001/* 002 * Copyright (c) 2007-2015 Concurrent, Inc. All Rights Reserved. 003 * 004 * Project and contact information: http://www.cascading.org/ 005 * 006 * This file is part of the Cascading project. 007 * 008 * Licensed under the Apache License, Version 2.0 (the "License"); 009 * you may not use this file except in compliance with the License. 010 * You may obtain a copy of the License at 011 * 012 * http://www.apache.org/licenses/LICENSE-2.0 013 * 014 * Unless required by applicable law or agreed to in writing, software 015 * distributed under the License is distributed on an "AS IS" BASIS, 016 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 017 * See the License for the specific language governing permissions and 018 * limitations under the License. 019 */ 020 021package cascading.flow; 022 023import java.util.Collection; 024import java.util.List; 025import java.util.Set; 026 027import cascading.flow.planner.Scope; 028import cascading.flow.planner.graph.ElementGraph; 029import cascading.flow.planner.process.ProcessModel; 030import cascading.tap.Tap; 031 032/** 033 * Class FlowNode represents the smallest parallelizable unit of work. It is a child to a 034 * {@link cascading.flow.FlowStep} and may have many siblings within the FlowStep. 035 * <p/> 036 * A FlowNode is commonly executed as one or more slices, where a slice is a JVM executing against a portion 037 * of data. 038 * <p/> 039 * Most slices within a FlowNode are identical, except for the sub-set of data they will be processing against. 040 * <p/> 041 * But on some platforms, like MapReduce, a slice is executing a single flow pipeline. Thus a FlowNode may consist of 042 * some set of pipelines (or pipeline graph). One pipeline per 'streamed' input source Tap. 043 * <p/> 044 * In a MapReduce model (like Apache Hadoop MapReduce) a FlowNode can by the Map or Reduce side of a job (where a job 045 * is a FlowStep). 046 * <p/> 047 * In a DAG model (like Apache Tez), a FlowNode is a 'vertex', and the 'DAG' is a FlowStep. 048 */ 049public interface FlowNode extends ProcessModel 050 { 051 String CASCADING_FLOW_NODE = "cascading.flow.node"; 052 053 String getID(); 054 055 FlowStep getFlowStep(); 056 057 Collection<? extends FlowElement> getFlowElementsFor( Enum annotation ); 058 059 Set<? extends FlowElement> getSourceElements( Enum annotation ); 060 061 Set<? extends FlowElement> getSinkElements( Enum annotation ); 062 063 Set<String> getSourceElementNames(); 064 065 Set<String> getSinkElementNames(); 066 067 Set<String> getSourceTapNames( Tap flowElement ); 068 069 Set<String> getSinkTapNames( Tap flowElement ); 070 071 Tap getTrap( String branchName ); 072 073 Collection<? extends Tap> getTraps(); 074 075 Collection<? extends Scope> getPreviousScopes( FlowElement flowElement ); 076 077 Collection<? extends Scope> getNextScopes( FlowElement flowElement ); 078 079 List<? extends ElementGraph> getPipelineGraphs(); 080 081 ElementGraph getPipelineGraphFor( FlowElement streamedSource ); 082 }