001/* 002 * Copyright (c) 2007-2017 Xplenty, Inc. All Rights Reserved. 003 * 004 * Project and contact information: http://www.cascading.org/ 005 * 006 * This file is part of the Cascading project. 007 * 008 * Licensed under the Apache License, Version 2.0 (the "License"); 009 * you may not use this file except in compliance with the License. 010 * You may obtain a copy of the License at 011 * 012 * http://www.apache.org/licenses/LICENSE-2.0 013 * 014 * Unless required by applicable law or agreed to in writing, software 015 * distributed under the License is distributed on an "AS IS" BASIS, 016 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 017 * See the License for the specific language governing permissions and 018 * limitations under the License. 019 */ 020 021package cascading.flow; 022 023import java.util.Collection; 024import java.util.List; 025import java.util.Map; 026import java.util.Set; 027 028import cascading.flow.planner.Scope; 029import cascading.flow.planner.graph.ElementGraph; 030import cascading.flow.planner.process.ProcessModel; 031import cascading.stats.FlowNodeStats; 032import cascading.tap.Tap; 033 034/** 035 * Class FlowNode represents the smallest parallelizable unit of work. It is a child to a 036 * {@link cascading.flow.FlowStep} and may have many siblings within the FlowStep. 037 * <p/> 038 * A FlowNode is commonly executed as one or more slices, where a slice is a JVM executing against a portion 039 * of data. 040 * <p/> 041 * Most slices within a FlowNode are identical, except for the sub-set of data they will be processing against. 042 * <p/> 043 * But on some platforms, like MapReduce, a slice is executing a single flow pipeline. Thus a FlowNode may consist of 044 * some set of pipelines (or pipeline graph). One pipeline per 'streamed' input source Tap. 045 * <p/> 046 * In a MapReduce model (like Apache Hadoop MapReduce) a FlowNode can by the Map or Reduce side of a job (where a job 047 * is a FlowStep). 048 * <p/> 049 * In a DAG model (like Apache Tez), a FlowNode is a 'vertex', and the 'DAG' is a FlowStep. 050 */ 051public interface FlowNode extends ProcessModel 052 { 053 String CASCADING_FLOW_NODE = "cascading.flow.node"; 054 055 String getID(); 056 057 /** 058 * Returns an immutable map of properties giving more details about the FlowNode object. 059 * <p/> 060 * FlowNode descriptions provide meta-data to monitoring systems describing the workload a given FlowNode represents. 061 * For known description types, see {@link FlowNodeDescriptors}. 062 * 063 * @return Map<String,String> 064 */ 065 Map<String, String> getFlowNodeDescriptor(); 066 067 FlowNodeStats getFlowNodeStats(); 068 069 FlowStep getFlowStep(); 070 071 Collection<? extends FlowElement> getFlowElementsFor( Enum annotation ); 072 073 Set<? extends FlowElement> getSourceElements( Enum annotation ); 074 075 Set<? extends FlowElement> getSinkElements( Enum annotation ); 076 077 Set<String> getSourceElementNames(); 078 079 Set<String> getSinkElementNames(); 080 081 Set<String> getSourceTapNames( Tap flowElement ); 082 083 Set<String> getSinkTapNames( Tap flowElement ); 084 085 Tap getTrap( String branchName ); 086 087 Collection<? extends Tap> getTraps(); 088 089 Collection<? extends Scope> getPreviousScopes( FlowElement flowElement ); 090 091 Collection<? extends Scope> getNextScopes( FlowElement flowElement ); 092 093 List<? extends ElementGraph> getPipelineGraphs(); 094 095 ElementGraph getPipelineGraphFor( FlowElement streamedSource ); 096 }