001/*
002 * Copyright (c) 2007-2017 Xplenty, Inc. All Rights Reserved.
003 *
004 * Project and contact information: http://www.cascading.org/
005 *
006 * This file is part of the Cascading project.
007 *
008 * Licensed under the Apache License, Version 2.0 (the "License");
009 * you may not use this file except in compliance with the License.
010 * You may obtain a copy of the License at
011 *
012 *     http://www.apache.org/licenses/LICENSE-2.0
013 *
014 * Unless required by applicable law or agreed to in writing, software
015 * distributed under the License is distributed on an "AS IS" BASIS,
016 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
017 * See the License for the specific language governing permissions and
018 * limitations under the License.
019 */
020
021package cascading.flow;
022
023import java.util.Collection;
024import java.util.List;
025import java.util.Map;
026import java.util.Set;
027
028import cascading.flow.planner.Scope;
029import cascading.flow.planner.graph.ElementGraph;
030import cascading.flow.planner.process.ProcessModel;
031import cascading.stats.FlowNodeStats;
032import cascading.tap.Tap;
033
034/**
035 * Class FlowNode represents the smallest parallelizable unit of work. It is a child to a
036 * {@link cascading.flow.FlowStep} and may have many siblings within the FlowStep.
037 * <p/>
038 * A FlowNode is commonly executed as one or more slices, where a slice is a JVM executing against a portion
039 * of data.
040 * <p/>
041 * Most slices within a FlowNode are identical, except for the sub-set of data they will be processing against.
042 * <p/>
043 * But on some platforms, like MapReduce, a slice is executing a single flow pipeline. Thus a FlowNode may consist of
044 * some set of pipelines (or pipeline graph). One pipeline per 'streamed' input source Tap.
045 * <p/>
046 * In a MapReduce model (like Apache Hadoop MapReduce) a FlowNode can by the Map or Reduce side of a job (where a job
047 * is a FlowStep).
048 * <p/>
049 * In a DAG model (like Apache Tez), a FlowNode is a 'vertex', and the 'DAG' is a FlowStep.
050 */
051public interface FlowNode extends ProcessModel
052  {
053  String CASCADING_FLOW_NODE = "cascading.flow.node";
054
055  String getID();
056
057  /**
058   * Returns an immutable map of properties giving more details about the FlowNode object.
059   * <p/>
060   * FlowNode descriptions provide meta-data to monitoring systems describing the workload a given FlowNode represents.
061   * For known description types, see {@link FlowNodeDescriptors}.
062   *
063   * @return Map<String,String>
064   */
065  Map<String, String> getFlowNodeDescriptor();
066
067  FlowNodeStats getFlowNodeStats();
068
069  FlowStep getFlowStep();
070
071  Collection<? extends FlowElement> getFlowElementsFor( Enum annotation );
072
073  Set<? extends FlowElement> getSourceElements( Enum annotation );
074
075  Set<? extends FlowElement> getSinkElements( Enum annotation );
076
077  Set<String> getSourceElementNames();
078
079  Set<String> getSinkElementNames();
080
081  Set<String> getSourceTapNames( Tap flowElement );
082
083  Set<String> getSinkTapNames( Tap flowElement );
084
085  Tap getTrap( String branchName );
086
087  Collection<? extends Tap> getTraps();
088
089  Collection<? extends Scope> getPreviousScopes( FlowElement flowElement );
090
091  Collection<? extends Scope> getNextScopes( FlowElement flowElement );
092
093  List<? extends ElementGraph> getPipelineGraphs();
094
095  ElementGraph getPipelineGraphFor( FlowElement streamedSource );
096  }