001/*
002 * Copyright (c) 2016-2017 Chris K Wensel <chris@wensel.net>. All Rights Reserved.
003 * Copyright (c) 2007-2017 Xplenty, Inc. All Rights Reserved.
004 *
005 * Project and contact information: http://www.cascading.org/
006 *
007 * This file is part of the Cascading project.
008 *
009 * Licensed under the Apache License, Version 2.0 (the "License");
010 * you may not use this file except in compliance with the License.
011 * You may obtain a copy of the License at
012 *
013 *     http://www.apache.org/licenses/LICENSE-2.0
014 *
015 * Unless required by applicable law or agreed to in writing, software
016 * distributed under the License is distributed on an "AS IS" BASIS,
017 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
018 * See the License for the specific language governing permissions and
019 * limitations under the License.
020 */
021
022package cascading.cascade;
023
024import java.util.Collection;
025import java.util.List;
026
027import cascading.flow.Flow;
028import cascading.flow.FlowSkipStrategy;
029import cascading.management.UnitOfWork;
030import cascading.stats.CascadeStats;
031import cascading.tap.Tap;
032
033/**
034 * A Cascade is an assembly of {@link cascading.flow.Flow} instances that share or depend on equivalent {@link Tap} instances and are executed as
035 * a single group. The most common case is where one Flow instance depends on a Tap created by a second Flow instance. This
036 * dependency chain can continue as practical.
037 * <p/>
038 * Note Flow instances that have no shared dependencies will be executed in parallel.
039 * <p/>
040 * Additionally, a Cascade allows for incremental builds of complex data processing processes. If a given source {@link Tap} is newer than
041 * a subsequent sink {@link Tap} in the assembly, the connecting {@link cascading.flow.Flow}(s) will be executed
042 * when the Cascade executed. If all the targets (sinks) are up to date, the Cascade exits immediately and does nothing.
043 * <p/>
044 * The concept of 'stale' is pluggable, see the {@link cascading.flow.FlowSkipStrategy} class.
045 * <p/>
046 * When a Cascade starts up, if first verifies which Flow instances have stale sinks, if the sinks are not stale, the
047 * method {@link cascading.flow.BaseFlow#deleteSinksIfNotUpdate()} is called. Before appends/updates were supported (logically)
048 * the Cascade deleted all the sinks in a Flow. Then and now, the {@link cascading.tap.SinkMode#KEEP} is not honored
049 * when the Tap is participating in a Cascade.
050 * <p/>
051 * The new consequence of this is if the Cascade fails, but does complete a Flow that appended or updated data, re-running
052 * the Cascade (and the successful append/update Flow) will re-update data to the source. Some systems may be idempotent and
053 * may not have any side-effects. So plan accordingly.
054 * <p/>
055 * Use the {@link CascadeListener} to receive any events on the life-cycle of the Cascade as it executes. Any
056 * {@link Tap} instances owned by managed Flows also implementing CascadeListener will automatically be added to the
057 * set of listeners.
058 *
059 * @see CascadeListener
060 * @see cascading.flow.Flow
061 * @see cascading.flow.FlowSkipStrategy
062 */
063public interface Cascade extends UnitOfWork<CascadeStats>
064  {
065  boolean hasListeners();
066
067  void addListener( CascadeListener cascadeListener );
068
069  boolean removeListener( CascadeListener flowListener );
070
071  /**
072   * Method getCascadeStats returns the cascadeStats of this Cascade object.
073   *
074   * @return the cascadeStats (type CascadeStats) of this Cascade object.
075   */
076  CascadeStats getCascadeStats();
077
078  /**
079   * Method getFlows returns the flows managed by this Cascade object. The returned {@link cascading.flow.Flow} instances
080   * will be in topological order.
081   *
082   * @return the flows (type Collection<Flow>) of this Cascade object.
083   */
084  List<Flow> getFlows();
085
086  /**
087   * Method findFlows returns a List of flows whose names match the given regex pattern.
088   *
089   * @param regex of type String
090   * @return List<Flow>
091   */
092  List<Flow> findFlows( String regex );
093
094  /**
095   * Method getHeadFlows returns all Flow instances that are at the "head" of the flow graph.
096   * <p/>
097   * That is, they are the first to execute and have no Tap source dependencies with Flow instances in the this Cascade
098   * instance.
099   *
100   * @return Collection<Flow>
101   */
102  Collection<Flow> getHeadFlows();
103
104  /**
105   * Method getTailFlows returns all Flow instances that are at the "tail" of the flow graph.
106   * <p/>
107   * That is, they are the last to execute and have no Tap sink dependencies with Flow instances in the this Cascade
108   * instance.
109   *
110   * @return Collection<Flow>
111   */
112  Collection<Flow> getTailFlows();
113
114  /**
115   * Method getIntermediateFlows returns all Flow instances that are neither at the "tail" or "tail" of the flow graph.
116   *
117   * @return Collection<Flow>
118   */
119  Collection<Flow> getIntermediateFlows();
120
121  /**
122   * Method getSourceTaps returns all source Tap instances in this Cascade instance.
123   * <p/>
124   * That is, none of returned Tap instances are the sinks of other Flow instances in this Cascade.
125   * <p/>
126   * All {@link cascading.tap.CompositeTap} instances are unwound if addressed directly by a managed Flow instance.
127   *
128   * @return Collection<Tap>
129   */
130  Collection<Tap> getSourceTaps();
131
132  /**
133   * Method getSinkTaps returns all sink Tap instances in this Cascade instance.
134   * <p/>
135   * That is, none of returned Tap instances are the sources of other Flow instances in this Cascade.
136   * <p/>
137   * All {@link cascading.tap.CompositeTap} instances are unwound if addressed directly by a managed Flow instance.
138   * <p/>
139   * This method will return checkpoint Taps managed by Flow instances if not used as a source by other Flow instances.
140   *
141   * @return Collection<Tap>
142   */
143  Collection<Tap> getSinkTaps();
144
145  /**
146   * Method getCheckpointTaps returns all checkpoint Tap instances from all the Flow instances in this Cascade instance.
147   *
148   * @return Collection<Tap>
149   */
150  Collection<Tap> getCheckpointsTaps();
151
152  /**
153   * Method getIntermediateTaps returns all Tap instances that are neither at the source or sink of the flow graph.
154   * <p/>
155   * This method does consider checkpoint Taps managed by Flow instances in this Cascade instance.
156   *
157   * @return Collection<Flow>
158   */
159  Collection<Tap> getIntermediateTaps();
160
161  /**
162   * Method getAllTaps returns all source, sink, and checkpoint Tap instances associated with the managed
163   * Flow instances in this Cascade instance.
164   *
165   * @return Collection<Tap>
166   */
167  Collection<Tap> getAllTaps();
168
169  /**
170   * Method getSuccessorFlows returns a Collection of all the Flow instances that will be
171   * executed after the given Flow instance.
172   *
173   * @param flow of type Flow
174   * @return Collection<Flow>
175   */
176  Collection<Flow> getSuccessorFlows( Flow flow );
177
178  /**
179   * Method getPredecessorFlows returns a Collection of all the Flow instances that will be
180   * executed before the given Flow instance.
181   *
182   * @param flow of type Flow
183   * @return Collection<Flow>
184   */
185  Collection<Flow> getPredecessorFlows( Flow flow );
186
187  /**
188   * Method findFlowsSourcingFrom returns all Flow instances that reads from a source with the given identifier.
189   *
190   * @param identifier of type String
191   * @return Collection<Flow>
192   */
193  Collection<Flow> findFlowsSourcingFrom( String identifier );
194
195  /**
196   * Method findFlowsSinkingTo returns all Flow instances that writes to a sink with the given identifier.
197   *
198   * @param identifier of type String
199   * @return Collection<Flow>
200   */
201  Collection<Flow> findFlowsSinkingTo( String identifier );
202
203  /**
204   * Method getFlowSkipStrategy returns the current {@link cascading.flow.FlowSkipStrategy} used by this Flow.
205   *
206   * @return FlowSkipStrategy
207   */
208  FlowSkipStrategy getFlowSkipStrategy();
209
210  /**
211   * Method setFlowSkipStrategy sets a new {@link cascading.flow.FlowSkipStrategy}, the current strategy, if any, is returned.
212   * If a strategy is given, it will be used as the strategy for all {@link cascading.flow.BaseFlow} instances managed by this Cascade instance.
213   * To revert back to consulting the strategies associated with each Flow instance, re-set this value to {@code null}, its
214   * default value.
215   * <p/>
216   * FlowSkipStrategy instances define when a Flow instance should be skipped. The default strategy is {@link cascading.flow.FlowSkipIfSinkNotStale}
217   * and is inherited from the Flow instance in question. An alternative strategy would be {@link cascading.flow.FlowSkipIfSinkExists}.
218   * <p/>
219   * A FlowSkipStrategy will not be consulted when executing a Flow directly through {@link #start()}
220   *
221   * @param flowSkipStrategy of type FlowSkipStrategy
222   * @return FlowSkipStrategy
223   */
224  FlowSkipStrategy setFlowSkipStrategy( FlowSkipStrategy flowSkipStrategy );
225
226  /**
227   * Method start begins the current Cascade process. It returns immediately. See method {@link #complete()} to block
228   * until the Cascade completes.
229   */
230  void start();
231
232  /**
233   * Method complete begins the current Cascade process if method {@link #start()} was not previously called. This method
234   * blocks until the process completes.
235   *
236   * @throws RuntimeException wrapping any exception thrown internally.
237   */
238  void complete();
239
240  void stop();
241
242  /**
243   * Method writeDOT writes this element graph to a DOT file for easy visualization and debugging.
244   *
245   * @param filename of type String
246   */
247  void writeDOT( String filename );
248  }