001/*
002 * Copyright (c) 2007-2015 Concurrent, Inc. All Rights Reserved.
003 *
004 * Project and contact information: http://www.cascading.org/
005 *
006 * This file is part of the Cascading project.
007 *
008 * Licensed under the Apache License, Version 2.0 (the "License");
009 * you may not use this file except in compliance with the License.
010 * You may obtain a copy of the License at
011 *
012 *     http://www.apache.org/licenses/LICENSE-2.0
013 *
014 * Unless required by applicable law or agreed to in writing, software
015 * distributed under the License is distributed on an "AS IS" BASIS,
016 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
017 * See the License for the specific language governing permissions and
018 * limitations under the License.
019 */
020
021package cascading.cascade;
022
023import java.util.Collection;
024import java.util.List;
025
026import cascading.flow.Flow;
027import cascading.flow.FlowSkipStrategy;
028import cascading.management.UnitOfWork;
029import cascading.stats.CascadeStats;
030import cascading.tap.Tap;
031
032/**
033 * A Cascade is an assembly of {@link cascading.flow.Flow} instances that share or depend on equivalent {@link Tap} instances and are executed as
034 * a single group. The most common case is where one Flow instance depends on a Tap created by a second Flow instance. This
035 * dependency chain can continue as practical.
036 * <p/>
037 * Note Flow instances that have no shared dependencies will be executed in parallel.
038 * <p/>
039 * Additionally, a Cascade allows for incremental builds of complex data processing processes. If a given source {@link Tap} is newer than
040 * a subsequent sink {@link Tap} in the assembly, the connecting {@link cascading.flow.Flow}(s) will be executed
041 * when the Cascade executed. If all the targets (sinks) are up to date, the Cascade exits immediately and does nothing.
042 * <p/>
043 * The concept of 'stale' is pluggable, see the {@link cascading.flow.FlowSkipStrategy} class.
044 * <p/>
045 * When a Cascade starts up, if first verifies which Flow instances have stale sinks, if the sinks are not stale, the
046 * method {@link cascading.flow.BaseFlow#deleteSinksIfNotUpdate()} is called. Before appends/updates were supported (logically)
047 * the Cascade deleted all the sinks in a Flow.
048 * <p/>
049 * The new consequence of this is if the Cascade fails, but does complete a Flow that appended or updated data, re-running
050 * the Cascade (and the successful append/update Flow) will re-update data to the source. Some systems may be idempotent and
051 * may not have any side-effects. So plan accordingly.
052 * <p/>
053 * Use the {@link CascadeListener} to receive any events on the life-cycle of the Cascade as it executes. Any
054 * {@link Tap} instances owned by managed Flows also implementing CascadeListener will automatically be added to the
055 * set of listeners.
056 *
057 * @see CascadeListener
058 * @see cascading.flow.Flow
059 * @see cascading.flow.FlowSkipStrategy
060 */
061public interface Cascade extends UnitOfWork<CascadeStats>
062  {
063  boolean hasListeners();
064
065  void addListener( CascadeListener cascadeListener );
066
067  boolean removeListener( CascadeListener flowListener );
068
069  /**
070   * Method getCascadeStats returns the cascadeStats of this Cascade object.
071   *
072   * @return the cascadeStats (type CascadeStats) of this Cascade object.
073   */
074  CascadeStats getCascadeStats();
075
076  /**
077   * Method getFlows returns the flows managed by this Cascade object. The returned {@link cascading.flow.Flow} instances
078   * will be in topological order.
079   *
080   * @return the flows (type Collection<Flow>) of this Cascade object.
081   */
082  List<Flow> getFlows();
083
084  /**
085   * Method findFlows returns a List of flows whose names match the given regex pattern.
086   *
087   * @param regex of type String
088   * @return List<Flow>
089   */
090  List<Flow> findFlows( String regex );
091
092  /**
093   * Method getHeadFlows returns all Flow instances that are at the "head" of the flow graph.
094   * <p/>
095   * That is, they are the first to execute and have no Tap source dependencies with Flow instances in the this Cascade
096   * instance.
097   *
098   * @return Collection<Flow>
099   */
100  Collection<Flow> getHeadFlows();
101
102  /**
103   * Method getTailFlows returns all Flow instances that are at the "tail" of the flow graph.
104   * <p/>
105   * That is, they are the last to execute and have no Tap sink dependencies with Flow instances in the this Cascade
106   * instance.
107   *
108   * @return Collection<Flow>
109   */
110  Collection<Flow> getTailFlows();
111
112  /**
113   * Method getIntermediateFlows returns all Flow instances that are neither at the "tail" or "tail" of the flow graph.
114   *
115   * @return Collection<Flow>
116   */
117  Collection<Flow> getIntermediateFlows();
118
119  /**
120   * Method getSourceTaps returns all source Tap instances in this Cascade instance.
121   * <p/>
122   * That is, none of returned Tap instances are the sinks of other Flow instances in this Cascade.
123   * <p/>
124   * All {@link cascading.tap.CompositeTap} instances are unwound if addressed directly by a managed Flow instance.
125   *
126   * @return Collection<Tap>
127   */
128  Collection<Tap> getSourceTaps();
129
130  /**
131   * Method getSinkTaps returns all sink Tap instances in this Cascade instance.
132   * <p/>
133   * That is, none of returned Tap instances are the sources of other Flow instances in this Cascade.
134   * <p/>
135   * All {@link cascading.tap.CompositeTap} instances are unwound if addressed directly by a managed Flow instance.
136   * <p/>
137   * This method will return checkpoint Taps managed by Flow instances if not used as a source by other Flow instances.
138   *
139   * @return Collection<Tap>
140   */
141  Collection<Tap> getSinkTaps();
142
143  /**
144   * Method getCheckpointTaps returns all checkpoint Tap instances from all the Flow instances in this Cascade instance.
145   *
146   * @return Collection<Tap>
147   */
148  Collection<Tap> getCheckpointsTaps();
149
150  /**
151   * Method getIntermediateTaps returns all Tap instances that are neither at the source or sink of the flow graph.
152   * <p/>
153   * This method does consider checkpoint Taps managed by Flow instances in this Cascade instance.
154   *
155   * @return Collection<Flow>
156   */
157  Collection<Tap> getIntermediateTaps();
158
159  /**
160   * Method getAllTaps returns all source, sink, and checkpoint Tap instances associated with the managed
161   * Flow instances in this Cascade instance.
162   *
163   * @return Collection<Tap>
164   */
165  Collection<Tap> getAllTaps();
166
167  /**
168   * Method getSuccessorFlows returns a Collection of all the Flow instances that will be
169   * executed after the given Flow instance.
170   *
171   * @param flow of type Flow
172   * @return Collection<Flow>
173   */
174  Collection<Flow> getSuccessorFlows( Flow flow );
175
176  /**
177   * Method getPredecessorFlows returns a Collection of all the Flow instances that will be
178   * executed before the given Flow instance.
179   *
180   * @param flow of type Flow
181   * @return Collection<Flow>
182   */
183  Collection<Flow> getPredecessorFlows( Flow flow );
184
185  /**
186   * Method findFlowsSourcingFrom returns all Flow instances that reads from a source with the given identifier.
187   *
188   * @param identifier of type String
189   * @return Collection<Flow>
190   */
191  Collection<Flow> findFlowsSourcingFrom( String identifier );
192
193  /**
194   * Method findFlowsSinkingTo returns all Flow instances that writes to a sink with the given identifier.
195   *
196   * @param identifier of type String
197   * @return Collection<Flow>
198   */
199  Collection<Flow> findFlowsSinkingTo( String identifier );
200
201  /**
202   * Method getFlowSkipStrategy returns the current {@link cascading.flow.FlowSkipStrategy} used by this Flow.
203   *
204   * @return FlowSkipStrategy
205   */
206  FlowSkipStrategy getFlowSkipStrategy();
207
208  /**
209   * Method setFlowSkipStrategy sets a new {@link cascading.flow.FlowSkipStrategy}, the current strategy, if any, is returned.
210   * If a strategy is given, it will be used as the strategy for all {@link cascading.flow.BaseFlow} instances managed by this Cascade instance.
211   * To revert back to consulting the strategies associated with each Flow instance, re-set this value to {@code null}, its
212   * default value.
213   * <p/>
214   * FlowSkipStrategy instances define when a Flow instance should be skipped. The default strategy is {@link cascading.flow.FlowSkipIfSinkNotStale}
215   * and is inherited from the Flow instance in question. An alternative strategy would be {@link cascading.flow.FlowSkipIfSinkExists}.
216   * <p/>
217   * A FlowSkipStrategy will not be consulted when executing a Flow directly through {@link #start()}
218   *
219   * @param flowSkipStrategy of type FlowSkipStrategy
220   * @return FlowSkipStrategy
221   */
222  FlowSkipStrategy setFlowSkipStrategy( FlowSkipStrategy flowSkipStrategy );
223
224  /**
225   * Method start begins the current Cascade process. It returns immediately. See method {@link #complete()} to block
226   * until the Cascade completes.
227   */
228  void start();
229
230  /**
231   * Method complete begins the current Cascade process if method {@link #start()} was not previously called. This method
232   * blocks until the process completes.
233   *
234   * @throws RuntimeException wrapping any exception thrown internally.
235   */
236  void complete();
237
238  void stop();
239
240  /**
241   * Method writeDOT writes this element graph to a DOT file for easy visualization and debugging.
242   *
243   * @param filename of type String
244   */
245  void writeDOT( String filename );
246  }