001/* 002 * Copyright (c) 2007-2015 Concurrent, Inc. All Rights Reserved. 003 * 004 * Project and contact information: http://www.cascading.org/ 005 * 006 * This file is part of the Cascading project. 007 * 008 * Licensed under the Apache License, Version 2.0 (the "License"); 009 * you may not use this file except in compliance with the License. 010 * You may obtain a copy of the License at 011 * 012 * http://www.apache.org/licenses/LICENSE-2.0 013 * 014 * Unless required by applicable law or agreed to in writing, software 015 * distributed under the License is distributed on an "AS IS" BASIS, 016 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 017 * See the License for the specific language governing permissions and 018 * limitations under the License. 019 */ 020 021package cascading.cascade; 022 023import java.util.Collection; 024import java.util.List; 025 026import cascading.flow.Flow; 027import cascading.flow.FlowSkipStrategy; 028import cascading.management.UnitOfWork; 029import cascading.stats.CascadeStats; 030import cascading.tap.Tap; 031 032/** 033 * A Cascade is an assembly of {@link cascading.flow.Flow} instances that share or depend on equivalent {@link Tap} instances and are executed as 034 * a single group. The most common case is where one Flow instance depends on a Tap created by a second Flow instance. This 035 * dependency chain can continue as practical. 036 * <p/> 037 * Note Flow instances that have no shared dependencies will be executed in parallel. 038 * <p/> 039 * Additionally, a Cascade allows for incremental builds of complex data processing processes. If a given source {@link Tap} is newer than 040 * a subsequent sink {@link Tap} in the assembly, the connecting {@link cascading.flow.Flow}(s) will be executed 041 * when the Cascade executed. If all the targets (sinks) are up to date, the Cascade exits immediately and does nothing. 042 * <p/> 043 * The concept of 'stale' is pluggable, see the {@link cascading.flow.FlowSkipStrategy} class. 044 * <p/> 045 * When a Cascade starts up, if first verifies which Flow instances have stale sinks, if the sinks are not stale, the 046 * method {@link cascading.flow.BaseFlow#deleteSinksIfNotUpdate()} is called. Before appends/updates were supported (logically) 047 * the Cascade deleted all the sinks in a Flow. 048 * <p/> 049 * The new consequence of this is if the Cascade fails, but does complete a Flow that appended or updated data, re-running 050 * the Cascade (and the successful append/update Flow) will re-update data to the source. Some systems may be idempotent and 051 * may not have any side-effects. So plan accordingly. 052 * <p/> 053 * Use the {@link CascadeListener} to receive any events on the life-cycle of the Cascade as it executes. Any 054 * {@link Tap} instances owned by managed Flows also implementing CascadeListener will automatically be added to the 055 * set of listeners. 056 * 057 * @see CascadeListener 058 * @see cascading.flow.Flow 059 * @see cascading.flow.FlowSkipStrategy 060 */ 061public interface Cascade extends UnitOfWork<CascadeStats> 062 { 063 boolean hasListeners(); 064 065 void addListener( CascadeListener cascadeListener ); 066 067 boolean removeListener( CascadeListener flowListener ); 068 069 /** 070 * Method getCascadeStats returns the cascadeStats of this Cascade object. 071 * 072 * @return the cascadeStats (type CascadeStats) of this Cascade object. 073 */ 074 CascadeStats getCascadeStats(); 075 076 /** 077 * Method getFlows returns the flows managed by this Cascade object. The returned {@link cascading.flow.Flow} instances 078 * will be in topological order. 079 * 080 * @return the flows (type Collection<Flow>) of this Cascade object. 081 */ 082 List<Flow> getFlows(); 083 084 /** 085 * Method findFlows returns a List of flows whose names match the given regex pattern. 086 * 087 * @param regex of type String 088 * @return List<Flow> 089 */ 090 List<Flow> findFlows( String regex ); 091 092 /** 093 * Method getHeadFlows returns all Flow instances that are at the "head" of the flow graph. 094 * <p/> 095 * That is, they are the first to execute and have no Tap source dependencies with Flow instances in the this Cascade 096 * instance. 097 * 098 * @return Collection<Flow> 099 */ 100 Collection<Flow> getHeadFlows(); 101 102 /** 103 * Method getTailFlows returns all Flow instances that are at the "tail" of the flow graph. 104 * <p/> 105 * That is, they are the last to execute and have no Tap sink dependencies with Flow instances in the this Cascade 106 * instance. 107 * 108 * @return Collection<Flow> 109 */ 110 Collection<Flow> getTailFlows(); 111 112 /** 113 * Method getIntermediateFlows returns all Flow instances that are neither at the "tail" or "tail" of the flow graph. 114 * 115 * @return Collection<Flow> 116 */ 117 Collection<Flow> getIntermediateFlows(); 118 119 /** 120 * Method getSourceTaps returns all source Tap instances in this Cascade instance. 121 * <p/> 122 * That is, none of returned Tap instances are the sinks of other Flow instances in this Cascade. 123 * <p/> 124 * All {@link cascading.tap.CompositeTap} instances are unwound if addressed directly by a managed Flow instance. 125 * 126 * @return Collection<Tap> 127 */ 128 Collection<Tap> getSourceTaps(); 129 130 /** 131 * Method getSinkTaps returns all sink Tap instances in this Cascade instance. 132 * <p/> 133 * That is, none of returned Tap instances are the sources of other Flow instances in this Cascade. 134 * <p/> 135 * All {@link cascading.tap.CompositeTap} instances are unwound if addressed directly by a managed Flow instance. 136 * <p/> 137 * This method will return checkpoint Taps managed by Flow instances if not used as a source by other Flow instances. 138 * 139 * @return Collection<Tap> 140 */ 141 Collection<Tap> getSinkTaps(); 142 143 /** 144 * Method getCheckpointTaps returns all checkpoint Tap instances from all the Flow instances in this Cascade instance. 145 * 146 * @return Collection<Tap> 147 */ 148 Collection<Tap> getCheckpointsTaps(); 149 150 /** 151 * Method getIntermediateTaps returns all Tap instances that are neither at the source or sink of the flow graph. 152 * <p/> 153 * This method does consider checkpoint Taps managed by Flow instances in this Cascade instance. 154 * 155 * @return Collection<Flow> 156 */ 157 Collection<Tap> getIntermediateTaps(); 158 159 /** 160 * Method getAllTaps returns all source, sink, and checkpoint Tap instances associated with the managed 161 * Flow instances in this Cascade instance. 162 * 163 * @return Collection<Tap> 164 */ 165 Collection<Tap> getAllTaps(); 166 167 /** 168 * Method getSuccessorFlows returns a Collection of all the Flow instances that will be 169 * executed after the given Flow instance. 170 * 171 * @param flow of type Flow 172 * @return Collection<Flow> 173 */ 174 Collection<Flow> getSuccessorFlows( Flow flow ); 175 176 /** 177 * Method getPredecessorFlows returns a Collection of all the Flow instances that will be 178 * executed before the given Flow instance. 179 * 180 * @param flow of type Flow 181 * @return Collection<Flow> 182 */ 183 Collection<Flow> getPredecessorFlows( Flow flow ); 184 185 /** 186 * Method findFlowsSourcingFrom returns all Flow instances that reads from a source with the given identifier. 187 * 188 * @param identifier of type String 189 * @return Collection<Flow> 190 */ 191 Collection<Flow> findFlowsSourcingFrom( String identifier ); 192 193 /** 194 * Method findFlowsSinkingTo returns all Flow instances that writes to a sink with the given identifier. 195 * 196 * @param identifier of type String 197 * @return Collection<Flow> 198 */ 199 Collection<Flow> findFlowsSinkingTo( String identifier ); 200 201 /** 202 * Method getFlowSkipStrategy returns the current {@link cascading.flow.FlowSkipStrategy} used by this Flow. 203 * 204 * @return FlowSkipStrategy 205 */ 206 FlowSkipStrategy getFlowSkipStrategy(); 207 208 /** 209 * Method setFlowSkipStrategy sets a new {@link cascading.flow.FlowSkipStrategy}, the current strategy, if any, is returned. 210 * If a strategy is given, it will be used as the strategy for all {@link cascading.flow.BaseFlow} instances managed by this Cascade instance. 211 * To revert back to consulting the strategies associated with each Flow instance, re-set this value to {@code null}, its 212 * default value. 213 * <p/> 214 * FlowSkipStrategy instances define when a Flow instance should be skipped. The default strategy is {@link cascading.flow.FlowSkipIfSinkNotStale} 215 * and is inherited from the Flow instance in question. An alternative strategy would be {@link cascading.flow.FlowSkipIfSinkExists}. 216 * <p/> 217 * A FlowSkipStrategy will not be consulted when executing a Flow directly through {@link #start()} 218 * 219 * @param flowSkipStrategy of type FlowSkipStrategy 220 * @return FlowSkipStrategy 221 */ 222 FlowSkipStrategy setFlowSkipStrategy( FlowSkipStrategy flowSkipStrategy ); 223 224 /** 225 * Method start begins the current Cascade process. It returns immediately. See method {@link #complete()} to block 226 * until the Cascade completes. 227 */ 228 void start(); 229 230 /** 231 * Method complete begins the current Cascade process if method {@link #start()} was not previously called. This method 232 * blocks until the process completes. 233 * 234 * @throws RuntimeException wrapping any exception thrown internally. 235 */ 236 void complete(); 237 238 void stop(); 239 240 /** 241 * Method writeDOT writes this element graph to a DOT file for easy visualization and debugging. 242 * 243 * @param filename of type String 244 */ 245 void writeDOT( String filename ); 246 }