001/* 002 * Copyright (c) 2016-2017 Chris K Wensel <chris@wensel.net>. All Rights Reserved. 003 * Copyright (c) 2007-2017 Xplenty, Inc. All Rights Reserved. 004 * 005 * Project and contact information: http://www.cascading.org/ 006 * 007 * This file is part of the Cascading project. 008 * 009 * Licensed under the Apache License, Version 2.0 (the "License"); 010 * you may not use this file except in compliance with the License. 011 * You may obtain a copy of the License at 012 * 013 * http://www.apache.org/licenses/LICENSE-2.0 014 * 015 * Unless required by applicable law or agreed to in writing, software 016 * distributed under the License is distributed on an "AS IS" BASIS, 017 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 018 * See the License for the specific language governing permissions and 019 * limitations under the License. 020 */ 021 022package cascading.cascade; 023 024import java.util.Collection; 025import java.util.List; 026 027import cascading.flow.Flow; 028import cascading.flow.FlowSkipStrategy; 029import cascading.management.UnitOfWork; 030import cascading.stats.CascadeStats; 031import cascading.tap.Tap; 032 033/** 034 * A Cascade is an assembly of {@link cascading.flow.Flow} instances that share or depend on equivalent {@link Tap} instances and are executed as 035 * a single group. The most common case is where one Flow instance depends on a Tap created by a second Flow instance. This 036 * dependency chain can continue as practical. 037 * <p/> 038 * Note Flow instances that have no shared dependencies will be executed in parallel. 039 * <p/> 040 * Additionally, a Cascade allows for incremental builds of complex data processing processes. If a given source {@link Tap} is newer than 041 * a subsequent sink {@link Tap} in the assembly, the connecting {@link cascading.flow.Flow}(s) will be executed 042 * when the Cascade executed. If all the targets (sinks) are up to date, the Cascade exits immediately and does nothing. 043 * <p/> 044 * The concept of 'stale' is pluggable, see the {@link cascading.flow.FlowSkipStrategy} class. 045 * <p/> 046 * When a Cascade starts up, if first verifies which Flow instances have stale sinks, if the sinks are not stale, the 047 * method {@link cascading.flow.BaseFlow#deleteSinksIfNotUpdate()} is called. Before appends/updates were supported (logically) 048 * the Cascade deleted all the sinks in a Flow. Then and now, the {@link cascading.tap.SinkMode#KEEP} is not honored 049 * when the Tap is participating in a Cascade. 050 * <p/> 051 * The new consequence of this is if the Cascade fails, but does complete a Flow that appended or updated data, re-running 052 * the Cascade (and the successful append/update Flow) will re-update data to the source. Some systems may be idempotent and 053 * may not have any side-effects. So plan accordingly. 054 * <p/> 055 * Use the {@link CascadeListener} to receive any events on the life-cycle of the Cascade as it executes. Any 056 * {@link Tap} instances owned by managed Flows also implementing CascadeListener will automatically be added to the 057 * set of listeners. 058 * 059 * @see CascadeListener 060 * @see cascading.flow.Flow 061 * @see cascading.flow.FlowSkipStrategy 062 */ 063public interface Cascade extends UnitOfWork<CascadeStats> 064 { 065 boolean hasListeners(); 066 067 void addListener( CascadeListener cascadeListener ); 068 069 boolean removeListener( CascadeListener flowListener ); 070 071 /** 072 * Method getCascadeStats returns the cascadeStats of this Cascade object. 073 * 074 * @return the cascadeStats (type CascadeStats) of this Cascade object. 075 */ 076 CascadeStats getCascadeStats(); 077 078 /** 079 * Method getFlows returns the flows managed by this Cascade object. The returned {@link cascading.flow.Flow} instances 080 * will be in topological order. 081 * 082 * @return the flows (type Collection<Flow>) of this Cascade object. 083 */ 084 List<Flow> getFlows(); 085 086 /** 087 * Method findFlows returns a List of flows whose names match the given regex pattern. 088 * 089 * @param regex of type String 090 * @return List<Flow> 091 */ 092 List<Flow> findFlows( String regex ); 093 094 /** 095 * Method getHeadFlows returns all Flow instances that are at the "head" of the flow graph. 096 * <p/> 097 * That is, they are the first to execute and have no Tap source dependencies with Flow instances in the this Cascade 098 * instance. 099 * 100 * @return Collection<Flow> 101 */ 102 Collection<Flow> getHeadFlows(); 103 104 /** 105 * Method getTailFlows returns all Flow instances that are at the "tail" of the flow graph. 106 * <p/> 107 * That is, they are the last to execute and have no Tap sink dependencies with Flow instances in the this Cascade 108 * instance. 109 * 110 * @return Collection<Flow> 111 */ 112 Collection<Flow> getTailFlows(); 113 114 /** 115 * Method getIntermediateFlows returns all Flow instances that are neither at the "tail" or "tail" of the flow graph. 116 * 117 * @return Collection<Flow> 118 */ 119 Collection<Flow> getIntermediateFlows(); 120 121 /** 122 * Method getSourceTaps returns all source Tap instances in this Cascade instance. 123 * <p/> 124 * That is, none of returned Tap instances are the sinks of other Flow instances in this Cascade. 125 * <p/> 126 * All {@link cascading.tap.CompositeTap} instances are unwound if addressed directly by a managed Flow instance. 127 * 128 * @return Collection<Tap> 129 */ 130 Collection<Tap> getSourceTaps(); 131 132 /** 133 * Method getSinkTaps returns all sink Tap instances in this Cascade instance. 134 * <p/> 135 * That is, none of returned Tap instances are the sources of other Flow instances in this Cascade. 136 * <p/> 137 * All {@link cascading.tap.CompositeTap} instances are unwound if addressed directly by a managed Flow instance. 138 * <p/> 139 * This method will return checkpoint Taps managed by Flow instances if not used as a source by other Flow instances. 140 * 141 * @return Collection<Tap> 142 */ 143 Collection<Tap> getSinkTaps(); 144 145 /** 146 * Method getCheckpointTaps returns all checkpoint Tap instances from all the Flow instances in this Cascade instance. 147 * 148 * @return Collection<Tap> 149 */ 150 Collection<Tap> getCheckpointsTaps(); 151 152 /** 153 * Method getIntermediateTaps returns all Tap instances that are neither at the source or sink of the flow graph. 154 * <p/> 155 * This method does consider checkpoint Taps managed by Flow instances in this Cascade instance. 156 * 157 * @return Collection<Flow> 158 */ 159 Collection<Tap> getIntermediateTaps(); 160 161 /** 162 * Method getAllTaps returns all source, sink, and checkpoint Tap instances associated with the managed 163 * Flow instances in this Cascade instance. 164 * 165 * @return Collection<Tap> 166 */ 167 Collection<Tap> getAllTaps(); 168 169 /** 170 * Method getSuccessorFlows returns a Collection of all the Flow instances that will be 171 * executed after the given Flow instance. 172 * 173 * @param flow of type Flow 174 * @return Collection<Flow> 175 */ 176 Collection<Flow> getSuccessorFlows( Flow flow ); 177 178 /** 179 * Method getPredecessorFlows returns a Collection of all the Flow instances that will be 180 * executed before the given Flow instance. 181 * 182 * @param flow of type Flow 183 * @return Collection<Flow> 184 */ 185 Collection<Flow> getPredecessorFlows( Flow flow ); 186 187 /** 188 * Method findFlowsSourcingFrom returns all Flow instances that reads from a source with the given identifier. 189 * 190 * @param identifier of type String 191 * @return Collection<Flow> 192 */ 193 Collection<Flow> findFlowsSourcingFrom( String identifier ); 194 195 /** 196 * Method findFlowsSinkingTo returns all Flow instances that writes to a sink with the given identifier. 197 * 198 * @param identifier of type String 199 * @return Collection<Flow> 200 */ 201 Collection<Flow> findFlowsSinkingTo( String identifier ); 202 203 /** 204 * Method getFlowSkipStrategy returns the current {@link cascading.flow.FlowSkipStrategy} used by this Flow. 205 * 206 * @return FlowSkipStrategy 207 */ 208 FlowSkipStrategy getFlowSkipStrategy(); 209 210 /** 211 * Method setFlowSkipStrategy sets a new {@link cascading.flow.FlowSkipStrategy}, the current strategy, if any, is returned. 212 * If a strategy is given, it will be used as the strategy for all {@link cascading.flow.BaseFlow} instances managed by this Cascade instance. 213 * To revert back to consulting the strategies associated with each Flow instance, re-set this value to {@code null}, its 214 * default value. 215 * <p/> 216 * FlowSkipStrategy instances define when a Flow instance should be skipped. The default strategy is {@link cascading.flow.FlowSkipIfSinkNotStale} 217 * and is inherited from the Flow instance in question. An alternative strategy would be {@link cascading.flow.FlowSkipIfSinkExists}. 218 * <p/> 219 * A FlowSkipStrategy will not be consulted when executing a Flow directly through {@link #start()} 220 * 221 * @param flowSkipStrategy of type FlowSkipStrategy 222 * @return FlowSkipStrategy 223 */ 224 FlowSkipStrategy setFlowSkipStrategy( FlowSkipStrategy flowSkipStrategy ); 225 226 /** 227 * Method start begins the current Cascade process. It returns immediately. See method {@link #complete()} to block 228 * until the Cascade completes. 229 */ 230 void start(); 231 232 /** 233 * Method complete begins the current Cascade process if method {@link #start()} was not previously called. This method 234 * blocks until the process completes. 235 * 236 * @throws RuntimeException wrapping any exception thrown internally. 237 */ 238 void complete(); 239 240 void stop(); 241 242 /** 243 * Method writeDOT writes this element graph to a DOT file for easy visualization and debugging. 244 * 245 * @param filename of type String 246 */ 247 void writeDOT( String filename ); 248 }