public class MultiMapReduceFlow extends BaseMapReduceFlow
HadoopFlow
subclass that supports custom MapReduce jobs
pre-configured via one or more JobConf
objects.
Use this class to group multiple JobConf instances together as a single Flow. MultiMapReduceFlow will automatically
topologically order the JobConf instances and schedule them on the cluster once BaseFlow.start()
or complete()
are called.
If you have a single JobConf instance, see MapReduceFlow
as a alternative to this class.
This class will not delete any sinks before execution, it is up to the developer to make sure any intermediate and
sink paths be removed/deleted before calling BaseFlow.start()
or complete()
, otherwise Hadoop will throw
an exception.
JobConf instances can be incrementally added at any point before the complete()
method is called. But they must
logically (topologically) come after any previously provided JobConf instances. In practice the Flow will fail if
the input source path is missing because a prior JobConf was not provided before the Flow was started.
The ordering is done by comparing the input and output paths of the given JobConf instances. By default, this class
only works with JobConf instances that read and write from the Hadoop FileSystem (HDFS) (any path that would work
with the Hfs
Tap.
If the configured JobConf instance uses some other identifier instead of Hadoop FS paths, you should override the
BaseMapReduceFlow.createSources(org.apache.hadoop.mapred.JobConf)
, BaseMapReduceFlow.createSinks(org.apache.hadoop.mapred.JobConf)
, and
BaseMapReduceFlow.createTraps(org.apache.hadoop.mapred.JobConf)
methods to properly resolve the configured paths into
usable Tap
instances. By default createTraps returns an empty collection and should probably be left alone.
deleteSinkOnInit
Modifier | Constructor and Description |
---|---|
|
MultiMapReduceFlow(java.util.Map<java.lang.Object,java.lang.Object> properties,
java.lang.String name,
JobConf jobConf,
JobConf... jobConfs)
Constructor MultiMapReduceFlow creates a new MultiMapReduceFlow instance.
|
|
MultiMapReduceFlow(java.util.Map<java.lang.Object,java.lang.Object> properties,
java.lang.String name,
java.util.Map<java.lang.String,java.lang.String> flowDescriptor,
boolean stopJobsOnExit,
JobConf jobConf,
JobConf... jobConfs)
Constructor MultiMapReduceFlow creates a new MultiMapReduceFlow instance.
|
|
MultiMapReduceFlow(java.util.Map<java.lang.Object,java.lang.Object> properties,
java.lang.String name,
java.util.Map<java.lang.String,java.lang.String> flowDescriptor,
JobConf jobConf,
JobConf... jobConfs)
Constructor MultiMapReduceFlow creates a new MultiMapReduceFlow instance.
|
protected |
MultiMapReduceFlow(cascading.flow.planner.PlatformInfo platformInfo,
java.util.Map<java.lang.Object,java.lang.Object> properties,
java.lang.String name)
Constructor MultiMapReduceFlow creates a new MultiMapReduceFlow instance.
|
protected |
MultiMapReduceFlow(cascading.flow.planner.PlatformInfo platformInfo,
java.util.Map<java.lang.Object,java.lang.Object> properties,
java.lang.String name,
java.util.Map<java.lang.String,java.lang.String> flowDescriptor)
Constructor MultiMapReduceFlow creates a new MultiMapReduceFlow instance.
|
|
MultiMapReduceFlow(java.lang.String name,
JobConf jobConf,
JobConf... jobConfs)
Constructor MultiMapReduceFlow creates a new MultiMapReduceFlow instance.
|
Modifier and Type | Method and Description |
---|---|
protected void |
addFlowStep(MapReduceFlowStep flowStep) |
void |
attachFlowStep(JobConf jobConf) |
protected boolean |
blockingContinuePollingSteps() |
void |
complete() |
protected MapReduceFlowStep |
createMapReduceFlowStep(JobConf jobConf) |
protected cascading.tap.Tap |
createTap(JobConf jobConf,
Path path,
cascading.tap.SinkMode sinkMode) |
protected cascading.flow.planner.process.FlowStepGraph |
getOrCreateFlowStepGraph() |
protected void |
initializeFrom(java.util.List<JobConf> jobConfs) |
void |
notifyComplete() |
protected boolean |
spawnSteps() |
protected cascading.flow.planner.process.FlowStepGraph |
updateFlowStepGraph(cascading.flow.planner.process.FlowStepGraph flowStepGraph,
java.util.Collection<MapReduceFlowStep> flowSteps) |
protected java.util.Collection<MapReduceFlowStep> |
updateWithFlowSteps(java.util.Collection<MapReduceFlowStep> flowSteps) |
createFlowStep, createSinks, createSources, createTraps, fileInputToTaps, fileOutputToTaps, makeNameFromPath, makeStepGraph, toSinkTap, toSourceTap
copyToDistributedCache, getConfig, getConfigAsProperties, getConfigCopy, getFlowProcess, getMaxNumParallelSteps, getProperty, getTotalSliceCPUMilliSeconds, initConfig, initFromProperties, internalClean, internalShutdown, internalStart, isPreserveTemporaryFiles, newConfig, registerHadoopShutdownHook, setConfigProperty, stepsAreLocal
addListener, addPlannerProperties, addSessionProperties, addStepListener, areSinksStale, areSourcesNewer, cleanup, createConfig, createFlowCanonicalHash, createFlowStats, createFlowThread, createPrepareFlowStats, deleteCheckpointsIfNotUpdate, deleteCheckpointsIfReplace, deleteSinks, deleteSinksIfNotUpdate, deleteSinksIfReplace, deleteTrapsIfNotUpdate, deleteTrapsIfReplace, fireOnCompleted, fireOnStarting, fireOnStopping, fireOnThrowable, fireOnThrowable, getCascadeID, getCascadingServices, getCheckpointNames, getCheckpoints, getCheckpointsCollection, getClassPath, getClientState, getEligibleJobsSize, getFieldsFor, getFlowCanonicalHash, getFlowDescriptor, getFlowElementGraph, getFlowSession, getFlowSkipStrategy, getFlowStats, getFlowStepGraph, getFlowSteps, getFlowStepStrategy, getHolder, getID, getJobMapCallables, getName, getPlannerInfo, getPlatformInfo, getRunID, getSink, getSink, getSinkModified, getSinkNames, getSinks, getSinksCollection, getSource, getSourceNames, getSources, getSourcesCollection, getSpawnStrategy, getStats, getSubmitPriority, getTags, getTrapNames, getTraps, getTrapsCollection, handleExecutorShutdown, hasListeners, hasStepListeners, initialize, initializeChildStats, initializeNewJobsMap, initSteps, internalStopAllJobs, isDebugEnabled, isInfoEnabled, isJobsMapInitialized, isSkipFlow, isStopJobsOnExit, logDebug, logError, logError, logInfo, logWarn, logWarn, logWarn, openSink, openSink, openSource, openSource, openTapForRead, openTapForWrite, openTrap, openTrap, prepare, presentSinkFields, presentSourceFields, registerShutdownHook, removeListener, removeListeners, removeStepListener, resourceExists, retrieveSinkFields, retrieveSourceFields, setCascade, setCheckpoints, setFlowElementGraph, setFlowSkipStrategy, setFlowStepGraph, setFlowStepStrategy, setName, setPlannerInfo, setSinks, setSources, setSpawnStrategy, setSubmitPriority, setTraps, start, stop, toString, updateJobsMap, updateSchemes, writeDOT, writeStepsDOT
public MultiMapReduceFlow(java.lang.String name, JobConf jobConf, JobConf... jobConfs)
name
- of StringjobConf
- of JobConfjobConfs
- of JobConf...public MultiMapReduceFlow(java.util.Map<java.lang.Object,java.lang.Object> properties, java.lang.String name, JobConf jobConf, JobConf... jobConfs)
properties
- of Mapname
- of StringjobConf
- of JobConfjobConfs
- of JobConf...public MultiMapReduceFlow(java.util.Map<java.lang.Object,java.lang.Object> properties, java.lang.String name, java.util.Map<java.lang.String,java.lang.String> flowDescriptor, JobConf jobConf, JobConf... jobConfs)
properties
- of Mapname
- of StringflowDescriptor
- of MapjobConf
- of JobConfjobConfs
- of JobConf...public MultiMapReduceFlow(java.util.Map<java.lang.Object,java.lang.Object> properties, java.lang.String name, java.util.Map<java.lang.String,java.lang.String> flowDescriptor, boolean stopJobsOnExit, JobConf jobConf, JobConf... jobConfs)
properties
- of Mapname
- of StringflowDescriptor
- of MapstopJobsOnExit
- of booleanjobConf
- of JobConfjobConfs
- of JobConf...protected MultiMapReduceFlow(cascading.flow.planner.PlatformInfo platformInfo, java.util.Map<java.lang.Object,java.lang.Object> properties, java.lang.String name)
platformInfo
- of PlatformInfoproperties
- of Mapname
- of Stringprotected MultiMapReduceFlow(cascading.flow.planner.PlatformInfo platformInfo, java.util.Map<java.lang.Object,java.lang.Object> properties, java.lang.String name, java.util.Map<java.lang.String,java.lang.String> flowDescriptor)
platformInfo
- of PlatformInfoproperties
- of Mapname
- of StringflowDescriptor
- of Mapprotected void initializeFrom(java.util.List<JobConf> jobConfs)
protected MapReduceFlowStep createMapReduceFlowStep(JobConf jobConf)
public void notifyComplete()
public void complete()
protected boolean spawnSteps() throws java.lang.InterruptedException, java.util.concurrent.ExecutionException
spawnSteps
in class cascading.flow.BaseFlow<JobConf>
java.lang.InterruptedException
java.util.concurrent.ExecutionException
protected boolean blockingContinuePollingSteps()
protected cascading.tap.Tap createTap(JobConf jobConf, Path path, cascading.tap.SinkMode sinkMode)
createTap
in class BaseMapReduceFlow
public void attachFlowStep(JobConf jobConf)
protected void addFlowStep(MapReduceFlowStep flowStep)
protected cascading.flow.planner.process.FlowStepGraph getOrCreateFlowStepGraph()
protected java.util.Collection<MapReduceFlowStep> updateWithFlowSteps(java.util.Collection<MapReduceFlowStep> flowSteps)
protected cascading.flow.planner.process.FlowStepGraph updateFlowStepGraph(cascading.flow.planner.process.FlowStepGraph flowStepGraph, java.util.Collection<MapReduceFlowStep> flowSteps)
Copyright © 2007-2015 Xplenty, Inc. All Rights Reserved.