001    /*
002     * Copyright (c) 2007-2014 Concurrent, Inc. All Rights Reserved.
003     *
004     * Project and contact information: http://www.cascading.org/
005     *
006     * This file is part of the Cascading project.
007     *
008     * Licensed under the Apache License, Version 2.0 (the "License");
009     * you may not use this file except in compliance with the License.
010     * You may obtain a copy of the License at
011     *
012     *     http://www.apache.org/licenses/LICENSE-2.0
013     *
014     * Unless required by applicable law or agreed to in writing, software
015     * distributed under the License is distributed on an "AS IS" BASIS,
016     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
017     * See the License for the specific language governing permissions and
018     * limitations under the License.
019     */
020    
021    package cascading.pipe;
022    
023    /**
024     * The Checkpoint pipe, if supported by the current planner, will force data to be persisted at the point in
025     * the tuple stream an instance of Checkpoint is inserted into the pipe assembly.
026     * <p/>
027     * If a checkpoint {@link cascading.tap.Tap} is added to the {@link cascading.flow.FlowDef} via the
028     * {@link cascading.flow.FlowDef#addCheckpoint(Checkpoint, cascading.tap.Tap)} method, that Tap instance
029     * will be used to capture the intermediate result sets.
030     * <p/>
031     * It is required that any Scheme used as a checkpoint must source {@link cascading.tuple.Fields#UNKNOWN} and
032     * sink {@link cascading.tuple.Fields#ALL}.
033     * <p/>
034     * If used with a {@link cascading.scheme.hadoop.TextDelimited} {@link cascading.scheme.Scheme} class and
035     * the {@code hasHeader} value is {@code true}, a header with the resolved field names will be written to the file.
036     * <p/>
037     * This is especially useful for debugging complex flows.
038     * <p/>
039     * For the {@link cascading.flow.hadoop.HadoopFlowConnector} and Hadoop platform, a Checkpoint will force a new
040     * MapReduce job ({@link cascading.flow.hadoop.HadoopFlowStep} into the {@link cascading.flow.Flow} plan.
041     * <p/>
042     * This can be important when used in conjunction with a {@link HashJoin} where all the operations upstream
043     * from the HashJoin significantly filter out data allowing it to fit in memory.
044     */
045    public class Checkpoint extends Pipe
046      {
047      /**
048       * Constructor Checkpoint creates a new Checkpoint pipe which inherits the name of its previous pipe.
049       *
050       * @param previous of type Pipe
051       */
052      public Checkpoint( Pipe previous )
053        {
054        super( previous );
055        }
056    
057      /**
058       * Constructor Checkpoint creates a new Checkpoint pipe with the given name.
059       *
060       * @param previous of type Pipe
061       */
062      public Checkpoint( String name, Pipe previous )
063        {
064        super( name, previous );
065        }
066      }