001    /*
002     * Copyright (c) 2007-2014 Concurrent, Inc. All Rights Reserved.
003     *
004     * Project and contact information: http://www.cascading.org/
005     *
006     * This file is part of the Cascading project.
007     *
008     * Licensed under the Apache License, Version 2.0 (the "License");
009     * you may not use this file except in compliance with the License.
010     * You may obtain a copy of the License at
011     *
012     *     http://www.apache.org/licenses/LICENSE-2.0
013     *
014     * Unless required by applicable law or agreed to in writing, software
015     * distributed under the License is distributed on an "AS IS" BASIS,
016     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
017     * See the License for the specific language governing permissions and
018     * limitations under the License.
019     */
020    
021    package cascading.pipe.joiner;
022    
023    import java.util.Arrays;
024    import java.util.Iterator;
025    
026    import cascading.flow.FlowProcess;
027    import cascading.tuple.Fields;
028    import cascading.tuple.Tuple;
029    
030    /**
031     * Class JoinerClosure wraps all incoming tuple streams with iterator instances allowing for just join implementations.
032     * <p/>
033     * This class is provided to a {@link Joiner#getIterator(JoinerClosure)} implementation, or to a {@link cascading.operation.Buffer}
034     * via the {@link cascading.operation.BufferCall#getJoinerClosure()} method.
035     * <p/>
036     * All iterators returned by {@link #getIterator(int)} may be retrieved more than once to restart them except for the left
037     * most iterator at position {@code 0} (zero).
038     * <p/>
039     * This iterator may only be iterated across once. All other iterators are backed by memory and possibly disk.
040     */
041    public abstract class JoinerClosure
042      {
043      protected final FlowProcess flowProcess;
044    
045      protected final Fields[] joinFields;
046      protected final Fields[] valueFields;
047    
048      public JoinerClosure( FlowProcess flowProcess, Fields[] joinFields, Fields[] valueFields )
049        {
050        this.flowProcess = flowProcess;
051        this.joinFields = Arrays.copyOf( joinFields, joinFields.length );
052        this.valueFields = Arrays.copyOf( valueFields, valueFields.length );
053        }
054    
055      public FlowProcess getFlowProcess()
056        {
057        return flowProcess;
058        }
059    
060      /**
061       * Returns an array of {@link Fields} denoting the join fields or keys uses for each incoming pipe.
062       * <p/>
063       * The most left handed pipe will be in array position 0.
064       *
065       * @return an array of Fields
066       */
067      public Fields[] getJoinFields()
068        {
069        return joinFields;
070        }
071    
072      /**
073       * Returns an array of all the incoming fields for each incoming pipe.
074       * <p/>
075       * The most left handed pipe will be in array position 0;
076       *
077       * @return an array of Fields
078       */
079      public Fields[] getValueFields()
080        {
081        return valueFields;
082        }
083    
084      public boolean isSelfJoin()
085        {
086        return valueFields.length == 1 && size() != valueFields.length;
087        }
088    
089      public abstract int size();
090    
091      /**
092       * Returns a Tuple Iterator for the given pipe position. Position 0 is the most left handed pipe passed to the prior
093       * {@link cascading.pipe.CoGroup}.
094       * <p/>
095       * To restart an Iterator over a given pipe, this method must be called again.
096       *
097       * @param pos of type int
098       * @return an Iterator of Tuple instances.
099       */
100      public abstract Iterator<Tuple> getIterator( int pos );
101    
102      public abstract boolean isEmpty( int pos );
103    
104      public abstract Tuple getGroupTuple( Tuple keysTuple );
105      }