001    /*
002     * Copyright (c) 2007-2014 Concurrent, Inc. All Rights Reserved.
003     *
004     * Project and contact information: http://www.cascading.org/
005     *
006     * This file is part of the Cascading project.
007     *
008     * Licensed under the Apache License, Version 2.0 (the "License");
009     * you may not use this file except in compliance with the License.
010     * You may obtain a copy of the License at
011     *
012     *     http://www.apache.org/licenses/LICENSE-2.0
013     *
014     * Unless required by applicable law or agreed to in writing, software
015     * distributed under the License is distributed on an "AS IS" BASIS,
016     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
017     * See the License for the specific language governing permissions and
018     * limitations under the License.
019     */
020    
021    package cascading.tuple;
022    
023    import java.io.InputStream;
024    
025    /**
026     * The StreamComparator interface allows for two {@link InputStream} instances to be compared, typically bit-wise.
027     * This is most useful when defining custom types to be stored in {@link Tuple} instances and the values will need
028     * to be grouped on and/or secondary sorted (via {@link cascading.pipe.GroupBy} and/or {@link cascading.pipe.CoGroup})
029     * and the underlying serialization implementation enables a useful bit-wise comparison without deserializing the custom
030     * type into memory.
031     * <p/>
032     * Typically this interface is used to mark a {@link java.util.Comparator} as additionally
033     * supporting the ability to compare raw streams in tandem with comparing Object instances.
034     * Thus concrete implementations should implement this interface and the Comparator interface when being used
035     * as a "grouping" or "sorting" field Comparator
036     * <p/>
037     * When used with Hadoop, a {@link cascading.tuple.hadoop.io.BufferedInputStream} is passed into the
038     * {@link #compare(java.io.InputStream, java.io.InputStream)}
039     * method. This class gives access to the underlying byte[] array so each individual byte need to be
040     * {@link java.io.InputStream#read()}.
041     * So it is useful to declare an implementation as
042     * {@code public class YourCustomComparator implements StreamComparator&lt;BufferedInputStream>, Comparator&lt;YourCustomType>, Serializable}
043     * <p/>
044     * Note the method {@link cascading.tuple.hadoop.io.BufferedInputStream#skip(long)} will need to be called with the number
045     * of bytes read from the underlying byte buffer before the compare() method returns.
046     *
047     * @param <T>
048     */
049    public interface StreamComparator<T extends InputStream>
050      {
051      int compare( T lhsStream, T rhsStream );
052      }