Source code

001/*
002 * Copyright (c) 2007-2016 Concurrent, Inc. All Rights Reserved.
003 *
004 * Project and contact information: http://www.cascading.org/
005 *
006 * This file is part of the Cascading project.
007 *
008 * Licensed under the Apache License, Version 2.0 (the "License");
009 * you may not use this file except in compliance with the License.
010 * You may obtain a copy of the License at
011 *
012 *     http://www.apache.org/licenses/LICENSE-2.0
013 *
014 * Unless required by applicable law or agreed to in writing, software
015 * distributed under the License is distributed on an "AS IS" BASIS,
016 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
017 * See the License for the specific language governing permissions and
018 * limitations under the License.
019 */
020
021package cascading.tuple;
022
023import java.io.InputStream;
024
025/**
026 * The StreamComparator interface allows for two {@link InputStream} instances to be compared, typically bit-wise.
027 * This is most useful when defining custom types to be stored in {@link Tuple} instances and the values will need
028 * to be grouped on and/or secondary sorted (via {@link cascading.pipe.GroupBy} and/or {@link cascading.pipe.CoGroup})
029 * and the underlying serialization implementation enables a useful bit-wise comparison without deserializing the custom
030 * type into memory.
031 * <p/>
032 * Typically this interface is used to mark a {@link java.util.Comparator} as additionally
033 * supporting the ability to compare raw streams in tandem with comparing Object instances.
034 * Thus concrete implementations should implement this interface and the Comparator interface when being used
035 * as a "grouping" or "sorting" field Comparator
036 * <p/>
037 * When used with Hadoop, a {@link cascading.tuple.hadoop.io.BufferedInputStream} is passed into the
038 * {@link #compare(java.io.InputStream, java.io.InputStream)}
039 * method. This class gives access to the underlying byte[] array so each individual byte need to be
040 * {@link java.io.InputStream#read()}.
041 * So it is useful to declare an implementation as
042 * {@code public class YourCustomComparator implements StreamComparator&lt;BufferedInputStream>, Comparator&lt;YourCustomType>, Serializable}
043 * <p/>
044 * Note the method {@link cascading.tuple.hadoop.io.BufferedInputStream#skip(long)} will need to be called with the number
045 * of bytes read from the underlying byte buffer before the compare() method returns.
046 *
047 * @param <T>
048 */
049public interface StreamComparator<T extends InputStream>
050  {
051  int compare( T lhsStream, T rhsStream );
052  }