001/*
002 * Copyright (c) 2007-2015 Concurrent, Inc. All Rights Reserved.
003 *
004 * Project and contact information: http://www.cascading.org/
005 *
006 * This file is part of the Cascading project.
007 *
008 * Licensed under the Apache License, Version 2.0 (the "License");
009 * you may not use this file except in compliance with the License.
010 * You may obtain a copy of the License at
011 *
012 *     http://www.apache.org/licenses/LICENSE-2.0
013 *
014 * Unless required by applicable law or agreed to in writing, software
015 * distributed under the License is distributed on an "AS IS" BASIS,
016 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
017 * See the License for the specific language governing permissions and
018 * limitations under the License.
019 */
020
021package cascading.tuple.hadoop;
022
023import java.util.ArrayList;
024import java.util.LinkedHashMap;
025import java.util.List;
026import java.util.Map;
027import java.util.Properties;
028
029import cascading.property.Props;
030import cascading.util.Util;
031
032/**
033 * Class TupleSerializationProps is a fluent interface for building properties to be passed to a
034 * {@link cascading.flow.FlowConnector} before creating new {@link cascading.flow.Flow} instances.
035 * <p/>
036 * See {@link TupleSerialization} for details on these properties.
037 *
038 * @see TupleSerialization
039 */
040public class TupleSerializationProps extends Props
041  {
042  public static final String SERIALIZATION_TOKENS = "cascading.serialization.tokens";
043  public static final String HADOOP_IO_SERIALIZATIONS = "io.serializations";
044
045  Map<Integer, String> serializationTokens = new LinkedHashMap<Integer, String>();
046  List<String> hadoopSerializations = new ArrayList<String>();
047
048  /**
049   * Adds the given token and className pair as a serialization token property. During object serialization and deserialization,
050   * the given token will be used instead of the className when an instance of the className is encountered.
051   *
052   * @param properties of type Map
053   * @param token      of type int
054   * @param className  of type String
055   */
056  public static void addSerializationToken( Map<Object, Object> properties, int token, String className )
057    {
058    String tokens = getSerializationTokens( properties );
059
060    properties.put( SERIALIZATION_TOKENS, Util.join( ",", Util.removeNulls( tokens, token + "=" + className ) ) );
061    }
062
063  /**
064   * Returns the serialization tokens property.
065   *
066   * @param properties of type Map
067   * @return returns a String
068   */
069  public static String getSerializationTokens( Map<Object, Object> properties )
070    {
071    return (String) properties.get( SERIALIZATION_TOKENS );
072    }
073
074  /**
075   * Adds the given className as a Hadoop IO serialization class.
076   *
077   * @param properties of type Map
078   * @param className  of type String
079   */
080  public static void addSerialization( Map<Object, Object> properties, String className )
081    {
082    String serializations = (String) properties.get( HADOOP_IO_SERIALIZATIONS );
083
084    properties.put( HADOOP_IO_SERIALIZATIONS, Util.join( ",", Util.removeNulls( serializations, className ) ) );
085    }
086
087  /**
088   * Creates a new TupleSerializationProps instance.
089   *
090   * @return TupleSerializationProps instance
091   */
092  public static TupleSerializationProps tupleSerializationProps()
093    {
094    return new TupleSerializationProps();
095    }
096
097  public TupleSerializationProps()
098    {
099    }
100
101  public Map<Integer, String> getSerializationTokens()
102    {
103    return serializationTokens;
104    }
105
106  /**
107   * Method setSerializationTokens sets the given integer tokens and classNames Map as a serialization properties.
108   * <p/>
109   * During object serialization and deserialization, the given tokens will be used instead of the className when an
110   * instance of the className is encountered.
111   *
112   * @param serializationTokens Map of Integer tokens and String classnames
113   * @return this
114   */
115  public TupleSerializationProps setSerializationTokens( Map<Integer, String> serializationTokens )
116    {
117    this.serializationTokens = serializationTokens;
118
119    return this;
120    }
121
122  /**
123   * Method addSerializationTokens adds the given integer tokens and classNames Map as a serialization properties.
124   * <p/>
125   * During object serialization and deserialization, the given tokens will be used instead of the className when an
126   * instance of the className is encountered.
127   *
128   * @param serializationTokens Map of Integer tokens and String classnames
129   * @return this
130   */
131  public TupleSerializationProps addSerializationTokens( Map<Integer, String> serializationTokens )
132    {
133    this.serializationTokens.putAll( serializationTokens );
134
135    return this;
136    }
137
138  /**
139   * Method addSerializationToken adds the given integer token and classNames as a serialization properties.
140   * <p/>
141   * During object serialization and deserialization, the given tokens will be used instead of the className when an
142   * instance of the className is encountered.
143   *
144   * @param token                  type int
145   * @param serializationClassName type String
146   * @return this
147   */
148  public TupleSerializationProps addSerializationToken( int token, String serializationClassName )
149    {
150    this.serializationTokens.put( token, serializationClassName );
151
152    return this;
153    }
154
155  public List<String> getHadoopSerializations()
156    {
157    return hadoopSerializations;
158    }
159
160  /**
161   * Method setHadoopSerializations sets the Hadoop serialization classNames to be used as properties.
162   *
163   * @param hadoopSerializationClassNames List of classNames
164   * @return this
165   */
166  public TupleSerializationProps setHadoopSerializations( List<String> hadoopSerializationClassNames )
167    {
168    this.hadoopSerializations = hadoopSerializationClassNames;
169
170    return this;
171    }
172
173  /**
174   * Method addHadoopSerializations adds the Hadoop serialization classNames to be used as properties.
175   *
176   * @param hadoopSerializationClassNames List of classNames
177   * @return this
178   */
179  public TupleSerializationProps addHadoopSerializations( List<String> hadoopSerializationClassNames )
180    {
181    this.hadoopSerializations.addAll( hadoopSerializationClassNames );
182
183    return this;
184    }
185
186  /**
187   * Method addHadoopSerialization adds a Hadoop serialization className to be used as properties.
188   *
189   * @param hadoopSerializationClassName List of classNames
190   * @return this
191   */
192  public TupleSerializationProps addHadoopSerialization( String hadoopSerializationClassName )
193    {
194    this.hadoopSerializations.add( hadoopSerializationClassName );
195
196    return this;
197    }
198
199  @Override
200  protected void addPropertiesTo( Properties properties )
201    {
202    for( Map.Entry<Integer, String> entry : serializationTokens.entrySet() )
203      addSerializationToken( properties, entry.getKey(), entry.getValue() );
204
205    for( String hadoopSerialization : hadoopSerializations )
206      addSerialization( properties, hadoopSerialization );
207    }
208  }