001    /*
002     * Copyright (c) 2007-2014 Concurrent, Inc. All Rights Reserved.
003     *
004     * Project and contact information: http://www.cascading.org/
005     *
006     * This file is part of the Cascading project.
007     *
008     * Licensed under the Apache License, Version 2.0 (the "License");
009     * you may not use this file except in compliance with the License.
010     * You may obtain a copy of the License at
011     *
012     *     http://www.apache.org/licenses/LICENSE-2.0
013     *
014     * Unless required by applicable law or agreed to in writing, software
015     * distributed under the License is distributed on an "AS IS" BASIS,
016     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
017     * See the License for the specific language governing permissions and
018     * limitations under the License.
019     */
020    
021    package cascading.operation.text;
022    
023    import java.beans.ConstructorProperties;
024    import java.text.ParseException;
025    import java.text.SimpleDateFormat;
026    import java.util.Arrays;
027    import java.util.Calendar;
028    import java.util.Date;
029    import java.util.Locale;
030    import java.util.TimeZone;
031    
032    import cascading.flow.FlowProcess;
033    import cascading.operation.Function;
034    import cascading.operation.FunctionCall;
035    import cascading.operation.OperationException;
036    import cascading.tuple.Fields;
037    import cascading.tuple.Tuple;
038    import cascading.util.Pair;
039    
040    /**
041     * Class DateParser is used to convert a text date string to a timestamp, the number of milliseconds
042     * since January 1, 1970, 00:00:00 GMT, using the {@link SimpleDateFormat} syntax.
043     * <p/>
044     * If given, individual {@link Calendar} fields can be stored in unique fields for a given {@link TimeZone} and {@link Locale}.
045     */
046    public class DateParser extends DateOperation implements Function<Pair<SimpleDateFormat, Tuple>>
047      {
048      /** Field FIELD_NAME */
049      public static final String FIELD_NAME = "ts";
050    
051      /** Field calendarFields */
052      private int[] calendarFields;
053    
054      /**
055       * Constructor DateParser creates a new DateParser instance that creates a simple long time stamp of the parsed date.
056       *
057       * @param dateFormatString of type String
058       */
059      @ConstructorProperties({"dateFormatString"})
060      public DateParser( String dateFormatString )
061        {
062        super( 1, new Fields( FIELD_NAME ), dateFormatString );
063        }
064    
065      /**
066       * Constructor DateParser creates a new DateParser instance.
067       *
068       * @param fieldDeclaration of type Fields
069       * @param dateFormatString of type String
070       */
071      @ConstructorProperties({"fieldDeclaration", "dateFormatString"})
072      public DateParser( Fields fieldDeclaration, String dateFormatString )
073        {
074        super( 1, fieldDeclaration, dateFormatString );
075        }
076    
077      /**
078       * Constructor DateParser creates a new DateParser instance, where calendarFields is an int[] of {@link Calendar} field
079       * values. See {@link Calendar#get(int)}.
080       *
081       * @param fieldDeclaration of type Fields
082       * @param calendarFields   of type int[]
083       * @param dateFormatString of type String
084       */
085      @ConstructorProperties({"fieldDeclaration", "calendarFields", "dateFormatString"})
086      public DateParser( Fields fieldDeclaration, int[] calendarFields, String dateFormatString )
087        {
088        this( fieldDeclaration, calendarFields, null, null, dateFormatString );
089        }
090    
091      /**
092       * Constructor DateParser creates a new DateParser instance, where zone and locale are passed to the internal
093       * {@link SimpleDateFormat} instance.
094       *
095       * @param fieldDeclaration of type Fields
096       * @param zone             of type TimeZone
097       * @param locale           of type Locale
098       * @param dateFormatString of type String
099       */
100      @ConstructorProperties({"fieldDeclaration", "zone", "locale", "dateFormatString"})
101      public DateParser( Fields fieldDeclaration, TimeZone zone, Locale locale, String dateFormatString )
102        {
103        this( fieldDeclaration, null, zone, locale, dateFormatString );
104        }
105    
106      /**
107       * Constructor DateParser creates a new DateParser instance, where calendarFields is an int[] of {@link Calendar} field
108       * values. See {@link Calendar#get(int)}. The {@link TimeZone} and/or {@link Locale} may also be set.
109       *
110       * @param fieldDeclaration of type Fields
111       * @param calendarFields   of type int[]
112       * @param zone             of type TimeZone
113       * @param locale           of type Locale
114       * @param dateFormatString of type String
115       */
116      @ConstructorProperties({"fieldDeclaration", "calendarFields", "zone", "locale", "dateFormatString"})
117      public DateParser( Fields fieldDeclaration, int[] calendarFields, TimeZone zone, Locale locale, String dateFormatString )
118        {
119        super( 1, fieldDeclaration, dateFormatString, zone, locale );
120    
121        if( calendarFields != null )
122          {
123          this.calendarFields = Arrays.copyOf( calendarFields, calendarFields.length );
124    
125          if( fieldDeclaration.size() != calendarFields.length )
126            throw new IllegalArgumentException( "fieldDeclaration must be same size as calendarFields, was " + fieldDeclaration.print() + " with calendar size: " + calendarFields.length );
127          }
128        else
129          {
130          if( !fieldDeclaration.isSubstitution() && fieldDeclaration.size() != 1 )
131            throw new IllegalArgumentException( "fieldDeclaration may only declare one field name, got " + fieldDeclaration.print() );
132          }
133        }
134    
135      @Override
136      public void operate( FlowProcess flowProcess, FunctionCall<Pair<SimpleDateFormat, Tuple>> functionCall )
137        {
138        Tuple output = functionCall.getContext().getRhs();
139    
140        try
141          {
142          String value = functionCall.getArguments().getString( 0 );
143    
144          if( value == null ) // if null, return null for the field
145            {
146            output.set( 0, null ); // safe to call set, tuple is size of 1
147    
148            functionCall.getOutputCollector().add( output );
149    
150            return;
151            }
152    
153          Date date = functionCall.getContext().getLhs().parse( value );
154    
155          if( calendarFields == null )
156            output.set( 0, date.getTime() ); // safe to call set, tuple is size of 1
157          else
158            makeCalendarFields( output, date );
159          }
160        catch( ParseException exception )
161          {
162          throw new OperationException( "unable to parse input value: " + functionCall.getArguments().getObject( 0 ), exception );
163          }
164    
165        functionCall.getOutputCollector().add( output );
166        }
167    
168      private void makeCalendarFields( Tuple output, Date date )
169        {
170        output.clear();
171    
172        Calendar calendar = getCalendar();
173        calendar.setTime( date );
174    
175        for( int i = 0; i < calendarFields.length; i++ )
176        //noinspection MagicConstant
177          output.add( calendar.get( calendarFields[ i ] ) );
178        }
179    
180      @Override
181      public boolean equals( Object object )
182        {
183        if( this == object )
184          return true;
185        if( !( object instanceof DateParser ) )
186          return false;
187        if( !super.equals( object ) )
188          return false;
189    
190        DateParser that = (DateParser) object;
191    
192        if( !Arrays.equals( calendarFields, that.calendarFields ) )
193          return false;
194    
195        return true;
196        }
197    
198      @Override
199      public int hashCode()
200        {
201        int result = super.hashCode();
202        result = 31 * result + ( calendarFields != null ? Arrays.hashCode( calendarFields ) : 0 );
203        return result;
204        }
205      }