001    /*
002     * Copyright (c) 2007-2014 Concurrent, Inc. All Rights Reserved.
003     *
004     * Project and contact information: http://www.cascading.org/
005     *
006     * This file is part of the Cascading project.
007     *
008     * Licensed under the Apache License, Version 2.0 (the "License");
009     * you may not use this file except in compliance with the License.
010     * You may obtain a copy of the License at
011     *
012     *     http://www.apache.org/licenses/LICENSE-2.0
013     *
014     * Unless required by applicable law or agreed to in writing, software
015     * distributed under the License is distributed on an "AS IS" BASIS,
016     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
017     * See the License for the specific language governing permissions and
018     * limitations under the License.
019     */
020    
021    package cascading.operation.regex;
022    
023    import java.beans.ConstructorProperties;
024    import java.util.regex.Pattern;
025    
026    import cascading.flow.FlowProcess;
027    import cascading.operation.Function;
028    import cascading.operation.FunctionCall;
029    import cascading.operation.OperationCall;
030    import cascading.tuple.Fields;
031    import cascading.tuple.Tuple;
032    import cascading.util.Pair;
033    
034    /** Class RegexSplitter will split an incoming argument value by the given regex delimiter patternString. */
035    public class RegexSplitter extends RegexOperation<Pair<Pattern, Tuple>> implements Function<Pair<Pattern, Tuple>>
036      {
037      private final int length;
038    
039      /**
040       * Constructor RegexSplitter creates a new RegexSplitter instance.
041       *
042       * @param patternString of type String
043       */
044      @ConstructorProperties({"patternString"})
045      public RegexSplitter( String patternString )
046        {
047        super( 1, patternString );
048        length = fieldDeclaration.isUnknown() ? -1 : fieldDeclaration.size();
049        }
050    
051      /**
052       * Constructor RegexOperation creates a new RegexOperation instance, where the delimiter is the tab character.
053       *
054       * @param fieldDeclaration of type Fields
055       */
056      @ConstructorProperties({"fieldDeclaration"})
057      public RegexSplitter( Fields fieldDeclaration )
058        {
059        super( 1, fieldDeclaration, "\t" );
060        length = fieldDeclaration.isUnknown() ? -1 : fieldDeclaration.size();
061        }
062    
063      /**
064       * Constructor RegexSplitter creates a new RegexSplitter instance.
065       *
066       * @param fieldDeclaration of type Fields
067       * @param patternString    of type String
068       */
069      @ConstructorProperties({"fieldDeclaration", "patternString"})
070      public RegexSplitter( Fields fieldDeclaration, String patternString )
071        {
072        super( 1, fieldDeclaration, patternString );
073        length = fieldDeclaration.isUnknown() ? -1 : fieldDeclaration.size();
074        }
075    
076      @Override
077      public void prepare( FlowProcess flowProcess, OperationCall<Pair<Pattern, Tuple>> operationCall )
078        {
079        operationCall.setContext( new Pair<Pattern, Tuple>( getPattern(), new Tuple() ) );
080        }
081    
082      @Override
083      public void operate( FlowProcess flowProcess, FunctionCall<Pair<Pattern, Tuple>> functionCall )
084        {
085        String value = functionCall.getArguments().getString( 0 );
086    
087        if( value == null )
088          value = "";
089    
090        Tuple output = functionCall.getContext().getRhs();
091    
092        output.clear();
093    
094        String[] split = functionCall.getContext().getLhs().split( value, length );
095    
096        for( int i = 0; i < split.length; i++ )
097          output.add( split[ i ] );
098    
099        functionCall.getOutputCollector().add( output );
100        }
101      }