001    /*
002     * Copyright (c) 2007-2014 Concurrent, Inc. All Rights Reserved.
003     *
004     * Project and contact information: http://www.cascading.org/
005     *
006     * This file is part of the Cascading project.
007     *
008     * Licensed under the Apache License, Version 2.0 (the "License");
009     * you may not use this file except in compliance with the License.
010     * You may obtain a copy of the License at
011     *
012     *     http://www.apache.org/licenses/LICENSE-2.0
013     *
014     * Unless required by applicable law or agreed to in writing, software
015     * distributed under the License is distributed on an "AS IS" BASIS,
016     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
017     * See the License for the specific language governing permissions and
018     * limitations under the License.
019     */
020    
021    package cascading.operation.regex;
022    
023    import java.beans.ConstructorProperties;
024    import java.util.regex.Matcher;
025    
026    import cascading.flow.FlowProcess;
027    import cascading.operation.Function;
028    import cascading.operation.FunctionCall;
029    import cascading.operation.OperationCall;
030    import cascading.tuple.Fields;
031    import cascading.tuple.Tuple;
032    import cascading.util.Pair;
033    
034    /** Class RegexGenerator will emit a new Tuple for every matched regex group. */
035    public class RegexGenerator extends RegexOperation<Pair<Matcher, Tuple>> implements Function<Pair<Matcher, Tuple>>
036      {
037      /**
038       * Constructor RegexGenerator creates a new RegexGenerator instance.
039       *
040       * @param patternString of type String
041       */
042      @ConstructorProperties({"patternString"})
043      public RegexGenerator( String patternString )
044        {
045        super( 1, Fields.size( 1 ), patternString );
046        }
047    
048      /**
049       * Constructor RegexGenerator creates a new RegexGenerator instance.
050       *
051       * @param fieldDeclaration of type Fields
052       * @param patternString    of type String
053       */
054      @ConstructorProperties({"fieldDeclaration", "patternString"})
055      public RegexGenerator( Fields fieldDeclaration, String patternString )
056        {
057        super( 1, fieldDeclaration, patternString );
058    
059        if( fieldDeclaration.size() != 1 )
060          throw new IllegalArgumentException( "fieldDeclaration may only declare one field, was " + fieldDeclaration.print() );
061        }
062    
063      @Override
064      public void prepare( FlowProcess flowProcess, OperationCall<Pair<Matcher, Tuple>> operationCall )
065        {
066        operationCall.setContext( new Pair<Matcher, Tuple>( getPattern().matcher( "" ), Tuple.size( 1 ) ) );
067        }
068    
069      @Override
070      public void operate( FlowProcess flowProcess, FunctionCall<Pair<Matcher, Tuple>> functionCall )
071        {
072        String value = functionCall.getArguments().getString( 0 );
073    
074        if( value == null )
075          value = "";
076    
077        Matcher matcher = functionCall.getContext().getLhs().reset( value );
078    
079        while( matcher.find() )
080          {
081          functionCall.getContext().getRhs().set( 0, matcher.group() );
082          functionCall.getOutputCollector().add( functionCall.getContext().getRhs() );
083          }
084        }
085      }