001/*
002 * Copyright (c) 2007-2017 Xplenty, Inc. All Rights Reserved.
003 *
004 * Project and contact information: http://www.cascading.org/
005 *
006 * This file is part of the Cascading project.
007 *
008 * Licensed under the Apache License, Version 2.0 (the "License");
009 * you may not use this file except in compliance with the License.
010 * You may obtain a copy of the License at
011 *
012 *     http://www.apache.org/licenses/LICENSE-2.0
013 *
014 * Unless required by applicable law or agreed to in writing, software
015 * distributed under the License is distributed on an "AS IS" BASIS,
016 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
017 * See the License for the specific language governing permissions and
018 * limitations under the License.
019 */
020
021package cascading.operation.regex;
022
023import java.beans.ConstructorProperties;
024import java.util.regex.Matcher;
025
026import cascading.flow.FlowProcess;
027import cascading.operation.Function;
028import cascading.operation.FunctionCall;
029import cascading.operation.OperationCall;
030import cascading.tuple.Fields;
031import cascading.tuple.Tuple;
032import cascading.util.Pair;
033
034/**
035 * Class RegexGenerator will emit a new Tuple for every matched regex group.
036 * <p/>
037 * Note a {@code null} valued argument passed to the parser will be converted to an empty string ({@code ""}) before
038 * the regex is applied.
039 * <p/>
040 * Any Object value will be coerced to a String type if type information is provided. See the
041 * {@link cascading.tuple.type.CoercibleType} interface to control how custom Object types are converted to String
042 * values.
043 */
044public class RegexGenerator extends RegexOperation<Pair<Matcher, Tuple>> implements Function<Pair<Matcher, Tuple>>
045  {
046  /**
047   * Constructor RegexGenerator creates a new RegexGenerator instance.
048   *
049   * @param patternString of type String
050   */
051  @ConstructorProperties({"patternString"})
052  public RegexGenerator( String patternString )
053    {
054    super( 1, Fields.size( 1 ), patternString );
055    }
056
057  /**
058   * Constructor RegexGenerator creates a new RegexGenerator instance.
059   *
060   * @param fieldDeclaration of type Fields
061   * @param patternString    of type String
062   */
063  @ConstructorProperties({"fieldDeclaration", "patternString"})
064  public RegexGenerator( Fields fieldDeclaration, String patternString )
065    {
066    super( 1, fieldDeclaration, patternString );
067
068    if( fieldDeclaration.size() != 1 )
069      throw new IllegalArgumentException( "fieldDeclaration may only declare one field, was " + fieldDeclaration.print() );
070    }
071
072  @Override
073  public void prepare( FlowProcess flowProcess, OperationCall<Pair<Matcher, Tuple>> operationCall )
074    {
075    operationCall.setContext( new Pair<Matcher, Tuple>( getPattern().matcher( "" ), Tuple.size( 1 ) ) );
076    }
077
078  @Override
079  public void operate( FlowProcess flowProcess, FunctionCall<Pair<Matcher, Tuple>> functionCall )
080    {
081    String value = functionCall.getArguments().getString( 0 );
082
083    if( value == null )
084      value = "";
085
086    Matcher matcher = functionCall.getContext().getLhs().reset( value );
087
088    while( matcher.find() )
089      {
090      functionCall.getContext().getRhs().set( 0, matcher.group() );
091      functionCall.getOutputCollector().add( functionCall.getContext().getRhs() );
092      }
093    }
094  }