001/*
002 * Copyright (c) 2007-2016 Concurrent, Inc. All Rights Reserved.
003 *
004 * Project and contact information: http://www.cascading.org/
005 *
006 * This file is part of the Cascading project.
007 *
008 * Licensed under the Apache License, Version 2.0 (the "License");
009 * you may not use this file except in compliance with the License.
010 * You may obtain a copy of the License at
011 *
012 *     http://www.apache.org/licenses/LICENSE-2.0
013 *
014 * Unless required by applicable law or agreed to in writing, software
015 * distributed under the License is distributed on an "AS IS" BASIS,
016 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
017 * See the License for the specific language governing permissions and
018 * limitations under the License.
019 */
020
021package cascading.operation.regex;
022
023import java.beans.ConstructorProperties;
024import java.util.regex.Matcher;
025
026import cascading.flow.FlowProcess;
027import cascading.management.annotation.Property;
028import cascading.management.annotation.PropertyDescription;
029import cascading.management.annotation.Visibility;
030import cascading.operation.Function;
031import cascading.operation.FunctionCall;
032import cascading.operation.OperationCall;
033import cascading.tuple.Fields;
034import cascading.tuple.Tuple;
035import cascading.util.Pair;
036
037/**
038 * Class RegexReplace is used to replace a matched regex with a replacement value.
039 * <p/>
040 * RegexReplace only expects one field value. If more than one argument value is passed, only the
041 * first is handled, the remainder are ignored.
042 * <p/>
043 * Note a {@code null} valued argument passed to the parser will be converted to an empty string ({@code ""}) before
044 * the regex is applied.
045 * <p/>
046 * Any Object value will be coerced to a String type if type information is provided. See the
047 * {@link cascading.tuple.type.CoercibleType} interface to control how custom Object types are converted to String
048 * values.
049 */
050public class RegexReplace extends RegexOperation<Pair<Matcher, Tuple>> implements Function<Pair<Matcher, Tuple>>
051  {
052  /** Field replacement */
053  private final String replacement;
054  /** Field replaceAll */
055  private boolean replaceAll = true;
056
057  /**
058   * Constructor RegexReplace creates a new RegexReplace instance,
059   *
060   * @param fieldDeclaration of type Fields
061   * @param patternString    of type String
062   * @param replacement      of type String
063   * @param replaceAll       of type boolean
064   */
065  @ConstructorProperties({"fieldDeclaration", "patternString", "replacement", "replaceAll"})
066  public RegexReplace( Fields fieldDeclaration, String patternString, String replacement, boolean replaceAll )
067    {
068    this( fieldDeclaration, patternString, replacement );
069    this.replaceAll = replaceAll;
070    }
071
072  /**
073   * Constructor RegexReplace creates a new RegexReplace instance.
074   *
075   * @param fieldDeclaration of type Fields
076   * @param patternString    of type String
077   * @param replacement      of type String
078   */
079  @ConstructorProperties({"fieldDeclaration", "patternString", "replacement"})
080  public RegexReplace( Fields fieldDeclaration, String patternString, String replacement )
081    {
082    super( 1, fieldDeclaration, patternString );
083    this.replacement = replacement;
084    }
085
086  @Property(name = "replacement", visibility = Visibility.PUBLIC)
087  @PropertyDescription("The string replacement value.")
088  public String getReplacement()
089    {
090    return replacement;
091    }
092
093  @Property(name = "replaceAll", visibility = Visibility.PUBLIC)
094  @PropertyDescription("Will replace all occurrences of pattern.")
095  public boolean isReplaceAll()
096    {
097    return replaceAll;
098    }
099
100  @Override
101  public void prepare( FlowProcess flowProcess, OperationCall<Pair<Matcher, Tuple>> operationCall )
102    {
103    operationCall.setContext( new Pair<Matcher, Tuple>( getPattern().matcher( "" ), Tuple.size( 1 ) ) );
104    }
105
106  @Override
107  public void operate( FlowProcess flowProcess, FunctionCall<Pair<Matcher, Tuple>> functionCall )
108    {
109    // coerce to string
110    String value = functionCall.getArguments().getString( 0 );
111
112    // make safe
113    if( value == null )
114      value = "";
115
116    Tuple output = functionCall.getContext().getRhs();
117    Matcher matcher = functionCall.getContext().getLhs().reset( value );
118
119    if( replaceAll )
120      output.set( 0, matcher.replaceAll( replacement ) );
121    else
122      output.set( 0, matcher.replaceFirst( replacement ) );
123
124    functionCall.getOutputCollector().add( output );
125    }
126
127  @Override
128  public boolean equals( Object object )
129    {
130    if( this == object )
131      return true;
132    if( !( object instanceof RegexReplace ) )
133      return false;
134    if( !super.equals( object ) )
135      return false;
136
137    RegexReplace that = (RegexReplace) object;
138
139    if( replaceAll != that.replaceAll )
140      return false;
141    if( replacement != null ? !replacement.equals( that.replacement ) : that.replacement != null )
142      return false;
143
144    return true;
145    }
146
147  @Override
148  public int hashCode()
149    {
150    int result = super.hashCode();
151    result = 31 * result + ( replacement != null ? replacement.hashCode() : 0 );
152    result = 31 * result + ( replaceAll ? 1 : 0 );
153    return result;
154    }
155  }