001    /*
002     * Copyright (c) 2007-2014 Concurrent, Inc. All Rights Reserved.
003     *
004     * Project and contact information: http://www.cascading.org/
005     *
006     * This file is part of the Cascading project.
007     *
008     * Licensed under the Apache License, Version 2.0 (the "License");
009     * you may not use this file except in compliance with the License.
010     * You may obtain a copy of the License at
011     *
012     *     http://www.apache.org/licenses/LICENSE-2.0
013     *
014     * Unless required by applicable law or agreed to in writing, software
015     * distributed under the License is distributed on an "AS IS" BASIS,
016     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
017     * See the License for the specific language governing permissions and
018     * limitations under the License.
019     */
020    
021    package cascading.operation.regex;
022    
023    import java.beans.ConstructorProperties;
024    import java.util.regex.Matcher;
025    
026    import cascading.flow.FlowProcess;
027    import cascading.operation.OperationCall;
028    import cascading.tuple.Fields;
029    import cascading.tuple.Tuple;
030    import org.slf4j.Logger;
031    import org.slf4j.LoggerFactory;
032    
033    /**
034     * Class RegexMatcher is the base class for common regular expression operations.
035     * <p/>
036     * This operation uses {@link java.util.regex.Matcher} internally, specifically the method {@link java.util.regex.Matcher#find()}.
037     *
038     * @see java.util.regex.Matcher
039     * @see java.util.regex.Pattern
040     */
041    public class RegexMatcher extends RegexOperation<Matcher>
042      {
043      /** Field LOG */
044      private static final Logger LOG = LoggerFactory.getLogger( RegexMatcher.class );
045    
046      /** Field removeMatch */
047      protected final boolean negateMatch;
048    
049      @ConstructorProperties({"patternString"})
050      protected RegexMatcher( String patternString )
051        {
052        super( patternString );
053        this.negateMatch = false;
054        }
055    
056      @ConstructorProperties({"patternString", "negateMatch"})
057      protected RegexMatcher( String patternString, boolean negateMatch )
058        {
059        super( patternString );
060        this.negateMatch = negateMatch;
061        }
062    
063      @ConstructorProperties({"fieldDeclaration", "patternString"})
064      protected RegexMatcher( Fields fieldDeclaration, String patternString )
065        {
066        super( ANY, fieldDeclaration, patternString );
067        this.negateMatch = false;
068    
069        verify();
070        }
071    
072      @ConstructorProperties({"fieldDeclaration", "patternString", "negateMatch"})
073      protected RegexMatcher( Fields fieldDeclaration, String patternString, boolean negateMatch )
074        {
075        super( ANY, fieldDeclaration, patternString );
076        this.negateMatch = negateMatch;
077    
078        verify();
079        }
080    
081      public boolean isNegateMatch()
082        {
083        return negateMatch;
084        }
085    
086      private void verify()
087        {
088        if( fieldDeclaration.size() != 1 )
089          throw new IllegalArgumentException( "num fields in fieldDeclaration must be one, found: " + fieldDeclaration.printVerbose() );
090        }
091    
092      @Override
093      public void prepare( FlowProcess flowProcess, OperationCall<Matcher> operationCall )
094        {
095        operationCall.setContext( getPattern().matcher( "" ) );
096        }
097    
098      /**
099       * Method matchWholeTuple ...
100       *
101       * @param matcher
102       * @param input   of type Tuple @return boolean
103       */
104      protected boolean matchWholeTuple( Matcher matcher, Tuple input )
105        {
106        matcher.reset( input.toString( "\t", false ) );
107    
108        boolean matchFound = matcher.find();
109    
110        LOG.debug( "pattern: {}, matches: {}", getPatternString(), matchFound );
111    
112        return matchFound == negateMatch;
113        }
114    
115      /**
116       * Method matchEachElement ...
117       *
118       * @param matcher
119       * @param input   of type Tuple @return boolean
120       */
121      protected boolean matchEachElement( Matcher matcher, Tuple input )
122        {
123        return matchEachElementPos( matcher, input ) != -1;
124        }
125    
126      protected int matchEachElementPos( Matcher matcher, Tuple input )
127        {
128        int pos = 0;
129        for( Object value : input )
130          {
131          if( value == null )
132            value = "";
133    
134          matcher.reset( value.toString() );
135    
136          boolean matchFound = matcher.find();
137    
138          if( LOG.isDebugEnabled() )
139            LOG.debug( "pattern: " + getPatternString() + ", matches: " + matchFound + ", element: '" + value + "'" );
140    
141          if( matchFound == negateMatch )
142            return pos;
143    
144          pos++;
145          }
146    
147        return -1;
148        }
149    
150      @Override
151      public boolean equals( Object object )
152        {
153        if( this == object )
154          return true;
155        if( !( object instanceof RegexMatcher ) )
156          return false;
157        if( !super.equals( object ) )
158          return false;
159    
160        RegexMatcher that = (RegexMatcher) object;
161    
162        if( negateMatch != that.negateMatch )
163          return false;
164    
165        return true;
166        }
167    
168      @Override
169      public int hashCode()
170        {
171        int result = super.hashCode();
172        result = 31 * result + ( negateMatch ? 1 : 0 );
173        return result;
174        }
175      }