001 /* 002 * Copyright (c) 2007-2015 Concurrent, Inc. All Rights Reserved. 003 * 004 * Project and contact information: http://www.cascading.org/ 005 * 006 * This file is part of the Cascading project. 007 * 008 * Licensed under the Apache License, Version 2.0 (the "License"); 009 * you may not use this file except in compliance with the License. 010 * You may obtain a copy of the License at 011 * 012 * http://www.apache.org/licenses/LICENSE-2.0 013 * 014 * Unless required by applicable law or agreed to in writing, software 015 * distributed under the License is distributed on an "AS IS" BASIS, 016 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 017 * See the License for the specific language governing permissions and 018 * limitations under the License. 019 */ 020 021 package cascading.operation.regex; 022 023 import java.beans.ConstructorProperties; 024 import java.util.regex.Matcher; 025 026 import cascading.flow.FlowProcess; 027 import cascading.operation.OperationCall; 028 import cascading.tuple.Fields; 029 import cascading.tuple.Tuple; 030 import cascading.tuple.TupleEntry; 031 import org.slf4j.Logger; 032 import org.slf4j.LoggerFactory; 033 034 /** 035 * Class RegexMatcher is the base class for common regular expression operations. 036 * <p/> 037 * This operation uses {@link java.util.regex.Matcher} internally, specifically the method {@link java.util.regex.Matcher#find()}. 038 * 039 * @see java.util.regex.Matcher 040 * @see java.util.regex.Pattern 041 */ 042 public class RegexMatcher extends RegexOperation<Matcher> 043 { 044 /** Field LOG */ 045 private static final Logger LOG = LoggerFactory.getLogger( RegexMatcher.class ); 046 047 /** Field removeMatch */ 048 protected final boolean negateMatch; 049 050 @ConstructorProperties({"patternString"}) 051 protected RegexMatcher( String patternString ) 052 { 053 super( patternString ); 054 this.negateMatch = false; 055 } 056 057 @ConstructorProperties({"patternString", "negateMatch"}) 058 protected RegexMatcher( String patternString, boolean negateMatch ) 059 { 060 super( patternString ); 061 this.negateMatch = negateMatch; 062 } 063 064 @ConstructorProperties({"fieldDeclaration", "patternString"}) 065 protected RegexMatcher( Fields fieldDeclaration, String patternString ) 066 { 067 super( ANY, fieldDeclaration, patternString ); 068 this.negateMatch = false; 069 070 verify(); 071 } 072 073 @ConstructorProperties({"fieldDeclaration", "patternString", "negateMatch"}) 074 protected RegexMatcher( Fields fieldDeclaration, String patternString, boolean negateMatch ) 075 { 076 super( ANY, fieldDeclaration, patternString ); 077 this.negateMatch = negateMatch; 078 079 verify(); 080 } 081 082 public boolean isNegateMatch() 083 { 084 return negateMatch; 085 } 086 087 private void verify() 088 { 089 if( fieldDeclaration.size() != 1 ) 090 throw new IllegalArgumentException( "num fields in fieldDeclaration must be one, found: " + fieldDeclaration.printVerbose() ); 091 } 092 093 @Override 094 public void prepare( FlowProcess flowProcess, OperationCall<Matcher> operationCall ) 095 { 096 operationCall.setContext( getPattern().matcher( "" ) ); 097 } 098 099 protected boolean matchWholeTuple( Matcher matcher, TupleEntry input ) 100 { 101 return matchWholeTuple( matcher, input.getTuple() ); 102 } 103 104 /** 105 * @deprecated use {@link #matchWholeTuple(java.util.regex.Matcher, cascading.tuple.TupleEntry)} 106 */ 107 @Deprecated 108 protected boolean matchWholeTuple( Matcher matcher, Tuple input ) 109 { 110 matcher.reset( input.toString( "\t", false ) ); 111 112 boolean matchFound = matcher.find(); 113 114 if( LOG.isDebugEnabled() ) 115 LOG.debug( "pattern: {}, matches: {}", getPatternString(), matchFound ); 116 117 return matchFound == negateMatch; 118 } 119 120 protected boolean matchEachElement( Matcher matcher, TupleEntry input ) 121 { 122 return matchEachElement( matcher, input.getTuple() ); 123 } 124 125 /** 126 * @deprecated use {@link #matchEachElementPos(java.util.regex.Matcher, cascading.tuple.TupleEntry)} 127 */ 128 @Deprecated 129 protected boolean matchEachElement( Matcher matcher, Tuple input ) 130 { 131 return matchEachElementPos( matcher, input ) != -1; 132 } 133 134 protected int matchEachElementPos( Matcher matcher, TupleEntry input ) 135 { 136 return matchEachElementPos( matcher, input.getTuple() ); 137 } 138 139 /** 140 * @deprecated use {@link #matchEachElementPos(java.util.regex.Matcher, cascading.tuple.TupleEntry)} 141 */ 142 @Deprecated 143 protected int matchEachElementPos( Matcher matcher, Tuple input ) 144 { 145 int pos = 0; 146 for( Object value : input ) 147 { 148 if( value == null ) 149 value = ""; 150 151 matcher.reset( value.toString() ); 152 153 boolean matchFound = matcher.find(); 154 155 if( LOG.isDebugEnabled() ) 156 LOG.debug( "pattern: " + getPatternString() + ", matches: " + matchFound + ", element: '" + value + "'" ); 157 158 if( matchFound == negateMatch ) 159 return pos; 160 161 pos++; 162 } 163 164 return -1; 165 } 166 167 @Override 168 public boolean equals( Object object ) 169 { 170 if( this == object ) 171 return true; 172 if( !( object instanceof RegexMatcher ) ) 173 return false; 174 if( !super.equals( object ) ) 175 return false; 176 177 RegexMatcher that = (RegexMatcher) object; 178 179 if( negateMatch != that.negateMatch ) 180 return false; 181 182 return true; 183 } 184 185 @Override 186 public int hashCode() 187 { 188 int result = super.hashCode(); 189 result = 31 * result + ( negateMatch ? 1 : 0 ); 190 return result; 191 } 192 }