001 /* 002 * Copyright (c) 2007-2014 Concurrent, Inc. All Rights Reserved. 003 * 004 * Project and contact information: http://www.cascading.org/ 005 * 006 * This file is part of the Cascading project. 007 * 008 * Licensed under the Apache License, Version 2.0 (the "License"); 009 * you may not use this file except in compliance with the License. 010 * You may obtain a copy of the License at 011 * 012 * http://www.apache.org/licenses/LICENSE-2.0 013 * 014 * Unless required by applicable law or agreed to in writing, software 015 * distributed under the License is distributed on an "AS IS" BASIS, 016 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 017 * See the License for the specific language governing permissions and 018 * limitations under the License. 019 */ 020 021 package cascading.operation.regex; 022 023 import java.beans.ConstructorProperties; 024 import java.util.regex.Matcher; 025 026 import cascading.flow.FlowProcess; 027 import cascading.operation.Filter; 028 import cascading.operation.FilterCall; 029 030 /** 031 * Class RegexFilter will apply the regex patternString against every input Tuple value and filter 032 * the Tuple stream accordingly. 033 * <p/> 034 * By default, Tuples that match the given pattern are kept, and Tuples that do not 035 * match are filtered out. This can be changed by setting removeMatch to true. 036 * <p/> 037 * Also, by default, the whole Tuple is matched against the given patternString (tab delimited). If matchEachElement 038 * is set to true, the pattern is applied to each Tuple value individually. 039 * <p/> 040 * This operation uses {@link java.util.regex.Matcher} internally, specifically the method {@link java.util.regex.Matcher#find()}. 041 * 042 * @see java.util.regex.Matcher 043 * @see java.util.regex.Pattern 044 */ 045 public class RegexFilter extends RegexMatcher implements Filter<Matcher> 046 { 047 /** Field matchEachElement */ 048 protected final boolean matchEachElement; 049 050 /** 051 * Constructor RegexFilter creates a new RegexFilter instance. 052 * 053 * @param patternString of type String 054 */ 055 @ConstructorProperties({"patternString"}) 056 public RegexFilter( String patternString ) 057 { 058 super( patternString ); 059 this.matchEachElement = false; 060 } 061 062 /** 063 * Constructor RegexFilter creates a new RegexFilter instance. 064 * 065 * @param patternString of type String 066 * @param removeMatch of type boolean 067 */ 068 @ConstructorProperties({"patternString", "removeMatch"}) 069 public RegexFilter( String patternString, boolean removeMatch ) 070 { 071 super( patternString, removeMatch ); 072 this.matchEachElement = false; 073 074 } 075 076 /** 077 * @param patternString of type String 078 * @param removeMatch of type boolean, set to true if a match should be filtered 079 * @param matchEachElement of type boolean, set to true if each element should be matched individually 080 */ 081 @ConstructorProperties({"patternString", "removeMatch", "matchEachElement"}) 082 public RegexFilter( String patternString, boolean removeMatch, boolean matchEachElement ) 083 { 084 super( patternString, removeMatch ); 085 this.matchEachElement = matchEachElement; 086 } 087 088 public boolean isMatchEachElement() 089 { 090 return matchEachElement; 091 } 092 093 @Override 094 public boolean isRemove( FlowProcess flowProcess, FilterCall<Matcher> filterCall ) 095 { 096 if( matchEachElement ) 097 return matchEachElement( filterCall.getContext(), filterCall.getArguments().getTuple() ); 098 else 099 return matchWholeTuple( filterCall.getContext(), filterCall.getArguments().getTuple() ); 100 } 101 102 @Override 103 public boolean equals( Object object ) 104 { 105 if( this == object ) 106 return true; 107 if( !( object instanceof RegexFilter ) ) 108 return false; 109 if( !super.equals( object ) ) 110 return false; 111 112 RegexFilter that = (RegexFilter) object; 113 114 if( matchEachElement != that.matchEachElement ) 115 return false; 116 117 return true; 118 } 119 120 @Override 121 public int hashCode() 122 { 123 int result = super.hashCode(); 124 result = 31 * result + ( matchEachElement ? 1 : 0 ); 125 return result; 126 } 127 }