001/* 002 * Copyright (c) 2007-2016 Concurrent, Inc. All Rights Reserved. 003 * 004 * Project and contact information: http://www.cascading.org/ 005 * 006 * This file is part of the Cascading project. 007 * 008 * Licensed under the Apache License, Version 2.0 (the "License"); 009 * you may not use this file except in compliance with the License. 010 * You may obtain a copy of the License at 011 * 012 * http://www.apache.org/licenses/LICENSE-2.0 013 * 014 * Unless required by applicable law or agreed to in writing, software 015 * distributed under the License is distributed on an "AS IS" BASIS, 016 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 017 * See the License for the specific language governing permissions and 018 * limitations under the License. 019 */ 020 021package cascading.operation.regex; 022 023import java.beans.ConstructorProperties; 024import java.util.regex.Matcher; 025 026import cascading.flow.FlowProcess; 027import cascading.operation.Function; 028import cascading.operation.FunctionCall; 029import cascading.operation.OperationCall; 030import cascading.tuple.Fields; 031import cascading.tuple.Tuple; 032import cascading.util.Pair; 033 034/** 035 * Class RegexGenerator will emit a new Tuple for every matched regex group. 036 * <p/> 037 * Note a {@code null} valued argument passed to the parser will be converted to an empty string ({@code ""}) before 038 * the regex is applied. 039 * <p/> 040 * Any Object value will be coerced to a String type if type information is provided. See the 041 * {@link cascading.tuple.type.CoercibleType} interface to control how custom Object types are converted to String 042 * values. 043 */ 044public class RegexGenerator extends RegexOperation<Pair<Matcher, Tuple>> implements Function<Pair<Matcher, Tuple>> 045 { 046 /** 047 * Constructor RegexGenerator creates a new RegexGenerator instance. 048 * 049 * @param patternString of type String 050 */ 051 @ConstructorProperties({"patternString"}) 052 public RegexGenerator( String patternString ) 053 { 054 super( 1, Fields.size( 1 ), patternString ); 055 } 056 057 /** 058 * Constructor RegexGenerator creates a new RegexGenerator instance. 059 * 060 * @param fieldDeclaration of type Fields 061 * @param patternString of type String 062 */ 063 @ConstructorProperties({"fieldDeclaration", "patternString"}) 064 public RegexGenerator( Fields fieldDeclaration, String patternString ) 065 { 066 super( 1, fieldDeclaration, patternString ); 067 068 if( fieldDeclaration.size() != 1 ) 069 throw new IllegalArgumentException( "fieldDeclaration may only declare one field, was " + fieldDeclaration.print() ); 070 } 071 072 @Override 073 public void prepare( FlowProcess flowProcess, OperationCall<Pair<Matcher, Tuple>> operationCall ) 074 { 075 operationCall.setContext( new Pair<Matcher, Tuple>( getPattern().matcher( "" ), Tuple.size( 1 ) ) ); 076 } 077 078 @Override 079 public void operate( FlowProcess flowProcess, FunctionCall<Pair<Matcher, Tuple>> functionCall ) 080 { 081 String value = functionCall.getArguments().getString( 0 ); 082 083 if( value == null ) 084 value = ""; 085 086 Matcher matcher = functionCall.getContext().getLhs().reset( value ); 087 088 while( matcher.find() ) 089 { 090 functionCall.getContext().getRhs().set( 0, matcher.group() ); 091 functionCall.getOutputCollector().add( functionCall.getContext().getRhs() ); 092 } 093 } 094 }