001/* 002 * Copyright (c) 2007-2017 Xplenty, Inc. All Rights Reserved. 003 * 004 * Project and contact information: http://www.cascading.org/ 005 * 006 * This file is part of the Cascading project. 007 * 008 * Licensed under the Apache License, Version 2.0 (the "License"); 009 * you may not use this file except in compliance with the License. 010 * You may obtain a copy of the License at 011 * 012 * http://www.apache.org/licenses/LICENSE-2.0 013 * 014 * Unless required by applicable law or agreed to in writing, software 015 * distributed under the License is distributed on an "AS IS" BASIS, 016 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 017 * See the License for the specific language governing permissions and 018 * limitations under the License. 019 */ 020 021package cascading.operation.xml; 022 023import javax.xml.parsers.DocumentBuilder; 024import javax.xml.xpath.XPathConstants; 025import javax.xml.xpath.XPathExpressionException; 026 027import cascading.flow.FlowProcess; 028import cascading.operation.Function; 029import cascading.operation.FunctionCall; 030import cascading.operation.OperationException; 031import cascading.tuple.Fields; 032import cascading.tuple.Tuple; 033import cascading.tuple.TupleEntry; 034import cascading.util.Pair; 035import org.slf4j.Logger; 036import org.slf4j.LoggerFactory; 037import org.w3c.dom.Document; 038import org.w3c.dom.NodeList; 039 040/** 041 * XPathGenerator is a Generator function that will emit a new Tuple for every Node returned by 042 * the given XPath expression. 043 */ 044public class XPathGenerator extends XPathOperation implements Function<Pair<DocumentBuilder, Tuple>> 045 { 046 /** Field LOG */ 047 private static final Logger LOG = LoggerFactory.getLogger( XPathGenerator.class ); 048 049 /** 050 * Constructor XPathGenerator creates a new XPathGenerator instance. 051 * 052 * @param fieldDeclaration of type Fields 053 * @param namespaces of type String[][] 054 * @param paths of type String... 055 */ 056 public XPathGenerator( Fields fieldDeclaration, String[][] namespaces, String... paths ) 057 { 058 super( 1, fieldDeclaration, namespaces, paths ); 059 060 if( fieldDeclaration.size() != 1 ) 061 throw new IllegalArgumentException( "only one field can be declared: " + fieldDeclaration.print() ); 062 063 } 064 065 @Override 066 public void operate( FlowProcess flowProcess, FunctionCall<Pair<DocumentBuilder, Tuple>> functionCall ) 067 { 068 TupleEntry input = functionCall.getArguments(); 069 070 if( input.getObject( 0 ) == null || !( input.getObject( 0 ) instanceof String ) ) 071 return; 072 073 String value = input.getString( 0 ); 074 075 if( value.length() == 0 ) // intentionally not trim()ing this value 076 return; 077 078 Document document = parseDocument( functionCall.getContext().getLhs(), value ); 079 080 for( int i = 0; i < getExpressions().size(); i++ ) 081 { 082 try 083 { 084 NodeList nodeList = (NodeList) getExpressions().get( i ).evaluate( document, XPathConstants.NODESET ); 085 086 if( LOG.isDebugEnabled() ) 087 LOG.debug( "xpath: {} was: {}", paths[ i ], nodeList != null && nodeList.getLength() != 0 ); 088 089 if( nodeList == null ) 090 continue; 091 092 for( int j = 0; j < nodeList.getLength(); j++ ) 093 { 094 functionCall.getContext().getRhs().set( 0, writeAsXML( nodeList.item( j ) ) ); 095 functionCall.getOutputCollector().add( functionCall.getContext().getRhs() ); 096 } 097 098 } 099 catch( XPathExpressionException exception ) 100 { 101 throw new OperationException( "could not evaluate xpath expression: " + paths[ i ], exception ); 102 } 103 } 104 } 105 }