001/*
002 * Copyright (c) 2007-2017 Xplenty, Inc. All Rights Reserved.
003 *
004 * Project and contact information: http://www.cascading.org/
005 *
006 * This file is part of the Cascading project.
007 *
008 * Licensed under the Apache License, Version 2.0 (the "License");
009 * you may not use this file except in compliance with the License.
010 * You may obtain a copy of the License at
011 *
012 *     http://www.apache.org/licenses/LICENSE-2.0
013 *
014 * Unless required by applicable law or agreed to in writing, software
015 * distributed under the License is distributed on an "AS IS" BASIS,
016 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
017 * See the License for the specific language governing permissions and
018 * limitations under the License.
019 */
020
021package cascading.operation.xml;
022
023import javax.xml.parsers.DocumentBuilder;
024import javax.xml.xpath.XPathConstants;
025import javax.xml.xpath.XPathExpressionException;
026
027import cascading.flow.FlowProcess;
028import cascading.operation.Function;
029import cascading.operation.FunctionCall;
030import cascading.operation.OperationException;
031import cascading.tuple.Fields;
032import cascading.tuple.Tuple;
033import cascading.tuple.TupleEntry;
034import cascading.util.Pair;
035import org.slf4j.Logger;
036import org.slf4j.LoggerFactory;
037import org.w3c.dom.Document;
038import org.w3c.dom.NodeList;
039
040/**
041 * XPathGenerator is a Generator function that will emit a new Tuple for every Node returned by
042 * the given XPath expression.
043 */
044public class XPathGenerator extends XPathOperation implements Function<Pair<DocumentBuilder, Tuple>>
045  {
046  /** Field LOG */
047  private static final Logger LOG = LoggerFactory.getLogger( XPathGenerator.class );
048
049  /**
050   * Constructor XPathGenerator creates a new XPathGenerator instance.
051   *
052   * @param fieldDeclaration of type Fields
053   * @param namespaces       of type String[][]
054   * @param paths            of type String...
055   */
056  public XPathGenerator( Fields fieldDeclaration, String[][] namespaces, String... paths )
057    {
058    super( 1, fieldDeclaration, namespaces, paths );
059
060    if( fieldDeclaration.size() != 1 )
061      throw new IllegalArgumentException( "only one field can be declared: " + fieldDeclaration.print() );
062
063    }
064
065  @Override
066  public void operate( FlowProcess flowProcess, FunctionCall<Pair<DocumentBuilder, Tuple>> functionCall )
067    {
068    TupleEntry input = functionCall.getArguments();
069
070    if( input.getObject( 0 ) == null || !( input.getObject( 0 ) instanceof String ) )
071      return;
072
073    String value = input.getString( 0 );
074
075    if( value.length() == 0 ) // intentionally not trim()ing this value
076      return;
077
078    Document document = parseDocument( functionCall.getContext().getLhs(), value );
079
080    for( int i = 0; i < getExpressions().size(); i++ )
081      {
082      try
083        {
084        NodeList nodeList = (NodeList) getExpressions().get( i ).evaluate( document, XPathConstants.NODESET );
085
086        if( LOG.isDebugEnabled() )
087          LOG.debug( "xpath: {} was: {}", paths[ i ], nodeList != null && nodeList.getLength() != 0 );
088
089        if( nodeList == null )
090          continue;
091
092        for( int j = 0; j < nodeList.getLength(); j++ )
093          {
094          functionCall.getContext().getRhs().set( 0, writeAsXML( nodeList.item( j ) ) );
095          functionCall.getOutputCollector().add( functionCall.getContext().getRhs() );
096          }
097
098        }
099      catch( XPathExpressionException exception )
100        {
101        throw new OperationException( "could not evaluate xpath expression: " + paths[ i ], exception );
102        }
103      }
104    }
105  }