001/*
002 * Copyright (c) 2007-2017 Xplenty, Inc. All Rights Reserved.
003 *
004 * Project and contact information: http://www.cascading.org/
005 *
006 * This file is part of the Cascading project.
007 *
008 * Licensed under the Apache License, Version 2.0 (the "License");
009 * you may not use this file except in compliance with the License.
010 * You may obtain a copy of the License at
011 *
012 *     http://www.apache.org/licenses/LICENSE-2.0
013 *
014 * Unless required by applicable law or agreed to in writing, software
015 * distributed under the License is distributed on an "AS IS" BASIS,
016 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
017 * See the License for the specific language governing permissions and
018 * limitations under the License.
019 */
020
021package cascading.operation.xml;
022
023import java.io.IOException;
024import java.io.StringReader;
025import java.io.StringWriter;
026import java.util.ArrayList;
027import java.util.Arrays;
028import java.util.HashMap;
029import java.util.Iterator;
030import java.util.List;
031import java.util.Map;
032import javax.xml.namespace.NamespaceContext;
033import javax.xml.parsers.DocumentBuilder;
034import javax.xml.parsers.DocumentBuilderFactory;
035import javax.xml.parsers.ParserConfigurationException;
036import javax.xml.transform.OutputKeys;
037import javax.xml.transform.Result;
038import javax.xml.transform.Source;
039import javax.xml.transform.Transformer;
040import javax.xml.transform.TransformerConfigurationException;
041import javax.xml.transform.TransformerException;
042import javax.xml.transform.TransformerFactory;
043import javax.xml.transform.dom.DOMSource;
044import javax.xml.transform.stream.StreamResult;
045import javax.xml.xpath.XPath;
046import javax.xml.xpath.XPathExpression;
047import javax.xml.xpath.XPathExpressionException;
048import javax.xml.xpath.XPathFactory;
049
050import cascading.flow.FlowProcess;
051import cascading.operation.BaseOperation;
052import cascading.operation.OperationCall;
053import cascading.operation.OperationException;
054import cascading.tuple.Fields;
055import cascading.tuple.Tuple;
056import cascading.util.Pair;
057import org.slf4j.Logger;
058import org.slf4j.LoggerFactory;
059import org.w3c.dom.Document;
060import org.w3c.dom.Node;
061import org.xml.sax.InputSource;
062import org.xml.sax.SAXException;
063
064/** Class XPathOperation is the base class for all XPath operations. */
065public class XPathOperation extends BaseOperation<Pair<DocumentBuilder, Tuple>>
066  {
067  /** Field NAMESPACE_XHTML */
068  public static final String[][] NAMESPACE_XHTML = new String[][]{
069    new String[]{"xhtml", "http://www.w3.org/1999/xhtml"}};
070
071  /** Field LOG */
072  private static final Logger LOG = LoggerFactory.getLogger( XPathOperation.class );
073
074  /** Field namespaces */
075  protected final String[][] namespaces;
076  /** Field paths */
077  protected final String[] paths;
078
079  /** Field xPath */
080  private transient XPath xPath;
081  /** Field transformer */
082  private transient Transformer transformer;
083  /** Field expressions */
084  private transient List<XPathExpression> expressions;
085
086  protected XPathOperation( int numArgs, Fields fieldDeclaration, String[][] namespaces, String... paths )
087    {
088    super( numArgs, fieldDeclaration );
089    this.namespaces = namespaces;
090    this.paths = paths;
091
092    if( paths == null || paths.length == 0 )
093      throw new IllegalArgumentException( "a xpath expression must be given" );
094    }
095
096  protected XPathOperation( int numArgs, String[][] namespaces, String... paths )
097    {
098    super( numArgs );
099    this.namespaces = namespaces;
100    this.paths = paths;
101
102    if( paths == null || paths.length == 0 )
103      throw new IllegalArgumentException( "a xpath expression must be given" );
104    }
105
106  @Override
107  public void prepare( FlowProcess flowProcess, OperationCall<Pair<DocumentBuilder, Tuple>> operationCall )
108    {
109    try
110      {
111      DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
112
113      factory.setNamespaceAware( true );
114
115      operationCall.setContext( new Pair<DocumentBuilder, Tuple>( factory.newDocumentBuilder(), Tuple.size( 1 ) ) );
116      }
117    catch( ParserConfigurationException exception )
118      {
119      throw new OperationException( "could not create document builder", exception );
120      }
121    }
122
123  /**
124   * Method getXPath returns the XPath of this XPathOperation object.
125   *
126   * @return the XPath (type XPath) of this XPathOperation object.
127   */
128  public XPath getXPath()
129    {
130    if( xPath != null )
131      return xPath;
132
133    XPathFactory factory = XPathFactory.newInstance();
134
135    xPath = factory.newXPath();
136
137    if( namespaces != null )
138      {
139      MutableNamespaceContext namespaceContext = new MutableNamespaceContext();
140
141      for( String[] namespace : namespaces )
142        {
143        if( LOG.isDebugEnabled() )
144          LOG.debug( "adding namespace: {}:{}", namespace[ 0 ], namespace[ 1 ] );
145
146        namespaceContext.addNamespace( namespace[ 0 ], namespace[ 1 ] );
147        }
148
149      xPath.setNamespaceContext( namespaceContext );
150      }
151
152    return xPath;
153    }
154
155  /**
156   * Method getTransformer returns the transformer of this XPathOperation object.
157   *
158   * @return the transformer (type Transformer) of this XPathOperation object.
159   * @throws TransformerConfigurationException when
160   */
161  public Transformer getTransformer() throws TransformerConfigurationException
162    {
163    if( transformer != null )
164      return transformer;
165
166    transformer = TransformerFactory.newInstance().newTransformer();
167
168    transformer.setOutputProperty( OutputKeys.OMIT_XML_DECLARATION, "yes" );
169
170    return transformer;
171    }
172
173  protected String writeAsXML( Node node )
174    {
175    StringWriter stringWriter = new StringWriter();
176    Result result = new StreamResult( stringWriter );
177    Source source = new DOMSource( node );
178
179    try
180      {
181      getTransformer().transform( source, result );
182      }
183    catch( TransformerException exception )
184      {
185      throw new OperationException( "writing to xml failed", exception );
186      }
187
188    return stringWriter.toString();
189    }
190
191  protected List<XPathExpression> getExpressions()
192    {
193    if( expressions != null )
194      return expressions;
195
196    expressions = new ArrayList<XPathExpression>();
197
198    for( String path : paths )
199      {
200      try
201        {
202        expressions.add( getXPath().compile( path ) );
203        }
204      catch( XPathExpressionException exception )
205        {
206        throw new OperationException( "could not compile xpath expression", exception );
207        }
208      }
209
210    return expressions;
211    }
212
213  class MutableNamespaceContext implements NamespaceContext
214    {
215
216    private final Map<String, String> map = new HashMap<String, String>();
217
218    public MutableNamespaceContext()
219      {
220      }
221
222    public void addNamespace( String prefix, String namespaceURI )
223      {
224      map.put( prefix, namespaceURI );
225      }
226
227    public String getNamespaceURI( String prefix )
228      {
229      return map.get( prefix );
230      }
231
232    public String getPrefix( String namespaceURI )
233      {
234      for( String prefix : map.keySet() )
235        {
236        if( map.get( prefix ).equals( namespaceURI ) )
237          {
238          return prefix;
239          }
240        }
241      return null;
242      }
243
244    public Iterator getPrefixes( String namespaceURI )
245      {
246      List<String> prefixes = new ArrayList<String>();
247
248      for( String prefix : map.keySet() )
249        {
250        if( map.get( prefix ).equals( namespaceURI ) )
251          prefixes.add( prefix );
252        }
253
254      return prefixes.iterator();
255      }
256    }
257
258  protected Document parseDocument( DocumentBuilder documentBuilder, String argument )
259    {
260    Document document;
261    try
262      {
263      document = documentBuilder.parse( new InputSource( new StringReader( argument ) ) );
264      }
265    catch( SAXException exception )
266      {
267      throw new OperationException( "could not parse xml document", exception );
268      }
269    catch( IOException exception )
270      {
271      throw new OperationException( "could not parse xml document", exception );
272      }
273    return document;
274    }
275
276  @Override
277  public boolean equals( Object object )
278    {
279    if( this == object )
280      return true;
281    if( !( object instanceof XPathOperation ) )
282      return false;
283    if( !super.equals( object ) )
284      return false;
285
286    XPathOperation that = (XPathOperation) object;
287
288    if( expressions != null ? !expressions.equals( that.expressions ) : that.expressions != null )
289      return false;
290    if( !Arrays.equals( paths, that.paths ) )
291      return false;
292
293    return true;
294    }
295
296  @Override
297  public int hashCode()
298    {
299    int result = super.hashCode();
300    result = 31 * result + ( paths != null ? Arrays.hashCode( paths ) : 0 );
301    result = 31 * result + ( expressions != null ? expressions.hashCode() : 0 );
302    return result;
303    }
304  }