001    /*
002     * Copyright (c) 2007-2014 Concurrent, Inc. All Rights Reserved.
003     *
004     * Project and contact information: http://www.cascading.org/
005     *
006     * This file is part of the Cascading project.
007     *
008     * Licensed under the Apache License, Version 2.0 (the "License");
009     * you may not use this file except in compliance with the License.
010     * You may obtain a copy of the License at
011     *
012     *     http://www.apache.org/licenses/LICENSE-2.0
013     *
014     * Unless required by applicable law or agreed to in writing, software
015     * distributed under the License is distributed on an "AS IS" BASIS,
016     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
017     * See the License for the specific language governing permissions and
018     * limitations under the License.
019     */
020    
021    package cascading.operation.xml;
022    
023    import java.io.IOException;
024    import java.io.StringReader;
025    import java.io.StringWriter;
026    import java.util.ArrayList;
027    import java.util.Arrays;
028    import java.util.HashMap;
029    import java.util.Iterator;
030    import java.util.List;
031    import java.util.Map;
032    import javax.xml.namespace.NamespaceContext;
033    import javax.xml.parsers.DocumentBuilder;
034    import javax.xml.parsers.DocumentBuilderFactory;
035    import javax.xml.parsers.ParserConfigurationException;
036    import javax.xml.transform.OutputKeys;
037    import javax.xml.transform.Result;
038    import javax.xml.transform.Source;
039    import javax.xml.transform.Transformer;
040    import javax.xml.transform.TransformerConfigurationException;
041    import javax.xml.transform.TransformerException;
042    import javax.xml.transform.TransformerFactory;
043    import javax.xml.transform.dom.DOMSource;
044    import javax.xml.transform.stream.StreamResult;
045    import javax.xml.xpath.XPath;
046    import javax.xml.xpath.XPathExpression;
047    import javax.xml.xpath.XPathExpressionException;
048    import javax.xml.xpath.XPathFactory;
049    
050    import cascading.flow.FlowProcess;
051    import cascading.operation.BaseOperation;
052    import cascading.operation.OperationCall;
053    import cascading.operation.OperationException;
054    import cascading.tuple.Fields;
055    import cascading.tuple.Tuple;
056    import cascading.util.Pair;
057    import org.slf4j.Logger;
058    import org.slf4j.LoggerFactory;
059    import org.w3c.dom.Document;
060    import org.w3c.dom.Node;
061    import org.xml.sax.InputSource;
062    import org.xml.sax.SAXException;
063    
064    /** Class XPathOperation is the base class for all XPath operations. */
065    public class XPathOperation extends BaseOperation<Pair<DocumentBuilder, Tuple>>
066      {
067      /** Field NAMESPACE_XHTML */
068      public static final String[][] NAMESPACE_XHTML = new String[][]{
069        new String[]{"xhtml", "http://www.w3.org/1999/xhtml"}};
070    
071      /** Field LOG */
072      private static final Logger LOG = LoggerFactory.getLogger( XPathOperation.class );
073    
074      /** Field namespaces */
075      protected final String[][] namespaces;
076      /** Field paths */
077      protected final String[] paths;
078    
079      /** Field xPath */
080      private transient XPath xPath;
081      /** Field transformer */
082      private transient Transformer transformer;
083      /** Field expressions */
084      private transient List<XPathExpression> expressions;
085    
086      protected XPathOperation( int numArgs, Fields fieldDeclaration, String[][] namespaces, String... paths )
087        {
088        super( numArgs, fieldDeclaration );
089        this.namespaces = namespaces;
090        this.paths = paths;
091    
092        if( paths == null || paths.length == 0 )
093          throw new IllegalArgumentException( "a xpath expression must be given" );
094        }
095    
096      protected XPathOperation( int numArgs, String[][] namespaces, String... paths )
097        {
098        super( numArgs );
099        this.namespaces = namespaces;
100        this.paths = paths;
101    
102        if( paths == null || paths.length == 0 )
103          throw new IllegalArgumentException( "a xpath expression must be given" );
104        }
105    
106      @Override
107      public void prepare( FlowProcess flowProcess, OperationCall<Pair<DocumentBuilder, Tuple>> operationCall )
108        {
109        try
110          {
111          DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
112    
113          factory.setNamespaceAware( true );
114    
115          operationCall.setContext( new Pair<DocumentBuilder, Tuple>( factory.newDocumentBuilder(), Tuple.size( 1 ) ) );
116          }
117        catch( ParserConfigurationException exception )
118          {
119          throw new OperationException( "could not create document builder", exception );
120          }
121        }
122    
123      /**
124       * Method getXPath returns the XPath of this XPathOperation object.
125       *
126       * @return the XPath (type XPath) of this XPathOperation object.
127       */
128      public XPath getXPath()
129        {
130        if( xPath != null )
131          return xPath;
132    
133        XPathFactory factory = XPathFactory.newInstance();
134    
135        xPath = factory.newXPath();
136    
137        if( namespaces != null )
138          {
139          MutableNamespaceContext namespaceContext = new MutableNamespaceContext();
140    
141          for( String[] namespace : namespaces )
142            {
143            if( LOG.isDebugEnabled() )
144              LOG.debug( "adding namespace: {}:{}", namespace[ 0 ], namespace[ 1 ] );
145    
146            namespaceContext.addNamespace( namespace[ 0 ], namespace[ 1 ] );
147            }
148    
149          xPath.setNamespaceContext( namespaceContext );
150          }
151    
152        return xPath;
153        }
154    
155      /**
156       * Method getTransformer returns the transformer of this XPathOperation object.
157       *
158       * @return the transformer (type Transformer) of this XPathOperation object.
159       * @throws TransformerConfigurationException
160       *          when
161       */
162      public Transformer getTransformer() throws TransformerConfigurationException
163        {
164        if( transformer != null )
165          return transformer;
166    
167        transformer = TransformerFactory.newInstance().newTransformer();
168    
169        transformer.setOutputProperty( OutputKeys.OMIT_XML_DECLARATION, "yes" );
170    
171        return transformer;
172        }
173    
174      protected String writeAsXML( Node node )
175        {
176        StringWriter stringWriter = new StringWriter();
177        Result result = new StreamResult( stringWriter );
178        Source source = new DOMSource( node );
179    
180        try
181          {
182          getTransformer().transform( source, result );
183          }
184        catch( TransformerException exception )
185          {
186          throw new OperationException( "writing to xml failed", exception );
187          }
188    
189        return stringWriter.toString();
190        }
191    
192      protected List<XPathExpression> getExpressions()
193        {
194        if( expressions != null )
195          return expressions;
196    
197        expressions = new ArrayList<XPathExpression>();
198    
199        for( String path : paths )
200          {
201          try
202            {
203            expressions.add( getXPath().compile( path ) );
204            }
205          catch( XPathExpressionException exception )
206            {
207            throw new OperationException( "could not compile xpath expression", exception );
208            }
209          }
210    
211        return expressions;
212        }
213    
214      class MutableNamespaceContext implements NamespaceContext
215        {
216    
217        private final Map<String, String> map = new HashMap<String, String>();
218    
219        public MutableNamespaceContext()
220          {
221          }
222    
223        public void addNamespace( String prefix, String namespaceURI )
224          {
225          map.put( prefix, namespaceURI );
226          }
227    
228        public String getNamespaceURI( String prefix )
229          {
230          return map.get( prefix );
231          }
232    
233        public String getPrefix( String namespaceURI )
234          {
235          for( String prefix : map.keySet() )
236            {
237            if( map.get( prefix ).equals( namespaceURI ) )
238              {
239              return prefix;
240              }
241            }
242          return null;
243          }
244    
245        public Iterator getPrefixes( String namespaceURI )
246          {
247          List<String> prefixes = new ArrayList<String>();
248    
249          for( String prefix : map.keySet() )
250            {
251            if( map.get( prefix ).equals( namespaceURI ) )
252              prefixes.add( prefix );
253            }
254    
255          return prefixes.iterator();
256          }
257        }
258    
259      protected Document parseDocument( DocumentBuilder documentBuilder, String argument )
260        {
261        Document document;
262        try
263          {
264          document = documentBuilder.parse( new InputSource( new StringReader( argument ) ) );
265          }
266        catch( SAXException exception )
267          {
268          throw new OperationException( "could not parse xml document", exception );
269          }
270        catch( IOException exception )
271          {
272          throw new OperationException( "could not parse xml document", exception );
273          }
274        return document;
275        }
276    
277      @Override
278      public boolean equals( Object object )
279        {
280        if( this == object )
281          return true;
282        if( !( object instanceof XPathOperation ) )
283          return false;
284        if( !super.equals( object ) )
285          return false;
286    
287        XPathOperation that = (XPathOperation) object;
288    
289        if( expressions != null ? !expressions.equals( that.expressions ) : that.expressions != null )
290          return false;
291        if( !Arrays.equals( paths, that.paths ) )
292          return false;
293    
294        return true;
295        }
296    
297      @Override
298      public int hashCode()
299        {
300        int result = super.hashCode();
301        result = 31 * result + ( paths != null ? Arrays.hashCode( paths ) : 0 );
302        result = 31 * result + ( expressions != null ? expressions.hashCode() : 0 );
303        return result;
304        }
305      }