001/* 002 * Copyright (c) 2007-2017 Xplenty, Inc. All Rights Reserved. 003 * 004 * Project and contact information: http://www.cascading.org/ 005 * 006 * This file is part of the Cascading project. 007 * 008 * Licensed under the Apache License, Version 2.0 (the "License"); 009 * you may not use this file except in compliance with the License. 010 * You may obtain a copy of the License at 011 * 012 * http://www.apache.org/licenses/LICENSE-2.0 013 * 014 * Unless required by applicable law or agreed to in writing, software 015 * distributed under the License is distributed on an "AS IS" BASIS, 016 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 017 * See the License for the specific language governing permissions and 018 * limitations under the License. 019 */ 020 021package cascading.operation.xml; 022 023import java.io.IOException; 024import java.io.StringReader; 025import java.io.StringWriter; 026import java.util.ArrayList; 027import java.util.Arrays; 028import java.util.HashMap; 029import java.util.Iterator; 030import java.util.List; 031import java.util.Map; 032import javax.xml.namespace.NamespaceContext; 033import javax.xml.parsers.DocumentBuilder; 034import javax.xml.parsers.DocumentBuilderFactory; 035import javax.xml.parsers.ParserConfigurationException; 036import javax.xml.transform.OutputKeys; 037import javax.xml.transform.Result; 038import javax.xml.transform.Source; 039import javax.xml.transform.Transformer; 040import javax.xml.transform.TransformerConfigurationException; 041import javax.xml.transform.TransformerException; 042import javax.xml.transform.TransformerFactory; 043import javax.xml.transform.dom.DOMSource; 044import javax.xml.transform.stream.StreamResult; 045import javax.xml.xpath.XPath; 046import javax.xml.xpath.XPathExpression; 047import javax.xml.xpath.XPathExpressionException; 048import javax.xml.xpath.XPathFactory; 049 050import cascading.flow.FlowProcess; 051import cascading.operation.BaseOperation; 052import cascading.operation.OperationCall; 053import cascading.operation.OperationException; 054import cascading.tuple.Fields; 055import cascading.tuple.Tuple; 056import cascading.util.Pair; 057import org.slf4j.Logger; 058import org.slf4j.LoggerFactory; 059import org.w3c.dom.Document; 060import org.w3c.dom.Node; 061import org.xml.sax.InputSource; 062import org.xml.sax.SAXException; 063 064/** Class XPathOperation is the base class for all XPath operations. */ 065public class XPathOperation extends BaseOperation<Pair<DocumentBuilder, Tuple>> 066 { 067 /** Field NAMESPACE_XHTML */ 068 public static final String[][] NAMESPACE_XHTML = new String[][]{ 069 new String[]{"xhtml", "http://www.w3.org/1999/xhtml"}}; 070 071 /** Field LOG */ 072 private static final Logger LOG = LoggerFactory.getLogger( XPathOperation.class ); 073 074 /** Field namespaces */ 075 protected final String[][] namespaces; 076 /** Field paths */ 077 protected final String[] paths; 078 079 /** Field xPath */ 080 private transient XPath xPath; 081 /** Field transformer */ 082 private transient Transformer transformer; 083 /** Field expressions */ 084 private transient List<XPathExpression> expressions; 085 086 protected XPathOperation( int numArgs, Fields fieldDeclaration, String[][] namespaces, String... paths ) 087 { 088 super( numArgs, fieldDeclaration ); 089 this.namespaces = namespaces; 090 this.paths = paths; 091 092 if( paths == null || paths.length == 0 ) 093 throw new IllegalArgumentException( "a xpath expression must be given" ); 094 } 095 096 protected XPathOperation( int numArgs, String[][] namespaces, String... paths ) 097 { 098 super( numArgs ); 099 this.namespaces = namespaces; 100 this.paths = paths; 101 102 if( paths == null || paths.length == 0 ) 103 throw new IllegalArgumentException( "a xpath expression must be given" ); 104 } 105 106 @Override 107 public void prepare( FlowProcess flowProcess, OperationCall<Pair<DocumentBuilder, Tuple>> operationCall ) 108 { 109 try 110 { 111 DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance(); 112 113 factory.setNamespaceAware( true ); 114 115 operationCall.setContext( new Pair<DocumentBuilder, Tuple>( factory.newDocumentBuilder(), Tuple.size( 1 ) ) ); 116 } 117 catch( ParserConfigurationException exception ) 118 { 119 throw new OperationException( "could not create document builder", exception ); 120 } 121 } 122 123 /** 124 * Method getXPath returns the XPath of this XPathOperation object. 125 * 126 * @return the XPath (type XPath) of this XPathOperation object. 127 */ 128 public XPath getXPath() 129 { 130 if( xPath != null ) 131 return xPath; 132 133 XPathFactory factory = XPathFactory.newInstance(); 134 135 xPath = factory.newXPath(); 136 137 if( namespaces != null ) 138 { 139 MutableNamespaceContext namespaceContext = new MutableNamespaceContext(); 140 141 for( String[] namespace : namespaces ) 142 { 143 if( LOG.isDebugEnabled() ) 144 LOG.debug( "adding namespace: {}:{}", namespace[ 0 ], namespace[ 1 ] ); 145 146 namespaceContext.addNamespace( namespace[ 0 ], namespace[ 1 ] ); 147 } 148 149 xPath.setNamespaceContext( namespaceContext ); 150 } 151 152 return xPath; 153 } 154 155 /** 156 * Method getTransformer returns the transformer of this XPathOperation object. 157 * 158 * @return the transformer (type Transformer) of this XPathOperation object. 159 * @throws TransformerConfigurationException when 160 */ 161 public Transformer getTransformer() throws TransformerConfigurationException 162 { 163 if( transformer != null ) 164 return transformer; 165 166 transformer = TransformerFactory.newInstance().newTransformer(); 167 168 transformer.setOutputProperty( OutputKeys.OMIT_XML_DECLARATION, "yes" ); 169 170 return transformer; 171 } 172 173 protected String writeAsXML( Node node ) 174 { 175 StringWriter stringWriter = new StringWriter(); 176 Result result = new StreamResult( stringWriter ); 177 Source source = new DOMSource( node ); 178 179 try 180 { 181 getTransformer().transform( source, result ); 182 } 183 catch( TransformerException exception ) 184 { 185 throw new OperationException( "writing to xml failed", exception ); 186 } 187 188 return stringWriter.toString(); 189 } 190 191 protected List<XPathExpression> getExpressions() 192 { 193 if( expressions != null ) 194 return expressions; 195 196 expressions = new ArrayList<XPathExpression>(); 197 198 for( String path : paths ) 199 { 200 try 201 { 202 expressions.add( getXPath().compile( path ) ); 203 } 204 catch( XPathExpressionException exception ) 205 { 206 throw new OperationException( "could not compile xpath expression", exception ); 207 } 208 } 209 210 return expressions; 211 } 212 213 class MutableNamespaceContext implements NamespaceContext 214 { 215 216 private final Map<String, String> map = new HashMap<String, String>(); 217 218 public MutableNamespaceContext() 219 { 220 } 221 222 public void addNamespace( String prefix, String namespaceURI ) 223 { 224 map.put( prefix, namespaceURI ); 225 } 226 227 public String getNamespaceURI( String prefix ) 228 { 229 return map.get( prefix ); 230 } 231 232 public String getPrefix( String namespaceURI ) 233 { 234 for( String prefix : map.keySet() ) 235 { 236 if( map.get( prefix ).equals( namespaceURI ) ) 237 { 238 return prefix; 239 } 240 } 241 return null; 242 } 243 244 public Iterator getPrefixes( String namespaceURI ) 245 { 246 List<String> prefixes = new ArrayList<String>(); 247 248 for( String prefix : map.keySet() ) 249 { 250 if( map.get( prefix ).equals( namespaceURI ) ) 251 prefixes.add( prefix ); 252 } 253 254 return prefixes.iterator(); 255 } 256 } 257 258 protected Document parseDocument( DocumentBuilder documentBuilder, String argument ) 259 { 260 Document document; 261 try 262 { 263 document = documentBuilder.parse( new InputSource( new StringReader( argument ) ) ); 264 } 265 catch( SAXException exception ) 266 { 267 throw new OperationException( "could not parse xml document", exception ); 268 } 269 catch( IOException exception ) 270 { 271 throw new OperationException( "could not parse xml document", exception ); 272 } 273 return document; 274 } 275 276 @Override 277 public boolean equals( Object object ) 278 { 279 if( this == object ) 280 return true; 281 if( !( object instanceof XPathOperation ) ) 282 return false; 283 if( !super.equals( object ) ) 284 return false; 285 286 XPathOperation that = (XPathOperation) object; 287 288 if( expressions != null ? !expressions.equals( that.expressions ) : that.expressions != null ) 289 return false; 290 if( !Arrays.equals( paths, that.paths ) ) 291 return false; 292 293 return true; 294 } 295 296 @Override 297 public int hashCode() 298 { 299 int result = super.hashCode(); 300 result = 31 * result + ( paths != null ? Arrays.hashCode( paths ) : 0 ); 301 result = 31 * result + ( expressions != null ? expressions.hashCode() : 0 ); 302 return result; 303 } 304 }