001 /* 002 * Copyright (c) 2007-2014 Concurrent, Inc. All Rights Reserved. 003 * 004 * Project and contact information: http://www.cascading.org/ 005 * 006 * This file is part of the Cascading project. 007 * 008 * Licensed under the Apache License, Version 2.0 (the "License"); 009 * you may not use this file except in compliance with the License. 010 * You may obtain a copy of the License at 011 * 012 * http://www.apache.org/licenses/LICENSE-2.0 013 * 014 * Unless required by applicable law or agreed to in writing, software 015 * distributed under the License is distributed on an "AS IS" BASIS, 016 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 017 * See the License for the specific language governing permissions and 018 * limitations under the License. 019 */ 020 021 package cascading.operation.xml; 022 023 import java.io.IOException; 024 import java.io.StringReader; 025 import java.io.StringWriter; 026 import java.util.ArrayList; 027 import java.util.Arrays; 028 import java.util.HashMap; 029 import java.util.Iterator; 030 import java.util.List; 031 import java.util.Map; 032 import javax.xml.namespace.NamespaceContext; 033 import javax.xml.parsers.DocumentBuilder; 034 import javax.xml.parsers.DocumentBuilderFactory; 035 import javax.xml.parsers.ParserConfigurationException; 036 import javax.xml.transform.OutputKeys; 037 import javax.xml.transform.Result; 038 import javax.xml.transform.Source; 039 import javax.xml.transform.Transformer; 040 import javax.xml.transform.TransformerConfigurationException; 041 import javax.xml.transform.TransformerException; 042 import javax.xml.transform.TransformerFactory; 043 import javax.xml.transform.dom.DOMSource; 044 import javax.xml.transform.stream.StreamResult; 045 import javax.xml.xpath.XPath; 046 import javax.xml.xpath.XPathExpression; 047 import javax.xml.xpath.XPathExpressionException; 048 import javax.xml.xpath.XPathFactory; 049 050 import cascading.flow.FlowProcess; 051 import cascading.operation.BaseOperation; 052 import cascading.operation.OperationCall; 053 import cascading.operation.OperationException; 054 import cascading.tuple.Fields; 055 import cascading.tuple.Tuple; 056 import cascading.util.Pair; 057 import org.slf4j.Logger; 058 import org.slf4j.LoggerFactory; 059 import org.w3c.dom.Document; 060 import org.w3c.dom.Node; 061 import org.xml.sax.InputSource; 062 import org.xml.sax.SAXException; 063 064 /** Class XPathOperation is the base class for all XPath operations. */ 065 public class XPathOperation extends BaseOperation<Pair<DocumentBuilder, Tuple>> 066 { 067 /** Field NAMESPACE_XHTML */ 068 public static final String[][] NAMESPACE_XHTML = new String[][]{ 069 new String[]{"xhtml", "http://www.w3.org/1999/xhtml"}}; 070 071 /** Field LOG */ 072 private static final Logger LOG = LoggerFactory.getLogger( XPathOperation.class ); 073 074 /** Field namespaces */ 075 protected final String[][] namespaces; 076 /** Field paths */ 077 protected final String[] paths; 078 079 /** Field xPath */ 080 private transient XPath xPath; 081 /** Field transformer */ 082 private transient Transformer transformer; 083 /** Field expressions */ 084 private transient List<XPathExpression> expressions; 085 086 protected XPathOperation( int numArgs, Fields fieldDeclaration, String[][] namespaces, String... paths ) 087 { 088 super( numArgs, fieldDeclaration ); 089 this.namespaces = namespaces; 090 this.paths = paths; 091 092 if( paths == null || paths.length == 0 ) 093 throw new IllegalArgumentException( "a xpath expression must be given" ); 094 } 095 096 protected XPathOperation( int numArgs, String[][] namespaces, String... paths ) 097 { 098 super( numArgs ); 099 this.namespaces = namespaces; 100 this.paths = paths; 101 102 if( paths == null || paths.length == 0 ) 103 throw new IllegalArgumentException( "a xpath expression must be given" ); 104 } 105 106 @Override 107 public void prepare( FlowProcess flowProcess, OperationCall<Pair<DocumentBuilder, Tuple>> operationCall ) 108 { 109 try 110 { 111 DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance(); 112 113 factory.setNamespaceAware( true ); 114 115 operationCall.setContext( new Pair<DocumentBuilder, Tuple>( factory.newDocumentBuilder(), Tuple.size( 1 ) ) ); 116 } 117 catch( ParserConfigurationException exception ) 118 { 119 throw new OperationException( "could not create document builder", exception ); 120 } 121 } 122 123 /** 124 * Method getXPath returns the XPath of this XPathOperation object. 125 * 126 * @return the XPath (type XPath) of this XPathOperation object. 127 */ 128 public XPath getXPath() 129 { 130 if( xPath != null ) 131 return xPath; 132 133 XPathFactory factory = XPathFactory.newInstance(); 134 135 xPath = factory.newXPath(); 136 137 if( namespaces != null ) 138 { 139 MutableNamespaceContext namespaceContext = new MutableNamespaceContext(); 140 141 for( String[] namespace : namespaces ) 142 { 143 if( LOG.isDebugEnabled() ) 144 LOG.debug( "adding namespace: {}:{}", namespace[ 0 ], namespace[ 1 ] ); 145 146 namespaceContext.addNamespace( namespace[ 0 ], namespace[ 1 ] ); 147 } 148 149 xPath.setNamespaceContext( namespaceContext ); 150 } 151 152 return xPath; 153 } 154 155 /** 156 * Method getTransformer returns the transformer of this XPathOperation object. 157 * 158 * @return the transformer (type Transformer) of this XPathOperation object. 159 * @throws TransformerConfigurationException 160 * when 161 */ 162 public Transformer getTransformer() throws TransformerConfigurationException 163 { 164 if( transformer != null ) 165 return transformer; 166 167 transformer = TransformerFactory.newInstance().newTransformer(); 168 169 transformer.setOutputProperty( OutputKeys.OMIT_XML_DECLARATION, "yes" ); 170 171 return transformer; 172 } 173 174 protected String writeAsXML( Node node ) 175 { 176 StringWriter stringWriter = new StringWriter(); 177 Result result = new StreamResult( stringWriter ); 178 Source source = new DOMSource( node ); 179 180 try 181 { 182 getTransformer().transform( source, result ); 183 } 184 catch( TransformerException exception ) 185 { 186 throw new OperationException( "writing to xml failed", exception ); 187 } 188 189 return stringWriter.toString(); 190 } 191 192 protected List<XPathExpression> getExpressions() 193 { 194 if( expressions != null ) 195 return expressions; 196 197 expressions = new ArrayList<XPathExpression>(); 198 199 for( String path : paths ) 200 { 201 try 202 { 203 expressions.add( getXPath().compile( path ) ); 204 } 205 catch( XPathExpressionException exception ) 206 { 207 throw new OperationException( "could not compile xpath expression", exception ); 208 } 209 } 210 211 return expressions; 212 } 213 214 class MutableNamespaceContext implements NamespaceContext 215 { 216 217 private final Map<String, String> map = new HashMap<String, String>(); 218 219 public MutableNamespaceContext() 220 { 221 } 222 223 public void addNamespace( String prefix, String namespaceURI ) 224 { 225 map.put( prefix, namespaceURI ); 226 } 227 228 public String getNamespaceURI( String prefix ) 229 { 230 return map.get( prefix ); 231 } 232 233 public String getPrefix( String namespaceURI ) 234 { 235 for( String prefix : map.keySet() ) 236 { 237 if( map.get( prefix ).equals( namespaceURI ) ) 238 { 239 return prefix; 240 } 241 } 242 return null; 243 } 244 245 public Iterator getPrefixes( String namespaceURI ) 246 { 247 List<String> prefixes = new ArrayList<String>(); 248 249 for( String prefix : map.keySet() ) 250 { 251 if( map.get( prefix ).equals( namespaceURI ) ) 252 prefixes.add( prefix ); 253 } 254 255 return prefixes.iterator(); 256 } 257 } 258 259 protected Document parseDocument( DocumentBuilder documentBuilder, String argument ) 260 { 261 Document document; 262 try 263 { 264 document = documentBuilder.parse( new InputSource( new StringReader( argument ) ) ); 265 } 266 catch( SAXException exception ) 267 { 268 throw new OperationException( "could not parse xml document", exception ); 269 } 270 catch( IOException exception ) 271 { 272 throw new OperationException( "could not parse xml document", exception ); 273 } 274 return document; 275 } 276 277 @Override 278 public boolean equals( Object object ) 279 { 280 if( this == object ) 281 return true; 282 if( !( object instanceof XPathOperation ) ) 283 return false; 284 if( !super.equals( object ) ) 285 return false; 286 287 XPathOperation that = (XPathOperation) object; 288 289 if( expressions != null ? !expressions.equals( that.expressions ) : that.expressions != null ) 290 return false; 291 if( !Arrays.equals( paths, that.paths ) ) 292 return false; 293 294 return true; 295 } 296 297 @Override 298 public int hashCode() 299 { 300 int result = super.hashCode(); 301 result = 31 * result + ( paths != null ? Arrays.hashCode( paths ) : 0 ); 302 result = 31 * result + ( expressions != null ? expressions.hashCode() : 0 ); 303 return result; 304 } 305 }