001 /* 002 * Copyright (c) 2007-2014 Concurrent, Inc. All Rights Reserved. 003 * 004 * Project and contact information: http://www.cascading.org/ 005 * 006 * This file is part of the Cascading project. 007 * 008 * Licensed under the Apache License, Version 2.0 (the "License"); 009 * you may not use this file except in compliance with the License. 010 * You may obtain a copy of the License at 011 * 012 * http://www.apache.org/licenses/LICENSE-2.0 013 * 014 * Unless required by applicable law or agreed to in writing, software 015 * distributed under the License is distributed on an "AS IS" BASIS, 016 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 017 * See the License for the specific language governing permissions and 018 * limitations under the License. 019 */ 020 021 package cascading.management.annotation; 022 023 import java.net.URI; 024 import java.util.Collections; 025 import java.util.Set; 026 import java.util.TreeSet; 027 028 import cascading.util.Util; 029 import org.slf4j.Logger; 030 import org.slf4j.LoggerFactory; 031 032 /** 033 * URISanitizer is an implementation of the Sanitizer interface to sanitize URIs of different kinds 034 * (file, HTTP, HDFS, JDBC etc.) Depending on the visibility, the Sanitizer will return different values: 035 * <ul> 036 * <li>PUBLIC: Only return the path of the URI</li> 037 * <li>PROTECTED: Same as PUBLIC + query parameters</li> 038 * <li>PRIVATE: Same as PROTECTED + URI scheme and authority (host/port)</li> 039 * </ul> 040 * <p/> 041 * <p>Parameters containing sensitive information like user-names, passwords, API-keys etc. can be filtered out by setting 042 * the {@link cascading.management.annotation.URISanitizer#PARAMETER_FILTER_PROPERTY} System property to a comma separated 043 * list of names that should never show up in the {@link cascading.management.DocumentService}. Some systems may use 044 * non-standard URIs, which cannot be parsed by {@link java.net.URI}.</p> 045 * <p/> 046 * <p>If the sanitizer encounters one of those URIs it 047 * will catch the Exception and return an empty String. This can be overruled by setting the 048 * {@link cascading.management.annotation.URISanitizer#FAILURE_MODE_PASS_THROUGH} System property to <code>true</code>, 049 * which will cause the actual value being returned. <b>Note</b> that this might leak sensitive information to the 050 * {@link cascading.management.DocumentService}.</p> 051 */ 052 public class URISanitizer implements Sanitizer 053 { 054 /** 055 * Logger. 056 */ 057 private static final Logger LOG = LoggerFactory.getLogger( URISanitizer.class ); 058 059 /** 060 * System property for listing URI parameters to be filtered out (usernames, passwords etc.) 061 * <p/> 062 * Value cases are ignored, thus {@code UserName} will be equivalent to {@code username}. 063 */ 064 public static final String PARAMETER_FILTER_PROPERTY = "cascading.management.annotation.urisanitizer.parameternames"; 065 066 /** System property to allow values to pass through a parse exception. */ 067 public static final String FAILURE_MODE_PASS_THROUGH = "cascading.management.annotation.urisanitizer.failurepassthrough"; 068 069 private Set<String> parametersToFilter; 070 071 public URISanitizer() 072 { 073 String parameterProperty = System.getProperty( PARAMETER_FILTER_PROPERTY ); 074 075 if( Util.isEmpty( parameterProperty ) ) 076 { 077 parametersToFilter = Collections.emptySet(); 078 } 079 else 080 { 081 // treat "UserName" equal to "username" 082 parametersToFilter = new TreeSet<String>( String.CASE_INSENSITIVE_ORDER ); 083 084 String[] parameterNames = parameterProperty.split( "," ); 085 086 for( String parameterName : parameterNames ) 087 { 088 if( parameterName != null ) 089 parameterName = parameterName.trim(); 090 091 if( !Util.isEmpty( parameterName ) ) 092 parametersToFilter.add( parameterName ); 093 } 094 } 095 } 096 097 @Override 098 public String apply( Visibility visibility, Object value ) 099 { 100 if( value == null ) 101 return null; 102 103 URI uri; 104 105 if( value instanceof URI ) 106 { 107 uri = (URI) value; 108 } 109 else 110 { 111 try 112 { 113 uri = URI.create( value.toString() ); 114 } 115 catch( IllegalArgumentException exception ) 116 { 117 LOG.warn( "failed to parse uri: {}", value, exception ); 118 119 if( Boolean.parseBoolean( System.getProperty( FAILURE_MODE_PASS_THROUGH ) ) ) 120 { 121 LOG.warn( "ignoring failures, returning raw value" ); 122 return value.toString(); 123 } 124 125 // return an empty string, to avoid the leakage of sensitive information. 126 return ""; 127 } 128 } 129 130 StringBuilder buffer = new StringBuilder(); 131 132 if( uri.getPath() != null ) // can happen according to the javadoc 133 buffer.append( uri.getPath() ); 134 135 if( ( visibility == Visibility.PROTECTED || visibility == Visibility.PRIVATE ) && uri.getQuery() != null ) 136 buffer.append( "?" ).append( sanitizeQuery( uri.getQuery() ) ); 137 138 if( visibility == Visibility.PRIVATE ) 139 { 140 String currentString = buffer.toString(); // preserve before creating a new instance 141 buffer = new StringBuilder(); 142 143 if( uri.getScheme() != null ) 144 buffer.append( uri.getScheme() ).append( "://" ); 145 146 if( uri.getAuthority() != null ) 147 buffer.append( uri.getAuthority() ); 148 149 buffer.append( currentString ); 150 } 151 152 return buffer.toString(); 153 } 154 155 private String sanitizeQuery( String query ) 156 { 157 StringBuilder buffer = new StringBuilder(); 158 String[] parts = query.split( "&" ); 159 160 for( String part : parts ) 161 { 162 String[] keyValuePair = part.split( "=" ); 163 String key = keyValuePair[ 0 ]; 164 165 if( parametersToFilter.contains( key ) ) 166 continue; 167 168 buffer.append( part ).append( "&" ); 169 } 170 171 return buffer.toString(); 172 } 173 }