001/*
002 * Copyright (c) 2007-2016 Concurrent, Inc. All Rights Reserved.
003 *
004 * Project and contact information: http://www.cascading.org/
005 *
006 * This file is part of the Cascading project.
007 *
008 * Licensed under the Apache License, Version 2.0 (the "License");
009 * you may not use this file except in compliance with the License.
010 * You may obtain a copy of the License at
011 *
012 *     http://www.apache.org/licenses/LICENSE-2.0
013 *
014 * Unless required by applicable law or agreed to in writing, software
015 * distributed under the License is distributed on an "AS IS" BASIS,
016 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
017 * See the License for the specific language governing permissions and
018 * limitations under the License.
019 */
020
021package cascading.management.annotation;
022
023import java.net.URI;
024import java.util.Collections;
025import java.util.Set;
026import java.util.TreeSet;
027
028import cascading.util.Util;
029import org.slf4j.Logger;
030import org.slf4j.LoggerFactory;
031
032/**
033 * URISanitizer is an implementation of the Sanitizer interface to sanitize URIs of different kinds
034 * (file, HTTP, HDFS, JDBC etc.) Depending on the visibility, the Sanitizer will return different values:
035 * <p/>
036 * For hierarchical URIs (jdbc://...):
037 * <ul>
038 * <li>PUBLIC: Only return the path of the URI</li>
039 * <li>PROTECTED: Same as PUBLIC + query parameters</li>
040 * <li>PRIVATE: Same as PROTECTED + URI scheme and authority (host/port)</li>
041 * </ul>
042 * <p/>
043 * For opaque URIs (mailto:someone@email.com):
044 * <ul>
045 * <li>PUBLIC: Only return the scheme of the URI, 'mailto:' etc</li>
046 * <li>PROTECTED: Same as PUBLIC</li>
047 * <li>PRIVATE: The whole URI</li>
048 * </ul>
049 * <p>
050 * Parameters containing sensitive information like user-names, passwords, API-keys etc. can be filtered out by setting
051 * the {@link cascading.management.annotation.URISanitizer#PARAMETER_FILTER_PROPERTY} System property to a comma separated
052 * list of names that should never show up in the {@link cascading.management.DocumentService}. Some systems may use
053 * non-standard URIs, which cannot be parsed by {@link java.net.URI}.</p>
054 * <p/>
055 * <p>If the sanitizer encounters one of those URIs it
056 * will catch the Exception and return an empty String. This can be overruled by setting the
057 * {@link cascading.management.annotation.URISanitizer#FAILURE_MODE_PASS_THROUGH} System property to <code>true</code>,
058 * which will cause the actual value being returned. <b>Note</b> that this might leak sensitive information to the
059 * {@link cascading.management.DocumentService}.</p>
060 */
061public class URISanitizer implements Sanitizer
062  {
063  /**
064   * Logger.
065   */
066  private static final Logger LOG = LoggerFactory.getLogger( URISanitizer.class );
067
068  /**
069   * System property for listing URI parameters to be filtered out (usernames, passwords etc.)
070   * <p/>
071   * Value cases are ignored, thus {@code UserName} will be equivalent to {@code username}.
072   */
073  public static final String PARAMETER_FILTER_PROPERTY = "cascading.management.annotation.urisanitizer.parameternames";
074
075  /** System property to allow values to pass through a parse exception. */
076  public static final String FAILURE_MODE_PASS_THROUGH = "cascading.management.annotation.urisanitizer.failurepassthrough";
077
078  private Set<String> parametersToFilter;
079
080  public URISanitizer()
081    {
082    String parameterProperty = System.getProperty( PARAMETER_FILTER_PROPERTY );
083
084    if( Util.isEmpty( parameterProperty ) )
085      {
086      parametersToFilter = Collections.emptySet();
087      }
088    else
089      {
090      // treat "UserName" equal to "username"
091      parametersToFilter = new TreeSet<String>( String.CASE_INSENSITIVE_ORDER );
092
093      String[] parameterNames = parameterProperty.split( "," );
094
095      for( String parameterName : parameterNames )
096        {
097        if( parameterName != null )
098          parameterName = parameterName.trim();
099
100        if( !Util.isEmpty( parameterName ) )
101          parametersToFilter.add( parameterName );
102        }
103      }
104    }
105
106  @Override
107  public String apply( Visibility visibility, Object value )
108    {
109    if( value == null )
110      return null;
111
112    URI uri;
113
114    if( value instanceof URI )
115      {
116      uri = (URI) value;
117      }
118    else
119      {
120      try
121        {
122        uri = URI.create( encode( value.toString() ) );
123        }
124      catch( IllegalArgumentException exception )
125        {
126        LOG.warn( "failed to parse uri: {}, message: {}", value, exception.getMessage() );
127        LOG.debug( "failed to parse uri: {}", value, exception );
128
129        if( Boolean.parseBoolean( System.getProperty( FAILURE_MODE_PASS_THROUGH ) ) )
130          {
131          LOG.warn( "ignoring uri sanitizer failures, returning unsanitized value, property '{}' set to true", FAILURE_MODE_PASS_THROUGH );
132          return value.toString();
133          }
134
135        // return an empty string, to avoid the leakage of sensitive information.
136        LOG.info( "set property: '{}', to true to return unsanitized value, returning empty string", FAILURE_MODE_PASS_THROUGH );
137        return "";
138        }
139      }
140
141    if( uri.isOpaque() )
142      {
143      switch( visibility )
144        {
145        case PRIVATE:
146          return value.toString();
147        case PROTECTED:
148        case PUBLIC:
149          return uri.getScheme() + ":";
150        }
151      }
152
153    StringBuilder buffer = new StringBuilder();
154
155    if( uri.getPath() != null ) // can happen according to the javadoc
156      buffer.append( uri.getPath() );
157
158    if( ( visibility == Visibility.PROTECTED || visibility == Visibility.PRIVATE ) && uri.getQuery() != null )
159      buffer.append( "?" ).append( sanitizeQuery( uri.getQuery() ) );
160
161    if( visibility == Visibility.PRIVATE )
162      {
163      String currentString = buffer.toString(); // preserve before creating a new instance
164      buffer = new StringBuilder();
165
166      if( uri.getScheme() != null )
167        buffer.append( uri.getScheme() ).append( "://" );
168
169      if( uri.getAuthority() != null )
170        buffer.append( uri.getAuthority() );
171
172      buffer.append( currentString );
173      }
174
175    return buffer.toString();
176    }
177
178  private String encode( String input )
179    {
180    String[] parts = input.split( "://", 2 );
181    String protocol = "";
182    String rest;
183
184    if( parts.length == 2 )
185      protocol = parts[ 0 ];
186
187    rest = parts[ parts.length - 1 ];
188
189    rest = rest.replaceAll( "\\[", "%5B" );
190    rest = rest.replaceAll( "\\]", "%5D" );
191    rest = rest.replaceAll( "\\{", "%7B" );
192    rest = rest.replaceAll( "\\}", "%7D" );
193    rest = rest.replaceAll( "\\\\", "/" );
194    rest = rest.replaceAll( ";", "%3B" );
195    rest = rest.replaceAll( ",", "%2C" );
196
197    StringBuilder builder = new StringBuilder();
198
199    if( !protocol.isEmpty() )
200      builder.append( protocol ).append( "://" );
201
202    builder.append( rest );
203
204    return builder.toString();
205    }
206
207  private String sanitizeQuery( String query )
208    {
209    StringBuilder buffer = new StringBuilder();
210    String[] parts = query.split( "&" );
211
212    for( String part : parts )
213      {
214      String[] keyValuePair = part.split( "=" );
215      String key = keyValuePair[ 0 ];
216
217      if( parametersToFilter.contains( key ) )
218        continue;
219
220      buffer.append( part ).append( "&" );
221      }
222
223    return buffer.toString();
224    }
225  }