001    /*
002     * Copyright (c) 2007-2014 Concurrent, Inc. All Rights Reserved.
003     *
004     * Project and contact information: http://www.cascading.org/
005     *
006     * This file is part of the Cascading project.
007     *
008     * Licensed under the Apache License, Version 2.0 (the "License");
009     * you may not use this file except in compliance with the License.
010     * You may obtain a copy of the License at
011     *
012     *     http://www.apache.org/licenses/LICENSE-2.0
013     *
014     * Unless required by applicable law or agreed to in writing, software
015     * distributed under the License is distributed on an "AS IS" BASIS,
016     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
017     * See the License for the specific language governing permissions and
018     * limitations under the License.
019     */
020    
021    package cascading.management.annotation;
022    
023    import java.net.URI;
024    import java.util.Collections;
025    import java.util.Set;
026    import java.util.TreeSet;
027    
028    import cascading.util.Util;
029    import org.slf4j.Logger;
030    import org.slf4j.LoggerFactory;
031    
032    /**
033     * URISanitizer is an implementation of the Sanitizer interface to sanitize URIs of different kinds
034     * (file, HTTP, HDFS, JDBC etc.) Depending on the visibility, the Sanitizer will return different values:
035     * <ul>
036     * <li>PUBLIC: Only return the path of the URI</li>
037     * <li>PROTECTED: Same as PUBLIC + query parameters</li>
038     * <li>PRIVATE: Same as PROTECTED + URI scheme and authority (host/port)</li>
039     * </ul>
040     * <p/>
041     * <p>Parameters containing sensitive information like user-names, passwords, API-keys etc. can be filtered out by setting
042     * the {@link cascading.management.annotation.URISanitizer#PARAMETER_FILTER_PROPERTY} System property to a comma separated
043     * list of names that should never show up in the {@link cascading.management.DocumentService}. Some systems may use
044     * non-standard URIs, which cannot be parsed by {@link java.net.URI}.</p>
045     * <p/>
046     * <p>If the sanitizer encounters one of those URIs it
047     * will catch the Exception and return an empty String. This can be overruled by setting the
048     * {@link cascading.management.annotation.URISanitizer#FAILURE_MODE_PASS_THROUGH} System property to <code>true</code>,
049     * which will cause the actual value being returned. <b>Note</b> that this might leak sensitive information to the
050     * {@link cascading.management.DocumentService}.</p>
051     */
052    public class URISanitizer implements Sanitizer
053      {
054      /**
055       * Logger.
056       */
057      private static final Logger LOG = LoggerFactory.getLogger( URISanitizer.class );
058    
059      /**
060       * System property for listing URI parameters to be filtered out (usernames, passwords etc.)
061       * <p/>
062       * Value cases are ignored, thus {@code UserName} will be equivalent to {@code username}.
063       */
064      public static final String PARAMETER_FILTER_PROPERTY = "cascading.management.annotation.urisanitizer.parameternames";
065    
066      /** System property to allow values to pass through a parse exception. */
067      public static final String FAILURE_MODE_PASS_THROUGH = "cascading.management.annotation.urisanitizer.failurepassthrough";
068    
069      private Set<String> parametersToFilter;
070    
071      public URISanitizer()
072        {
073        String parameterProperty = System.getProperty( PARAMETER_FILTER_PROPERTY );
074    
075        if( Util.isEmpty( parameterProperty ) )
076          {
077          parametersToFilter = Collections.emptySet();
078          }
079        else
080          {
081          // treat "UserName" equal to "username"
082          parametersToFilter = new TreeSet<String>( String.CASE_INSENSITIVE_ORDER );
083    
084          String[] parameterNames = parameterProperty.split( "," );
085    
086          for( String parameterName : parameterNames )
087            {
088            if( parameterName != null )
089              parameterName = parameterName.trim();
090    
091            if( !Util.isEmpty( parameterName ) )
092              parametersToFilter.add( parameterName );
093            }
094          }
095        }
096    
097      @Override
098      public String apply( Visibility visibility, Object value )
099        {
100        if( value == null )
101          return null;
102    
103        URI uri;
104    
105        if( value instanceof URI )
106          {
107          uri = (URI) value;
108          }
109        else
110          {
111          try
112            {
113            uri = URI.create( value.toString() );
114            }
115          catch( IllegalArgumentException exception )
116            {
117            LOG.warn( "failed to parse uri: {}", value, exception );
118    
119            if( Boolean.parseBoolean( System.getProperty( FAILURE_MODE_PASS_THROUGH ) ) )
120              {
121              LOG.warn( "ignoring failures, returning raw value" );
122              return value.toString();
123              }
124    
125            // return an empty string, to avoid the leakage of sensitive information.
126            return "";
127            }
128          }
129    
130        StringBuilder buffer = new StringBuilder();
131    
132        if( uri.getPath() != null ) // can happen according to the javadoc
133          buffer.append( uri.getPath() );
134    
135        if( ( visibility == Visibility.PROTECTED || visibility == Visibility.PRIVATE ) && uri.getQuery() != null )
136          buffer.append( "?" ).append( sanitizeQuery( uri.getQuery() ) );
137    
138        if( visibility == Visibility.PRIVATE )
139          {
140          String currentString = buffer.toString(); // preserve before creating a new instance
141          buffer = new StringBuilder();
142    
143          if( uri.getScheme() != null )
144            buffer.append( uri.getScheme() ).append( "://" );
145    
146          if( uri.getAuthority() != null )
147            buffer.append( uri.getAuthority() );
148    
149          buffer.append( currentString );
150          }
151    
152        return buffer.toString();
153        }
154    
155      private String sanitizeQuery( String query )
156        {
157        StringBuilder buffer = new StringBuilder();
158        String[] parts = query.split( "&" );
159    
160        for( String part : parts )
161          {
162          String[] keyValuePair = part.split( "=" );
163          String key = keyValuePair[ 0 ];
164    
165          if( parametersToFilter.contains( key ) )
166            continue;
167    
168          buffer.append( part ).append( "&" );
169          }
170    
171        return buffer.toString();
172        }
173      }