001/*
002 * Copyright (c) 2007-2017 Xplenty, Inc. All Rights Reserved.
003 *
004 * Project and contact information: http://www.cascading.org/
005 *
006 * This file is part of the Cascading project.
007 *
008 * Licensed under the Apache License, Version 2.0 (the "License");
009 * you may not use this file except in compliance with the License.
010 * You may obtain a copy of the License at
011 *
012 *     http://www.apache.org/licenses/LICENSE-2.0
013 *
014 * Unless required by applicable law or agreed to in writing, software
015 * distributed under the License is distributed on an "AS IS" BASIS,
016 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
017 * See the License for the specific language governing permissions and
018 * limitations under the License.
019 */
020
021package cascading.tap.hadoop;
022
023import java.io.IOException;
024import java.net.URI;
025
026import cascading.flow.FlowProcess;
027import org.apache.hadoop.conf.Configuration;
028import org.apache.hadoop.filecache.DistributedCache;
029import org.apache.hadoop.fs.Path;
030
031/**
032 * Class DistCacheTap is a Tap decorator for Hfs and can be used to move a file to the
033 * {@link org.apache.hadoop.filecache.DistributedCache} on read when accessed cluster side.
034 * <p/>
035 * This is useful for {@link cascading.pipe.HashJoin}s.
036 * <p/>
037 * The distributed cache is only used when the Tap is used as a source. If the DistCacheTap is used as a sink,
038 * it will delegate to the provided parent instance and not use the DistributedCache.
039 */
040public class DistCacheTap extends BaseDistCacheTap
041  {
042  /**
043   * Constructs a new DistCacheTap instance with the given Hfs.
044   *
045   * @param parent an Hfs or GlobHfs instance representing a small file.
046   */
047  public DistCacheTap( Hfs parent )
048    {
049    super( parent );
050    }
051
052  @Override
053  protected Path[] getLocalCacheFiles( FlowProcess<? extends Configuration> flowProcess ) throws IOException
054    {
055    return DistributedCache.getLocalCacheFiles( flowProcess.getConfig() );
056    }
057
058  @Override
059  protected void addLocalCacheFiles( Configuration conf, URI uri )
060    {
061    DistributedCache.addCacheFile( uri, conf );
062    }
063  }