001/*
002 * Copyright (c) 2007-2017 Xplenty, Inc. All Rights Reserved.
003 *
004 * Project and contact information: http://www.cascading.org/
005 *
006 * This file is part of the Cascading project.
007 *
008 * Licensed under the Apache License, Version 2.0 (the "License");
009 * you may not use this file except in compliance with the License.
010 * You may obtain a copy of the License at
011 *
012 *     http://www.apache.org/licenses/LICENSE-2.0
013 *
014 * Unless required by applicable law or agreed to in writing, software
015 * distributed under the License is distributed on an "AS IS" BASIS,
016 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
017 * See the License for the specific language governing permissions and
018 * limitations under the License.
019 */
020
021package cascading;
022
023import java.io.File;
024import java.io.IOException;
025import java.util.Collection;
026import java.util.Collections;
027import java.util.LinkedList;
028import java.util.List;
029import java.util.ListIterator;
030
031import junit.framework.Test;
032import junit.framework.TestCase;
033import junit.framework.TestSuite;
034import org.apache.commons.io.FileUtils;
035import org.apache.commons.io.LineIterator;
036import org.apache.commons.io.filefilter.RegexFileFilter;
037import org.apache.commons.io.filefilter.TrueFileFilter;
038import org.junit.internal.runners.SuiteMethod;
039import org.junit.runner.RunWith;
040import org.slf4j.Logger;
041import org.slf4j.LoggerFactory;
042
043/**
044 *
045 */
046@RunWith(SuiteMethod.class)
047public class ComparePlatformsTest extends CascadingTestCase
048  {
049  private static final Logger LOG = LoggerFactory.getLogger( ComparePlatformsTest.class );
050  public static final String NONDETERMINISTIC = "-nondeterministic";
051
052  public static Test suite() throws Exception
053    {
054    String root = System.getProperty( "test.output.roots" );
055
056    if( root == null )
057      return new TestSuite();
058
059    LOG.info( "output roots: {}", root );
060
061    String[] roots = root.split( "," );
062
063    File localRoot = new File( find( roots, "/cascading-local/" ), "local" );
064    File hadoopRoot = new File( find( roots, "/cascading-hadoop/" ), "hadoop" );
065    File hadoop2Root = new File( find( roots, "/cascading-hadoop2-mr1/" ), "hadoop2-mr1" );
066    File hadoop2TezRoot = new File( find( roots, "/cascading-hadoop2-tez/" ), "hadoop2-tez" );
067
068    LOG.info( "local path: {}", localRoot );
069    LOG.info( "hadoop path: {}", hadoopRoot );
070    LOG.info( "hadoop2-mr1 path: {}", hadoop2Root );
071    LOG.info( "hadoop2-tez path: {}", hadoop2TezRoot );
072
073    TestSuite suite = new TestSuite();
074
075    createComparisons( "local~hadoop", localRoot, hadoopRoot, suite );
076    createComparisons( "local~hadoop2-mr1", localRoot, hadoop2Root, suite );
077    createComparisons( "local~hadoop2-tez", localRoot, hadoop2TezRoot, suite );
078
079    return suite;
080    }
081
082  private static void createComparisons( String comparison, File lhsRoot, File rhsRoot, TestSuite suite )
083    {
084    LOG.info( "comparing directory: {}, with: {}", lhsRoot, rhsRoot );
085
086    LinkedList<File> lhsFiles = new LinkedList<File>( FileUtils.listFiles( lhsRoot, new RegexFileFilter( "^[\\w-]+" ), TrueFileFilter.INSTANCE ) );
087    LinkedList<File> rhsFiles = new LinkedList<File>();
088
089    LOG.info( "found lhs files: {}", lhsFiles.size() );
090
091    int rootLength = lhsRoot.toString().length() + 1;
092
093    ListIterator<File> iterator = lhsFiles.listIterator();
094    while( iterator.hasNext() )
095      {
096      File localFile = iterator.next();
097      File file = new File( rhsRoot, localFile.toString().substring( rootLength ) );
098
099      if( localFile.toString().endsWith( NONDETERMINISTIC ) )
100        iterator.remove();
101      else if( file.exists() )
102        rhsFiles.add( file );
103      else
104        iterator.remove();
105      }
106
107    LOG.info( "running {} comparisons", lhsFiles.size() );
108
109    for( int i = 0; i < lhsFiles.size(); i++ )
110      {
111      File localFile = lhsFiles.get( i );
112      File hadoopFile = rhsFiles.get( i );
113
114      suite.addTest( new CompareTestCase( comparison, localFile, hadoopFile ) );
115      }
116    }
117
118  private static String find( String[] roots, String string )
119    {
120    for( String root : roots )
121      {
122      if( root.contains( string ) )
123        return root;
124      }
125
126    throw new IllegalStateException( "not found in roots: " + string );
127    }
128
129  public static class CompareTestCase extends TestCase
130    {
131    File localFile;
132    File hadoopFile;
133
134    public CompareTestCase( String comparison, File localFile, File hadoopFile )
135      {
136      super( "testFiles" );
137
138      this.localFile = localFile;
139      this.hadoopFile = hadoopFile;
140
141      // craps out junit, unsure how to set display name
142//      setName( String.format( "%s..%s", comparison, localFile.getName() ) ); // relevant bits have same file name
143      }
144
145    @org.junit.Test
146    public void testFiles() throws IOException
147      {
148      LinkedList<String> localLines = getLines( localFile );
149      LinkedList<String> hadoopLines = getLines( hadoopFile );
150
151      assertEquals( localFile + " != " + hadoopFile, localLines.size(), hadoopLines.size() );
152
153      if( localLines.size() == 0 )
154        return;
155
156      Collections.sort( localLines );
157      Collections.sort( hadoopLines );
158
159      if( hasLineNumbers( localLines ) )
160        {
161        trimLineNumbers( localLines );
162        trimLineNumbers( hadoopLines );
163        }
164
165      for( int i = 0; i < localLines.size(); i++ )
166        {
167        String localLine = localLines.get( i );
168
169        assertTrue( localFile + " - not in hadoop lines: " + localLine, hadoopLines.contains( localLine ) );
170        }
171      }
172
173    private void trimLineNumbers( LinkedList<String> lines )
174      {
175      ListIterator<String> iterator = lines.listIterator();
176
177      while( iterator.hasNext() )
178        iterator.set( iterator.next().replaceFirst( "^\\d+\\s(.*)$", "$1" ) );
179      }
180
181    private boolean hasLineNumbers( List<String> lines )
182      {
183      List<Integer> values = new LinkedList<Integer>();
184
185      for( String line : lines )
186        {
187        if( !line.matches( "^\\d+\\s.*$" ) )
188          return false;
189
190        String value = line.replaceFirst( "^(\\d+)\\s.*$", "$1" );
191
192        if( value == null || value.isEmpty() )
193          return false;
194
195        values.add( Integer.parseInt( value ) );
196        }
197
198      Collections.sort( values );
199
200      int last = -1;
201      for( Integer value : values )
202        {
203        if( last >= value )
204          return false;
205
206        last = value;
207        }
208
209      return true;
210      }
211
212    private LinkedList<String> getLines( File localFile ) throws IOException
213      {
214      LinkedList<String> lines = new LinkedList<String>();
215
216      if( !localFile.isDirectory() )
217        return populate( localFile, lines );
218
219      Collection<File> subFiles = FileUtils.listFiles( localFile, new RegexFileFilter( "^part-.*" ), null );
220
221      for( File subFile : subFiles )
222        populate( subFile, lines );
223
224      return lines;
225      }
226
227    private LinkedList<String> populate( File localFile, LinkedList<String> lines ) throws IOException
228      {
229      LineIterator iterator = FileUtils.lineIterator( localFile, "UTF-8" );
230
231      while( iterator.hasNext() )
232        lines.add( iterator.next() );
233
234      return lines;
235      }
236    }
237  }