001    /*
002     * Copyright (c) 2007-2014 Concurrent, Inc. All Rights Reserved.
003     *
004     * Project and contact information: http://www.cascading.org/
005     *
006     * This file is part of the Cascading project.
007     *
008     * Licensed under the Apache License, Version 2.0 (the "License");
009     * you may not use this file except in compliance with the License.
010     * You may obtain a copy of the License at
011     *
012     *     http://www.apache.org/licenses/LICENSE-2.0
013     *
014     * Unless required by applicable law or agreed to in writing, software
015     * distributed under the License is distributed on an "AS IS" BASIS,
016     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
017     * See the License for the specific language governing permissions and
018     * limitations under the License.
019     */
020    
021    package cascading;
022    
023    import java.io.File;
024    import java.io.IOException;
025    import java.util.Collection;
026    import java.util.Collections;
027    import java.util.LinkedList;
028    import java.util.List;
029    import java.util.ListIterator;
030    
031    import junit.framework.Test;
032    import junit.framework.TestCase;
033    import junit.framework.TestSuite;
034    import org.apache.commons.io.FileUtils;
035    import org.apache.commons.io.LineIterator;
036    import org.apache.commons.io.filefilter.RegexFileFilter;
037    import org.apache.commons.io.filefilter.TrueFileFilter;
038    import org.slf4j.Logger;
039    import org.slf4j.LoggerFactory;
040    
041    /**
042     *
043     */
044    public class ComparePlatformsTest extends CascadingTestCase
045      {
046      private static final Logger LOG = LoggerFactory.getLogger( ComparePlatformsTest.class );
047      public static final String NONDETERMINISTIC = "-nondeterministic";
048    
049      public static Test suite() throws Exception
050        {
051        String root = System.getProperty( "test.output.roots" );
052    
053        LOG.info( "output roots: {}", root );
054    
055        String[] roots = root.split( "," );
056    
057        File localRoot = new File( find( roots, "/cascading-local/" ), "local" );
058        File hadoopRoot = new File( find( roots, "/cascading-hadoop/" ), "hadoop" );
059        File hadoop2Root = new File( find( roots, "/cascading-hadoop2-mr1/" ), "hadoop2-mr1" );
060    
061        LOG.info( "local path: {}", localRoot );
062        LOG.info( "hadoop path: {}", hadoopRoot );
063        LOG.info( "hadoop2 path: {}", hadoop2Root );
064    
065        TestSuite suite = new TestSuite();
066    
067        createComparisons( "local~hadoop", localRoot, hadoopRoot, suite );
068        createComparisons( "local~hadoop2-mr1", localRoot, hadoop2Root, suite );
069    
070        return suite;
071        }
072    
073      private static void createComparisons( String comparison, File lhsRoot, File rhsRoot, TestSuite suite )
074        {
075        LOG.info( "comparing directory: {}, with: {}", lhsRoot, rhsRoot );
076    
077        LinkedList<File> lhsFiles = new LinkedList<File>( FileUtils.listFiles( lhsRoot, new RegexFileFilter( "^[\\w-]+" ), TrueFileFilter.INSTANCE ) );
078        LinkedList<File> rhsFiles = new LinkedList<File>();
079    
080        LOG.info( "found lhs files: {}", lhsFiles.size() );
081    
082        int rootLength = lhsRoot.toString().length() + 1;
083    
084        ListIterator<File> iterator = lhsFiles.listIterator();
085        while( iterator.hasNext() )
086          {
087          File localFile = iterator.next();
088          File file = new File( rhsRoot, localFile.toString().substring( rootLength ) );
089    
090          if( localFile.toString().endsWith( NONDETERMINISTIC ) )
091            iterator.remove();
092          else if( file.exists() )
093            rhsFiles.add( file );
094          else
095            iterator.remove();
096          }
097    
098        LOG.info( "running {} comparisons", lhsFiles.size() );
099    
100        for( int i = 0; i < lhsFiles.size(); i++ )
101          {
102          File localFile = lhsFiles.get( i );
103          File hadoopFile = rhsFiles.get( i );
104    
105          suite.addTest( new CompareTestCase( comparison, localFile, hadoopFile ) );
106          }
107        }
108    
109      private static String find( String[] roots, String string )
110        {
111        for( String root : roots )
112          {
113          if( root.contains( string ) )
114            return root;
115          }
116    
117        throw new IllegalStateException( "not found in roots: " + string );
118        }
119    
120      public static class CompareTestCase extends TestCase
121        {
122        File localFile;
123        File hadoopFile;
124    
125        public CompareTestCase( String comparison, File localFile, File hadoopFile )
126          {
127          super( "testFiles" );
128    
129          this.localFile = localFile;
130          this.hadoopFile = hadoopFile;
131    
132          // craps out junit, unsure how to set display name
133    //      setName( String.format( "%s..%s", comparison, localFile.getName() ) ); // relevant bits have same file name
134          }
135    
136        @org.junit.Test
137        public void testFiles() throws IOException
138          {
139          LinkedList<String> localLines = getLines( localFile );
140          LinkedList<String> hadoopLines = getLines( hadoopFile );
141    
142          assertEquals( localFile + " != " + hadoopFile, localLines.size(), hadoopLines.size() );
143    
144          if( localLines.size() == 0 )
145            return;
146    
147          Collections.sort( localLines );
148          Collections.sort( hadoopLines );
149    
150          if( hasLineNumbers( localLines ) )
151            {
152            trimLineNumbers( localLines );
153            trimLineNumbers( hadoopLines );
154            }
155    
156          for( int i = 0; i < localLines.size(); i++ )
157            {
158            String localLine = localLines.get( i );
159    
160            assertTrue( localFile + " - not in hadoop lines: " + localLine, hadoopLines.contains( localLine ) );
161            }
162          }
163    
164        private void trimLineNumbers( LinkedList<String> lines )
165          {
166          ListIterator<String> iterator = lines.listIterator();
167    
168          while( iterator.hasNext() )
169            iterator.set( iterator.next().replaceFirst( "^\\d+\\s(.*)$", "$1" ) );
170          }
171    
172        private boolean hasLineNumbers( List<String> lines )
173          {
174          List<Integer> values = new LinkedList<Integer>();
175    
176          for( String line : lines )
177            {
178            if( !line.matches( "^\\d+\\s.*$" ) )
179              return false;
180    
181            String value = line.replaceFirst( "^(\\d+)\\s.*$", "$1" );
182    
183            if( value == null || value.isEmpty() )
184              return false;
185    
186            values.add( Integer.parseInt( value ) );
187            }
188    
189          Collections.sort( values );
190    
191          int last = -1;
192          for( Integer value : values )
193            {
194            if( last >= value )
195              return false;
196    
197            last = value;
198            }
199    
200          return true;
201          }
202    
203        private LinkedList<String> getLines( File localFile ) throws IOException
204          {
205          LinkedList<String> lines = new LinkedList<String>();
206    
207          if( !localFile.isDirectory() )
208            return populate( localFile, lines );
209    
210          Collection<File> subFiles = FileUtils.listFiles( localFile, new RegexFileFilter( "^part-.*" ), null );
211    
212          for( File subFile : subFiles )
213            populate( subFile, lines );
214    
215          return lines;
216          }
217    
218        private LinkedList<String> populate( File localFile, LinkedList<String> lines ) throws IOException
219          {
220          LineIterator iterator = FileUtils.lineIterator( localFile, "UTF-8" );
221    
222          while( iterator.hasNext() )
223            lines.add( iterator.next() );
224    
225          return lines;
226          }
227        }
228      }