001/* 002 * Copyright (c) 2007-2017 Xplenty, Inc. All Rights Reserved. 003 * 004 * Project and contact information: http://www.cascading.org/ 005 * 006 * This file is part of the Cascading project. 007 * 008 * Licensed under the Apache License, Version 2.0 (the "License"); 009 * you may not use this file except in compliance with the License. 010 * You may obtain a copy of the License at 011 * 012 * http://www.apache.org/licenses/LICENSE-2.0 013 * 014 * Unless required by applicable law or agreed to in writing, software 015 * distributed under the License is distributed on an "AS IS" BASIS, 016 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 017 * See the License for the specific language governing permissions and 018 * limitations under the License. 019 */ 020 021package cascading; 022 023import java.io.File; 024import java.io.IOException; 025import java.util.Collection; 026import java.util.Collections; 027import java.util.LinkedList; 028import java.util.List; 029import java.util.ListIterator; 030 031import junit.framework.Test; 032import junit.framework.TestCase; 033import junit.framework.TestSuite; 034import org.apache.commons.io.FileUtils; 035import org.apache.commons.io.LineIterator; 036import org.apache.commons.io.filefilter.RegexFileFilter; 037import org.apache.commons.io.filefilter.TrueFileFilter; 038import org.junit.internal.runners.SuiteMethod; 039import org.junit.runner.RunWith; 040import org.slf4j.Logger; 041import org.slf4j.LoggerFactory; 042 043/** 044 * 045 */ 046@RunWith(SuiteMethod.class) 047public class ComparePlatformsTest extends CascadingTestCase 048 { 049 private static final Logger LOG = LoggerFactory.getLogger( ComparePlatformsTest.class ); 050 public static final String NONDETERMINISTIC = "-nondeterministic"; 051 052 public static Test suite() throws Exception 053 { 054 String root = System.getProperty( "test.output.roots" ); 055 056 if( root == null ) 057 return new TestSuite(); 058 059 LOG.info( "output roots: {}", root ); 060 061 String[] roots = root.split( "," ); 062 063 File localRoot = new File( find( roots, "/cascading-local/" ), "local" ); 064 File hadoopRoot = new File( find( roots, "/cascading-hadoop/" ), "hadoop" ); 065 File hadoop2Root = new File( find( roots, "/cascading-hadoop2-mr1/" ), "hadoop2-mr1" ); 066 File hadoop2TezRoot = new File( find( roots, "/cascading-hadoop2-tez/" ), "hadoop2-tez" ); 067 068 LOG.info( "local path: {}", localRoot ); 069 LOG.info( "hadoop path: {}", hadoopRoot ); 070 LOG.info( "hadoop2-mr1 path: {}", hadoop2Root ); 071 LOG.info( "hadoop2-tez path: {}", hadoop2TezRoot ); 072 073 TestSuite suite = new TestSuite(); 074 075 createComparisons( "local~hadoop", localRoot, hadoopRoot, suite ); 076 createComparisons( "local~hadoop2-mr1", localRoot, hadoop2Root, suite ); 077 createComparisons( "local~hadoop2-tez", localRoot, hadoop2TezRoot, suite ); 078 079 return suite; 080 } 081 082 private static void createComparisons( String comparison, File lhsRoot, File rhsRoot, TestSuite suite ) 083 { 084 LOG.info( "comparing directory: {}, with: {}", lhsRoot, rhsRoot ); 085 086 LinkedList<File> lhsFiles = new LinkedList<File>( FileUtils.listFiles( lhsRoot, new RegexFileFilter( "^[\\w-]+" ), TrueFileFilter.INSTANCE ) ); 087 LinkedList<File> rhsFiles = new LinkedList<File>(); 088 089 LOG.info( "found lhs files: {}", lhsFiles.size() ); 090 091 int rootLength = lhsRoot.toString().length() + 1; 092 093 ListIterator<File> iterator = lhsFiles.listIterator(); 094 while( iterator.hasNext() ) 095 { 096 File localFile = iterator.next(); 097 File file = new File( rhsRoot, localFile.toString().substring( rootLength ) ); 098 099 if( localFile.toString().endsWith( NONDETERMINISTIC ) ) 100 iterator.remove(); 101 else if( file.exists() ) 102 rhsFiles.add( file ); 103 else 104 iterator.remove(); 105 } 106 107 LOG.info( "running {} comparisons", lhsFiles.size() ); 108 109 for( int i = 0; i < lhsFiles.size(); i++ ) 110 { 111 File localFile = lhsFiles.get( i ); 112 File hadoopFile = rhsFiles.get( i ); 113 114 suite.addTest( new CompareTestCase( comparison, localFile, hadoopFile ) ); 115 } 116 } 117 118 private static String find( String[] roots, String string ) 119 { 120 for( String root : roots ) 121 { 122 if( root.contains( string ) ) 123 return root; 124 } 125 126 throw new IllegalStateException( "not found in roots: " + string ); 127 } 128 129 public static class CompareTestCase extends TestCase 130 { 131 File localFile; 132 File hadoopFile; 133 134 public CompareTestCase( String comparison, File localFile, File hadoopFile ) 135 { 136 super( "testFiles" ); 137 138 this.localFile = localFile; 139 this.hadoopFile = hadoopFile; 140 141 // craps out junit, unsure how to set display name 142// setName( String.format( "%s..%s", comparison, localFile.getName() ) ); // relevant bits have same file name 143 } 144 145 @org.junit.Test 146 public void testFiles() throws IOException 147 { 148 LinkedList<String> localLines = getLines( localFile ); 149 LinkedList<String> hadoopLines = getLines( hadoopFile ); 150 151 assertEquals( localFile + " != " + hadoopFile, localLines.size(), hadoopLines.size() ); 152 153 if( localLines.size() == 0 ) 154 return; 155 156 Collections.sort( localLines ); 157 Collections.sort( hadoopLines ); 158 159 if( hasLineNumbers( localLines ) ) 160 { 161 trimLineNumbers( localLines ); 162 trimLineNumbers( hadoopLines ); 163 } 164 165 for( int i = 0; i < localLines.size(); i++ ) 166 { 167 String localLine = localLines.get( i ); 168 169 assertTrue( localFile + " - not in hadoop lines: " + localLine, hadoopLines.contains( localLine ) ); 170 } 171 } 172 173 private void trimLineNumbers( LinkedList<String> lines ) 174 { 175 ListIterator<String> iterator = lines.listIterator(); 176 177 while( iterator.hasNext() ) 178 iterator.set( iterator.next().replaceFirst( "^\\d+\\s(.*)$", "$1" ) ); 179 } 180 181 private boolean hasLineNumbers( List<String> lines ) 182 { 183 List<Integer> values = new LinkedList<Integer>(); 184 185 for( String line : lines ) 186 { 187 if( !line.matches( "^\\d+\\s.*$" ) ) 188 return false; 189 190 String value = line.replaceFirst( "^(\\d+)\\s.*$", "$1" ); 191 192 if( value == null || value.isEmpty() ) 193 return false; 194 195 values.add( Integer.parseInt( value ) ); 196 } 197 198 Collections.sort( values ); 199 200 int last = -1; 201 for( Integer value : values ) 202 { 203 if( last >= value ) 204 return false; 205 206 last = value; 207 } 208 209 return true; 210 } 211 212 private LinkedList<String> getLines( File localFile ) throws IOException 213 { 214 LinkedList<String> lines = new LinkedList<String>(); 215 216 if( !localFile.isDirectory() ) 217 return populate( localFile, lines ); 218 219 Collection<File> subFiles = FileUtils.listFiles( localFile, new RegexFileFilter( "^part-.*" ), null ); 220 221 for( File subFile : subFiles ) 222 populate( subFile, lines ); 223 224 return lines; 225 } 226 227 private LinkedList<String> populate( File localFile, LinkedList<String> lines ) throws IOException 228 { 229 LineIterator iterator = FileUtils.lineIterator( localFile, "UTF-8" ); 230 231 while( iterator.hasNext() ) 232 lines.add( iterator.next() ); 233 234 return lines; 235 } 236 } 237 }