001 /* 002 * Copyright (c) 2007-2014 Concurrent, Inc. All Rights Reserved. 003 * 004 * Project and contact information: http://www.cascading.org/ 005 * 006 * This file is part of the Cascading project. 007 * 008 * Licensed under the Apache License, Version 2.0 (the "License"); 009 * you may not use this file except in compliance with the License. 010 * You may obtain a copy of the License at 011 * 012 * http://www.apache.org/licenses/LICENSE-2.0 013 * 014 * Unless required by applicable law or agreed to in writing, software 015 * distributed under the License is distributed on an "AS IS" BASIS, 016 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 017 * See the License for the specific language governing permissions and 018 * limitations under the License. 019 */ 020 021 package cascading; 022 023 import java.io.File; 024 import java.io.IOException; 025 import java.util.Collection; 026 import java.util.Collections; 027 import java.util.LinkedList; 028 import java.util.List; 029 import java.util.ListIterator; 030 031 import junit.framework.Test; 032 import junit.framework.TestCase; 033 import junit.framework.TestSuite; 034 import org.apache.commons.io.FileUtils; 035 import org.apache.commons.io.LineIterator; 036 import org.apache.commons.io.filefilter.RegexFileFilter; 037 import org.apache.commons.io.filefilter.TrueFileFilter; 038 import org.slf4j.Logger; 039 import org.slf4j.LoggerFactory; 040 041 /** 042 * 043 */ 044 public class ComparePlatformsTest extends CascadingTestCase 045 { 046 private static final Logger LOG = LoggerFactory.getLogger( ComparePlatformsTest.class ); 047 public static final String NONDETERMINISTIC = "-nondeterministic"; 048 049 public static Test suite() throws Exception 050 { 051 String root = System.getProperty( "test.output.roots" ); 052 053 LOG.info( "output roots: {}", root ); 054 055 String[] roots = root.split( "," ); 056 057 File localRoot = new File( find( roots, "/cascading-local/" ), "local" ); 058 File hadoopRoot = new File( find( roots, "/cascading-hadoop/" ), "hadoop" ); 059 File hadoop2Root = new File( find( roots, "/cascading-hadoop2-mr1/" ), "hadoop2-mr1" ); 060 061 LOG.info( "local path: {}", localRoot ); 062 LOG.info( "hadoop path: {}", hadoopRoot ); 063 LOG.info( "hadoop2 path: {}", hadoop2Root ); 064 065 TestSuite suite = new TestSuite(); 066 067 createComparisons( "local~hadoop", localRoot, hadoopRoot, suite ); 068 createComparisons( "local~hadoop2-mr1", localRoot, hadoop2Root, suite ); 069 070 return suite; 071 } 072 073 private static void createComparisons( String comparison, File lhsRoot, File rhsRoot, TestSuite suite ) 074 { 075 LOG.info( "comparing directory: {}, with: {}", lhsRoot, rhsRoot ); 076 077 LinkedList<File> lhsFiles = new LinkedList<File>( FileUtils.listFiles( lhsRoot, new RegexFileFilter( "^[\\w-]+" ), TrueFileFilter.INSTANCE ) ); 078 LinkedList<File> rhsFiles = new LinkedList<File>(); 079 080 LOG.info( "found lhs files: {}", lhsFiles.size() ); 081 082 int rootLength = lhsRoot.toString().length() + 1; 083 084 ListIterator<File> iterator = lhsFiles.listIterator(); 085 while( iterator.hasNext() ) 086 { 087 File localFile = iterator.next(); 088 File file = new File( rhsRoot, localFile.toString().substring( rootLength ) ); 089 090 if( localFile.toString().endsWith( NONDETERMINISTIC ) ) 091 iterator.remove(); 092 else if( file.exists() ) 093 rhsFiles.add( file ); 094 else 095 iterator.remove(); 096 } 097 098 LOG.info( "running {} comparisons", lhsFiles.size() ); 099 100 for( int i = 0; i < lhsFiles.size(); i++ ) 101 { 102 File localFile = lhsFiles.get( i ); 103 File hadoopFile = rhsFiles.get( i ); 104 105 suite.addTest( new CompareTestCase( comparison, localFile, hadoopFile ) ); 106 } 107 } 108 109 private static String find( String[] roots, String string ) 110 { 111 for( String root : roots ) 112 { 113 if( root.contains( string ) ) 114 return root; 115 } 116 117 throw new IllegalStateException( "not found in roots: " + string ); 118 } 119 120 public static class CompareTestCase extends TestCase 121 { 122 File localFile; 123 File hadoopFile; 124 125 public CompareTestCase( String comparison, File localFile, File hadoopFile ) 126 { 127 super( "testFiles" ); 128 129 this.localFile = localFile; 130 this.hadoopFile = hadoopFile; 131 132 // craps out junit, unsure how to set display name 133 // setName( String.format( "%s..%s", comparison, localFile.getName() ) ); // relevant bits have same file name 134 } 135 136 @org.junit.Test 137 public void testFiles() throws IOException 138 { 139 LinkedList<String> localLines = getLines( localFile ); 140 LinkedList<String> hadoopLines = getLines( hadoopFile ); 141 142 assertEquals( localFile + " != " + hadoopFile, localLines.size(), hadoopLines.size() ); 143 144 if( localLines.size() == 0 ) 145 return; 146 147 Collections.sort( localLines ); 148 Collections.sort( hadoopLines ); 149 150 if( hasLineNumbers( localLines ) ) 151 { 152 trimLineNumbers( localLines ); 153 trimLineNumbers( hadoopLines ); 154 } 155 156 for( int i = 0; i < localLines.size(); i++ ) 157 { 158 String localLine = localLines.get( i ); 159 160 assertTrue( localFile + " - not in hadoop lines: " + localLine, hadoopLines.contains( localLine ) ); 161 } 162 } 163 164 private void trimLineNumbers( LinkedList<String> lines ) 165 { 166 ListIterator<String> iterator = lines.listIterator(); 167 168 while( iterator.hasNext() ) 169 iterator.set( iterator.next().replaceFirst( "^\\d+\\s(.*)$", "$1" ) ); 170 } 171 172 private boolean hasLineNumbers( List<String> lines ) 173 { 174 List<Integer> values = new LinkedList<Integer>(); 175 176 for( String line : lines ) 177 { 178 if( !line.matches( "^\\d+\\s.*$" ) ) 179 return false; 180 181 String value = line.replaceFirst( "^(\\d+)\\s.*$", "$1" ); 182 183 if( value == null || value.isEmpty() ) 184 return false; 185 186 values.add( Integer.parseInt( value ) ); 187 } 188 189 Collections.sort( values ); 190 191 int last = -1; 192 for( Integer value : values ) 193 { 194 if( last >= value ) 195 return false; 196 197 last = value; 198 } 199 200 return true; 201 } 202 203 private LinkedList<String> getLines( File localFile ) throws IOException 204 { 205 LinkedList<String> lines = new LinkedList<String>(); 206 207 if( !localFile.isDirectory() ) 208 return populate( localFile, lines ); 209 210 Collection<File> subFiles = FileUtils.listFiles( localFile, new RegexFileFilter( "^part-.*" ), null ); 211 212 for( File subFile : subFiles ) 213 populate( subFile, lines ); 214 215 return lines; 216 } 217 218 private LinkedList<String> populate( File localFile, LinkedList<String> lines ) throws IOException 219 { 220 LineIterator iterator = FileUtils.lineIterator( localFile, "UTF-8" ); 221 222 while( iterator.hasNext() ) 223 lines.add( iterator.next() ); 224 225 return lines; 226 } 227 } 228 }