1
0

Moving depedencies off cdh to apache + Hive2 support

- Tests redone in the process
 - Main changes are to RealtimeRecordReader and how it treats maps/arrays
 - Make hive sync work with Hive 1/2 and CDH environments
 - Fixes to make corner cases for Hive queries
 - Spark Hive integration - Working version across Apache and CDH versions
 - Known Issue - https://github.com/uber/hudi/issues/439
This commit is contained in:
Vinoth Chandar
2018-07-15 22:34:02 -07:00
committed by vinoth chandar
parent 2b1af18941
commit a5359662be
32 changed files with 1983 additions and 407 deletions

View File

@@ -18,11 +18,11 @@
package com.uber.hoodie.hadoop;
import groovy.lang.Tuple2;
import java.io.IOException;
import java.util.List;
import java.util.stream.Collectors;
import java.util.stream.IntStream;
import org.apache.commons.lang3.tuple.Pair;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapred.RecordReader;
@@ -41,8 +41,8 @@ public class TestRecordReaderValueIterator {
"spark",
"dataset",
};
List<Tuple2<Integer, String>> entries = IntStream.range(0, values.length)
.boxed().map(idx -> new Tuple2<>(idx, values[idx])).collect(Collectors.toList());
List<Pair<Integer, String>> entries = IntStream.range(0, values.length)
.boxed().map(idx -> Pair.of(idx, values[idx])).collect(Collectors.toList());
TestRecordReader reader = new TestRecordReader(entries);
RecordReaderValueIterator<IntWritable, Text> itr = new RecordReaderValueIterator<IntWritable, Text>(reader);
for (int i = 0; i < values.length; i++) {
@@ -58,10 +58,10 @@ public class TestRecordReaderValueIterator {
*/
private static class TestRecordReader implements RecordReader<IntWritable, Text> {
private final List<Tuple2<Integer, String>> entries;
private final List<Pair<Integer, String>> entries;
private int currIndex = 0;
public TestRecordReader(List<Tuple2<Integer, String>> entries) {
public TestRecordReader(List<Pair<Integer, String>> entries) {
this.entries = entries;
}
@@ -71,8 +71,8 @@ public class TestRecordReaderValueIterator {
if (currIndex >= entries.size()) {
return false;
}
key.set(entries.get(currIndex).getFirst());
value.set(entries.get(currIndex).getSecond());
key.set(entries.get(currIndex).getLeft());
value.set(entries.get(currIndex).getRight());
currIndex++;
return true;
}

View File

@@ -104,7 +104,6 @@ public class HoodieRealtimeRecordReaderTest {
header.put(HoodieLogBlock.HeaderMetadataType.SCHEMA, writeSchema.toString());
HoodieAvroDataBlock dataBlock = new HoodieAvroDataBlock(records, header);
writer = writer.appendBlock(dataBlock);
long size = writer.getCurrentSize();
return writer;
}
@@ -348,7 +347,7 @@ public class HoodieRealtimeRecordReaderTest {
// Assert type MAP
ArrayWritable mapItem = (ArrayWritable) values[12];
Writable[] mapItemValues = mapItem.get();
Writable[] mapItemValues = ((ArrayWritable) mapItem.get()[0]).get();
ArrayWritable mapItemValue1 = (ArrayWritable) mapItemValues[0];
ArrayWritable mapItemValue2 = (ArrayWritable) mapItemValues[1];
Assert.assertEquals("test value for field: tags", mapItemValue1.get()[0].toString(),
@@ -381,10 +380,10 @@ public class HoodieRealtimeRecordReaderTest {
// Assert type ARRAY
ArrayWritable arrayValue = (ArrayWritable) values[14];
Writable[] arrayValues = arrayValue.get();
Writable[] arrayValues = ((ArrayWritable) arrayValue.get()[0]).get();
for (int i = 0; i < arrayValues.length; i++) {
Assert.assertEquals("test value for field: stringArray", arrayValues[i].toString(),
"stringArray" + i + recordCommitTimeSuffix);
Assert.assertEquals("test value for field: stringArray", "stringArray" + i + recordCommitTimeSuffix,
arrayValues[i].toString());
}
}
}