From 93bc5e2153c90e44d35dafc468466196306f4227 Mon Sep 17 00:00:00 2001 From: Balaji Varadarajan Date: Wed, 11 Sep 2019 11:31:07 -0700 Subject: [PATCH] HUDI-243 Rename HoodieInputFormat and HoodieRealtimeInputFormat to HoodieParquetInputFormat and HoodieParquetRealtimeInputFormat --- .../hudi/common/HoodieMergeOnReadTestUtils.java | 8 ++++---- .../com/uber/hoodie/hadoop/HoodieInputFormat.java | 4 +++- .../hadoop/realtime/HoodieRealtimeInputFormat.java | 4 +++- ...utFormat.java => HoodieParquetInputFormat.java} | 4 ++-- .../hadoop/hive/HoodieCombineHiveInputFormat.java | 14 +++++++------- ....java => HoodieParquetRealtimeInputFormat.java} | 6 +++--- .../realtime/RealtimeCompactedRecordReader.java | 2 +- .../org/apache/hudi/hadoop/AnnotationTest.java | 4 ++-- .../apache/hudi/hadoop/HoodieInputFormatTest.java | 4 ++-- .../java/org/apache/hudi/hive/HiveSyncTool.java | 8 ++++---- 10 files changed, 31 insertions(+), 27 deletions(-) rename hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/{HoodieInputFormat.java => HoodieParquetInputFormat.java} (98%) rename hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/realtime/{HoodieRealtimeInputFormat.java => HoodieParquetRealtimeInputFormat.java} (98%) diff --git a/hudi-client/src/test/java/org/apache/hudi/common/HoodieMergeOnReadTestUtils.java b/hudi-client/src/test/java/org/apache/hudi/common/HoodieMergeOnReadTestUtils.java index 5d03f3af4..aa1948811 100644 --- a/hudi-client/src/test/java/org/apache/hudi/common/HoodieMergeOnReadTestUtils.java +++ b/hudi-client/src/test/java/org/apache/hudi/common/HoodieMergeOnReadTestUtils.java @@ -35,7 +35,7 @@ import org.apache.hadoop.mapred.JobConf; import org.apache.hadoop.mapred.RecordReader; import org.apache.hudi.common.model.HoodieTestUtils; import org.apache.hudi.common.util.HoodieAvroUtils; -import org.apache.hudi.hadoop.realtime.HoodieRealtimeInputFormat; +import org.apache.hudi.hadoop.realtime.HoodieParquetRealtimeInputFormat; /** * Utility methods to aid in testing MergeOnRead (workaround for HoodieReadClient for MOR) @@ -46,7 +46,7 @@ public class HoodieMergeOnReadTestUtils { throws IOException { JobConf jobConf = new JobConf(); Schema schema = HoodieAvroUtils.addMetadataFields(Schema.parse(HoodieTestDataGenerator.TRIP_EXAMPLE_SCHEMA)); - HoodieRealtimeInputFormat inputFormat = new HoodieRealtimeInputFormat(); + HoodieParquetRealtimeInputFormat inputFormat = new HoodieParquetRealtimeInputFormat(); setPropsForInputFormat(inputFormat, jobConf, schema, basePath); return inputPaths.stream().map(path -> { setInputPath(jobConf, path); @@ -76,8 +76,8 @@ public class HoodieMergeOnReadTestUtils { }).get(); } - private static void setPropsForInputFormat(HoodieRealtimeInputFormat inputFormat, JobConf jobConf, Schema schema, - String basePath) { + private static void setPropsForInputFormat(HoodieParquetRealtimeInputFormat inputFormat, JobConf jobConf, + Schema schema, String basePath) { List fields = schema.getFields(); String names = fields.stream().map(f -> f.name().toString()).collect(Collectors.joining(",")); String postions = fields.stream().map(f -> String.valueOf(f.pos())).collect(Collectors.joining(",")); diff --git a/hudi-hadoop-mr/src/main/java/com/uber/hoodie/hadoop/HoodieInputFormat.java b/hudi-hadoop-mr/src/main/java/com/uber/hoodie/hadoop/HoodieInputFormat.java index 1b1ad423b..b22820e69 100644 --- a/hudi-hadoop-mr/src/main/java/com/uber/hoodie/hadoop/HoodieInputFormat.java +++ b/hudi-hadoop-mr/src/main/java/com/uber/hoodie/hadoop/HoodieInputFormat.java @@ -18,9 +18,11 @@ package com.uber.hoodie.hadoop; +import org.apache.hudi.hadoop.HoodieParquetInputFormat; + /** * Temporary class to allow seamless migration of com.uber.hoodie to org.apache.hudi */ -public class HoodieInputFormat extends org.apache.hudi.hadoop.HoodieInputFormat { +public class HoodieInputFormat extends HoodieParquetInputFormat { } diff --git a/hudi-hadoop-mr/src/main/java/com/uber/hoodie/hadoop/realtime/HoodieRealtimeInputFormat.java b/hudi-hadoop-mr/src/main/java/com/uber/hoodie/hadoop/realtime/HoodieRealtimeInputFormat.java index 627876741..99d3c3a26 100644 --- a/hudi-hadoop-mr/src/main/java/com/uber/hoodie/hadoop/realtime/HoodieRealtimeInputFormat.java +++ b/hudi-hadoop-mr/src/main/java/com/uber/hoodie/hadoop/realtime/HoodieRealtimeInputFormat.java @@ -18,9 +18,11 @@ package com.uber.hoodie.hadoop.realtime; +import org.apache.hudi.hadoop.realtime.HoodieParquetRealtimeInputFormat; + /** * Temporary class to allow seamless migration of com.uber.hoodie to org.apache.hudi */ -public class HoodieRealtimeInputFormat extends org.apache.hudi.hadoop.realtime.HoodieRealtimeInputFormat { +public class HoodieRealtimeInputFormat extends HoodieParquetRealtimeInputFormat { } diff --git a/hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/HoodieInputFormat.java b/hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/HoodieParquetInputFormat.java similarity index 98% rename from hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/HoodieInputFormat.java rename to hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/HoodieParquetInputFormat.java index a12579c18..5e08da6f8 100644 --- a/hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/HoodieInputFormat.java +++ b/hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/HoodieParquetInputFormat.java @@ -57,9 +57,9 @@ import org.apache.log4j.Logger; * Hoodie/Non-Hoodie datasets */ @UseFileSplitsFromInputFormat -public class HoodieInputFormat extends MapredParquetInputFormat implements Configurable { +public class HoodieParquetInputFormat extends MapredParquetInputFormat implements Configurable { - private static final transient Logger LOG = LogManager.getLogger(HoodieInputFormat.class); + private static final transient Logger LOG = LogManager.getLogger(HoodieParquetInputFormat.class); protected Configuration conf; diff --git a/hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/hive/HoodieCombineHiveInputFormat.java b/hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/hive/HoodieCombineHiveInputFormat.java index 501b0c475..d7e634762 100644 --- a/hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/hive/HoodieCombineHiveInputFormat.java +++ b/hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/hive/HoodieCombineHiveInputFormat.java @@ -70,8 +70,8 @@ import org.apache.hadoop.mapred.Reporter; import org.apache.hadoop.mapred.lib.CombineFileInputFormat; import org.apache.hadoop.mapred.lib.CombineFileSplit; import org.apache.hadoop.mapreduce.JobContext; -import org.apache.hudi.hadoop.HoodieInputFormat; -import org.apache.hudi.hadoop.realtime.HoodieRealtimeInputFormat; +import org.apache.hudi.hadoop.HoodieParquetInputFormat; +import org.apache.hudi.hadoop.realtime.HoodieParquetRealtimeInputFormat; import org.apache.log4j.LogManager; import org.apache.log4j.Logger; @@ -403,9 +403,9 @@ public class HoodieCombineHiveInputFormat " + inputFormatClass.getName()); // **MOD** Set the hoodie filter in the combine - if (inputFormatClass.getName().equals(HoodieInputFormat.class.getName())) { + if (inputFormatClass.getName().equals(HoodieParquetInputFormat.class.getName())) { combine.setHoodieFilter(true); - } else if (inputFormatClass.getName().equals(HoodieRealtimeInputFormat.class.getName())) { + } else if (inputFormatClass.getName().equals(HoodieParquetRealtimeInputFormat.class.getName())) { LOG.info("Setting hoodie filter and realtime input format"); combine.setHoodieFilter(true); combine.setRealTime(true); @@ -857,13 +857,13 @@ public class HoodieCombineHiveInputFormat result; if (hoodieFilter) { - HoodieInputFormat input; + HoodieParquetInputFormat input; if (isRealTime) { LOG.info("Using HoodieRealtimeInputFormat"); - input = new HoodieRealtimeInputFormat(); + input = new HoodieParquetRealtimeInputFormat(); } else { LOG.info("Using HoodieInputFormat"); - input = new HoodieInputFormat(); + input = new HoodieParquetInputFormat(); } input.setConf(job.getConfiguration()); result = new ArrayList( diff --git a/hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/realtime/HoodieRealtimeInputFormat.java b/hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/realtime/HoodieParquetRealtimeInputFormat.java similarity index 98% rename from hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/realtime/HoodieRealtimeInputFormat.java rename to hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/realtime/HoodieParquetRealtimeInputFormat.java index bd354e05a..1359b15f3 100644 --- a/hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/realtime/HoodieRealtimeInputFormat.java +++ b/hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/realtime/HoodieParquetRealtimeInputFormat.java @@ -52,7 +52,7 @@ import org.apache.hudi.common.util.FSUtils; import org.apache.hudi.common.util.Option; import org.apache.hudi.exception.HoodieException; import org.apache.hudi.exception.HoodieIOException; -import org.apache.hudi.hadoop.HoodieInputFormat; +import org.apache.hudi.hadoop.HoodieParquetInputFormat; import org.apache.hudi.hadoop.UseFileSplitsFromInputFormat; import org.apache.log4j.LogManager; import org.apache.log4j.Logger; @@ -61,9 +61,9 @@ import org.apache.log4j.Logger; * Input Format, that provides a real-time view of data in a Hoodie dataset */ @UseFileSplitsFromInputFormat -public class HoodieRealtimeInputFormat extends HoodieInputFormat implements Configurable { +public class HoodieParquetRealtimeInputFormat extends HoodieParquetInputFormat implements Configurable { - private static final transient Logger LOG = LogManager.getLogger(HoodieRealtimeInputFormat.class); + private static final transient Logger LOG = LogManager.getLogger(HoodieParquetRealtimeInputFormat.class); // These positions have to be deterministic across all tables public static final int HOODIE_COMMIT_TIME_COL_POS = 0; diff --git a/hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/realtime/RealtimeCompactedRecordReader.java b/hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/realtime/RealtimeCompactedRecordReader.java index 9c54b56d4..7d3a9bc5a 100644 --- a/hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/realtime/RealtimeCompactedRecordReader.java +++ b/hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/realtime/RealtimeCompactedRecordReader.java @@ -80,7 +80,7 @@ class RealtimeCompactedRecordReader extends AbstractRealtimeRecordReader impleme // TODO(VC): Right now, we assume all records in log, have a matching base record. (which // would be true until we have a way to index logs too) // return from delta records map if we have some match. - String key = arrayWritable.get()[HoodieRealtimeInputFormat.HOODIE_RECORD_KEY_COL_POS] + String key = arrayWritable.get()[HoodieParquetRealtimeInputFormat.HOODIE_RECORD_KEY_COL_POS] .toString(); if (deltaRecordMap.containsKey(key)) { // TODO(NA): Invoke preCombine here by converting arrayWritable to Avro. This is required since the diff --git a/hudi-hadoop-mr/src/test/java/org/apache/hudi/hadoop/AnnotationTest.java b/hudi-hadoop-mr/src/test/java/org/apache/hudi/hadoop/AnnotationTest.java index 765b19ebb..771ed7e4a 100644 --- a/hudi-hadoop-mr/src/test/java/org/apache/hudi/hadoop/AnnotationTest.java +++ b/hudi-hadoop-mr/src/test/java/org/apache/hudi/hadoop/AnnotationTest.java @@ -27,8 +27,8 @@ public class AnnotationTest { @Test public void testAnnotation() { - assertTrue(HoodieInputFormat.class.isAnnotationPresent(UseFileSplitsFromInputFormat.class)); - Annotation[] annotations = HoodieInputFormat.class.getAnnotations(); + assertTrue(HoodieParquetInputFormat.class.isAnnotationPresent(UseFileSplitsFromInputFormat.class)); + Annotation[] annotations = HoodieParquetInputFormat.class.getAnnotations(); boolean found = false; for (Annotation annotation : annotations) { if ("UseFileSplitsFromInputFormat".equals(annotation.annotationType().getSimpleName())) { diff --git a/hudi-hadoop-mr/src/test/java/org/apache/hudi/hadoop/HoodieInputFormatTest.java b/hudi-hadoop-mr/src/test/java/org/apache/hudi/hadoop/HoodieInputFormatTest.java index e5c79a928..73915b7b9 100644 --- a/hudi-hadoop-mr/src/test/java/org/apache/hudi/hadoop/HoodieInputFormatTest.java +++ b/hudi-hadoop-mr/src/test/java/org/apache/hudi/hadoop/HoodieInputFormatTest.java @@ -38,12 +38,12 @@ import org.junit.rules.TemporaryFolder; public class HoodieInputFormatTest { - private HoodieInputFormat inputFormat; + private HoodieParquetInputFormat inputFormat; private JobConf jobConf; @Before public void setUp() { - inputFormat = new HoodieInputFormat(); + inputFormat = new HoodieParquetInputFormat(); jobConf = new JobConf(); inputFormat.setConf(jobConf); } diff --git a/hudi-hive/src/main/java/org/apache/hudi/hive/HiveSyncTool.java b/hudi-hive/src/main/java/org/apache/hudi/hive/HiveSyncTool.java index 6e8e82a23..02afff622 100644 --- a/hudi-hive/src/main/java/org/apache/hudi/hive/HiveSyncTool.java +++ b/hudi-hive/src/main/java/org/apache/hudi/hive/HiveSyncTool.java @@ -31,8 +31,8 @@ import org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe; import org.apache.hudi.common.util.FSUtils; import org.apache.hudi.common.util.Option; import org.apache.hudi.exception.InvalidDatasetException; -import org.apache.hudi.hadoop.HoodieInputFormat; -import org.apache.hudi.hadoop.realtime.HoodieRealtimeInputFormat; +import org.apache.hudi.hadoop.HoodieParquetInputFormat; +import org.apache.hudi.hadoop.realtime.HoodieParquetRealtimeInputFormat; import org.apache.hudi.hive.HoodieHiveClient.PartitionEvent; import org.apache.hudi.hive.HoodieHiveClient.PartitionEvent.PartitionEventType; import org.apache.hudi.hive.util.SchemaUtil; @@ -128,7 +128,7 @@ public class HiveSyncTool { // for now) String inputFormatClassName = cfg.usePreApacheInputFormat ? com.uber.hoodie.hadoop.HoodieInputFormat.class.getName() - : HoodieInputFormat.class.getName(); + : HoodieParquetInputFormat.class.getName(); hoodieHiveClient.createTable(schema, inputFormatClassName, MapredParquetOutputFormat.class.getName(), ParquetHiveSerDe.class.getName()); } else { @@ -137,7 +137,7 @@ public class HiveSyncTool { // /ql/exec/DDLTask.java#L3488 String inputFormatClassName = cfg.usePreApacheInputFormat ? com.uber.hoodie.hadoop.realtime.HoodieRealtimeInputFormat.class.getName() - : HoodieRealtimeInputFormat.class.getName(); + : HoodieParquetRealtimeInputFormat.class.getName(); hoodieHiveClient.createTable(schema, inputFormatClassName, MapredParquetOutputFormat.class.getName(), ParquetHiveSerDe.class.getName()); }