HUDI-243 Rename HoodieInputFormat and HoodieRealtimeInputFormat to HoodieParquetInputFormat and HoodieParquetRealtimeInputFormat
This commit is contained in:
committed by
vinoth chandar
parent
d0b9b56b7d
commit
93bc5e2153
@@ -18,9 +18,11 @@
|
||||
|
||||
package com.uber.hoodie.hadoop;
|
||||
|
||||
import org.apache.hudi.hadoop.HoodieParquetInputFormat;
|
||||
|
||||
/**
|
||||
* Temporary class to allow seamless migration of com.uber.hoodie to org.apache.hudi
|
||||
*/
|
||||
public class HoodieInputFormat extends org.apache.hudi.hadoop.HoodieInputFormat {
|
||||
public class HoodieInputFormat extends HoodieParquetInputFormat {
|
||||
|
||||
}
|
||||
|
||||
@@ -18,9 +18,11 @@
|
||||
|
||||
package com.uber.hoodie.hadoop.realtime;
|
||||
|
||||
import org.apache.hudi.hadoop.realtime.HoodieParquetRealtimeInputFormat;
|
||||
|
||||
/**
|
||||
* Temporary class to allow seamless migration of com.uber.hoodie to org.apache.hudi
|
||||
*/
|
||||
public class HoodieRealtimeInputFormat extends org.apache.hudi.hadoop.realtime.HoodieRealtimeInputFormat {
|
||||
public class HoodieRealtimeInputFormat extends HoodieParquetRealtimeInputFormat {
|
||||
|
||||
}
|
||||
|
||||
@@ -57,9 +57,9 @@ import org.apache.log4j.Logger;
|
||||
* Hoodie/Non-Hoodie datasets
|
||||
*/
|
||||
@UseFileSplitsFromInputFormat
|
||||
public class HoodieInputFormat extends MapredParquetInputFormat implements Configurable {
|
||||
public class HoodieParquetInputFormat extends MapredParquetInputFormat implements Configurable {
|
||||
|
||||
private static final transient Logger LOG = LogManager.getLogger(HoodieInputFormat.class);
|
||||
private static final transient Logger LOG = LogManager.getLogger(HoodieParquetInputFormat.class);
|
||||
|
||||
protected Configuration conf;
|
||||
|
||||
@@ -70,8 +70,8 @@ import org.apache.hadoop.mapred.Reporter;
|
||||
import org.apache.hadoop.mapred.lib.CombineFileInputFormat;
|
||||
import org.apache.hadoop.mapred.lib.CombineFileSplit;
|
||||
import org.apache.hadoop.mapreduce.JobContext;
|
||||
import org.apache.hudi.hadoop.HoodieInputFormat;
|
||||
import org.apache.hudi.hadoop.realtime.HoodieRealtimeInputFormat;
|
||||
import org.apache.hudi.hadoop.HoodieParquetInputFormat;
|
||||
import org.apache.hudi.hadoop.realtime.HoodieParquetRealtimeInputFormat;
|
||||
import org.apache.log4j.LogManager;
|
||||
import org.apache.log4j.Logger;
|
||||
|
||||
@@ -403,9 +403,9 @@ public class HoodieCombineHiveInputFormat<K extends WritableComparable, V extend
|
||||
InputFormat inputFormat = getInputFormatFromCache(inputFormatClass, job);
|
||||
LOG.info("Input Format => " + inputFormatClass.getName());
|
||||
// **MOD** Set the hoodie filter in the combine
|
||||
if (inputFormatClass.getName().equals(HoodieInputFormat.class.getName())) {
|
||||
if (inputFormatClass.getName().equals(HoodieParquetInputFormat.class.getName())) {
|
||||
combine.setHoodieFilter(true);
|
||||
} else if (inputFormatClass.getName().equals(HoodieRealtimeInputFormat.class.getName())) {
|
||||
} else if (inputFormatClass.getName().equals(HoodieParquetRealtimeInputFormat.class.getName())) {
|
||||
LOG.info("Setting hoodie filter and realtime input format");
|
||||
combine.setHoodieFilter(true);
|
||||
combine.setRealTime(true);
|
||||
@@ -857,13 +857,13 @@ public class HoodieCombineHiveInputFormat<K extends WritableComparable, V extend
|
||||
LOG.info("Listing status in HoodieCombineHiveInputFormat.HoodieCombineFileInputFormatShim");
|
||||
List<FileStatus> result;
|
||||
if (hoodieFilter) {
|
||||
HoodieInputFormat input;
|
||||
HoodieParquetInputFormat input;
|
||||
if (isRealTime) {
|
||||
LOG.info("Using HoodieRealtimeInputFormat");
|
||||
input = new HoodieRealtimeInputFormat();
|
||||
input = new HoodieParquetRealtimeInputFormat();
|
||||
} else {
|
||||
LOG.info("Using HoodieInputFormat");
|
||||
input = new HoodieInputFormat();
|
||||
input = new HoodieParquetInputFormat();
|
||||
}
|
||||
input.setConf(job.getConfiguration());
|
||||
result = new ArrayList<FileStatus>(
|
||||
|
||||
@@ -52,7 +52,7 @@ import org.apache.hudi.common.util.FSUtils;
|
||||
import org.apache.hudi.common.util.Option;
|
||||
import org.apache.hudi.exception.HoodieException;
|
||||
import org.apache.hudi.exception.HoodieIOException;
|
||||
import org.apache.hudi.hadoop.HoodieInputFormat;
|
||||
import org.apache.hudi.hadoop.HoodieParquetInputFormat;
|
||||
import org.apache.hudi.hadoop.UseFileSplitsFromInputFormat;
|
||||
import org.apache.log4j.LogManager;
|
||||
import org.apache.log4j.Logger;
|
||||
@@ -61,9 +61,9 @@ import org.apache.log4j.Logger;
|
||||
* Input Format, that provides a real-time view of data in a Hoodie dataset
|
||||
*/
|
||||
@UseFileSplitsFromInputFormat
|
||||
public class HoodieRealtimeInputFormat extends HoodieInputFormat implements Configurable {
|
||||
public class HoodieParquetRealtimeInputFormat extends HoodieParquetInputFormat implements Configurable {
|
||||
|
||||
private static final transient Logger LOG = LogManager.getLogger(HoodieRealtimeInputFormat.class);
|
||||
private static final transient Logger LOG = LogManager.getLogger(HoodieParquetRealtimeInputFormat.class);
|
||||
|
||||
// These positions have to be deterministic across all tables
|
||||
public static final int HOODIE_COMMIT_TIME_COL_POS = 0;
|
||||
@@ -80,7 +80,7 @@ class RealtimeCompactedRecordReader extends AbstractRealtimeRecordReader impleme
|
||||
// TODO(VC): Right now, we assume all records in log, have a matching base record. (which
|
||||
// would be true until we have a way to index logs too)
|
||||
// return from delta records map if we have some match.
|
||||
String key = arrayWritable.get()[HoodieRealtimeInputFormat.HOODIE_RECORD_KEY_COL_POS]
|
||||
String key = arrayWritable.get()[HoodieParquetRealtimeInputFormat.HOODIE_RECORD_KEY_COL_POS]
|
||||
.toString();
|
||||
if (deltaRecordMap.containsKey(key)) {
|
||||
// TODO(NA): Invoke preCombine here by converting arrayWritable to Avro. This is required since the
|
||||
|
||||
Reference in New Issue
Block a user