[HUDI-822] decouple Hudi related logics from HoodieInputFormat (#1592)
- Refactoring business logic out of InputFormat into Utils helpers.
This commit is contained in:
@@ -29,6 +29,7 @@ import org.apache.hudi.common.table.timeline.HoodieTimeline;
|
||||
import org.apache.hudi.common.table.timeline.TimelineMetadataUtils;
|
||||
import org.apache.hudi.common.testutils.HoodieTestUtils;
|
||||
import org.apache.hudi.hadoop.testutils.InputFormatTestUtil;
|
||||
import org.apache.hudi.hadoop.utils.HoodieHiveUtils;
|
||||
|
||||
import org.apache.avro.Schema;
|
||||
import org.apache.hadoop.fs.FileStatus;
|
||||
@@ -103,7 +104,7 @@ public class TestHoodieParquetInputFormat {
|
||||
timeline.setInstants(instants);
|
||||
|
||||
// Verify getCommitsTimelineBeforePendingCompaction does not return instants after first compaction instant
|
||||
HoodieTimeline filteredTimeline = new HoodieParquetInputFormat().filterInstantsTimeline(timeline);
|
||||
HoodieTimeline filteredTimeline = inputFormat.filterInstantsTimeline(timeline);
|
||||
assertTrue(filteredTimeline.containsInstant(t1));
|
||||
assertTrue(filteredTimeline.containsInstant(t2));
|
||||
assertFalse(filteredTimeline.containsInstant(t3));
|
||||
@@ -116,7 +117,7 @@ public class TestHoodieParquetInputFormat {
|
||||
instants.remove(t3);
|
||||
timeline = new HoodieActiveTimeline(metaClient);
|
||||
timeline.setInstants(instants);
|
||||
filteredTimeline = new HoodieParquetInputFormat().filterInstantsTimeline(timeline);
|
||||
filteredTimeline = inputFormat.filterInstantsTimeline(timeline);
|
||||
|
||||
// verify all remaining instants are returned.
|
||||
assertTrue(filteredTimeline.containsInstant(t1));
|
||||
@@ -130,7 +131,7 @@ public class TestHoodieParquetInputFormat {
|
||||
instants.remove(t5);
|
||||
timeline = new HoodieActiveTimeline(metaClient);
|
||||
timeline.setInstants(instants);
|
||||
filteredTimeline = new HoodieParquetInputFormat().filterInstantsTimeline(timeline);
|
||||
filteredTimeline = inputFormat.filterInstantsTimeline(timeline);
|
||||
|
||||
// verify all remaining instants are returned.
|
||||
assertTrue(filteredTimeline.containsInstant(t1));
|
||||
@@ -267,7 +268,7 @@ public class TestHoodieParquetInputFormat {
|
||||
ensureFilesInCommit("Pulling 3 commits from 100, should get us the 1 files from 300 commit", files, "300", 1);
|
||||
ensureFilesInCommit("Pulling 3 commits from 100, should get us the 1 files from 200 commit", files, "200", 1);
|
||||
|
||||
InputFormatTestUtil.setupIncremental(jobConf, "100", HoodieHiveUtil.MAX_COMMIT_ALL);
|
||||
InputFormatTestUtil.setupIncremental(jobConf, "100", HoodieHiveUtils.MAX_COMMIT_ALL);
|
||||
files = inputFormat.listStatus(jobConf);
|
||||
|
||||
assertEquals(5, files.length,
|
||||
@@ -312,15 +313,15 @@ public class TestHoodieParquetInputFormat {
|
||||
public void testGetIncrementalTableNames() throws IOException {
|
||||
String[] expectedincrTables = {"db1.raw_trips", "db2.model_trips", "db3.model_trips"};
|
||||
JobConf conf = new JobConf();
|
||||
String incrementalMode1 = String.format(HoodieHiveUtil.HOODIE_CONSUME_MODE_PATTERN, expectedincrTables[0]);
|
||||
conf.set(incrementalMode1, HoodieHiveUtil.INCREMENTAL_SCAN_MODE);
|
||||
String incrementalMode2 = String.format(HoodieHiveUtil.HOODIE_CONSUME_MODE_PATTERN, expectedincrTables[1]);
|
||||
conf.set(incrementalMode2,HoodieHiveUtil.INCREMENTAL_SCAN_MODE);
|
||||
String incrementalMode3 = String.format(HoodieHiveUtil.HOODIE_CONSUME_MODE_PATTERN, "db3.model_trips");
|
||||
conf.set(incrementalMode3, HoodieHiveUtil.INCREMENTAL_SCAN_MODE.toLowerCase());
|
||||
String defaultmode = String.format(HoodieHiveUtil.HOODIE_CONSUME_MODE_PATTERN, "db3.first_trips");
|
||||
conf.set(defaultmode, HoodieHiveUtil.DEFAULT_SCAN_MODE);
|
||||
List<String> actualincrTables = HoodieHiveUtil.getIncrementalTableNames(Job.getInstance(conf));
|
||||
String incrementalMode1 = String.format(HoodieHiveUtils.HOODIE_CONSUME_MODE_PATTERN, expectedincrTables[0]);
|
||||
conf.set(incrementalMode1, HoodieHiveUtils.INCREMENTAL_SCAN_MODE);
|
||||
String incrementalMode2 = String.format(HoodieHiveUtils.HOODIE_CONSUME_MODE_PATTERN, expectedincrTables[1]);
|
||||
conf.set(incrementalMode2,HoodieHiveUtils.INCREMENTAL_SCAN_MODE);
|
||||
String incrementalMode3 = String.format(HoodieHiveUtils.HOODIE_CONSUME_MODE_PATTERN, "db3.model_trips");
|
||||
conf.set(incrementalMode3, HoodieHiveUtils.INCREMENTAL_SCAN_MODE.toLowerCase());
|
||||
String defaultmode = String.format(HoodieHiveUtils.HOODIE_CONSUME_MODE_PATTERN, "db3.first_trips");
|
||||
conf.set(defaultmode, HoodieHiveUtils.DEFAULT_SCAN_MODE);
|
||||
List<String> actualincrTables = HoodieHiveUtils.getIncrementalTableNames(Job.getInstance(conf));
|
||||
for (String expectedincrTable : expectedincrTables) {
|
||||
assertTrue(actualincrTables.contains(expectedincrTable));
|
||||
}
|
||||
|
||||
@@ -31,6 +31,7 @@ import org.apache.hudi.common.testutils.SchemaTestUtil;
|
||||
import org.apache.hudi.common.util.collection.Pair;
|
||||
import org.apache.hudi.exception.HoodieException;
|
||||
import org.apache.hudi.hadoop.testutils.InputFormatTestUtil;
|
||||
import org.apache.hudi.hadoop.config.HoodieRealtimeConfig;
|
||||
|
||||
import org.apache.avro.Schema;
|
||||
import org.apache.avro.Schema.Field;
|
||||
@@ -80,7 +81,7 @@ public class TestHoodieRealtimeRecordReader {
|
||||
@BeforeEach
|
||||
public void setUp() {
|
||||
jobConf = new JobConf();
|
||||
jobConf.set(AbstractRealtimeRecordReader.MAX_DFS_STREAM_BUFFER_SIZE_PROP, String.valueOf(1024 * 1024));
|
||||
jobConf.set(HoodieRealtimeConfig.MAX_DFS_STREAM_BUFFER_SIZE_PROP, String.valueOf(1024 * 1024));
|
||||
hadoopConf = HoodieTestUtils.getDefaultHadoopConf();
|
||||
fs = FSUtils.getFs(basePath.toString(), hadoopConf);
|
||||
}
|
||||
|
||||
@@ -28,7 +28,7 @@ import org.apache.hudi.common.table.log.block.HoodieCommandBlock;
|
||||
import org.apache.hudi.common.table.log.block.HoodieLogBlock;
|
||||
import org.apache.hudi.common.testutils.HoodieTestUtils;
|
||||
import org.apache.hudi.common.testutils.SchemaTestUtil;
|
||||
import org.apache.hudi.hadoop.HoodieHiveUtil;
|
||||
import org.apache.hudi.hadoop.utils.HoodieHiveUtils;
|
||||
|
||||
import org.apache.avro.Schema;
|
||||
import org.apache.avro.generic.GenericRecord;
|
||||
@@ -104,15 +104,15 @@ public class InputFormatTestUtil {
|
||||
|
||||
public static void setupIncremental(JobConf jobConf, String startCommit, int numberOfCommitsToPull) {
|
||||
String modePropertyName =
|
||||
String.format(HoodieHiveUtil.HOODIE_CONSUME_MODE_PATTERN, HoodieTestUtils.RAW_TRIPS_TEST_NAME);
|
||||
jobConf.set(modePropertyName, HoodieHiveUtil.INCREMENTAL_SCAN_MODE);
|
||||
String.format(HoodieHiveUtils.HOODIE_CONSUME_MODE_PATTERN, HoodieTestUtils.RAW_TRIPS_TEST_NAME);
|
||||
jobConf.set(modePropertyName, HoodieHiveUtils.INCREMENTAL_SCAN_MODE);
|
||||
|
||||
String startCommitTimestampName =
|
||||
String.format(HoodieHiveUtil.HOODIE_START_COMMIT_PATTERN, HoodieTestUtils.RAW_TRIPS_TEST_NAME);
|
||||
String.format(HoodieHiveUtils.HOODIE_START_COMMIT_PATTERN, HoodieTestUtils.RAW_TRIPS_TEST_NAME);
|
||||
jobConf.set(startCommitTimestampName, startCommit);
|
||||
|
||||
String maxCommitPulls =
|
||||
String.format(HoodieHiveUtil.HOODIE_MAX_COMMIT_PATTERN, HoodieTestUtils.RAW_TRIPS_TEST_NAME);
|
||||
String.format(HoodieHiveUtils.HOODIE_MAX_COMMIT_PATTERN, HoodieTestUtils.RAW_TRIPS_TEST_NAME);
|
||||
jobConf.setInt(maxCommitPulls, numberOfCommitsToPull);
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user