1
0

[HUDI-822] decouple Hudi related logics from HoodieInputFormat (#1592)

- Refactoring business logic out of InputFormat into Utils helpers.
This commit is contained in:
Gary Li
2020-06-09 06:10:16 -07:00
committed by GitHub
parent 3387b3841f
commit 37838cea60
18 changed files with 919 additions and 624 deletions

View File

@@ -29,6 +29,7 @@ import org.apache.hudi.common.table.timeline.HoodieTimeline;
import org.apache.hudi.common.table.timeline.TimelineMetadataUtils;
import org.apache.hudi.common.testutils.HoodieTestUtils;
import org.apache.hudi.hadoop.testutils.InputFormatTestUtil;
import org.apache.hudi.hadoop.utils.HoodieHiveUtils;
import org.apache.avro.Schema;
import org.apache.hadoop.fs.FileStatus;
@@ -103,7 +104,7 @@ public class TestHoodieParquetInputFormat {
timeline.setInstants(instants);
// Verify getCommitsTimelineBeforePendingCompaction does not return instants after first compaction instant
HoodieTimeline filteredTimeline = new HoodieParquetInputFormat().filterInstantsTimeline(timeline);
HoodieTimeline filteredTimeline = inputFormat.filterInstantsTimeline(timeline);
assertTrue(filteredTimeline.containsInstant(t1));
assertTrue(filteredTimeline.containsInstant(t2));
assertFalse(filteredTimeline.containsInstant(t3));
@@ -116,7 +117,7 @@ public class TestHoodieParquetInputFormat {
instants.remove(t3);
timeline = new HoodieActiveTimeline(metaClient);
timeline.setInstants(instants);
filteredTimeline = new HoodieParquetInputFormat().filterInstantsTimeline(timeline);
filteredTimeline = inputFormat.filterInstantsTimeline(timeline);
// verify all remaining instants are returned.
assertTrue(filteredTimeline.containsInstant(t1));
@@ -130,7 +131,7 @@ public class TestHoodieParquetInputFormat {
instants.remove(t5);
timeline = new HoodieActiveTimeline(metaClient);
timeline.setInstants(instants);
filteredTimeline = new HoodieParquetInputFormat().filterInstantsTimeline(timeline);
filteredTimeline = inputFormat.filterInstantsTimeline(timeline);
// verify all remaining instants are returned.
assertTrue(filteredTimeline.containsInstant(t1));
@@ -267,7 +268,7 @@ public class TestHoodieParquetInputFormat {
ensureFilesInCommit("Pulling 3 commits from 100, should get us the 1 files from 300 commit", files, "300", 1);
ensureFilesInCommit("Pulling 3 commits from 100, should get us the 1 files from 200 commit", files, "200", 1);
InputFormatTestUtil.setupIncremental(jobConf, "100", HoodieHiveUtil.MAX_COMMIT_ALL);
InputFormatTestUtil.setupIncremental(jobConf, "100", HoodieHiveUtils.MAX_COMMIT_ALL);
files = inputFormat.listStatus(jobConf);
assertEquals(5, files.length,
@@ -312,15 +313,15 @@ public class TestHoodieParquetInputFormat {
public void testGetIncrementalTableNames() throws IOException {
String[] expectedincrTables = {"db1.raw_trips", "db2.model_trips", "db3.model_trips"};
JobConf conf = new JobConf();
String incrementalMode1 = String.format(HoodieHiveUtil.HOODIE_CONSUME_MODE_PATTERN, expectedincrTables[0]);
conf.set(incrementalMode1, HoodieHiveUtil.INCREMENTAL_SCAN_MODE);
String incrementalMode2 = String.format(HoodieHiveUtil.HOODIE_CONSUME_MODE_PATTERN, expectedincrTables[1]);
conf.set(incrementalMode2,HoodieHiveUtil.INCREMENTAL_SCAN_MODE);
String incrementalMode3 = String.format(HoodieHiveUtil.HOODIE_CONSUME_MODE_PATTERN, "db3.model_trips");
conf.set(incrementalMode3, HoodieHiveUtil.INCREMENTAL_SCAN_MODE.toLowerCase());
String defaultmode = String.format(HoodieHiveUtil.HOODIE_CONSUME_MODE_PATTERN, "db3.first_trips");
conf.set(defaultmode, HoodieHiveUtil.DEFAULT_SCAN_MODE);
List<String> actualincrTables = HoodieHiveUtil.getIncrementalTableNames(Job.getInstance(conf));
String incrementalMode1 = String.format(HoodieHiveUtils.HOODIE_CONSUME_MODE_PATTERN, expectedincrTables[0]);
conf.set(incrementalMode1, HoodieHiveUtils.INCREMENTAL_SCAN_MODE);
String incrementalMode2 = String.format(HoodieHiveUtils.HOODIE_CONSUME_MODE_PATTERN, expectedincrTables[1]);
conf.set(incrementalMode2,HoodieHiveUtils.INCREMENTAL_SCAN_MODE);
String incrementalMode3 = String.format(HoodieHiveUtils.HOODIE_CONSUME_MODE_PATTERN, "db3.model_trips");
conf.set(incrementalMode3, HoodieHiveUtils.INCREMENTAL_SCAN_MODE.toLowerCase());
String defaultmode = String.format(HoodieHiveUtils.HOODIE_CONSUME_MODE_PATTERN, "db3.first_trips");
conf.set(defaultmode, HoodieHiveUtils.DEFAULT_SCAN_MODE);
List<String> actualincrTables = HoodieHiveUtils.getIncrementalTableNames(Job.getInstance(conf));
for (String expectedincrTable : expectedincrTables) {
assertTrue(actualincrTables.contains(expectedincrTable));
}

View File

@@ -31,6 +31,7 @@ import org.apache.hudi.common.testutils.SchemaTestUtil;
import org.apache.hudi.common.util.collection.Pair;
import org.apache.hudi.exception.HoodieException;
import org.apache.hudi.hadoop.testutils.InputFormatTestUtil;
import org.apache.hudi.hadoop.config.HoodieRealtimeConfig;
import org.apache.avro.Schema;
import org.apache.avro.Schema.Field;
@@ -80,7 +81,7 @@ public class TestHoodieRealtimeRecordReader {
@BeforeEach
public void setUp() {
jobConf = new JobConf();
jobConf.set(AbstractRealtimeRecordReader.MAX_DFS_STREAM_BUFFER_SIZE_PROP, String.valueOf(1024 * 1024));
jobConf.set(HoodieRealtimeConfig.MAX_DFS_STREAM_BUFFER_SIZE_PROP, String.valueOf(1024 * 1024));
hadoopConf = HoodieTestUtils.getDefaultHadoopConf();
fs = FSUtils.getFs(basePath.toString(), hadoopConf);
}

View File

@@ -28,7 +28,7 @@ import org.apache.hudi.common.table.log.block.HoodieCommandBlock;
import org.apache.hudi.common.table.log.block.HoodieLogBlock;
import org.apache.hudi.common.testutils.HoodieTestUtils;
import org.apache.hudi.common.testutils.SchemaTestUtil;
import org.apache.hudi.hadoop.HoodieHiveUtil;
import org.apache.hudi.hadoop.utils.HoodieHiveUtils;
import org.apache.avro.Schema;
import org.apache.avro.generic.GenericRecord;
@@ -104,15 +104,15 @@ public class InputFormatTestUtil {
public static void setupIncremental(JobConf jobConf, String startCommit, int numberOfCommitsToPull) {
String modePropertyName =
String.format(HoodieHiveUtil.HOODIE_CONSUME_MODE_PATTERN, HoodieTestUtils.RAW_TRIPS_TEST_NAME);
jobConf.set(modePropertyName, HoodieHiveUtil.INCREMENTAL_SCAN_MODE);
String.format(HoodieHiveUtils.HOODIE_CONSUME_MODE_PATTERN, HoodieTestUtils.RAW_TRIPS_TEST_NAME);
jobConf.set(modePropertyName, HoodieHiveUtils.INCREMENTAL_SCAN_MODE);
String startCommitTimestampName =
String.format(HoodieHiveUtil.HOODIE_START_COMMIT_PATTERN, HoodieTestUtils.RAW_TRIPS_TEST_NAME);
String.format(HoodieHiveUtils.HOODIE_START_COMMIT_PATTERN, HoodieTestUtils.RAW_TRIPS_TEST_NAME);
jobConf.set(startCommitTimestampName, startCommit);
String maxCommitPulls =
String.format(HoodieHiveUtil.HOODIE_MAX_COMMIT_PATTERN, HoodieTestUtils.RAW_TRIPS_TEST_NAME);
String.format(HoodieHiveUtils.HOODIE_MAX_COMMIT_PATTERN, HoodieTestUtils.RAW_TRIPS_TEST_NAME);
jobConf.setInt(maxCommitPulls, numberOfCommitsToPull);
}