[HUDI-2837] Add support for using database name in incremental query (#4083)
This commit is contained in:
@@ -22,6 +22,7 @@ import org.apache.hudi.avro.model.HoodieCompactionPlan;
|
||||
import org.apache.hudi.common.fs.FSUtils;
|
||||
import org.apache.hudi.common.model.HoodieCommitMetadata;
|
||||
import org.apache.hudi.common.model.HoodieFileFormat;
|
||||
import org.apache.hudi.common.model.HoodieTableType;
|
||||
import org.apache.hudi.common.model.HoodieWriteStat;
|
||||
import org.apache.hudi.common.table.HoodieTableMetaClient;
|
||||
import org.apache.hudi.common.table.timeline.HoodieActiveTimeline;
|
||||
@@ -232,9 +233,90 @@ public class TestHoodieHFileInputFormat {
|
||||
|
||||
InputFormatTestUtil.setupIncremental(jobConf, "100", 1);
|
||||
|
||||
HoodieTableMetaClient metaClient = HoodieTestUtils.init(HoodieTestUtils.getDefaultHadoopConf(), basePath.toString(),
|
||||
HoodieTableType.COPY_ON_WRITE, baseFileFormat);
|
||||
assertEquals(null, metaClient.getTableConfig().getDatabaseName(),
|
||||
"When hoodie.database.name is not set, it should default to null");
|
||||
|
||||
FileStatus[] files = inputFormat.listStatus(jobConf);
|
||||
assertEquals(0, files.length,
|
||||
"We should exclude commit 100 when returning incremental pull with start commit time as 100");
|
||||
|
||||
InputFormatTestUtil.setupIncremental(jobConf, "100", 1, true);
|
||||
|
||||
files = inputFormat.listStatus(jobConf);
|
||||
assertEquals(0, files.length,
|
||||
"We should exclude commit 100 when returning incremental pull with start commit time as 100");
|
||||
|
||||
metaClient = HoodieTestUtils.init(HoodieTestUtils.getDefaultHadoopConf(), basePath.toString(), HoodieTableType.COPY_ON_WRITE,
|
||||
baseFileFormat, HoodieTestUtils.HOODIE_DATABASE);
|
||||
assertEquals(HoodieTestUtils.HOODIE_DATABASE, metaClient.getTableConfig().getDatabaseName(),
|
||||
String.format("The hoodie.database.name should be %s ", HoodieTestUtils.HOODIE_DATABASE));
|
||||
|
||||
files = inputFormat.listStatus(jobConf);
|
||||
assertEquals(10, files.length,
|
||||
"When hoodie.incremental.use.database is true and hoodie.database.name is not null or empty"
|
||||
+ " and the incremental database name is not set, then the incremental query will not take effect");
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testIncrementalWithDatabaseName() throws IOException {
|
||||
// initial commit
|
||||
File partitionDir = InputFormatTestUtil.prepareTable(basePath, baseFileFormat, 10, "100");
|
||||
createCommitFile(basePath, "100", "2016/05/01");
|
||||
|
||||
// Add the paths
|
||||
FileInputFormat.setInputPaths(jobConf, partitionDir.getPath());
|
||||
|
||||
InputFormatTestUtil.setupIncremental(jobConf, "100", 1, HoodieTestUtils.HOODIE_DATABASE, true);
|
||||
|
||||
HoodieTableMetaClient metaClient = HoodieTestUtils.init(HoodieTestUtils.getDefaultHadoopConf(), basePath.toString(),
|
||||
HoodieTableType.COPY_ON_WRITE, baseFileFormat);
|
||||
assertEquals(null, metaClient.getTableConfig().getDatabaseName(),
|
||||
"When hoodie.database.name is not set, it should default to null");
|
||||
|
||||
FileStatus[] files = inputFormat.listStatus(jobConf);
|
||||
assertEquals(10, files.length,
|
||||
"When hoodie.database.name is null, then the incremental query will not take effect");
|
||||
|
||||
metaClient = HoodieTestUtils.init(HoodieTestUtils.getDefaultHadoopConf(), basePath.toString(), HoodieTableType.COPY_ON_WRITE,
|
||||
baseFileFormat, "");
|
||||
assertEquals("", metaClient.getTableConfig().getDatabaseName(),
|
||||
"The hoodie.database.name should be empty");
|
||||
|
||||
files = inputFormat.listStatus(jobConf);
|
||||
assertEquals(10, files.length,
|
||||
"When hoodie.database.name is empty, then the incremental query will not take effect");
|
||||
|
||||
metaClient = HoodieTestUtils.init(HoodieTestUtils.getDefaultHadoopConf(), basePath.toString(), HoodieTableType.COPY_ON_WRITE,
|
||||
baseFileFormat, HoodieTestUtils.HOODIE_DATABASE);
|
||||
assertEquals(HoodieTestUtils.HOODIE_DATABASE, metaClient.getTableConfig().getDatabaseName(),
|
||||
String.format("The hoodie.database.name should be %s ", HoodieTestUtils.HOODIE_DATABASE));
|
||||
|
||||
files = inputFormat.listStatus(jobConf);
|
||||
assertEquals(0, files.length,
|
||||
"We should exclude commit 100 when returning incremental pull with start commit time as 100");
|
||||
|
||||
InputFormatTestUtil.setupIncremental(jobConf, "100", 1, HoodieTestUtils.HOODIE_DATABASE, false);
|
||||
|
||||
files = inputFormat.listStatus(jobConf);
|
||||
assertEquals(10, files.length,
|
||||
"When hoodie.incremental.use.database is false and the incremental database name is set,"
|
||||
+ "then the incremental query will not take effect");
|
||||
|
||||
// The configuration with and without database name exists together
|
||||
InputFormatTestUtil.setupIncremental(jobConf, "1", 1, true);
|
||||
|
||||
files = inputFormat.listStatus(jobConf);
|
||||
assertEquals(0, files.length,
|
||||
"When hoodie.incremental.use.database is true, "
|
||||
+ "We should exclude commit 100 because the returning incremental pull with start commit time is 100");
|
||||
|
||||
InputFormatTestUtil.setupIncremental(jobConf, "1", 1, false);
|
||||
files = inputFormat.listStatus(jobConf);
|
||||
assertEquals(10, files.length,
|
||||
"When hoodie.incremental.use.database is false, "
|
||||
+ "We should include commit 100 because the returning incremental pull with start commit time is 1");
|
||||
}
|
||||
|
||||
private void createCommitFile(java.nio.file.Path basePath, String commitNumber, String partitionPath)
|
||||
@@ -316,7 +398,7 @@ public class TestHoodieHFileInputFormat {
|
||||
ensureFilesInCommit("Pulling all commits from 100, should get us the 1 files from 200 commit", files, "200", 1);
|
||||
}
|
||||
|
||||
// TODO enable this after enabling predicate pushdown
|
||||
// TODO enable this after enabling predicate push down
|
||||
public void testPredicatePushDown() throws IOException {
|
||||
// initial commit
|
||||
Schema schema = getSchemaFromResource(TestHoodieHFileInputFormat.class, "/sample1.avsc");
|
||||
@@ -337,7 +419,7 @@ public class TestHoodieHFileInputFormat {
|
||||
// check whether we have 2 records at this point
|
||||
ensureRecordsInCommit("We need to have 2 records that was modified at commit " + commit2 + " and no more", commit2,
|
||||
2, 2);
|
||||
// Make sure we have the 10 records if we roll back the stattime
|
||||
// Make sure we have the 10 records if we roll back the start time
|
||||
InputFormatTestUtil.setupIncremental(jobConf, "0", 2);
|
||||
ensureRecordsInCommit("We need to have 8 records that was modified at commit " + commit1 + " and no more", commit1,
|
||||
8, 10);
|
||||
@@ -347,19 +429,19 @@ public class TestHoodieHFileInputFormat {
|
||||
|
||||
@Test
|
||||
public void testGetIncrementalTableNames() throws IOException {
|
||||
String[] expectedincrTables = {"db1.raw_trips", "db2.model_trips", "db3.model_trips"};
|
||||
String[] expectedIncrTables = {"db1.raw_trips", "db2.model_trips", "db3.model_trips"};
|
||||
JobConf conf = new JobConf();
|
||||
String incrementalMode1 = String.format(HoodieHiveUtils.HOODIE_CONSUME_MODE_PATTERN, expectedincrTables[0]);
|
||||
String incrementalMode1 = String.format(HoodieHiveUtils.HOODIE_CONSUME_MODE_PATTERN, expectedIncrTables[0]);
|
||||
conf.set(incrementalMode1, HoodieHiveUtils.INCREMENTAL_SCAN_MODE);
|
||||
String incrementalMode2 = String.format(HoodieHiveUtils.HOODIE_CONSUME_MODE_PATTERN, expectedincrTables[1]);
|
||||
String incrementalMode2 = String.format(HoodieHiveUtils.HOODIE_CONSUME_MODE_PATTERN, expectedIncrTables[1]);
|
||||
conf.set(incrementalMode2,HoodieHiveUtils.INCREMENTAL_SCAN_MODE);
|
||||
String incrementalMode3 = String.format(HoodieHiveUtils.HOODIE_CONSUME_MODE_PATTERN, "db3.model_trips");
|
||||
conf.set(incrementalMode3, HoodieHiveUtils.INCREMENTAL_SCAN_MODE.toLowerCase());
|
||||
String defaultmode = String.format(HoodieHiveUtils.HOODIE_CONSUME_MODE_PATTERN, "db3.first_trips");
|
||||
conf.set(defaultmode, HoodieHiveUtils.DEFAULT_SCAN_MODE);
|
||||
List<String> actualincrTables = HoodieHiveUtils.getIncrementalTableNames(Job.getInstance(conf));
|
||||
for (String expectedincrTable : expectedincrTables) {
|
||||
assertTrue(actualincrTables.contains(expectedincrTable));
|
||||
String defaultMode = String.format(HoodieHiveUtils.HOODIE_CONSUME_MODE_PATTERN, "db3.first_trips");
|
||||
conf.set(defaultMode, HoodieHiveUtils.DEFAULT_SCAN_MODE);
|
||||
List<String> actualIncrTables = HoodieHiveUtils.getIncrementalTableNames(Job.getInstance(conf));
|
||||
for (String expectedIncrTable : expectedIncrTables) {
|
||||
assertTrue(actualIncrTables.contains(expectedIncrTable));
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -22,6 +22,7 @@ import org.apache.hudi.avro.model.HoodieCompactionPlan;
|
||||
import org.apache.hudi.common.fs.FSUtils;
|
||||
import org.apache.hudi.common.model.HoodieCommitMetadata;
|
||||
import org.apache.hudi.common.model.HoodieFileFormat;
|
||||
import org.apache.hudi.common.model.HoodieTableType;
|
||||
import org.apache.hudi.common.model.HoodieWriteStat;
|
||||
import org.apache.hudi.common.table.HoodieTableMetaClient;
|
||||
import org.apache.hudi.common.table.timeline.HoodieActiveTimeline;
|
||||
@@ -286,9 +287,90 @@ public class TestHoodieParquetInputFormat {
|
||||
|
||||
InputFormatTestUtil.setupIncremental(jobConf, "100", 1);
|
||||
|
||||
HoodieTableMetaClient metaClient = HoodieTestUtils.init(HoodieTestUtils.getDefaultHadoopConf(), basePath.toString(),
|
||||
HoodieTableType.COPY_ON_WRITE, baseFileFormat);
|
||||
assertEquals(null, metaClient.getTableConfig().getDatabaseName(),
|
||||
"When hoodie.database.name is not set, it should default to null");
|
||||
|
||||
FileStatus[] files = inputFormat.listStatus(jobConf);
|
||||
assertEquals(0, files.length,
|
||||
"We should exclude commit 100 when returning incremental pull with start commit time as 100");
|
||||
|
||||
InputFormatTestUtil.setupIncremental(jobConf, "100", 1, true);
|
||||
|
||||
files = inputFormat.listStatus(jobConf);
|
||||
assertEquals(0, files.length,
|
||||
"We should exclude commit 100 when returning incremental pull with start commit time as 100");
|
||||
|
||||
metaClient = HoodieTestUtils.init(HoodieTestUtils.getDefaultHadoopConf(), basePath.toString(), HoodieTableType.COPY_ON_WRITE,
|
||||
baseFileFormat, HoodieTestUtils.HOODIE_DATABASE);
|
||||
assertEquals(HoodieTestUtils.HOODIE_DATABASE, metaClient.getTableConfig().getDatabaseName(),
|
||||
String.format("The hoodie.database.name should be %s ", HoodieTestUtils.HOODIE_DATABASE));
|
||||
|
||||
files = inputFormat.listStatus(jobConf);
|
||||
assertEquals(10, files.length,
|
||||
"When hoodie.incremental.use.database is true and hoodie.database.name is not null or empty"
|
||||
+ " and the incremental database name is not set, then the incremental query will not take effect");
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testIncrementalWithDatabaseName() throws IOException {
|
||||
// initial commit
|
||||
File partitionDir = InputFormatTestUtil.prepareTable(basePath, baseFileFormat, 10, "100");
|
||||
createCommitFile(basePath, "100", "2016/05/01");
|
||||
|
||||
// Add the paths
|
||||
FileInputFormat.setInputPaths(jobConf, partitionDir.getPath());
|
||||
|
||||
InputFormatTestUtil.setupIncremental(jobConf, "100", 1, HoodieTestUtils.HOODIE_DATABASE, true);
|
||||
|
||||
HoodieTableMetaClient metaClient = HoodieTestUtils.init(HoodieTestUtils.getDefaultHadoopConf(), basePath.toString(),
|
||||
HoodieTableType.COPY_ON_WRITE, baseFileFormat);
|
||||
assertEquals(null, metaClient.getTableConfig().getDatabaseName(),
|
||||
"When hoodie.database.name is not set, it should default to null");
|
||||
|
||||
FileStatus[] files = inputFormat.listStatus(jobConf);
|
||||
assertEquals(10, files.length,
|
||||
"When hoodie.database.name is null, then the incremental query will not take effect");
|
||||
|
||||
metaClient = HoodieTestUtils.init(HoodieTestUtils.getDefaultHadoopConf(), basePath.toString(), HoodieTableType.COPY_ON_WRITE,
|
||||
baseFileFormat, "");
|
||||
assertEquals("", metaClient.getTableConfig().getDatabaseName(),
|
||||
"The hoodie.database.name should be empty");
|
||||
|
||||
files = inputFormat.listStatus(jobConf);
|
||||
assertEquals(10, files.length,
|
||||
"When hoodie.database.name is empty, then the incremental query will not take effect");
|
||||
|
||||
metaClient = HoodieTestUtils.init(HoodieTestUtils.getDefaultHadoopConf(), basePath.toString(), HoodieTableType.COPY_ON_WRITE,
|
||||
baseFileFormat, HoodieTestUtils.HOODIE_DATABASE);
|
||||
assertEquals(HoodieTestUtils.HOODIE_DATABASE, metaClient.getTableConfig().getDatabaseName(),
|
||||
String.format("The hoodie.database.name should be %s ", HoodieTestUtils.HOODIE_DATABASE));
|
||||
|
||||
files = inputFormat.listStatus(jobConf);
|
||||
assertEquals(0, files.length,
|
||||
"We should exclude commit 100 when returning incremental pull with start commit time as 100");
|
||||
|
||||
InputFormatTestUtil.setupIncremental(jobConf, "100", 1, HoodieTestUtils.HOODIE_DATABASE, false);
|
||||
|
||||
files = inputFormat.listStatus(jobConf);
|
||||
assertEquals(10, files.length,
|
||||
"When hoodie.incremental.use.database is false and the incremental database name is set, "
|
||||
+ "then the incremental query will not take effect");
|
||||
|
||||
// The configuration with and without database name exists together
|
||||
InputFormatTestUtil.setupIncremental(jobConf, "1", 1, true);
|
||||
|
||||
files = inputFormat.listStatus(jobConf);
|
||||
assertEquals(0, files.length,
|
||||
"When hoodie.incremental.use.database is true, "
|
||||
+ "We should exclude commit 100 because the returning incremental pull with start commit time is 100");
|
||||
|
||||
InputFormatTestUtil.setupIncremental(jobConf, "1", 1, false);
|
||||
files = inputFormat.listStatus(jobConf);
|
||||
assertEquals(10, files.length,
|
||||
"When hoodie.incremental.use.database is false, "
|
||||
+ "We should include commit 100 because the returning incremental pull with start commit time is 1");
|
||||
}
|
||||
|
||||
@Test
|
||||
@@ -429,7 +511,7 @@ public class TestHoodieParquetInputFormat {
|
||||
ensureFilesInCommit("Pulling all commits from 100, should get us the 1 files from 200 commit", files, "200", 1);
|
||||
}
|
||||
|
||||
@Disabled("enable this after enabling predicate pushdown")
|
||||
@Disabled("enable this after enabling predicate push down")
|
||||
@Test
|
||||
public void testPredicatePushDown() throws IOException {
|
||||
// initial commit
|
||||
@@ -451,7 +533,7 @@ public class TestHoodieParquetInputFormat {
|
||||
// check whether we have 2 records at this point
|
||||
ensureRecordsInCommit("We need to have 2 records that was modified at commit " + commit2 + " and no more", commit2,
|
||||
2, 2);
|
||||
// Make sure we have the 10 records if we roll back the stattime
|
||||
// Make sure we have the 10 records if we roll back the start time
|
||||
InputFormatTestUtil.setupIncremental(jobConf, "0", 2);
|
||||
ensureRecordsInCommit("We need to have 8 records that was modified at commit " + commit1 + " and no more", commit1,
|
||||
8, 10);
|
||||
@@ -461,19 +543,19 @@ public class TestHoodieParquetInputFormat {
|
||||
|
||||
@Test
|
||||
public void testGetIncrementalTableNames() throws IOException {
|
||||
String[] expectedincrTables = {"db1.raw_trips", "db2.model_trips", "db3.model_trips"};
|
||||
String[] expectedIncrTables = {"db1.raw_trips", "db2.model_trips", "db3.model_trips"};
|
||||
JobConf conf = new JobConf();
|
||||
String incrementalMode1 = String.format(HoodieHiveUtils.HOODIE_CONSUME_MODE_PATTERN, expectedincrTables[0]);
|
||||
String incrementalMode1 = String.format(HoodieHiveUtils.HOODIE_CONSUME_MODE_PATTERN, expectedIncrTables[0]);
|
||||
conf.set(incrementalMode1, HoodieHiveUtils.INCREMENTAL_SCAN_MODE);
|
||||
String incrementalMode2 = String.format(HoodieHiveUtils.HOODIE_CONSUME_MODE_PATTERN, expectedincrTables[1]);
|
||||
String incrementalMode2 = String.format(HoodieHiveUtils.HOODIE_CONSUME_MODE_PATTERN, expectedIncrTables[1]);
|
||||
conf.set(incrementalMode2, HoodieHiveUtils.INCREMENTAL_SCAN_MODE);
|
||||
String incrementalMode3 = String.format(HoodieHiveUtils.HOODIE_CONSUME_MODE_PATTERN, "db3.model_trips");
|
||||
conf.set(incrementalMode3, HoodieHiveUtils.INCREMENTAL_SCAN_MODE.toLowerCase());
|
||||
String defaultmode = String.format(HoodieHiveUtils.HOODIE_CONSUME_MODE_PATTERN, "db3.first_trips");
|
||||
conf.set(defaultmode, HoodieHiveUtils.DEFAULT_SCAN_MODE);
|
||||
List<String> actualincrTables = HoodieHiveUtils.getIncrementalTableNames(Job.getInstance(conf));
|
||||
for (String expectedincrTable : expectedincrTables) {
|
||||
assertTrue(actualincrTables.contains(expectedincrTable));
|
||||
String defaultMode = String.format(HoodieHiveUtils.HOODIE_CONSUME_MODE_PATTERN, "db3.first_trips");
|
||||
conf.set(defaultMode, HoodieHiveUtils.DEFAULT_SCAN_MODE);
|
||||
List<String> actualIncrTables = HoodieHiveUtils.getIncrementalTableNames(Job.getInstance(conf));
|
||||
for (String expectedIncrTable : expectedIncrTables) {
|
||||
assertTrue(actualIncrTables.contains(expectedIncrTable));
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -130,6 +130,10 @@ public class InputFormatTestUtil {
|
||||
}
|
||||
|
||||
public static void setupIncremental(JobConf jobConf, String startCommit, int numberOfCommitsToPull) {
|
||||
setupIncremental(jobConf, startCommit, numberOfCommitsToPull, false);
|
||||
}
|
||||
|
||||
public static void setupIncremental(JobConf jobConf, String startCommit, int numberOfCommitsToPull, boolean isIncrementalUseDatabase) {
|
||||
String modePropertyName =
|
||||
String.format(HoodieHiveUtils.HOODIE_CONSUME_MODE_PATTERN, HoodieTestUtils.RAW_TRIPS_TEST_NAME);
|
||||
jobConf.set(modePropertyName, HoodieHiveUtils.INCREMENTAL_SCAN_MODE);
|
||||
@@ -141,8 +145,26 @@ public class InputFormatTestUtil {
|
||||
String maxCommitPulls =
|
||||
String.format(HoodieHiveUtils.HOODIE_MAX_COMMIT_PATTERN, HoodieTestUtils.RAW_TRIPS_TEST_NAME);
|
||||
jobConf.setInt(maxCommitPulls, numberOfCommitsToPull);
|
||||
|
||||
jobConf.setBoolean(HoodieHiveUtils.HOODIE_INCREMENTAL_USE_DATABASE, isIncrementalUseDatabase);
|
||||
}
|
||||
|
||||
|
||||
public static void setupIncremental(JobConf jobConf, String startCommit, int numberOfCommitsToPull, String databaseName, boolean isIncrementalUseDatabase) {
|
||||
String modePropertyName =
|
||||
String.format(HoodieHiveUtils.HOODIE_CONSUME_MODE_PATTERN, databaseName + "." + HoodieTestUtils.RAW_TRIPS_TEST_NAME);
|
||||
jobConf.set(modePropertyName, HoodieHiveUtils.INCREMENTAL_SCAN_MODE);
|
||||
|
||||
String startCommitTimestampName =
|
||||
String.format(HoodieHiveUtils.HOODIE_START_COMMIT_PATTERN, databaseName + "." + HoodieTestUtils.RAW_TRIPS_TEST_NAME);
|
||||
jobConf.set(startCommitTimestampName, startCommit);
|
||||
|
||||
String maxCommitPulls =
|
||||
String.format(HoodieHiveUtils.HOODIE_MAX_COMMIT_PATTERN, databaseName + "." + HoodieTestUtils.RAW_TRIPS_TEST_NAME);
|
||||
jobConf.setInt(maxCommitPulls, numberOfCommitsToPull);
|
||||
|
||||
jobConf.setBoolean(HoodieHiveUtils.HOODIE_INCREMENTAL_USE_DATABASE, isIncrementalUseDatabase);
|
||||
}
|
||||
|
||||
public static void setupSnapshotIncludePendingCommits(JobConf jobConf, String instantTime) {
|
||||
setupSnapshotScanMode(jobConf, true);
|
||||
String validateTimestampName =
|
||||
|
||||
Reference in New Issue
Block a user