[HUDI-2837] Add support for using database name in incremental query (#4083)
This commit is contained in:
@@ -121,7 +121,7 @@ public abstract class HoodieFileInputFormatBase extends FileInputFormat<NullWrit
|
||||
continue;
|
||||
}
|
||||
List<Path> inputPaths = inputPathHandler.getGroupedIncrementalPaths().get(metaClient);
|
||||
List<FileStatus> result = listStatusForIncrementalMode(job, metaClient, inputPaths);
|
||||
List<FileStatus> result = listStatusForIncrementalMode(job, metaClient, inputPaths, table);
|
||||
if (result != null) {
|
||||
returns.addAll(result);
|
||||
}
|
||||
@@ -229,14 +229,14 @@ public abstract class HoodieFileInputFormatBase extends FileInputFormat<NullWrit
|
||||
* partitions and then filtering based on the commits of interest, this logic first extracts the
|
||||
* partitions touched by the desired commits and then lists only those partitions.
|
||||
*/
|
||||
protected List<FileStatus> listStatusForIncrementalMode(JobConf job, HoodieTableMetaClient tableMetaClient, List<Path> inputPaths) throws IOException {
|
||||
String tableName = tableMetaClient.getTableConfig().getTableName();
|
||||
protected List<FileStatus> listStatusForIncrementalMode(JobConf job, HoodieTableMetaClient tableMetaClient,
|
||||
List<Path> inputPaths, String incrementalTable) throws IOException {
|
||||
Job jobContext = Job.getInstance(job);
|
||||
Option<HoodieTimeline> timeline = HoodieInputFormatUtils.getFilteredCommitsTimeline(jobContext, tableMetaClient);
|
||||
if (!timeline.isPresent()) {
|
||||
return null;
|
||||
}
|
||||
Option<List<HoodieInstant>> commitsToCheck = HoodieInputFormatUtils.getCommitsForIncrementalQuery(jobContext, tableName, timeline.get());
|
||||
Option<List<HoodieInstant>> commitsToCheck = HoodieInputFormatUtils.getCommitsForIncrementalQuery(jobContext, incrementalTable, timeline.get());
|
||||
if (!commitsToCheck.isPresent()) {
|
||||
return null;
|
||||
}
|
||||
|
||||
@@ -19,11 +19,13 @@
|
||||
package org.apache.hudi.hadoop;
|
||||
|
||||
import org.apache.hudi.common.table.HoodieTableMetaClient;
|
||||
import org.apache.hudi.common.util.StringUtils;
|
||||
import org.apache.hudi.exception.InvalidTableException;
|
||||
import org.apache.hudi.exception.TableNotFoundException;
|
||||
|
||||
import org.apache.hadoop.conf.Configuration;
|
||||
import org.apache.hadoop.fs.Path;
|
||||
import org.apache.hudi.hadoop.utils.HoodieHiveUtils;
|
||||
import org.apache.log4j.LogManager;
|
||||
import org.apache.log4j.Logger;
|
||||
|
||||
@@ -53,11 +55,12 @@ public class InputPathHandler {
|
||||
public static final Logger LOG = LogManager.getLogger(InputPathHandler.class);
|
||||
|
||||
private final Configuration conf;
|
||||
// tablename to metadata mapping for all Hoodie tables(both incremental & snapshot)
|
||||
// tableName to metadata mapping for all Hoodie tables(both incremental & snapshot)
|
||||
private final Map<String, HoodieTableMetaClient> tableMetaClientMap;
|
||||
private final Map<HoodieTableMetaClient, List<Path>> groupedIncrementalPaths;
|
||||
private final List<Path> snapshotPaths;
|
||||
private final List<Path> nonHoodieInputPaths;
|
||||
private boolean isIncrementalUseDatabase;
|
||||
|
||||
public InputPathHandler(Configuration conf, Path[] inputPaths, List<String> incrementalTables) throws IOException {
|
||||
this.conf = conf;
|
||||
@@ -65,13 +68,14 @@ public class InputPathHandler {
|
||||
snapshotPaths = new ArrayList<>();
|
||||
nonHoodieInputPaths = new ArrayList<>();
|
||||
groupedIncrementalPaths = new HashMap<>();
|
||||
this.isIncrementalUseDatabase = HoodieHiveUtils.isIncrementalUseDatabase(conf);
|
||||
parseInputPaths(inputPaths, incrementalTables);
|
||||
}
|
||||
|
||||
/**
|
||||
* Takes in the original InputPaths and classifies each of them into incremental, snapshot and
|
||||
* non-hoodie InputPaths. The logic is as follows:
|
||||
* 1. Check if an inputPath starts with the same basepath as any of the metadata basepaths we know
|
||||
* 1. Check if an inputPath starts with the same basePath as any of the metadata basePaths we know
|
||||
* 1a. If yes, this belongs to a Hoodie table that we already know about. Simply classify this
|
||||
* as incremental or snapshot - We can get the table name of this inputPath from the
|
||||
* metadata. Then based on the list of incrementalTables, we can classify this inputPath.
|
||||
@@ -95,19 +99,17 @@ public class InputPathHandler {
|
||||
// We already know the base path for this inputPath.
|
||||
basePathKnown = true;
|
||||
// Check if this is for a snapshot query
|
||||
String tableName = metaClient.getTableConfig().getTableName();
|
||||
tagAsIncrementalOrSnapshot(inputPath, tableName, metaClient, incrementalTables);
|
||||
tagAsIncrementalOrSnapshot(inputPath, metaClient, incrementalTables);
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (!basePathKnown) {
|
||||
// This path is for a table that we dont know about yet.
|
||||
// This path is for a table that we don't know about yet.
|
||||
HoodieTableMetaClient metaClient;
|
||||
try {
|
||||
metaClient = getTableMetaClientForBasePath(inputPath.getFileSystem(conf), inputPath);
|
||||
String tableName = metaClient.getTableConfig().getTableName();
|
||||
tableMetaClientMap.put(tableName, metaClient);
|
||||
tagAsIncrementalOrSnapshot(inputPath, tableName, metaClient, incrementalTables);
|
||||
tableMetaClientMap.put(getIncrementalTable(metaClient), metaClient);
|
||||
tagAsIncrementalOrSnapshot(inputPath, metaClient, incrementalTables);
|
||||
} catch (TableNotFoundException | InvalidTableException e) {
|
||||
// This is a non Hoodie inputPath
|
||||
LOG.info("Handling a non-hoodie path " + inputPath);
|
||||
@@ -117,9 +119,8 @@ public class InputPathHandler {
|
||||
}
|
||||
}
|
||||
|
||||
private void tagAsIncrementalOrSnapshot(Path inputPath, String tableName,
|
||||
HoodieTableMetaClient metaClient, List<String> incrementalTables) {
|
||||
if (!incrementalTables.contains(tableName)) {
|
||||
private void tagAsIncrementalOrSnapshot(Path inputPath, HoodieTableMetaClient metaClient, List<String> incrementalTables) {
|
||||
if (!incrementalTables.contains(getIncrementalTable(metaClient))) {
|
||||
snapshotPaths.add(inputPath);
|
||||
} else {
|
||||
// Group incremental Paths belonging to same table.
|
||||
@@ -145,4 +146,11 @@ public class InputPathHandler {
|
||||
public List<Path> getNonHoodieInputPaths() {
|
||||
return nonHoodieInputPaths;
|
||||
}
|
||||
|
||||
private String getIncrementalTable(HoodieTableMetaClient metaClient) {
|
||||
String databaseName = metaClient.getTableConfig().getDatabaseName();
|
||||
String tableName = metaClient.getTableConfig().getTableName();
|
||||
return isIncrementalUseDatabase && !StringUtils.isNullOrEmpty(databaseName)
|
||||
? databaseName + "." + tableName : tableName;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -113,9 +113,8 @@ public class HoodieParquetRealtimeInputFormat extends HoodieParquetInputFormat i
|
||||
*/
|
||||
@Override
|
||||
protected List<FileStatus> listStatusForIncrementalMode(
|
||||
JobConf job, HoodieTableMetaClient tableMetaClient, List<Path> inputPaths) throws IOException {
|
||||
JobConf job, HoodieTableMetaClient tableMetaClient, List<Path> inputPaths, String incrementalTable) throws IOException {
|
||||
List<FileStatus> result = new ArrayList<>();
|
||||
String tableName = tableMetaClient.getTableConfig().getTableName();
|
||||
Job jobContext = Job.getInstance(job);
|
||||
|
||||
// step1
|
||||
@@ -123,7 +122,7 @@ public class HoodieParquetRealtimeInputFormat extends HoodieParquetInputFormat i
|
||||
if (!timeline.isPresent()) {
|
||||
return result;
|
||||
}
|
||||
HoodieTimeline commitsTimelineToReturn = HoodieInputFormatUtils.getHoodieTimelineForIncrementalQuery(jobContext, tableName, timeline.get());
|
||||
HoodieTimeline commitsTimelineToReturn = HoodieInputFormatUtils.getHoodieTimelineForIncrementalQuery(jobContext, incrementalTable, timeline.get());
|
||||
Option<List<HoodieInstant>> commitsToCheck = Option.of(commitsTimelineToReturn.getInstants().collect(Collectors.toList()));
|
||||
if (!commitsToCheck.isPresent()) {
|
||||
return result;
|
||||
|
||||
@@ -18,6 +18,7 @@
|
||||
|
||||
package org.apache.hudi.hadoop.utils;
|
||||
|
||||
import org.apache.hadoop.conf.Configuration;
|
||||
import org.apache.hudi.common.table.HoodieTableMetaClient;
|
||||
import org.apache.hudi.common.table.timeline.HoodieTimeline;
|
||||
import org.apache.hudi.common.util.CollectionUtils;
|
||||
@@ -43,6 +44,7 @@ public class HoodieHiveUtils {
|
||||
|
||||
public static final Logger LOG = LogManager.getLogger(HoodieHiveUtils.class);
|
||||
|
||||
public static final String HOODIE_INCREMENTAL_USE_DATABASE = "hoodie.incremental.use.database";
|
||||
public static final String HOODIE_CONSUME_MODE_PATTERN = "hoodie.%s.consume.mode";
|
||||
public static final String HOODIE_START_COMMIT_PATTERN = "hoodie.%s.consume.start.timestamp";
|
||||
public static final String HOODIE_MAX_COMMIT_PATTERN = "hoodie.%s.consume.max.commits";
|
||||
@@ -178,4 +180,8 @@ public class HoodieHiveUtils {
|
||||
}
|
||||
return timeline.findInstantsBeforeOrEquals(maxCommit);
|
||||
}
|
||||
|
||||
public static boolean isIncrementalUseDatabase(Configuration conf) {
|
||||
return conf.getBoolean(HOODIE_INCREMENTAL_USE_DATABASE, false);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -22,6 +22,7 @@ import org.apache.hudi.avro.model.HoodieCompactionPlan;
|
||||
import org.apache.hudi.common.fs.FSUtils;
|
||||
import org.apache.hudi.common.model.HoodieCommitMetadata;
|
||||
import org.apache.hudi.common.model.HoodieFileFormat;
|
||||
import org.apache.hudi.common.model.HoodieTableType;
|
||||
import org.apache.hudi.common.model.HoodieWriteStat;
|
||||
import org.apache.hudi.common.table.HoodieTableMetaClient;
|
||||
import org.apache.hudi.common.table.timeline.HoodieActiveTimeline;
|
||||
@@ -232,9 +233,90 @@ public class TestHoodieHFileInputFormat {
|
||||
|
||||
InputFormatTestUtil.setupIncremental(jobConf, "100", 1);
|
||||
|
||||
HoodieTableMetaClient metaClient = HoodieTestUtils.init(HoodieTestUtils.getDefaultHadoopConf(), basePath.toString(),
|
||||
HoodieTableType.COPY_ON_WRITE, baseFileFormat);
|
||||
assertEquals(null, metaClient.getTableConfig().getDatabaseName(),
|
||||
"When hoodie.database.name is not set, it should default to null");
|
||||
|
||||
FileStatus[] files = inputFormat.listStatus(jobConf);
|
||||
assertEquals(0, files.length,
|
||||
"We should exclude commit 100 when returning incremental pull with start commit time as 100");
|
||||
|
||||
InputFormatTestUtil.setupIncremental(jobConf, "100", 1, true);
|
||||
|
||||
files = inputFormat.listStatus(jobConf);
|
||||
assertEquals(0, files.length,
|
||||
"We should exclude commit 100 when returning incremental pull with start commit time as 100");
|
||||
|
||||
metaClient = HoodieTestUtils.init(HoodieTestUtils.getDefaultHadoopConf(), basePath.toString(), HoodieTableType.COPY_ON_WRITE,
|
||||
baseFileFormat, HoodieTestUtils.HOODIE_DATABASE);
|
||||
assertEquals(HoodieTestUtils.HOODIE_DATABASE, metaClient.getTableConfig().getDatabaseName(),
|
||||
String.format("The hoodie.database.name should be %s ", HoodieTestUtils.HOODIE_DATABASE));
|
||||
|
||||
files = inputFormat.listStatus(jobConf);
|
||||
assertEquals(10, files.length,
|
||||
"When hoodie.incremental.use.database is true and hoodie.database.name is not null or empty"
|
||||
+ " and the incremental database name is not set, then the incremental query will not take effect");
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testIncrementalWithDatabaseName() throws IOException {
|
||||
// initial commit
|
||||
File partitionDir = InputFormatTestUtil.prepareTable(basePath, baseFileFormat, 10, "100");
|
||||
createCommitFile(basePath, "100", "2016/05/01");
|
||||
|
||||
// Add the paths
|
||||
FileInputFormat.setInputPaths(jobConf, partitionDir.getPath());
|
||||
|
||||
InputFormatTestUtil.setupIncremental(jobConf, "100", 1, HoodieTestUtils.HOODIE_DATABASE, true);
|
||||
|
||||
HoodieTableMetaClient metaClient = HoodieTestUtils.init(HoodieTestUtils.getDefaultHadoopConf(), basePath.toString(),
|
||||
HoodieTableType.COPY_ON_WRITE, baseFileFormat);
|
||||
assertEquals(null, metaClient.getTableConfig().getDatabaseName(),
|
||||
"When hoodie.database.name is not set, it should default to null");
|
||||
|
||||
FileStatus[] files = inputFormat.listStatus(jobConf);
|
||||
assertEquals(10, files.length,
|
||||
"When hoodie.database.name is null, then the incremental query will not take effect");
|
||||
|
||||
metaClient = HoodieTestUtils.init(HoodieTestUtils.getDefaultHadoopConf(), basePath.toString(), HoodieTableType.COPY_ON_WRITE,
|
||||
baseFileFormat, "");
|
||||
assertEquals("", metaClient.getTableConfig().getDatabaseName(),
|
||||
"The hoodie.database.name should be empty");
|
||||
|
||||
files = inputFormat.listStatus(jobConf);
|
||||
assertEquals(10, files.length,
|
||||
"When hoodie.database.name is empty, then the incremental query will not take effect");
|
||||
|
||||
metaClient = HoodieTestUtils.init(HoodieTestUtils.getDefaultHadoopConf(), basePath.toString(), HoodieTableType.COPY_ON_WRITE,
|
||||
baseFileFormat, HoodieTestUtils.HOODIE_DATABASE);
|
||||
assertEquals(HoodieTestUtils.HOODIE_DATABASE, metaClient.getTableConfig().getDatabaseName(),
|
||||
String.format("The hoodie.database.name should be %s ", HoodieTestUtils.HOODIE_DATABASE));
|
||||
|
||||
files = inputFormat.listStatus(jobConf);
|
||||
assertEquals(0, files.length,
|
||||
"We should exclude commit 100 when returning incremental pull with start commit time as 100");
|
||||
|
||||
InputFormatTestUtil.setupIncremental(jobConf, "100", 1, HoodieTestUtils.HOODIE_DATABASE, false);
|
||||
|
||||
files = inputFormat.listStatus(jobConf);
|
||||
assertEquals(10, files.length,
|
||||
"When hoodie.incremental.use.database is false and the incremental database name is set,"
|
||||
+ "then the incremental query will not take effect");
|
||||
|
||||
// The configuration with and without database name exists together
|
||||
InputFormatTestUtil.setupIncremental(jobConf, "1", 1, true);
|
||||
|
||||
files = inputFormat.listStatus(jobConf);
|
||||
assertEquals(0, files.length,
|
||||
"When hoodie.incremental.use.database is true, "
|
||||
+ "We should exclude commit 100 because the returning incremental pull with start commit time is 100");
|
||||
|
||||
InputFormatTestUtil.setupIncremental(jobConf, "1", 1, false);
|
||||
files = inputFormat.listStatus(jobConf);
|
||||
assertEquals(10, files.length,
|
||||
"When hoodie.incremental.use.database is false, "
|
||||
+ "We should include commit 100 because the returning incremental pull with start commit time is 1");
|
||||
}
|
||||
|
||||
private void createCommitFile(java.nio.file.Path basePath, String commitNumber, String partitionPath)
|
||||
@@ -316,7 +398,7 @@ public class TestHoodieHFileInputFormat {
|
||||
ensureFilesInCommit("Pulling all commits from 100, should get us the 1 files from 200 commit", files, "200", 1);
|
||||
}
|
||||
|
||||
// TODO enable this after enabling predicate pushdown
|
||||
// TODO enable this after enabling predicate push down
|
||||
public void testPredicatePushDown() throws IOException {
|
||||
// initial commit
|
||||
Schema schema = getSchemaFromResource(TestHoodieHFileInputFormat.class, "/sample1.avsc");
|
||||
@@ -337,7 +419,7 @@ public class TestHoodieHFileInputFormat {
|
||||
// check whether we have 2 records at this point
|
||||
ensureRecordsInCommit("We need to have 2 records that was modified at commit " + commit2 + " and no more", commit2,
|
||||
2, 2);
|
||||
// Make sure we have the 10 records if we roll back the stattime
|
||||
// Make sure we have the 10 records if we roll back the start time
|
||||
InputFormatTestUtil.setupIncremental(jobConf, "0", 2);
|
||||
ensureRecordsInCommit("We need to have 8 records that was modified at commit " + commit1 + " and no more", commit1,
|
||||
8, 10);
|
||||
@@ -347,19 +429,19 @@ public class TestHoodieHFileInputFormat {
|
||||
|
||||
@Test
|
||||
public void testGetIncrementalTableNames() throws IOException {
|
||||
String[] expectedincrTables = {"db1.raw_trips", "db2.model_trips", "db3.model_trips"};
|
||||
String[] expectedIncrTables = {"db1.raw_trips", "db2.model_trips", "db3.model_trips"};
|
||||
JobConf conf = new JobConf();
|
||||
String incrementalMode1 = String.format(HoodieHiveUtils.HOODIE_CONSUME_MODE_PATTERN, expectedincrTables[0]);
|
||||
String incrementalMode1 = String.format(HoodieHiveUtils.HOODIE_CONSUME_MODE_PATTERN, expectedIncrTables[0]);
|
||||
conf.set(incrementalMode1, HoodieHiveUtils.INCREMENTAL_SCAN_MODE);
|
||||
String incrementalMode2 = String.format(HoodieHiveUtils.HOODIE_CONSUME_MODE_PATTERN, expectedincrTables[1]);
|
||||
String incrementalMode2 = String.format(HoodieHiveUtils.HOODIE_CONSUME_MODE_PATTERN, expectedIncrTables[1]);
|
||||
conf.set(incrementalMode2,HoodieHiveUtils.INCREMENTAL_SCAN_MODE);
|
||||
String incrementalMode3 = String.format(HoodieHiveUtils.HOODIE_CONSUME_MODE_PATTERN, "db3.model_trips");
|
||||
conf.set(incrementalMode3, HoodieHiveUtils.INCREMENTAL_SCAN_MODE.toLowerCase());
|
||||
String defaultmode = String.format(HoodieHiveUtils.HOODIE_CONSUME_MODE_PATTERN, "db3.first_trips");
|
||||
conf.set(defaultmode, HoodieHiveUtils.DEFAULT_SCAN_MODE);
|
||||
List<String> actualincrTables = HoodieHiveUtils.getIncrementalTableNames(Job.getInstance(conf));
|
||||
for (String expectedincrTable : expectedincrTables) {
|
||||
assertTrue(actualincrTables.contains(expectedincrTable));
|
||||
String defaultMode = String.format(HoodieHiveUtils.HOODIE_CONSUME_MODE_PATTERN, "db3.first_trips");
|
||||
conf.set(defaultMode, HoodieHiveUtils.DEFAULT_SCAN_MODE);
|
||||
List<String> actualIncrTables = HoodieHiveUtils.getIncrementalTableNames(Job.getInstance(conf));
|
||||
for (String expectedIncrTable : expectedIncrTables) {
|
||||
assertTrue(actualIncrTables.contains(expectedIncrTable));
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -22,6 +22,7 @@ import org.apache.hudi.avro.model.HoodieCompactionPlan;
|
||||
import org.apache.hudi.common.fs.FSUtils;
|
||||
import org.apache.hudi.common.model.HoodieCommitMetadata;
|
||||
import org.apache.hudi.common.model.HoodieFileFormat;
|
||||
import org.apache.hudi.common.model.HoodieTableType;
|
||||
import org.apache.hudi.common.model.HoodieWriteStat;
|
||||
import org.apache.hudi.common.table.HoodieTableMetaClient;
|
||||
import org.apache.hudi.common.table.timeline.HoodieActiveTimeline;
|
||||
@@ -286,9 +287,90 @@ public class TestHoodieParquetInputFormat {
|
||||
|
||||
InputFormatTestUtil.setupIncremental(jobConf, "100", 1);
|
||||
|
||||
HoodieTableMetaClient metaClient = HoodieTestUtils.init(HoodieTestUtils.getDefaultHadoopConf(), basePath.toString(),
|
||||
HoodieTableType.COPY_ON_WRITE, baseFileFormat);
|
||||
assertEquals(null, metaClient.getTableConfig().getDatabaseName(),
|
||||
"When hoodie.database.name is not set, it should default to null");
|
||||
|
||||
FileStatus[] files = inputFormat.listStatus(jobConf);
|
||||
assertEquals(0, files.length,
|
||||
"We should exclude commit 100 when returning incremental pull with start commit time as 100");
|
||||
|
||||
InputFormatTestUtil.setupIncremental(jobConf, "100", 1, true);
|
||||
|
||||
files = inputFormat.listStatus(jobConf);
|
||||
assertEquals(0, files.length,
|
||||
"We should exclude commit 100 when returning incremental pull with start commit time as 100");
|
||||
|
||||
metaClient = HoodieTestUtils.init(HoodieTestUtils.getDefaultHadoopConf(), basePath.toString(), HoodieTableType.COPY_ON_WRITE,
|
||||
baseFileFormat, HoodieTestUtils.HOODIE_DATABASE);
|
||||
assertEquals(HoodieTestUtils.HOODIE_DATABASE, metaClient.getTableConfig().getDatabaseName(),
|
||||
String.format("The hoodie.database.name should be %s ", HoodieTestUtils.HOODIE_DATABASE));
|
||||
|
||||
files = inputFormat.listStatus(jobConf);
|
||||
assertEquals(10, files.length,
|
||||
"When hoodie.incremental.use.database is true and hoodie.database.name is not null or empty"
|
||||
+ " and the incremental database name is not set, then the incremental query will not take effect");
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testIncrementalWithDatabaseName() throws IOException {
|
||||
// initial commit
|
||||
File partitionDir = InputFormatTestUtil.prepareTable(basePath, baseFileFormat, 10, "100");
|
||||
createCommitFile(basePath, "100", "2016/05/01");
|
||||
|
||||
// Add the paths
|
||||
FileInputFormat.setInputPaths(jobConf, partitionDir.getPath());
|
||||
|
||||
InputFormatTestUtil.setupIncremental(jobConf, "100", 1, HoodieTestUtils.HOODIE_DATABASE, true);
|
||||
|
||||
HoodieTableMetaClient metaClient = HoodieTestUtils.init(HoodieTestUtils.getDefaultHadoopConf(), basePath.toString(),
|
||||
HoodieTableType.COPY_ON_WRITE, baseFileFormat);
|
||||
assertEquals(null, metaClient.getTableConfig().getDatabaseName(),
|
||||
"When hoodie.database.name is not set, it should default to null");
|
||||
|
||||
FileStatus[] files = inputFormat.listStatus(jobConf);
|
||||
assertEquals(10, files.length,
|
||||
"When hoodie.database.name is null, then the incremental query will not take effect");
|
||||
|
||||
metaClient = HoodieTestUtils.init(HoodieTestUtils.getDefaultHadoopConf(), basePath.toString(), HoodieTableType.COPY_ON_WRITE,
|
||||
baseFileFormat, "");
|
||||
assertEquals("", metaClient.getTableConfig().getDatabaseName(),
|
||||
"The hoodie.database.name should be empty");
|
||||
|
||||
files = inputFormat.listStatus(jobConf);
|
||||
assertEquals(10, files.length,
|
||||
"When hoodie.database.name is empty, then the incremental query will not take effect");
|
||||
|
||||
metaClient = HoodieTestUtils.init(HoodieTestUtils.getDefaultHadoopConf(), basePath.toString(), HoodieTableType.COPY_ON_WRITE,
|
||||
baseFileFormat, HoodieTestUtils.HOODIE_DATABASE);
|
||||
assertEquals(HoodieTestUtils.HOODIE_DATABASE, metaClient.getTableConfig().getDatabaseName(),
|
||||
String.format("The hoodie.database.name should be %s ", HoodieTestUtils.HOODIE_DATABASE));
|
||||
|
||||
files = inputFormat.listStatus(jobConf);
|
||||
assertEquals(0, files.length,
|
||||
"We should exclude commit 100 when returning incremental pull with start commit time as 100");
|
||||
|
||||
InputFormatTestUtil.setupIncremental(jobConf, "100", 1, HoodieTestUtils.HOODIE_DATABASE, false);
|
||||
|
||||
files = inputFormat.listStatus(jobConf);
|
||||
assertEquals(10, files.length,
|
||||
"When hoodie.incremental.use.database is false and the incremental database name is set, "
|
||||
+ "then the incremental query will not take effect");
|
||||
|
||||
// The configuration with and without database name exists together
|
||||
InputFormatTestUtil.setupIncremental(jobConf, "1", 1, true);
|
||||
|
||||
files = inputFormat.listStatus(jobConf);
|
||||
assertEquals(0, files.length,
|
||||
"When hoodie.incremental.use.database is true, "
|
||||
+ "We should exclude commit 100 because the returning incremental pull with start commit time is 100");
|
||||
|
||||
InputFormatTestUtil.setupIncremental(jobConf, "1", 1, false);
|
||||
files = inputFormat.listStatus(jobConf);
|
||||
assertEquals(10, files.length,
|
||||
"When hoodie.incremental.use.database is false, "
|
||||
+ "We should include commit 100 because the returning incremental pull with start commit time is 1");
|
||||
}
|
||||
|
||||
@Test
|
||||
@@ -429,7 +511,7 @@ public class TestHoodieParquetInputFormat {
|
||||
ensureFilesInCommit("Pulling all commits from 100, should get us the 1 files from 200 commit", files, "200", 1);
|
||||
}
|
||||
|
||||
@Disabled("enable this after enabling predicate pushdown")
|
||||
@Disabled("enable this after enabling predicate push down")
|
||||
@Test
|
||||
public void testPredicatePushDown() throws IOException {
|
||||
// initial commit
|
||||
@@ -451,7 +533,7 @@ public class TestHoodieParquetInputFormat {
|
||||
// check whether we have 2 records at this point
|
||||
ensureRecordsInCommit("We need to have 2 records that was modified at commit " + commit2 + " and no more", commit2,
|
||||
2, 2);
|
||||
// Make sure we have the 10 records if we roll back the stattime
|
||||
// Make sure we have the 10 records if we roll back the start time
|
||||
InputFormatTestUtil.setupIncremental(jobConf, "0", 2);
|
||||
ensureRecordsInCommit("We need to have 8 records that was modified at commit " + commit1 + " and no more", commit1,
|
||||
8, 10);
|
||||
@@ -461,19 +543,19 @@ public class TestHoodieParquetInputFormat {
|
||||
|
||||
@Test
|
||||
public void testGetIncrementalTableNames() throws IOException {
|
||||
String[] expectedincrTables = {"db1.raw_trips", "db2.model_trips", "db3.model_trips"};
|
||||
String[] expectedIncrTables = {"db1.raw_trips", "db2.model_trips", "db3.model_trips"};
|
||||
JobConf conf = new JobConf();
|
||||
String incrementalMode1 = String.format(HoodieHiveUtils.HOODIE_CONSUME_MODE_PATTERN, expectedincrTables[0]);
|
||||
String incrementalMode1 = String.format(HoodieHiveUtils.HOODIE_CONSUME_MODE_PATTERN, expectedIncrTables[0]);
|
||||
conf.set(incrementalMode1, HoodieHiveUtils.INCREMENTAL_SCAN_MODE);
|
||||
String incrementalMode2 = String.format(HoodieHiveUtils.HOODIE_CONSUME_MODE_PATTERN, expectedincrTables[1]);
|
||||
String incrementalMode2 = String.format(HoodieHiveUtils.HOODIE_CONSUME_MODE_PATTERN, expectedIncrTables[1]);
|
||||
conf.set(incrementalMode2, HoodieHiveUtils.INCREMENTAL_SCAN_MODE);
|
||||
String incrementalMode3 = String.format(HoodieHiveUtils.HOODIE_CONSUME_MODE_PATTERN, "db3.model_trips");
|
||||
conf.set(incrementalMode3, HoodieHiveUtils.INCREMENTAL_SCAN_MODE.toLowerCase());
|
||||
String defaultmode = String.format(HoodieHiveUtils.HOODIE_CONSUME_MODE_PATTERN, "db3.first_trips");
|
||||
conf.set(defaultmode, HoodieHiveUtils.DEFAULT_SCAN_MODE);
|
||||
List<String> actualincrTables = HoodieHiveUtils.getIncrementalTableNames(Job.getInstance(conf));
|
||||
for (String expectedincrTable : expectedincrTables) {
|
||||
assertTrue(actualincrTables.contains(expectedincrTable));
|
||||
String defaultMode = String.format(HoodieHiveUtils.HOODIE_CONSUME_MODE_PATTERN, "db3.first_trips");
|
||||
conf.set(defaultMode, HoodieHiveUtils.DEFAULT_SCAN_MODE);
|
||||
List<String> actualIncrTables = HoodieHiveUtils.getIncrementalTableNames(Job.getInstance(conf));
|
||||
for (String expectedIncrTable : expectedIncrTables) {
|
||||
assertTrue(actualIncrTables.contains(expectedIncrTable));
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -130,6 +130,10 @@ public class InputFormatTestUtil {
|
||||
}
|
||||
|
||||
public static void setupIncremental(JobConf jobConf, String startCommit, int numberOfCommitsToPull) {
|
||||
setupIncremental(jobConf, startCommit, numberOfCommitsToPull, false);
|
||||
}
|
||||
|
||||
public static void setupIncremental(JobConf jobConf, String startCommit, int numberOfCommitsToPull, boolean isIncrementalUseDatabase) {
|
||||
String modePropertyName =
|
||||
String.format(HoodieHiveUtils.HOODIE_CONSUME_MODE_PATTERN, HoodieTestUtils.RAW_TRIPS_TEST_NAME);
|
||||
jobConf.set(modePropertyName, HoodieHiveUtils.INCREMENTAL_SCAN_MODE);
|
||||
@@ -141,8 +145,26 @@ public class InputFormatTestUtil {
|
||||
String maxCommitPulls =
|
||||
String.format(HoodieHiveUtils.HOODIE_MAX_COMMIT_PATTERN, HoodieTestUtils.RAW_TRIPS_TEST_NAME);
|
||||
jobConf.setInt(maxCommitPulls, numberOfCommitsToPull);
|
||||
|
||||
jobConf.setBoolean(HoodieHiveUtils.HOODIE_INCREMENTAL_USE_DATABASE, isIncrementalUseDatabase);
|
||||
}
|
||||
|
||||
|
||||
public static void setupIncremental(JobConf jobConf, String startCommit, int numberOfCommitsToPull, String databaseName, boolean isIncrementalUseDatabase) {
|
||||
String modePropertyName =
|
||||
String.format(HoodieHiveUtils.HOODIE_CONSUME_MODE_PATTERN, databaseName + "." + HoodieTestUtils.RAW_TRIPS_TEST_NAME);
|
||||
jobConf.set(modePropertyName, HoodieHiveUtils.INCREMENTAL_SCAN_MODE);
|
||||
|
||||
String startCommitTimestampName =
|
||||
String.format(HoodieHiveUtils.HOODIE_START_COMMIT_PATTERN, databaseName + "." + HoodieTestUtils.RAW_TRIPS_TEST_NAME);
|
||||
jobConf.set(startCommitTimestampName, startCommit);
|
||||
|
||||
String maxCommitPulls =
|
||||
String.format(HoodieHiveUtils.HOODIE_MAX_COMMIT_PATTERN, databaseName + "." + HoodieTestUtils.RAW_TRIPS_TEST_NAME);
|
||||
jobConf.setInt(maxCommitPulls, numberOfCommitsToPull);
|
||||
|
||||
jobConf.setBoolean(HoodieHiveUtils.HOODIE_INCREMENTAL_USE_DATABASE, isIncrementalUseDatabase);
|
||||
}
|
||||
|
||||
public static void setupSnapshotIncludePendingCommits(JobConf jobConf, String instantTime) {
|
||||
setupSnapshotScanMode(jobConf, true);
|
||||
String validateTimestampName =
|
||||
|
||||
Reference in New Issue
Block a user