[HUDI-2711] Fallback to fulltable scan for IncrementalRelation if underlying files have been cleared or moved by cleaner (#3946)
Co-authored-by: sivabalan <n.siva.b@gmail.com>
This commit is contained in:
@@ -135,7 +135,10 @@ public class HoodieIncrSource extends RowSource {
|
||||
DataFrameReader reader = sparkSession.read().format("org.apache.hudi")
|
||||
.option(DataSourceReadOptions.QUERY_TYPE().key(), DataSourceReadOptions.QUERY_TYPE_INCREMENTAL_OPT_VAL())
|
||||
.option(DataSourceReadOptions.BEGIN_INSTANTTIME().key(), instantEndpts.getLeft())
|
||||
.option(DataSourceReadOptions.END_INSTANTTIME().key(), instantEndpts.getRight());
|
||||
.option(DataSourceReadOptions.END_INSTANTTIME().key(), instantEndpts.getRight())
|
||||
.option(DataSourceReadOptions.INCREMENTAL_FALLBACK_TO_FULL_TABLE_SCAN_FOR_NON_EXISTING_FILES().key(),
|
||||
props.getString(DataSourceReadOptions.INCREMENTAL_FALLBACK_TO_FULL_TABLE_SCAN_FOR_NON_EXISTING_FILES().key(),
|
||||
DataSourceReadOptions.INCREMENTAL_FALLBACK_TO_FULL_TABLE_SCAN_FOR_NON_EXISTING_FILES().defaultValue()));
|
||||
|
||||
Dataset<Row> source = reader.load(srcPath);
|
||||
|
||||
|
||||
@@ -19,6 +19,7 @@
|
||||
package org.apache.hudi.utilities.functional;
|
||||
|
||||
import org.apache.hudi.AvroConversionUtils;
|
||||
import org.apache.hudi.DataSourceReadOptions;
|
||||
import org.apache.hudi.DataSourceWriteOptions;
|
||||
import org.apache.hudi.client.SparkRDDWriteClient;
|
||||
import org.apache.hudi.common.config.DFSPropertiesConfiguration;
|
||||
@@ -1739,6 +1740,54 @@ public class TestHoodieDeltaStreamer extends HoodieDeltaStreamerTestBase {
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testHoodieIncrFallback() throws Exception {
|
||||
String tableBasePath = dfsBasePath + "/incr_test_table";
|
||||
String downstreamTableBasePath = dfsBasePath + "/incr_test_downstream_table";
|
||||
|
||||
insertInTable(tableBasePath, 1, WriteOperationType.BULK_INSERT);
|
||||
HoodieDeltaStreamer.Config downstreamCfg =
|
||||
TestHelpers.makeConfigForHudiIncrSrc(tableBasePath, downstreamTableBasePath,
|
||||
WriteOperationType.BULK_INSERT, true, null);
|
||||
new HoodieDeltaStreamer(downstreamCfg, jsc).sync();
|
||||
|
||||
insertInTable(tableBasePath, 9, WriteOperationType.UPSERT);
|
||||
//No change as this fails with Path not exist error
|
||||
assertThrows(org.apache.spark.sql.AnalysisException.class, () -> new HoodieDeltaStreamer(downstreamCfg, jsc).sync());
|
||||
TestHelpers.assertRecordCount(1000, downstreamTableBasePath + "/*/*", sqlContext);
|
||||
|
||||
if (downstreamCfg.configs == null) {
|
||||
downstreamCfg.configs = new ArrayList<>();
|
||||
}
|
||||
|
||||
downstreamCfg.configs.add(DataSourceReadOptions.INCREMENTAL_FALLBACK_TO_FULL_TABLE_SCAN_FOR_NON_EXISTING_FILES().key() + "=true");
|
||||
//Adding this conf to make testing easier :)
|
||||
downstreamCfg.configs.add("hoodie.deltastreamer.source.hoodieincr.num_instants=10");
|
||||
downstreamCfg.operation = WriteOperationType.UPSERT;
|
||||
new HoodieDeltaStreamer(downstreamCfg, jsc).sync();
|
||||
new HoodieDeltaStreamer(downstreamCfg, jsc).sync();
|
||||
|
||||
long baseTableRecords = sqlContext.read().format("org.apache.hudi").load(tableBasePath + "/*/*.parquet").count();
|
||||
long downStreamTableRecords = sqlContext.read().format("org.apache.hudi").load(downstreamTableBasePath + "/*/*.parquet").count();
|
||||
assertEquals(baseTableRecords, downStreamTableRecords);
|
||||
}
|
||||
|
||||
private void insertInTable(String tableBasePath, int count, WriteOperationType operationType) throws Exception {
|
||||
HoodieDeltaStreamer.Config cfg = TestHelpers.makeConfig(tableBasePath, operationType,
|
||||
Collections.singletonList(SqlQueryBasedTransformer.class.getName()), PROPS_FILENAME_TEST_SOURCE, false);
|
||||
if (cfg.configs == null) {
|
||||
cfg.configs = new ArrayList<>();
|
||||
}
|
||||
cfg.configs.add("hoodie.cleaner.commits.retained=3");
|
||||
cfg.configs.add("hoodie.keep.min.commits=4");
|
||||
cfg.configs.add("hoodie.keep.max.commits=5");
|
||||
cfg.configs.add("hoodie.test.source.generate.inserts=true");
|
||||
|
||||
for (int i = 0; i < count; i++) {
|
||||
new HoodieDeltaStreamer(cfg, jsc).sync();
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testInsertOverwrite() throws Exception {
|
||||
testDeltaStreamerWithSpecifiedOperation(dfsBasePath + "/insert_overwrite", WriteOperationType.INSERT_OVERWRITE);
|
||||
|
||||
Reference in New Issue
Block a user