[HUDI-3213] Making commit preserve metadata to true for compaction (#4811)
* Making commit preserve metadata to true * Fixing integ tests * Fixing preserve commit metadata for metadata table * fixed bootstrap tests * temp diff * Fixing merge handle * renaming fallback record * fixing build issue * Fixing test failures
This commit is contained in:
committed by
GitHub
parent
6f57bbfac4
commit
3539578ccb
@@ -364,6 +364,7 @@ public class HoodieJavaStreamingApp {
|
||||
.option(HoodieCompactionConfig.INLINE_COMPACT_NUM_DELTA_COMMITS.key(), "1")
|
||||
.option(DataSourceWriteOptions.ASYNC_COMPACT_ENABLE().key(), "true")
|
||||
.option(DataSourceWriteOptions.ASYNC_CLUSTERING_ENABLE().key(), "true")
|
||||
.option(HoodieCompactionConfig.PRESERVE_COMMIT_METADATA.key(), "false")
|
||||
.option(HoodieWriteConfig.TBL_NAME.key(), tableName).option("checkpointLocation", checkpointLocation)
|
||||
.outputMode(OutputMode.Append());
|
||||
|
||||
|
||||
@@ -256,7 +256,7 @@ public class TestBootstrap extends HoodieClientTestBase {
|
||||
SparkRDDWriteClient client = new SparkRDDWriteClient(context, config);
|
||||
client.bootstrap(Option.empty());
|
||||
checkBootstrapResults(totalRecords, schema, bootstrapCommitInstantTs, checkNumRawFiles, numInstantsAfterBootstrap,
|
||||
numInstantsAfterBootstrap, timestamp, timestamp, deltaCommit, bootstrapInstants);
|
||||
numInstantsAfterBootstrap, timestamp, timestamp, deltaCommit, bootstrapInstants, true);
|
||||
|
||||
// Rollback Bootstrap
|
||||
HoodieActiveTimeline.deleteInstantFile(metaClient.getFs(), metaClient.getMetaPath(), new HoodieInstant(State.COMPLETED,
|
||||
@@ -284,7 +284,7 @@ public class TestBootstrap extends HoodieClientTestBase {
|
||||
}
|
||||
|
||||
checkBootstrapResults(totalRecords, schema, bootstrapCommitInstantTs, checkNumRawFiles, numInstantsAfterBootstrap,
|
||||
numInstantsAfterBootstrap, timestamp, timestamp, deltaCommit, bootstrapInstants);
|
||||
numInstantsAfterBootstrap, timestamp, timestamp, deltaCommit, bootstrapInstants, true);
|
||||
|
||||
// Upsert case
|
||||
long updateTimestamp = Instant.now().toEpochMilli();
|
||||
@@ -296,7 +296,7 @@ public class TestBootstrap extends HoodieClientTestBase {
|
||||
String newInstantTs = client.startCommit();
|
||||
client.upsert(updateBatch, newInstantTs);
|
||||
checkBootstrapResults(totalRecords, schema, newInstantTs, false, numInstantsAfterBootstrap + 1,
|
||||
updateTimestamp, deltaCommit ? timestamp : updateTimestamp, deltaCommit);
|
||||
updateTimestamp, deltaCommit ? timestamp : updateTimestamp, deltaCommit, true);
|
||||
|
||||
if (deltaCommit) {
|
||||
Option<String> compactionInstant = client.scheduleCompaction(Option.empty());
|
||||
@@ -304,7 +304,7 @@ public class TestBootstrap extends HoodieClientTestBase {
|
||||
client.compact(compactionInstant.get());
|
||||
checkBootstrapResults(totalRecords, schema, compactionInstant.get(), checkNumRawFiles,
|
||||
numInstantsAfterBootstrap + 2, 2, updateTimestamp, updateTimestamp, !deltaCommit,
|
||||
Arrays.asList(compactionInstant.get()));
|
||||
Arrays.asList(compactionInstant.get()), !config.isPreserveHoodieCommitMetadataForCompaction());
|
||||
}
|
||||
}
|
||||
|
||||
@@ -334,14 +334,14 @@ public class TestBootstrap extends HoodieClientTestBase {
|
||||
}
|
||||
|
||||
private void checkBootstrapResults(int totalRecords, Schema schema, String maxInstant, boolean checkNumRawFiles,
|
||||
int expNumInstants, long expTimestamp, long expROTimestamp, boolean isDeltaCommit) throws Exception {
|
||||
int expNumInstants, long expTimestamp, long expROTimestamp, boolean isDeltaCommit, boolean validateRecordsForCommitTime) throws Exception {
|
||||
checkBootstrapResults(totalRecords, schema, maxInstant, checkNumRawFiles, expNumInstants, expNumInstants,
|
||||
expTimestamp, expROTimestamp, isDeltaCommit, Arrays.asList(maxInstant));
|
||||
expTimestamp, expROTimestamp, isDeltaCommit, Arrays.asList(maxInstant), validateRecordsForCommitTime);
|
||||
}
|
||||
|
||||
private void checkBootstrapResults(int totalRecords, Schema schema, String instant, boolean checkNumRawFiles,
|
||||
int expNumInstants, int numVersions, long expTimestamp, long expROTimestamp, boolean isDeltaCommit,
|
||||
List<String> instantsWithValidRecords) throws Exception {
|
||||
List<String> instantsWithValidRecords, boolean validateRecordsForCommitTime) throws Exception {
|
||||
metaClient.reloadActiveTimeline();
|
||||
assertEquals(expNumInstants, metaClient.getCommitsTimeline().filterCompletedInstants().countInstants());
|
||||
assertEquals(instant, metaClient.getActiveTimeline()
|
||||
@@ -361,8 +361,10 @@ public class TestBootstrap extends HoodieClientTestBase {
|
||||
if (!isDeltaCommit) {
|
||||
String predicate = String.join(", ",
|
||||
instantsWithValidRecords.stream().map(p -> "\"" + p + "\"").collect(Collectors.toList()));
|
||||
assertEquals(totalRecords, sqlContext.sql("select * from bootstrapped where _hoodie_commit_time IN "
|
||||
+ "(" + predicate + ")").count());
|
||||
if (validateRecordsForCommitTime) {
|
||||
assertEquals(totalRecords, sqlContext.sql("select * from bootstrapped where _hoodie_commit_time IN "
|
||||
+ "(" + predicate + ")").count());
|
||||
}
|
||||
Dataset<Row> missingOriginal = sqlContext.sql("select a._row_key from original a where a._row_key not "
|
||||
+ "in (select _hoodie_record_key from bootstrapped)");
|
||||
assertEquals(0, missingOriginal.count());
|
||||
|
||||
@@ -248,7 +248,7 @@ public class TestOrcBootstrap extends HoodieClientTestBase {
|
||||
SparkRDDWriteClient client = new SparkRDDWriteClient(context, config);
|
||||
client.bootstrap(Option.empty());
|
||||
checkBootstrapResults(totalRecords, schema, bootstrapCommitInstantTs, checkNumRawFiles, numInstantsAfterBootstrap,
|
||||
numInstantsAfterBootstrap, timestamp, timestamp, deltaCommit, bootstrapInstants);
|
||||
numInstantsAfterBootstrap, timestamp, timestamp, deltaCommit, bootstrapInstants, true);
|
||||
|
||||
// Rollback Bootstrap
|
||||
if (deltaCommit) {
|
||||
@@ -278,7 +278,7 @@ public class TestOrcBootstrap extends HoodieClientTestBase {
|
||||
}
|
||||
|
||||
checkBootstrapResults(totalRecords, schema, bootstrapCommitInstantTs, checkNumRawFiles, numInstantsAfterBootstrap,
|
||||
numInstantsAfterBootstrap, timestamp, timestamp, deltaCommit, bootstrapInstants);
|
||||
numInstantsAfterBootstrap, timestamp, timestamp, deltaCommit, bootstrapInstants, true);
|
||||
|
||||
// Upsert case
|
||||
long updateTimestamp = Instant.now().toEpochMilli();
|
||||
@@ -290,7 +290,7 @@ public class TestOrcBootstrap extends HoodieClientTestBase {
|
||||
String newInstantTs = client.startCommit();
|
||||
client.upsert(updateBatch, newInstantTs);
|
||||
checkBootstrapResults(totalRecords, schema, newInstantTs, false, numInstantsAfterBootstrap + 1,
|
||||
updateTimestamp, deltaCommit ? timestamp : updateTimestamp, deltaCommit);
|
||||
updateTimestamp, deltaCommit ? timestamp : updateTimestamp, deltaCommit, true);
|
||||
|
||||
if (deltaCommit) {
|
||||
Option<String> compactionInstant = client.scheduleCompaction(Option.empty());
|
||||
@@ -298,7 +298,7 @@ public class TestOrcBootstrap extends HoodieClientTestBase {
|
||||
client.compact(compactionInstant.get());
|
||||
checkBootstrapResults(totalRecords, schema, compactionInstant.get(), checkNumRawFiles,
|
||||
numInstantsAfterBootstrap + 2, 2, updateTimestamp, updateTimestamp, !deltaCommit,
|
||||
Arrays.asList(compactionInstant.get()));
|
||||
Arrays.asList(compactionInstant.get()), !config.isPreserveHoodieCommitMetadataForCompaction());
|
||||
}
|
||||
}
|
||||
|
||||
@@ -328,14 +328,14 @@ public class TestOrcBootstrap extends HoodieClientTestBase {
|
||||
}
|
||||
|
||||
private void checkBootstrapResults(int totalRecords, Schema schema, String maxInstant, boolean checkNumRawFiles,
|
||||
int expNumInstants, long expTimestamp, long expROTimestamp, boolean isDeltaCommit) throws Exception {
|
||||
int expNumInstants, long expTimestamp, long expROTimestamp, boolean isDeltaCommit, boolean validateRecordsForCommitTime) throws Exception {
|
||||
checkBootstrapResults(totalRecords, schema, maxInstant, checkNumRawFiles, expNumInstants, expNumInstants,
|
||||
expTimestamp, expROTimestamp, isDeltaCommit, Arrays.asList(maxInstant));
|
||||
expTimestamp, expROTimestamp, isDeltaCommit, Arrays.asList(maxInstant), validateRecordsForCommitTime);
|
||||
}
|
||||
|
||||
private void checkBootstrapResults(int totalRecords, Schema schema, String instant, boolean checkNumRawFiles,
|
||||
int expNumInstants, int numVersions, long expTimestamp, long expROTimestamp, boolean isDeltaCommit,
|
||||
List<String> instantsWithValidRecords) throws Exception {
|
||||
List<String> instantsWithValidRecords, boolean validateCommitRecords) throws Exception {
|
||||
metaClient.reloadActiveTimeline();
|
||||
assertEquals(expNumInstants, metaClient.getCommitsTimeline().filterCompletedInstants().countInstants());
|
||||
assertEquals(instant, metaClient.getActiveTimeline()
|
||||
@@ -355,8 +355,10 @@ public class TestOrcBootstrap extends HoodieClientTestBase {
|
||||
if (!isDeltaCommit) {
|
||||
String predicate = String.join(", ",
|
||||
instantsWithValidRecords.stream().map(p -> "\"" + p + "\"").collect(Collectors.toList()));
|
||||
assertEquals(totalRecords, sqlContext.sql("select * from bootstrapped where _hoodie_commit_time IN "
|
||||
+ "(" + predicate + ")").count());
|
||||
if (validateCommitRecords) {
|
||||
assertEquals(totalRecords, sqlContext.sql("select * from bootstrapped where _hoodie_commit_time IN "
|
||||
+ "(" + predicate + ")").count());
|
||||
}
|
||||
Dataset<Row> missingOriginal = sqlContext.sql("select a._row_key from original a where a._row_key not "
|
||||
+ "in (select _hoodie_record_key from bootstrapped)");
|
||||
assertEquals(0, missingOriginal.count());
|
||||
|
||||
Reference in New Issue
Block a user