1
0

[HUDI-1513] Introduce WriteClient#preWrite() and relocate metadata table syncing (#2413)

- Syncing to metadata table, setting operation type, starting async cleaner done in preWrite()
 - Fixes an issues where delete() was not starting async cleaner correctly
 - Fixed tests and enabled metadata table for TestAsyncCompaction
This commit is contained in:
vinoth chandar
2021-01-06 23:08:30 -08:00
committed by GitHub
parent b593f10629
commit 5ff8e88d58
8 changed files with 54 additions and 57 deletions

View File

@@ -45,7 +45,7 @@ public class HoodieSparkCompactor<T extends HoodieRecordPayload> extends Abstrac
@Override
public void compact(HoodieInstant instant) throws IOException {
LOG.info("Compactor executing compaction " + instant);
SparkRDDWriteClient<T> writeClient = (SparkRDDWriteClient<T>)compactionClient;
SparkRDDWriteClient<T> writeClient = (SparkRDDWriteClient<T>) compactionClient;
JavaRDD<WriteStatus> res = writeClient.compact(instant.getTimestamp());
this.context.setJobStatus(this.getClass().getSimpleName(), "Collect compaction write status");
long numWriteErrors = res.collect().stream().filter(WriteStatus::hasErrors).count();

View File

@@ -140,8 +140,7 @@ public class SparkRDDWriteClient<T extends HoodieRecordPayload> extends
HoodieTable<T, JavaRDD<HoodieRecord<T>>, JavaRDD<HoodieKey>, JavaRDD<WriteStatus>> table =
getTableAndInitCtx(WriteOperationType.UPSERT, instantTime);
table.validateUpsertSchema();
setOperationType(WriteOperationType.UPSERT);
this.asyncCleanerService = AsyncCleanerService.startAsyncCleaningIfEnabled(this);
preWrite(instantTime, WriteOperationType.UPSERT);
HoodieWriteMetadata<JavaRDD<WriteStatus>> result = table.upsert(context, instantTime, records);
if (result.getIndexLookupDuration().isPresent()) {
metrics.updateIndexMetrics(LOOKUP_STR, result.getIndexLookupDuration().get().toMillis());
@@ -154,8 +153,7 @@ public class SparkRDDWriteClient<T extends HoodieRecordPayload> extends
HoodieTable<T, JavaRDD<HoodieRecord<T>>, JavaRDD<HoodieKey>, JavaRDD<WriteStatus>> table =
getTableAndInitCtx(WriteOperationType.UPSERT_PREPPED, instantTime);
table.validateUpsertSchema();
setOperationType(WriteOperationType.UPSERT_PREPPED);
this.asyncCleanerService = AsyncCleanerService.startAsyncCleaningIfEnabled(this);
preWrite(instantTime, WriteOperationType.UPSERT_PREPPED);
HoodieWriteMetadata<JavaRDD<WriteStatus>> result = table.upsertPrepped(context,instantTime, preppedRecords);
return postWrite(result, instantTime, table);
}
@@ -165,8 +163,7 @@ public class SparkRDDWriteClient<T extends HoodieRecordPayload> extends
HoodieTable<T, JavaRDD<HoodieRecord<T>>, JavaRDD<HoodieKey>, JavaRDD<WriteStatus>> table =
getTableAndInitCtx(WriteOperationType.INSERT, instantTime);
table.validateInsertSchema();
setOperationType(WriteOperationType.INSERT);
this.asyncCleanerService = AsyncCleanerService.startAsyncCleaningIfEnabled(this);
preWrite(instantTime, WriteOperationType.INSERT);
HoodieWriteMetadata<JavaRDD<WriteStatus>> result = table.insert(context,instantTime, records);
return postWrite(result, instantTime, table);
}
@@ -176,8 +173,7 @@ public class SparkRDDWriteClient<T extends HoodieRecordPayload> extends
HoodieTable<T, JavaRDD<HoodieRecord<T>>, JavaRDD<HoodieKey>, JavaRDD<WriteStatus>> table =
getTableAndInitCtx(WriteOperationType.INSERT_PREPPED, instantTime);
table.validateInsertSchema();
setOperationType(WriteOperationType.INSERT_PREPPED);
this.asyncCleanerService = AsyncCleanerService.startAsyncCleaningIfEnabled(this);
preWrite(instantTime, WriteOperationType.INSERT_PREPPED);
HoodieWriteMetadata<JavaRDD<WriteStatus>> result = table.insertPrepped(context,instantTime, preppedRecords);
return postWrite(result, instantTime, table);
}
@@ -192,8 +188,7 @@ public class SparkRDDWriteClient<T extends HoodieRecordPayload> extends
public HoodieWriteResult insertOverwrite(JavaRDD<HoodieRecord<T>> records, final String instantTime) {
HoodieTable table = getTableAndInitCtx(WriteOperationType.INSERT_OVERWRITE, instantTime);
table.validateInsertSchema();
setOperationType(WriteOperationType.INSERT_OVERWRITE);
this.asyncCleanerService = AsyncCleanerService.startAsyncCleaningIfEnabled(this);
preWrite(instantTime, WriteOperationType.INSERT_OVERWRITE);
HoodieWriteMetadata result = table.insertOverwrite(context, instantTime, records);
return new HoodieWriteResult(postWrite(result, instantTime, table), result.getPartitionToReplaceFileIds());
}
@@ -209,8 +204,7 @@ public class SparkRDDWriteClient<T extends HoodieRecordPayload> extends
public HoodieWriteResult insertOverwriteTable(JavaRDD<HoodieRecord<T>> records, final String instantTime) {
HoodieTable table = getTableAndInitCtx(WriteOperationType.INSERT_OVERWRITE_TABLE, instantTime);
table.validateInsertSchema();
setOperationType(WriteOperationType.INSERT_OVERWRITE_TABLE);
this.asyncCleanerService = AsyncCleanerService.startAsyncCleaningIfEnabled(this);
preWrite(instantTime, WriteOperationType.INSERT_OVERWRITE_TABLE);
HoodieWriteMetadata result = table.insertOverwriteTable(context, instantTime, records);
return new HoodieWriteResult(postWrite(result, instantTime, table), result.getPartitionToReplaceFileIds());
}
@@ -225,8 +219,7 @@ public class SparkRDDWriteClient<T extends HoodieRecordPayload> extends
HoodieTable<T, JavaRDD<HoodieRecord<T>>, JavaRDD<HoodieKey>, JavaRDD<WriteStatus>> table =
getTableAndInitCtx(WriteOperationType.BULK_INSERT, instantTime);
table.validateInsertSchema();
setOperationType(WriteOperationType.BULK_INSERT);
this.asyncCleanerService = AsyncCleanerService.startAsyncCleaningIfEnabled(this);
preWrite(instantTime, WriteOperationType.BULK_INSERT);
HoodieWriteMetadata<JavaRDD<WriteStatus>> result = table.bulkInsert(context,instantTime, records, userDefinedBulkInsertPartitioner);
return postWrite(result, instantTime, table);
}
@@ -236,8 +229,7 @@ public class SparkRDDWriteClient<T extends HoodieRecordPayload> extends
HoodieTable<T, JavaRDD<HoodieRecord<T>>, JavaRDD<HoodieKey>, JavaRDD<WriteStatus>> table =
getTableAndInitCtx(WriteOperationType.BULK_INSERT_PREPPED, instantTime);
table.validateInsertSchema();
setOperationType(WriteOperationType.BULK_INSERT_PREPPED);
this.asyncCleanerService = AsyncCleanerService.startAsyncCleaningIfEnabled(this);
preWrite(instantTime, WriteOperationType.BULK_INSERT_PREPPED);
HoodieWriteMetadata<JavaRDD<WriteStatus>> result = table.bulkInsertPrepped(context,instantTime, preppedRecords, bulkInsertPartitioner);
return postWrite(result, instantTime, table);
}
@@ -245,14 +237,14 @@ public class SparkRDDWriteClient<T extends HoodieRecordPayload> extends
@Override
public JavaRDD<WriteStatus> delete(JavaRDD<HoodieKey> keys, String instantTime) {
HoodieTable<T, JavaRDD<HoodieRecord<T>>, JavaRDD<HoodieKey>, JavaRDD<WriteStatus>> table = getTableAndInitCtx(WriteOperationType.DELETE, instantTime);
setOperationType(WriteOperationType.DELETE);
preWrite(instantTime, WriteOperationType.DELETE);
HoodieWriteMetadata<JavaRDD<WriteStatus>> result = table.delete(context,instantTime, keys);
return postWrite(result, instantTime, table);
}
public HoodieWriteResult deletePartitions(List<String> partitions, String instantTime) {
HoodieTable<T, JavaRDD<HoodieRecord<T>>, JavaRDD<HoodieKey>, JavaRDD<WriteStatus>> table = getTableAndInitCtx(WriteOperationType.DELETE_PARTITION, instantTime);
setOperationType(WriteOperationType.DELETE_PARTITION);
preWrite(instantTime, WriteOperationType.DELETE_PARTITION);
HoodieWriteMetadata<JavaRDD<WriteStatus>> result = table.deletePartitions(context,instantTime, partitions);
return new HoodieWriteResult(postWrite(result, instantTime, table), result.getPartitionToReplaceFileIds());
}