[HUDI-332]Add operation type (insert/upsert/bulkinsert/delete) to HoodieCommitMetadata (#1157)
[HUDI-332]Add operation type (insert/upsert/bulkinsert/delete) to HoodieCommitMetadata (#1157)
This commit is contained in:
@@ -28,6 +28,7 @@ import org.apache.hudi.common.model.HoodieRecordPayload;
|
||||
import org.apache.hudi.common.model.HoodieRollingStat;
|
||||
import org.apache.hudi.common.model.HoodieRollingStatMetadata;
|
||||
import org.apache.hudi.common.model.HoodieWriteStat;
|
||||
import org.apache.hudi.common.model.WriteOperationType;
|
||||
import org.apache.hudi.common.table.HoodieTableMetaClient;
|
||||
import org.apache.hudi.common.table.HoodieTimeline;
|
||||
import org.apache.hudi.common.table.timeline.HoodieActiveTimeline;
|
||||
@@ -71,6 +72,15 @@ public abstract class AbstractHoodieWriteClient<T extends HoodieRecordPayload> e
|
||||
private final transient HoodieIndex<T> index;
|
||||
|
||||
private transient Timer.Context writeContext = null;
|
||||
private transient WriteOperationType operationType;
|
||||
|
||||
public void setOperationType(WriteOperationType operationType) {
|
||||
this.operationType = operationType;
|
||||
}
|
||||
|
||||
public WriteOperationType getOperationType() {
|
||||
return this.operationType;
|
||||
}
|
||||
|
||||
protected AbstractHoodieWriteClient(JavaSparkContext jsc, HoodieIndex index, HoodieWriteConfig clientConfig) {
|
||||
super(jsc, clientConfig);
|
||||
@@ -149,6 +159,7 @@ public abstract class AbstractHoodieWriteClient<T extends HoodieRecordPayload> e
|
||||
extraMetadata.get().forEach(metadata::addMetadata);
|
||||
}
|
||||
metadata.addMetadata(HoodieCommitMetadata.SCHEMA_KEY, config.getSchema());
|
||||
metadata.setOperationType(operationType);
|
||||
|
||||
try {
|
||||
activeTimeline.saveAsComplete(new HoodieInstant(true, actionType, commitTime),
|
||||
@@ -255,9 +266,9 @@ public abstract class AbstractHoodieWriteClient<T extends HoodieRecordPayload> e
|
||||
return index;
|
||||
}
|
||||
|
||||
protected HoodieTable getTableAndInitCtx(OperationType operationType) {
|
||||
protected HoodieTable getTableAndInitCtx(WriteOperationType operationType) {
|
||||
HoodieTableMetaClient metaClient = createMetaClient(true);
|
||||
if (operationType == OperationType.DELETE) {
|
||||
if (operationType == WriteOperationType.DELETE) {
|
||||
setWriteSchemaFromLastInstant(metaClient);
|
||||
}
|
||||
// Create a Hoodie table which encapsulated the commits and files visible
|
||||
|
||||
@@ -32,6 +32,7 @@ import org.apache.hudi.common.model.HoodieRecord;
|
||||
import org.apache.hudi.common.model.HoodieRecordPayload;
|
||||
import org.apache.hudi.common.model.HoodieTableType;
|
||||
import org.apache.hudi.common.model.HoodieWriteStat;
|
||||
import org.apache.hudi.common.model.WriteOperationType;
|
||||
import org.apache.hudi.common.table.HoodieTableMetaClient;
|
||||
import org.apache.hudi.common.table.HoodieTimeline;
|
||||
import org.apache.hudi.common.table.TableFileSystemView.BaseFileOnlyView;
|
||||
@@ -98,7 +99,6 @@ public class HoodieWriteClient<T extends HoodieRecordPayload> extends AbstractHo
|
||||
private final transient HoodieCleanClient<T> cleanClient;
|
||||
private transient Timer.Context compactionTimer;
|
||||
|
||||
|
||||
/**
|
||||
* Create a write client, without cleaning up failed/inflight commits.
|
||||
*
|
||||
@@ -174,7 +174,8 @@ public class HoodieWriteClient<T extends HoodieRecordPayload> extends AbstractHo
|
||||
* @return JavaRDD[WriteStatus] - RDD of WriteStatus to inspect errors and counts
|
||||
*/
|
||||
public JavaRDD<WriteStatus> upsert(JavaRDD<HoodieRecord<T>> records, final String commitTime) {
|
||||
HoodieTable<T> table = getTableAndInitCtx(OperationType.UPSERT);
|
||||
HoodieTable<T> table = getTableAndInitCtx(WriteOperationType.UPSERT);
|
||||
setOperationType(WriteOperationType.UPSERT);
|
||||
try {
|
||||
// De-dupe/merge if needed
|
||||
JavaRDD<HoodieRecord<T>> dedupedRecords =
|
||||
@@ -203,7 +204,8 @@ public class HoodieWriteClient<T extends HoodieRecordPayload> extends AbstractHo
|
||||
* @return JavaRDD[WriteStatus] - RDD of WriteStatus to inspect errors and counts
|
||||
*/
|
||||
public JavaRDD<WriteStatus> upsertPreppedRecords(JavaRDD<HoodieRecord<T>> preppedRecords, final String commitTime) {
|
||||
HoodieTable<T> table = getTableAndInitCtx(OperationType.UPSERT_PREPPED);
|
||||
HoodieTable<T> table = getTableAndInitCtx(WriteOperationType.UPSERT_PREPPED);
|
||||
setOperationType(WriteOperationType.UPSERT_PREPPED);
|
||||
try {
|
||||
return upsertRecordsInternal(preppedRecords, commitTime, table, true);
|
||||
} catch (Throwable e) {
|
||||
@@ -225,7 +227,8 @@ public class HoodieWriteClient<T extends HoodieRecordPayload> extends AbstractHo
|
||||
* @return JavaRDD[WriteStatus] - RDD of WriteStatus to inspect errors and counts
|
||||
*/
|
||||
public JavaRDD<WriteStatus> insert(JavaRDD<HoodieRecord<T>> records, final String commitTime) {
|
||||
HoodieTable<T> table = getTableAndInitCtx(OperationType.INSERT);
|
||||
HoodieTable<T> table = getTableAndInitCtx(WriteOperationType.INSERT);
|
||||
setOperationType(WriteOperationType.INSERT);
|
||||
try {
|
||||
// De-dupe/merge if needed
|
||||
JavaRDD<HoodieRecord<T>> dedupedRecords =
|
||||
@@ -252,7 +255,8 @@ public class HoodieWriteClient<T extends HoodieRecordPayload> extends AbstractHo
|
||||
* @return JavaRDD[WriteStatus] - RDD of WriteStatus to inspect errors and counts
|
||||
*/
|
||||
public JavaRDD<WriteStatus> insertPreppedRecords(JavaRDD<HoodieRecord<T>> preppedRecords, final String commitTime) {
|
||||
HoodieTable<T> table = getTableAndInitCtx(OperationType.INSERT_PREPPED);
|
||||
HoodieTable<T> table = getTableAndInitCtx(WriteOperationType.INSERT_PREPPED);
|
||||
setOperationType(WriteOperationType.INSERT_PREPPED);
|
||||
try {
|
||||
return upsertRecordsInternal(preppedRecords, commitTime, table, false);
|
||||
} catch (Throwable e) {
|
||||
@@ -295,7 +299,8 @@ public class HoodieWriteClient<T extends HoodieRecordPayload> extends AbstractHo
|
||||
*/
|
||||
public JavaRDD<WriteStatus> bulkInsert(JavaRDD<HoodieRecord<T>> records, final String commitTime,
|
||||
Option<UserDefinedBulkInsertPartitioner> bulkInsertPartitioner) {
|
||||
HoodieTable<T> table = getTableAndInitCtx(OperationType.BULK_INSERT);
|
||||
HoodieTable<T> table = getTableAndInitCtx(WriteOperationType.BULK_INSERT);
|
||||
setOperationType(WriteOperationType.BULK_INSERT);
|
||||
try {
|
||||
// De-dupe/merge if needed
|
||||
JavaRDD<HoodieRecord<T>> dedupedRecords =
|
||||
@@ -328,7 +333,8 @@ public class HoodieWriteClient<T extends HoodieRecordPayload> extends AbstractHo
|
||||
*/
|
||||
public JavaRDD<WriteStatus> bulkInsertPreppedRecords(JavaRDD<HoodieRecord<T>> preppedRecords, final String commitTime,
|
||||
Option<UserDefinedBulkInsertPartitioner> bulkInsertPartitioner) {
|
||||
HoodieTable<T> table = getTableAndInitCtx(OperationType.BULK_INSERT_PREPPED);
|
||||
HoodieTable<T> table = getTableAndInitCtx(WriteOperationType.BULK_INSERT_PREPPED);
|
||||
setOperationType(WriteOperationType.BULK_INSERT_PREPPED);
|
||||
try {
|
||||
return bulkInsertInternal(preppedRecords, commitTime, table, bulkInsertPartitioner);
|
||||
} catch (Throwable e) {
|
||||
@@ -341,14 +347,15 @@ public class HoodieWriteClient<T extends HoodieRecordPayload> extends AbstractHo
|
||||
|
||||
/**
|
||||
* Deletes a list of {@link HoodieKey}s from the Hoodie table, at the supplied commitTime {@link HoodieKey}s will be
|
||||
* deduped and non existant keys will be removed before deleting.
|
||||
* de-duped and non existent keys will be removed before deleting.
|
||||
*
|
||||
* @param keys {@link List} of {@link HoodieKey}s to be deleted
|
||||
* @param commitTime Commit time handle
|
||||
* @return JavaRDD[WriteStatus] - RDD of WriteStatus to inspect errors and counts
|
||||
*/
|
||||
public JavaRDD<WriteStatus> delete(JavaRDD<HoodieKey> keys, final String commitTime) {
|
||||
HoodieTable<T> table = getTableAndInitCtx(OperationType.DELETE);
|
||||
HoodieTable<T> table = getTableAndInitCtx(WriteOperationType.DELETE);
|
||||
setOperationType(WriteOperationType.DELETE);
|
||||
try {
|
||||
// De-dupe/merge if needed
|
||||
JavaRDD<HoodieKey> dedupedKeys =
|
||||
@@ -435,6 +442,7 @@ public class HoodieWriteClient<T extends HoodieRecordPayload> extends AbstractHo
|
||||
metadata.addWriteStat(path.toString(), writeStat);
|
||||
});
|
||||
});
|
||||
metadata.setOperationType(getOperationType());
|
||||
|
||||
HoodieActiveTimeline activeTimeline = table.getActiveTimeline();
|
||||
String commitActionType = table.getMetaClient().getCommitActionType();
|
||||
|
||||
@@ -331,7 +331,7 @@ public class HoodieCommitArchiveLog {
|
||||
return archivedMetaWrapper;
|
||||
}
|
||||
|
||||
private org.apache.hudi.avro.model.HoodieCommitMetadata commitMetadataConverter(
|
||||
public org.apache.hudi.avro.model.HoodieCommitMetadata commitMetadataConverter(
|
||||
HoodieCommitMetadata hoodieCommitMetadata) {
|
||||
ObjectMapper mapper = new ObjectMapper();
|
||||
// Need this to ignore other public get() methods
|
||||
|
||||
@@ -20,7 +20,9 @@ package org.apache.hudi.io;
|
||||
|
||||
import org.apache.hudi.common.HoodieClientTestHarness;
|
||||
import org.apache.hudi.common.HoodieTestDataGenerator;
|
||||
import org.apache.hudi.common.model.HoodieCommitMetadata;
|
||||
import org.apache.hudi.common.model.HoodieTestUtils;
|
||||
import org.apache.hudi.common.model.WriteOperationType;
|
||||
import org.apache.hudi.common.table.HoodieTableMetaClient;
|
||||
import org.apache.hudi.common.table.HoodieTimeline;
|
||||
import org.apache.hudi.common.table.timeline.HoodieActiveTimeline;
|
||||
@@ -413,4 +415,20 @@ public class TestHoodieCommitArchiveLog extends HoodieClientTestHarness {
|
||||
assertEquals("Loaded inflight clean actions and the count should match", expectedTotalInstants,
|
||||
timeline.countInstants());
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testCommitMetadataConverter() {
|
||||
HoodieCommitMetadata hoodieCommitMetadata = new HoodieCommitMetadata();
|
||||
hoodieCommitMetadata.setOperationType(WriteOperationType.INSERT);
|
||||
|
||||
HoodieWriteConfig cfg = HoodieWriteConfig.newBuilder().withPath(basePath)
|
||||
.withSchema(HoodieTestDataGenerator.TRIP_EXAMPLE_SCHEMA).withParallelism(2, 2).forTable("test-commitMetadata-converter")
|
||||
.withCompactionConfig(HoodieCompactionConfig.newBuilder().retainCommits(1).archiveCommitsWith(2, 5).build())
|
||||
.build();
|
||||
metaClient = HoodieTableMetaClient.reload(metaClient);
|
||||
HoodieCommitArchiveLog archiveLog = new HoodieCommitArchiveLog(cfg, metaClient);
|
||||
|
||||
org.apache.hudi.avro.model.HoodieCommitMetadata expectedCommitMetadata = archiveLog.commitMetadataConverter(hoodieCommitMetadata);
|
||||
assertEquals(expectedCommitMetadata.getOperationType(), WriteOperationType.INSERT.toString());
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user