1
0

[HUDI-1738] Emit deletes for flink MOR table streaming read (#2742)

Current we did a soft delete for DELETE row data when writes into hoodie
table. For streaming read of MOR table, the Flink reader detects the
delete records and still emit them if the record key semantics are still
kept.

This is useful and actually a must for streaming ETL pipeline
incremental computation.
This commit is contained in:
Danny Chan
2021-04-01 15:25:31 +08:00
committed by GitHub
parent fe16d0de7c
commit 9804662bc8
20 changed files with 557 additions and 158 deletions

View File

@@ -31,6 +31,7 @@ import java.io.IOException;
import java.io.UnsupportedEncodingException;
import java.net.URLEncoder;
import java.nio.charset.StandardCharsets;
import java.util.Arrays;
import java.util.List;
public class KeyGenUtils {
@@ -41,6 +42,32 @@ public class KeyGenUtils {
protected static final String DEFAULT_PARTITION_PATH = "default";
protected static final String DEFAULT_PARTITION_PATH_SEPARATOR = "/";
/**
* Extracts the record key fields in strings out of the given record key,
* this is the reverse operation of {@link #getRecordKey(GenericRecord, String)}.
*
* @see SimpleAvroKeyGenerator
* @see org.apache.hudi.keygen.ComplexAvroKeyGenerator
*/
public static String[] extractRecordKeys(String recordKey) {
String[] fieldKV = recordKey.split(",");
if (fieldKV.length == 1) {
return fieldKV;
} else {
// a complex key
return Arrays.stream(fieldKV).map(kv -> {
final String[] kvArray = kv.split(":");
if (kvArray[1].equals(NULL_RECORDKEY_PLACEHOLDER)) {
return null;
} else if (kvArray[1].equals(EMPTY_RECORDKEY_PLACEHOLDER)) {
return "";
} else {
return kvArray[1];
}
}).toArray(String[]::new);
}
}
public static String getRecordKey(GenericRecord record, List<String> recordKeyFields) {
boolean keyIsNullEmpty = true;
StringBuilder recordKey = new StringBuilder();