[HUDI-296] Explore use of spotless to auto fix formatting errors (#945)
- Add spotless format fixing to project - One time reformatting for conformity - Build fails for formatting changes and mvn spotless:apply autofixes them
This commit is contained in:
@@ -33,7 +33,7 @@ public abstract class BaseAvroPayload implements Serializable {
|
||||
/**
|
||||
* Avro data extracted from the source converted to bytes
|
||||
*/
|
||||
protected final byte [] recordBytes;
|
||||
protected final byte[] recordBytes;
|
||||
|
||||
/**
|
||||
* For purposes of preCombining
|
||||
|
||||
@@ -26,8 +26,7 @@ import org.apache.hudi.common.util.TypedProperties;
|
||||
import org.apache.hudi.exception.HoodieException;
|
||||
|
||||
/**
|
||||
* Complex key generator, which takes names of fields to be used for recordKey and partitionPath as
|
||||
* configs.
|
||||
* Complex key generator, which takes names of fields to be used for recordKey and partitionPath as configs.
|
||||
*/
|
||||
public class ComplexKeyGenerator extends KeyGenerator {
|
||||
|
||||
@@ -42,15 +41,14 @@ public class ComplexKeyGenerator extends KeyGenerator {
|
||||
public ComplexKeyGenerator(TypedProperties props) {
|
||||
super(props);
|
||||
this.recordKeyFields = Arrays.asList(props.getString(DataSourceWriteOptions.RECORDKEY_FIELD_OPT_KEY()).split(","));
|
||||
this.partitionPathFields = Arrays.asList(props
|
||||
.getString(DataSourceWriteOptions.PARTITIONPATH_FIELD_OPT_KEY()).split(","));
|
||||
this.partitionPathFields =
|
||||
Arrays.asList(props.getString(DataSourceWriteOptions.PARTITIONPATH_FIELD_OPT_KEY()).split(","));
|
||||
}
|
||||
|
||||
@Override
|
||||
public HoodieKey getKey(GenericRecord record) {
|
||||
if (recordKeyFields == null || partitionPathFields == null) {
|
||||
throw new HoodieException(
|
||||
"Unable to find field names for record key or partition path in cfg");
|
||||
throw new HoodieException("Unable to find field names for record key or partition path in cfg");
|
||||
}
|
||||
StringBuilder recordKey = new StringBuilder();
|
||||
for (String recordKeyField : recordKeyFields) {
|
||||
|
||||
@@ -66,7 +66,7 @@ public class DataSourceUtils {
|
||||
String[] parts = fieldName.split("\\.");
|
||||
GenericRecord valueNode = record;
|
||||
int i = 0;
|
||||
for (;i < parts.length; i++) {
|
||||
for (; i < parts.length; i++) {
|
||||
String part = parts[i];
|
||||
Object val = valueNode.get(part);
|
||||
if (val == null) {
|
||||
@@ -84,23 +84,21 @@ public class DataSourceUtils {
|
||||
valueNode = (GenericRecord) val;
|
||||
}
|
||||
}
|
||||
throw new HoodieException(fieldName + "(Part -" + parts[i] + ") field not found in record. "
|
||||
+ "Acceptable fields were :" + valueNode.getSchema().getFields()
|
||||
.stream().map(Field::name).collect(Collectors.toList()));
|
||||
throw new HoodieException(
|
||||
fieldName + "(Part -" + parts[i] + ") field not found in record. " + "Acceptable fields were :"
|
||||
+ valueNode.getSchema().getFields().stream().map(Field::name).collect(Collectors.toList()));
|
||||
}
|
||||
|
||||
/**
|
||||
* Create a key generator class via reflection, passing in any configs needed.
|
||||
*
|
||||
* If the class name of key generator is configured through the properties file, i.e., {@code
|
||||
* props}, use the corresponding key generator class; otherwise, use the default key generator
|
||||
* class specified in {@code DataSourceWriteOptions}.
|
||||
* props}, use the corresponding key generator class; otherwise, use the default key generator class specified in
|
||||
* {@code DataSourceWriteOptions}.
|
||||
*/
|
||||
public static KeyGenerator createKeyGenerator(TypedProperties props) throws IOException {
|
||||
String keyGeneratorClass = props.getString(
|
||||
DataSourceWriteOptions.KEYGENERATOR_CLASS_OPT_KEY(),
|
||||
DataSourceWriteOptions.DEFAULT_KEYGENERATOR_CLASS_OPT_VAL()
|
||||
);
|
||||
String keyGeneratorClass = props.getString(DataSourceWriteOptions.KEYGENERATOR_CLASS_OPT_KEY(),
|
||||
DataSourceWriteOptions.DEFAULT_KEYGENERATOR_CLASS_OPT_VAL());
|
||||
try {
|
||||
return (KeyGenerator) ReflectionUtils.loadClass(keyGeneratorClass, props);
|
||||
} catch (Throwable e) {
|
||||
@@ -111,7 +109,7 @@ public class DataSourceUtils {
|
||||
/**
|
||||
* Create a partition value extractor class via reflection, passing in any configs needed
|
||||
*/
|
||||
public static PartitionValueExtractor createPartitionExtractor(String partitionExtractorClass) {
|
||||
public static PartitionValueExtractor createPartitionExtractor(String partitionExtractorClass) {
|
||||
try {
|
||||
return (PartitionValueExtractor) ReflectionUtils.loadClass(partitionExtractorClass);
|
||||
} catch (Throwable e) {
|
||||
@@ -122,18 +120,17 @@ public class DataSourceUtils {
|
||||
/**
|
||||
* Create a payload class via reflection, passing in an ordering/precombine value.
|
||||
*/
|
||||
public static HoodieRecordPayload createPayload(String payloadClass, GenericRecord record,
|
||||
Comparable orderingVal) throws IOException {
|
||||
public static HoodieRecordPayload createPayload(String payloadClass, GenericRecord record, Comparable orderingVal)
|
||||
throws IOException {
|
||||
try {
|
||||
return (HoodieRecordPayload) ReflectionUtils
|
||||
.loadClass(payloadClass, new Class<?>[]{GenericRecord.class, Comparable.class}, record, orderingVal);
|
||||
return (HoodieRecordPayload) ReflectionUtils.loadClass(payloadClass,
|
||||
new Class<?>[] {GenericRecord.class, Comparable.class}, record, orderingVal);
|
||||
} catch (Throwable e) {
|
||||
throw new IOException("Could not create payload for class: " + payloadClass, e);
|
||||
}
|
||||
}
|
||||
|
||||
public static void checkRequiredProperties(TypedProperties props,
|
||||
List<String> checkPropNames) {
|
||||
public static void checkRequiredProperties(TypedProperties props, List<String> checkPropNames) {
|
||||
checkPropNames.stream().forEach(prop -> {
|
||||
if (!props.containsKey(prop)) {
|
||||
throw new HoodieNotSupportedException("Required property " + prop + " is missing");
|
||||
@@ -141,28 +138,22 @@ public class DataSourceUtils {
|
||||
});
|
||||
}
|
||||
|
||||
public static HoodieWriteClient createHoodieClient(JavaSparkContext jssc, String schemaStr,
|
||||
String basePath, String tblName, Map<String, String> parameters) throws Exception {
|
||||
public static HoodieWriteClient createHoodieClient(JavaSparkContext jssc, String schemaStr, String basePath,
|
||||
String tblName, Map<String, String> parameters) throws Exception {
|
||||
|
||||
// inline compaction is on by default for MOR
|
||||
boolean inlineCompact = parameters.get(DataSourceWriteOptions.STORAGE_TYPE_OPT_KEY())
|
||||
.equals(DataSourceWriteOptions.MOR_STORAGE_TYPE_OPT_VAL());
|
||||
|
||||
// insert/bulk-insert combining to be true, if filtering for duplicates
|
||||
boolean combineInserts = Boolean.parseBoolean(parameters.get(
|
||||
DataSourceWriteOptions.INSERT_DROP_DUPS_OPT_KEY()));
|
||||
boolean combineInserts = Boolean.parseBoolean(parameters.get(DataSourceWriteOptions.INSERT_DROP_DUPS_OPT_KEY()));
|
||||
|
||||
HoodieWriteConfig writeConfig = HoodieWriteConfig.newBuilder()
|
||||
.withPath(basePath).withAutoCommit(false)
|
||||
.combineInput(combineInserts, true)
|
||||
.withSchema(schemaStr).forTable(tblName).withIndexConfig(
|
||||
HoodieIndexConfig.newBuilder().withIndexType(HoodieIndex.IndexType.BLOOM).build())
|
||||
HoodieWriteConfig writeConfig = HoodieWriteConfig.newBuilder().withPath(basePath).withAutoCommit(false)
|
||||
.combineInput(combineInserts, true).withSchema(schemaStr).forTable(tblName)
|
||||
.withIndexConfig(HoodieIndexConfig.newBuilder().withIndexType(HoodieIndex.IndexType.BLOOM).build())
|
||||
.withCompactionConfig(HoodieCompactionConfig.newBuilder()
|
||||
.withPayloadClass(parameters.get(
|
||||
DataSourceWriteOptions
|
||||
.PAYLOAD_CLASS_OPT_KEY()))
|
||||
.withInlineCompaction(inlineCompact)
|
||||
.build())
|
||||
.withPayloadClass(parameters.get(DataSourceWriteOptions.PAYLOAD_CLASS_OPT_KEY()))
|
||||
.withInlineCompaction(inlineCompact).build())
|
||||
// override above with Hoodie configs specified as options.
|
||||
.withProps(parameters).build();
|
||||
|
||||
@@ -170,27 +161,26 @@ public class DataSourceUtils {
|
||||
}
|
||||
|
||||
|
||||
public static JavaRDD<WriteStatus> doWriteOperation(HoodieWriteClient client,
|
||||
JavaRDD<HoodieRecord> hoodieRecords, String commitTime, String operation) {
|
||||
public static JavaRDD<WriteStatus> doWriteOperation(HoodieWriteClient client, JavaRDD<HoodieRecord> hoodieRecords,
|
||||
String commitTime, String operation) {
|
||||
if (operation.equals(DataSourceWriteOptions.BULK_INSERT_OPERATION_OPT_VAL())) {
|
||||
return client.bulkInsert(hoodieRecords, commitTime);
|
||||
} else if (operation.equals(DataSourceWriteOptions.INSERT_OPERATION_OPT_VAL())) {
|
||||
return client.insert(hoodieRecords, commitTime);
|
||||
} else {
|
||||
//default is upsert
|
||||
// default is upsert
|
||||
return client.upsert(hoodieRecords, commitTime);
|
||||
}
|
||||
}
|
||||
|
||||
public static HoodieRecord createHoodieRecord(GenericRecord gr, Comparable orderingVal,
|
||||
HoodieKey hKey, String payloadClass) throws IOException {
|
||||
public static HoodieRecord createHoodieRecord(GenericRecord gr, Comparable orderingVal, HoodieKey hKey,
|
||||
String payloadClass) throws IOException {
|
||||
HoodieRecordPayload payload = DataSourceUtils.createPayload(payloadClass, gr, orderingVal);
|
||||
return new HoodieRecord<>(hKey, payload);
|
||||
}
|
||||
|
||||
@SuppressWarnings("unchecked")
|
||||
public static JavaRDD<HoodieRecord> dropDuplicates(JavaSparkContext jssc,
|
||||
JavaRDD<HoodieRecord> incomingHoodieRecords,
|
||||
public static JavaRDD<HoodieRecord> dropDuplicates(JavaSparkContext jssc, JavaRDD<HoodieRecord> incomingHoodieRecords,
|
||||
HoodieWriteConfig writeConfig, Option<EmbeddedTimelineService> timelineService) throws Exception {
|
||||
HoodieReadClient client = null;
|
||||
try {
|
||||
@@ -209,15 +199,10 @@ public class DataSourceUtils {
|
||||
}
|
||||
|
||||
@SuppressWarnings("unchecked")
|
||||
public static JavaRDD<HoodieRecord> dropDuplicates(JavaSparkContext jssc,
|
||||
JavaRDD<HoodieRecord> incomingHoodieRecords,
|
||||
Map<String, String> parameters,
|
||||
Option<EmbeddedTimelineService> timelineService)
|
||||
throws Exception {
|
||||
HoodieWriteConfig writeConfig = HoodieWriteConfig
|
||||
.newBuilder()
|
||||
.withPath(parameters.get("path"))
|
||||
.withProps(parameters).build();
|
||||
public static JavaRDD<HoodieRecord> dropDuplicates(JavaSparkContext jssc, JavaRDD<HoodieRecord> incomingHoodieRecords,
|
||||
Map<String, String> parameters, Option<EmbeddedTimelineService> timelineService) throws Exception {
|
||||
HoodieWriteConfig writeConfig =
|
||||
HoodieWriteConfig.newBuilder().withPath(parameters.get("path")).withProps(parameters).build();
|
||||
return dropDuplicates(jssc, incomingHoodieRecords, writeConfig, timelineService);
|
||||
}
|
||||
|
||||
@@ -234,17 +219,17 @@ public class DataSourceUtils {
|
||||
hiveSyncConfig.databaseName = props.getString(DataSourceWriteOptions.HIVE_DATABASE_OPT_KEY(),
|
||||
DataSourceWriteOptions.DEFAULT_HIVE_DATABASE_OPT_VAL());
|
||||
hiveSyncConfig.tableName = props.getString(DataSourceWriteOptions.HIVE_TABLE_OPT_KEY());
|
||||
hiveSyncConfig.hiveUser = props.getString(DataSourceWriteOptions.HIVE_USER_OPT_KEY(),
|
||||
DataSourceWriteOptions.DEFAULT_HIVE_USER_OPT_VAL());
|
||||
hiveSyncConfig.hivePass = props.getString(DataSourceWriteOptions.HIVE_PASS_OPT_KEY(),
|
||||
DataSourceWriteOptions.DEFAULT_HIVE_PASS_OPT_VAL());
|
||||
hiveSyncConfig.jdbcUrl = props.getString(DataSourceWriteOptions.HIVE_URL_OPT_KEY(),
|
||||
DataSourceWriteOptions.DEFAULT_HIVE_URL_OPT_VAL());
|
||||
hiveSyncConfig.hiveUser =
|
||||
props.getString(DataSourceWriteOptions.HIVE_USER_OPT_KEY(), DataSourceWriteOptions.DEFAULT_HIVE_USER_OPT_VAL());
|
||||
hiveSyncConfig.hivePass =
|
||||
props.getString(DataSourceWriteOptions.HIVE_PASS_OPT_KEY(), DataSourceWriteOptions.DEFAULT_HIVE_PASS_OPT_VAL());
|
||||
hiveSyncConfig.jdbcUrl =
|
||||
props.getString(DataSourceWriteOptions.HIVE_URL_OPT_KEY(), DataSourceWriteOptions.DEFAULT_HIVE_URL_OPT_VAL());
|
||||
hiveSyncConfig.partitionFields =
|
||||
props.getStringList(DataSourceWriteOptions.HIVE_PARTITION_FIELDS_OPT_KEY(), ",", new ArrayList<>());
|
||||
props.getStringList(DataSourceWriteOptions.HIVE_PARTITION_FIELDS_OPT_KEY(), ",", new ArrayList<>());
|
||||
hiveSyncConfig.partitionValueExtractorClass =
|
||||
props.getString(DataSourceWriteOptions.HIVE_PARTITION_EXTRACTOR_CLASS_OPT_KEY(),
|
||||
SlashEncodedDayPartitionValueExtractor.class.getName());
|
||||
props.getString(DataSourceWriteOptions.HIVE_PARTITION_EXTRACTOR_CLASS_OPT_KEY(),
|
||||
SlashEncodedDayPartitionValueExtractor.class.getName());
|
||||
return hiveSyncConfig;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -29,7 +29,7 @@ import org.apache.hudi.common.util.Option;
|
||||
*/
|
||||
public class EmptyHoodieRecordPayload implements HoodieRecordPayload<EmptyHoodieRecordPayload> {
|
||||
|
||||
public EmptyHoodieRecordPayload(GenericRecord record, Comparable orderingVal) { }
|
||||
public EmptyHoodieRecordPayload(GenericRecord record, Comparable orderingVal) {}
|
||||
|
||||
@Override
|
||||
public EmptyHoodieRecordPayload preCombine(EmptyHoodieRecordPayload another) {
|
||||
|
||||
@@ -29,14 +29,13 @@ import org.apache.hudi.common.table.timeline.HoodieActiveTimeline;
|
||||
import org.apache.hudi.common.table.timeline.HoodieInstant;
|
||||
|
||||
/**
|
||||
* List of helpers to aid, construction of instanttime for read and write operations using
|
||||
* datasource
|
||||
* List of helpers to aid, construction of instanttime for read and write operations using datasource
|
||||
*/
|
||||
public class HoodieDataSourceHelpers {
|
||||
|
||||
/**
|
||||
* Checks if the Hoodie dataset has new data since given timestamp. This can be subsequently fed
|
||||
* to an incremental view read, to perform incremental processing.
|
||||
* Checks if the Hoodie dataset has new data since given timestamp. This can be subsequently fed to an incremental
|
||||
* view read, to perform incremental processing.
|
||||
*/
|
||||
public static boolean hasNewCommits(FileSystem fs, String basePath, String commitTimestamp) {
|
||||
return listCommitsSince(fs, basePath, commitTimestamp).size() > 0;
|
||||
@@ -45,8 +44,7 @@ public class HoodieDataSourceHelpers {
|
||||
/**
|
||||
* Get a list of instant times that have occurred, from the given instant timestamp.
|
||||
*/
|
||||
public static List<String> listCommitsSince(FileSystem fs, String basePath,
|
||||
String instantTimestamp) {
|
||||
public static List<String> listCommitsSince(FileSystem fs, String basePath, String instantTimestamp) {
|
||||
HoodieTimeline timeline = allCompletedCommitsCompactions(fs, basePath);
|
||||
return timeline.findInstantsAfter(instantTimestamp, Integer.MAX_VALUE).getInstants()
|
||||
.map(HoodieInstant::getTimestamp).collect(Collectors.toList());
|
||||
@@ -61,15 +59,14 @@ public class HoodieDataSourceHelpers {
|
||||
}
|
||||
|
||||
/**
|
||||
* Obtain all the commits, compactions that have occurred on the timeline, whose instant times
|
||||
* could be fed into the datasource options.
|
||||
* Obtain all the commits, compactions that have occurred on the timeline, whose instant times could be fed into the
|
||||
* datasource options.
|
||||
*/
|
||||
public static HoodieTimeline allCompletedCommitsCompactions(FileSystem fs, String basePath) {
|
||||
HoodieTableMetaClient metaClient = new HoodieTableMetaClient(fs.getConf(), basePath, true);
|
||||
if (metaClient.getTableType().equals(HoodieTableType.MERGE_ON_READ)) {
|
||||
return metaClient.getActiveTimeline().getTimelineOfActions(
|
||||
Sets.newHashSet(HoodieActiveTimeline.COMMIT_ACTION,
|
||||
HoodieActiveTimeline.DELTA_COMMIT_ACTION));
|
||||
Sets.newHashSet(HoodieActiveTimeline.COMMIT_ACTION, HoodieActiveTimeline.DELTA_COMMIT_ACTION));
|
||||
} else {
|
||||
return metaClient.getCommitTimeline().filterCompletedInstants();
|
||||
}
|
||||
|
||||
@@ -24,9 +24,7 @@ import org.apache.hudi.common.model.HoodieKey;
|
||||
import org.apache.hudi.common.util.TypedProperties;
|
||||
|
||||
/**
|
||||
* Abstract class to extend for plugging in extraction of
|
||||
* {@link HoodieKey}
|
||||
* from an Avro record
|
||||
* Abstract class to extend for plugging in extraction of {@link HoodieKey} from an Avro record
|
||||
*/
|
||||
public abstract class KeyGenerator implements Serializable {
|
||||
|
||||
|
||||
@@ -32,8 +32,8 @@ import org.apache.hudi.common.util.Option;
|
||||
* 1. preCombine - Picks the latest delta record for a key, based on an ordering field 2.
|
||||
* combineAndGetUpdateValue/getInsertValue - Simply overwrites storage with latest delta record
|
||||
*/
|
||||
public class OverwriteWithLatestAvroPayload extends BaseAvroPayload implements
|
||||
HoodieRecordPayload<OverwriteWithLatestAvroPayload> {
|
||||
public class OverwriteWithLatestAvroPayload extends BaseAvroPayload
|
||||
implements HoodieRecordPayload<OverwriteWithLatestAvroPayload> {
|
||||
|
||||
/**
|
||||
* @param record
|
||||
@@ -58,8 +58,7 @@ public class OverwriteWithLatestAvroPayload extends BaseAvroPayload implements
|
||||
}
|
||||
|
||||
@Override
|
||||
public Option<IndexedRecord> combineAndGetUpdateValue(IndexedRecord currentValue, Schema schema)
|
||||
throws IOException {
|
||||
public Option<IndexedRecord> combineAndGetUpdateValue(IndexedRecord currentValue, Schema schema) throws IOException {
|
||||
// combining strategy here trivially ignores currentValue on disk and writes this record
|
||||
return getInsertValue(schema);
|
||||
}
|
||||
|
||||
@@ -39,8 +39,8 @@ import org.apache.hudi.common.util.Option;
|
||||
import org.apache.hudi.exception.HoodieIOException;
|
||||
|
||||
/**
|
||||
* Class to be used in quickstart guide for generating inserts and updates against a corpus.
|
||||
* Test data uses a toy Uber trips, data model.
|
||||
* Class to be used in quickstart guide for generating inserts and updates against a corpus. Test data uses a toy Uber
|
||||
* trips, data model.
|
||||
*/
|
||||
public class QuickstartUtils {
|
||||
|
||||
@@ -49,20 +49,13 @@ public class QuickstartUtils {
|
||||
private static final String DEFAULT_SECOND_PARTITION_PATH = "americas/brazil/sao_paulo";
|
||||
private static final String DEFAULT_THIRD_PARTITION_PATH = "asia/india/chennai";
|
||||
|
||||
private static final String[] DEFAULT_PARTITION_PATHS = {
|
||||
DEFAULT_FIRST_PARTITION_PATH,
|
||||
DEFAULT_SECOND_PARTITION_PATH,
|
||||
DEFAULT_THIRD_PARTITION_PATH
|
||||
};
|
||||
private static final String[] DEFAULT_PARTITION_PATHS =
|
||||
{DEFAULT_FIRST_PARTITION_PATH, DEFAULT_SECOND_PARTITION_PATH, DEFAULT_THIRD_PARTITION_PATH};
|
||||
static String TRIP_EXAMPLE_SCHEMA = "{\"type\": \"record\"," + "\"name\": \"triprec\"," + "\"fields\": [ "
|
||||
+ "{\"name\": \"ts\",\"type\": \"double\"},"
|
||||
+ "{\"name\": \"uuid\", \"type\": \"string\"},"
|
||||
+ "{\"name\": \"rider\", \"type\": \"string\"},"
|
||||
+ "{\"name\": \"driver\", \"type\": \"string\"},"
|
||||
+ "{\"name\": \"begin_lat\", \"type\": \"double\"},"
|
||||
+ "{\"name\": \"begin_lon\", \"type\": \"double\"},"
|
||||
+ "{\"name\": \"end_lat\", \"type\": \"double\"},"
|
||||
+ "{\"name\": \"end_lon\", \"type\": \"double\"},"
|
||||
+ "{\"name\": \"ts\",\"type\": \"double\"}," + "{\"name\": \"uuid\", \"type\": \"string\"},"
|
||||
+ "{\"name\": \"rider\", \"type\": \"string\"}," + "{\"name\": \"driver\", \"type\": \"string\"},"
|
||||
+ "{\"name\": \"begin_lat\", \"type\": \"double\"}," + "{\"name\": \"begin_lon\", \"type\": \"double\"},"
|
||||
+ "{\"name\": \"end_lat\", \"type\": \"double\"}," + "{\"name\": \"end_lon\", \"type\": \"double\"},"
|
||||
+ "{\"name\":\"fare\",\"type\": \"double\"}]}";
|
||||
static Schema avroSchema = new Schema.Parser().parse(TRIP_EXAMPLE_SCHEMA);
|
||||
|
||||
@@ -87,8 +80,7 @@ public class QuickstartUtils {
|
||||
int stringLength = 3;
|
||||
StringBuilder buffer = new StringBuilder(stringLength);
|
||||
for (int i = 0; i < stringLength; i++) {
|
||||
int randomLimitedInt = leftLimit + (int)
|
||||
(rand.nextFloat() * (rightLimit - leftLimit + 1));
|
||||
int randomLimitedInt = leftLimit + (int) (rand.nextFloat() * (rightLimit - leftLimit + 1));
|
||||
buffer.append((char) randomLimitedInt);
|
||||
}
|
||||
return buffer.toString();
|
||||
@@ -99,7 +91,7 @@ public class QuickstartUtils {
|
||||
}
|
||||
|
||||
public static GenericRecord generateGenericRecord(String rowKey, String riderName, String driverName,
|
||||
double timestamp) {
|
||||
double timestamp) {
|
||||
GenericRecord rec = new GenericData.Record(avroSchema);
|
||||
rec.put("uuid", rowKey);
|
||||
rec.put("ts", timestamp);
|
||||
@@ -114,15 +106,15 @@ public class QuickstartUtils {
|
||||
}
|
||||
|
||||
/**
|
||||
* Generates a new avro record of the above schema format, retaining the key if optionally provided.
|
||||
* The riderDriverSuffix string is a random String to simulate updates by changing the rider driver fields
|
||||
* for records belonging to the same commit. It is purely used for demo purposes. In real world, the actual
|
||||
* updates are assumed to be provided based on the application requirements.
|
||||
* Generates a new avro record of the above schema format, retaining the key if optionally provided. The
|
||||
* riderDriverSuffix string is a random String to simulate updates by changing the rider driver fields for records
|
||||
* belonging to the same commit. It is purely used for demo purposes. In real world, the actual updates are assumed
|
||||
* to be provided based on the application requirements.
|
||||
*/
|
||||
public static OverwriteWithLatestAvroPayload generateRandomValue(HoodieKey key, String riderDriverSuffix) throws
|
||||
IOException {
|
||||
GenericRecord rec = generateGenericRecord(key.getRecordKey(), "rider-" + riderDriverSuffix, "driver-"
|
||||
+ riderDriverSuffix, 0.0);
|
||||
public static OverwriteWithLatestAvroPayload generateRandomValue(HoodieKey key, String riderDriverSuffix)
|
||||
throws IOException {
|
||||
GenericRecord rec =
|
||||
generateGenericRecord(key.getRecordKey(), "rider-" + riderDriverSuffix, "driver-" + riderDriverSuffix, 0.0);
|
||||
return new OverwriteWithLatestAvroPayload(Option.of(rec));
|
||||
}
|
||||
|
||||
@@ -182,19 +174,19 @@ public class QuickstartUtils {
|
||||
|
||||
private static Option<String> convertToString(HoodieRecord record) {
|
||||
try {
|
||||
String str = HoodieAvroUtils.bytesToAvro(((OverwriteWithLatestAvroPayload) record.getData()).recordBytes,
|
||||
DataGenerator.avroSchema).toString();
|
||||
String str = HoodieAvroUtils
|
||||
.bytesToAvro(((OverwriteWithLatestAvroPayload) record.getData()).recordBytes, DataGenerator.avroSchema)
|
||||
.toString();
|
||||
str = "{" + str.substring(str.indexOf("\"ts\":"));
|
||||
return Option.of(str.replaceAll("}",
|
||||
", \"partitionpath\": \"" + record.getPartitionPath() + "\"}"));
|
||||
return Option.of(str.replaceAll("}", ", \"partitionpath\": \"" + record.getPartitionPath() + "\"}"));
|
||||
} catch (IOException e) {
|
||||
return Option.empty();
|
||||
}
|
||||
}
|
||||
|
||||
public static List<String> convertToStringList(List<HoodieRecord> records) {
|
||||
return records.stream().map(hr -> convertToString(hr)).filter(os -> os.isPresent())
|
||||
.map(os -> os.get()).collect(Collectors.toList());
|
||||
return records.stream().map(hr -> convertToString(hr)).filter(os -> os.isPresent()).map(os -> os.get())
|
||||
.collect(Collectors.toList());
|
||||
}
|
||||
|
||||
public static Map<String, String> getQuickstartWriteConfigs() {
|
||||
|
||||
@@ -24,8 +24,7 @@ import org.apache.hudi.common.util.TypedProperties;
|
||||
import org.apache.hudi.exception.HoodieException;
|
||||
|
||||
/**
|
||||
* Simple key generator, which takes names of fields to be used for recordKey and partitionPath as
|
||||
* configs.
|
||||
* Simple key generator, which takes names of fields to be used for recordKey and partitionPath as configs.
|
||||
*/
|
||||
public class SimpleKeyGenerator extends KeyGenerator {
|
||||
|
||||
@@ -38,15 +37,13 @@ public class SimpleKeyGenerator extends KeyGenerator {
|
||||
public SimpleKeyGenerator(TypedProperties props) {
|
||||
super(props);
|
||||
this.recordKeyField = props.getString(DataSourceWriteOptions.RECORDKEY_FIELD_OPT_KEY());
|
||||
this.partitionPathField = props
|
||||
.getString(DataSourceWriteOptions.PARTITIONPATH_FIELD_OPT_KEY());
|
||||
this.partitionPathField = props.getString(DataSourceWriteOptions.PARTITIONPATH_FIELD_OPT_KEY());
|
||||
}
|
||||
|
||||
@Override
|
||||
public HoodieKey getKey(GenericRecord record) {
|
||||
if (recordKeyField == null || partitionPathField == null) {
|
||||
throw new HoodieException(
|
||||
"Unable to find field names for record key or partition path in cfg");
|
||||
throw new HoodieException("Unable to find field names for record key or partition path in cfg");
|
||||
}
|
||||
|
||||
String recordKey = DataSourceUtils.getNestedFieldValAsString(record, recordKeyField);
|
||||
|
||||
Reference in New Issue
Block a user