1
0

[HUDI-296] Explore use of spotless to auto fix formatting errors (#945)

- Add spotless format fixing to project
- One time reformatting for conformity
- Build fails for formatting changes and mvn spotless:apply autofixes them
This commit is contained in:
leesf
2019-10-10 20:19:40 +08:00
committed by vinoth chandar
parent 834c591955
commit b19bed442d
381 changed files with 7350 additions and 9064 deletions

View File

@@ -33,7 +33,7 @@ public abstract class BaseAvroPayload implements Serializable {
/**
* Avro data extracted from the source converted to bytes
*/
protected final byte [] recordBytes;
protected final byte[] recordBytes;
/**
* For purposes of preCombining

View File

@@ -26,8 +26,7 @@ import org.apache.hudi.common.util.TypedProperties;
import org.apache.hudi.exception.HoodieException;
/**
* Complex key generator, which takes names of fields to be used for recordKey and partitionPath as
* configs.
* Complex key generator, which takes names of fields to be used for recordKey and partitionPath as configs.
*/
public class ComplexKeyGenerator extends KeyGenerator {
@@ -42,15 +41,14 @@ public class ComplexKeyGenerator extends KeyGenerator {
public ComplexKeyGenerator(TypedProperties props) {
super(props);
this.recordKeyFields = Arrays.asList(props.getString(DataSourceWriteOptions.RECORDKEY_FIELD_OPT_KEY()).split(","));
this.partitionPathFields = Arrays.asList(props
.getString(DataSourceWriteOptions.PARTITIONPATH_FIELD_OPT_KEY()).split(","));
this.partitionPathFields =
Arrays.asList(props.getString(DataSourceWriteOptions.PARTITIONPATH_FIELD_OPT_KEY()).split(","));
}
@Override
public HoodieKey getKey(GenericRecord record) {
if (recordKeyFields == null || partitionPathFields == null) {
throw new HoodieException(
"Unable to find field names for record key or partition path in cfg");
throw new HoodieException("Unable to find field names for record key or partition path in cfg");
}
StringBuilder recordKey = new StringBuilder();
for (String recordKeyField : recordKeyFields) {

View File

@@ -66,7 +66,7 @@ public class DataSourceUtils {
String[] parts = fieldName.split("\\.");
GenericRecord valueNode = record;
int i = 0;
for (;i < parts.length; i++) {
for (; i < parts.length; i++) {
String part = parts[i];
Object val = valueNode.get(part);
if (val == null) {
@@ -84,23 +84,21 @@ public class DataSourceUtils {
valueNode = (GenericRecord) val;
}
}
throw new HoodieException(fieldName + "(Part -" + parts[i] + ") field not found in record. "
+ "Acceptable fields were :" + valueNode.getSchema().getFields()
.stream().map(Field::name).collect(Collectors.toList()));
throw new HoodieException(
fieldName + "(Part -" + parts[i] + ") field not found in record. " + "Acceptable fields were :"
+ valueNode.getSchema().getFields().stream().map(Field::name).collect(Collectors.toList()));
}
/**
* Create a key generator class via reflection, passing in any configs needed.
*
* If the class name of key generator is configured through the properties file, i.e., {@code
* props}, use the corresponding key generator class; otherwise, use the default key generator
* class specified in {@code DataSourceWriteOptions}.
* props}, use the corresponding key generator class; otherwise, use the default key generator class specified in
* {@code DataSourceWriteOptions}.
*/
public static KeyGenerator createKeyGenerator(TypedProperties props) throws IOException {
String keyGeneratorClass = props.getString(
DataSourceWriteOptions.KEYGENERATOR_CLASS_OPT_KEY(),
DataSourceWriteOptions.DEFAULT_KEYGENERATOR_CLASS_OPT_VAL()
);
String keyGeneratorClass = props.getString(DataSourceWriteOptions.KEYGENERATOR_CLASS_OPT_KEY(),
DataSourceWriteOptions.DEFAULT_KEYGENERATOR_CLASS_OPT_VAL());
try {
return (KeyGenerator) ReflectionUtils.loadClass(keyGeneratorClass, props);
} catch (Throwable e) {
@@ -111,7 +109,7 @@ public class DataSourceUtils {
/**
* Create a partition value extractor class via reflection, passing in any configs needed
*/
public static PartitionValueExtractor createPartitionExtractor(String partitionExtractorClass) {
public static PartitionValueExtractor createPartitionExtractor(String partitionExtractorClass) {
try {
return (PartitionValueExtractor) ReflectionUtils.loadClass(partitionExtractorClass);
} catch (Throwable e) {
@@ -122,18 +120,17 @@ public class DataSourceUtils {
/**
* Create a payload class via reflection, passing in an ordering/precombine value.
*/
public static HoodieRecordPayload createPayload(String payloadClass, GenericRecord record,
Comparable orderingVal) throws IOException {
public static HoodieRecordPayload createPayload(String payloadClass, GenericRecord record, Comparable orderingVal)
throws IOException {
try {
return (HoodieRecordPayload) ReflectionUtils
.loadClass(payloadClass, new Class<?>[]{GenericRecord.class, Comparable.class}, record, orderingVal);
return (HoodieRecordPayload) ReflectionUtils.loadClass(payloadClass,
new Class<?>[] {GenericRecord.class, Comparable.class}, record, orderingVal);
} catch (Throwable e) {
throw new IOException("Could not create payload for class: " + payloadClass, e);
}
}
public static void checkRequiredProperties(TypedProperties props,
List<String> checkPropNames) {
public static void checkRequiredProperties(TypedProperties props, List<String> checkPropNames) {
checkPropNames.stream().forEach(prop -> {
if (!props.containsKey(prop)) {
throw new HoodieNotSupportedException("Required property " + prop + " is missing");
@@ -141,28 +138,22 @@ public class DataSourceUtils {
});
}
public static HoodieWriteClient createHoodieClient(JavaSparkContext jssc, String schemaStr,
String basePath, String tblName, Map<String, String> parameters) throws Exception {
public static HoodieWriteClient createHoodieClient(JavaSparkContext jssc, String schemaStr, String basePath,
String tblName, Map<String, String> parameters) throws Exception {
// inline compaction is on by default for MOR
boolean inlineCompact = parameters.get(DataSourceWriteOptions.STORAGE_TYPE_OPT_KEY())
.equals(DataSourceWriteOptions.MOR_STORAGE_TYPE_OPT_VAL());
// insert/bulk-insert combining to be true, if filtering for duplicates
boolean combineInserts = Boolean.parseBoolean(parameters.get(
DataSourceWriteOptions.INSERT_DROP_DUPS_OPT_KEY()));
boolean combineInserts = Boolean.parseBoolean(parameters.get(DataSourceWriteOptions.INSERT_DROP_DUPS_OPT_KEY()));
HoodieWriteConfig writeConfig = HoodieWriteConfig.newBuilder()
.withPath(basePath).withAutoCommit(false)
.combineInput(combineInserts, true)
.withSchema(schemaStr).forTable(tblName).withIndexConfig(
HoodieIndexConfig.newBuilder().withIndexType(HoodieIndex.IndexType.BLOOM).build())
HoodieWriteConfig writeConfig = HoodieWriteConfig.newBuilder().withPath(basePath).withAutoCommit(false)
.combineInput(combineInserts, true).withSchema(schemaStr).forTable(tblName)
.withIndexConfig(HoodieIndexConfig.newBuilder().withIndexType(HoodieIndex.IndexType.BLOOM).build())
.withCompactionConfig(HoodieCompactionConfig.newBuilder()
.withPayloadClass(parameters.get(
DataSourceWriteOptions
.PAYLOAD_CLASS_OPT_KEY()))
.withInlineCompaction(inlineCompact)
.build())
.withPayloadClass(parameters.get(DataSourceWriteOptions.PAYLOAD_CLASS_OPT_KEY()))
.withInlineCompaction(inlineCompact).build())
// override above with Hoodie configs specified as options.
.withProps(parameters).build();
@@ -170,27 +161,26 @@ public class DataSourceUtils {
}
public static JavaRDD<WriteStatus> doWriteOperation(HoodieWriteClient client,
JavaRDD<HoodieRecord> hoodieRecords, String commitTime, String operation) {
public static JavaRDD<WriteStatus> doWriteOperation(HoodieWriteClient client, JavaRDD<HoodieRecord> hoodieRecords,
String commitTime, String operation) {
if (operation.equals(DataSourceWriteOptions.BULK_INSERT_OPERATION_OPT_VAL())) {
return client.bulkInsert(hoodieRecords, commitTime);
} else if (operation.equals(DataSourceWriteOptions.INSERT_OPERATION_OPT_VAL())) {
return client.insert(hoodieRecords, commitTime);
} else {
//default is upsert
// default is upsert
return client.upsert(hoodieRecords, commitTime);
}
}
public static HoodieRecord createHoodieRecord(GenericRecord gr, Comparable orderingVal,
HoodieKey hKey, String payloadClass) throws IOException {
public static HoodieRecord createHoodieRecord(GenericRecord gr, Comparable orderingVal, HoodieKey hKey,
String payloadClass) throws IOException {
HoodieRecordPayload payload = DataSourceUtils.createPayload(payloadClass, gr, orderingVal);
return new HoodieRecord<>(hKey, payload);
}
@SuppressWarnings("unchecked")
public static JavaRDD<HoodieRecord> dropDuplicates(JavaSparkContext jssc,
JavaRDD<HoodieRecord> incomingHoodieRecords,
public static JavaRDD<HoodieRecord> dropDuplicates(JavaSparkContext jssc, JavaRDD<HoodieRecord> incomingHoodieRecords,
HoodieWriteConfig writeConfig, Option<EmbeddedTimelineService> timelineService) throws Exception {
HoodieReadClient client = null;
try {
@@ -209,15 +199,10 @@ public class DataSourceUtils {
}
@SuppressWarnings("unchecked")
public static JavaRDD<HoodieRecord> dropDuplicates(JavaSparkContext jssc,
JavaRDD<HoodieRecord> incomingHoodieRecords,
Map<String, String> parameters,
Option<EmbeddedTimelineService> timelineService)
throws Exception {
HoodieWriteConfig writeConfig = HoodieWriteConfig
.newBuilder()
.withPath(parameters.get("path"))
.withProps(parameters).build();
public static JavaRDD<HoodieRecord> dropDuplicates(JavaSparkContext jssc, JavaRDD<HoodieRecord> incomingHoodieRecords,
Map<String, String> parameters, Option<EmbeddedTimelineService> timelineService) throws Exception {
HoodieWriteConfig writeConfig =
HoodieWriteConfig.newBuilder().withPath(parameters.get("path")).withProps(parameters).build();
return dropDuplicates(jssc, incomingHoodieRecords, writeConfig, timelineService);
}
@@ -234,17 +219,17 @@ public class DataSourceUtils {
hiveSyncConfig.databaseName = props.getString(DataSourceWriteOptions.HIVE_DATABASE_OPT_KEY(),
DataSourceWriteOptions.DEFAULT_HIVE_DATABASE_OPT_VAL());
hiveSyncConfig.tableName = props.getString(DataSourceWriteOptions.HIVE_TABLE_OPT_KEY());
hiveSyncConfig.hiveUser = props.getString(DataSourceWriteOptions.HIVE_USER_OPT_KEY(),
DataSourceWriteOptions.DEFAULT_HIVE_USER_OPT_VAL());
hiveSyncConfig.hivePass = props.getString(DataSourceWriteOptions.HIVE_PASS_OPT_KEY(),
DataSourceWriteOptions.DEFAULT_HIVE_PASS_OPT_VAL());
hiveSyncConfig.jdbcUrl = props.getString(DataSourceWriteOptions.HIVE_URL_OPT_KEY(),
DataSourceWriteOptions.DEFAULT_HIVE_URL_OPT_VAL());
hiveSyncConfig.hiveUser =
props.getString(DataSourceWriteOptions.HIVE_USER_OPT_KEY(), DataSourceWriteOptions.DEFAULT_HIVE_USER_OPT_VAL());
hiveSyncConfig.hivePass =
props.getString(DataSourceWriteOptions.HIVE_PASS_OPT_KEY(), DataSourceWriteOptions.DEFAULT_HIVE_PASS_OPT_VAL());
hiveSyncConfig.jdbcUrl =
props.getString(DataSourceWriteOptions.HIVE_URL_OPT_KEY(), DataSourceWriteOptions.DEFAULT_HIVE_URL_OPT_VAL());
hiveSyncConfig.partitionFields =
props.getStringList(DataSourceWriteOptions.HIVE_PARTITION_FIELDS_OPT_KEY(), ",", new ArrayList<>());
props.getStringList(DataSourceWriteOptions.HIVE_PARTITION_FIELDS_OPT_KEY(), ",", new ArrayList<>());
hiveSyncConfig.partitionValueExtractorClass =
props.getString(DataSourceWriteOptions.HIVE_PARTITION_EXTRACTOR_CLASS_OPT_KEY(),
SlashEncodedDayPartitionValueExtractor.class.getName());
props.getString(DataSourceWriteOptions.HIVE_PARTITION_EXTRACTOR_CLASS_OPT_KEY(),
SlashEncodedDayPartitionValueExtractor.class.getName());
return hiveSyncConfig;
}
}

View File

@@ -29,7 +29,7 @@ import org.apache.hudi.common.util.Option;
*/
public class EmptyHoodieRecordPayload implements HoodieRecordPayload<EmptyHoodieRecordPayload> {
public EmptyHoodieRecordPayload(GenericRecord record, Comparable orderingVal) { }
public EmptyHoodieRecordPayload(GenericRecord record, Comparable orderingVal) {}
@Override
public EmptyHoodieRecordPayload preCombine(EmptyHoodieRecordPayload another) {

View File

@@ -29,14 +29,13 @@ import org.apache.hudi.common.table.timeline.HoodieActiveTimeline;
import org.apache.hudi.common.table.timeline.HoodieInstant;
/**
* List of helpers to aid, construction of instanttime for read and write operations using
* datasource
* List of helpers to aid, construction of instanttime for read and write operations using datasource
*/
public class HoodieDataSourceHelpers {
/**
* Checks if the Hoodie dataset has new data since given timestamp. This can be subsequently fed
* to an incremental view read, to perform incremental processing.
* Checks if the Hoodie dataset has new data since given timestamp. This can be subsequently fed to an incremental
* view read, to perform incremental processing.
*/
public static boolean hasNewCommits(FileSystem fs, String basePath, String commitTimestamp) {
return listCommitsSince(fs, basePath, commitTimestamp).size() > 0;
@@ -45,8 +44,7 @@ public class HoodieDataSourceHelpers {
/**
* Get a list of instant times that have occurred, from the given instant timestamp.
*/
public static List<String> listCommitsSince(FileSystem fs, String basePath,
String instantTimestamp) {
public static List<String> listCommitsSince(FileSystem fs, String basePath, String instantTimestamp) {
HoodieTimeline timeline = allCompletedCommitsCompactions(fs, basePath);
return timeline.findInstantsAfter(instantTimestamp, Integer.MAX_VALUE).getInstants()
.map(HoodieInstant::getTimestamp).collect(Collectors.toList());
@@ -61,15 +59,14 @@ public class HoodieDataSourceHelpers {
}
/**
* Obtain all the commits, compactions that have occurred on the timeline, whose instant times
* could be fed into the datasource options.
* Obtain all the commits, compactions that have occurred on the timeline, whose instant times could be fed into the
* datasource options.
*/
public static HoodieTimeline allCompletedCommitsCompactions(FileSystem fs, String basePath) {
HoodieTableMetaClient metaClient = new HoodieTableMetaClient(fs.getConf(), basePath, true);
if (metaClient.getTableType().equals(HoodieTableType.MERGE_ON_READ)) {
return metaClient.getActiveTimeline().getTimelineOfActions(
Sets.newHashSet(HoodieActiveTimeline.COMMIT_ACTION,
HoodieActiveTimeline.DELTA_COMMIT_ACTION));
Sets.newHashSet(HoodieActiveTimeline.COMMIT_ACTION, HoodieActiveTimeline.DELTA_COMMIT_ACTION));
} else {
return metaClient.getCommitTimeline().filterCompletedInstants();
}

View File

@@ -24,9 +24,7 @@ import org.apache.hudi.common.model.HoodieKey;
import org.apache.hudi.common.util.TypedProperties;
/**
* Abstract class to extend for plugging in extraction of
* {@link HoodieKey}
* from an Avro record
* Abstract class to extend for plugging in extraction of {@link HoodieKey} from an Avro record
*/
public abstract class KeyGenerator implements Serializable {

View File

@@ -32,8 +32,8 @@ import org.apache.hudi.common.util.Option;
* 1. preCombine - Picks the latest delta record for a key, based on an ordering field 2.
* combineAndGetUpdateValue/getInsertValue - Simply overwrites storage with latest delta record
*/
public class OverwriteWithLatestAvroPayload extends BaseAvroPayload implements
HoodieRecordPayload<OverwriteWithLatestAvroPayload> {
public class OverwriteWithLatestAvroPayload extends BaseAvroPayload
implements HoodieRecordPayload<OverwriteWithLatestAvroPayload> {
/**
* @param record
@@ -58,8 +58,7 @@ public class OverwriteWithLatestAvroPayload extends BaseAvroPayload implements
}
@Override
public Option<IndexedRecord> combineAndGetUpdateValue(IndexedRecord currentValue, Schema schema)
throws IOException {
public Option<IndexedRecord> combineAndGetUpdateValue(IndexedRecord currentValue, Schema schema) throws IOException {
// combining strategy here trivially ignores currentValue on disk and writes this record
return getInsertValue(schema);
}

View File

@@ -39,8 +39,8 @@ import org.apache.hudi.common.util.Option;
import org.apache.hudi.exception.HoodieIOException;
/**
* Class to be used in quickstart guide for generating inserts and updates against a corpus.
* Test data uses a toy Uber trips, data model.
* Class to be used in quickstart guide for generating inserts and updates against a corpus. Test data uses a toy Uber
* trips, data model.
*/
public class QuickstartUtils {
@@ -49,20 +49,13 @@ public class QuickstartUtils {
private static final String DEFAULT_SECOND_PARTITION_PATH = "americas/brazil/sao_paulo";
private static final String DEFAULT_THIRD_PARTITION_PATH = "asia/india/chennai";
private static final String[] DEFAULT_PARTITION_PATHS = {
DEFAULT_FIRST_PARTITION_PATH,
DEFAULT_SECOND_PARTITION_PATH,
DEFAULT_THIRD_PARTITION_PATH
};
private static final String[] DEFAULT_PARTITION_PATHS =
{DEFAULT_FIRST_PARTITION_PATH, DEFAULT_SECOND_PARTITION_PATH, DEFAULT_THIRD_PARTITION_PATH};
static String TRIP_EXAMPLE_SCHEMA = "{\"type\": \"record\"," + "\"name\": \"triprec\"," + "\"fields\": [ "
+ "{\"name\": \"ts\",\"type\": \"double\"},"
+ "{\"name\": \"uuid\", \"type\": \"string\"},"
+ "{\"name\": \"rider\", \"type\": \"string\"},"
+ "{\"name\": \"driver\", \"type\": \"string\"},"
+ "{\"name\": \"begin_lat\", \"type\": \"double\"},"
+ "{\"name\": \"begin_lon\", \"type\": \"double\"},"
+ "{\"name\": \"end_lat\", \"type\": \"double\"},"
+ "{\"name\": \"end_lon\", \"type\": \"double\"},"
+ "{\"name\": \"ts\",\"type\": \"double\"}," + "{\"name\": \"uuid\", \"type\": \"string\"},"
+ "{\"name\": \"rider\", \"type\": \"string\"}," + "{\"name\": \"driver\", \"type\": \"string\"},"
+ "{\"name\": \"begin_lat\", \"type\": \"double\"}," + "{\"name\": \"begin_lon\", \"type\": \"double\"},"
+ "{\"name\": \"end_lat\", \"type\": \"double\"}," + "{\"name\": \"end_lon\", \"type\": \"double\"},"
+ "{\"name\":\"fare\",\"type\": \"double\"}]}";
static Schema avroSchema = new Schema.Parser().parse(TRIP_EXAMPLE_SCHEMA);
@@ -87,8 +80,7 @@ public class QuickstartUtils {
int stringLength = 3;
StringBuilder buffer = new StringBuilder(stringLength);
for (int i = 0; i < stringLength; i++) {
int randomLimitedInt = leftLimit + (int)
(rand.nextFloat() * (rightLimit - leftLimit + 1));
int randomLimitedInt = leftLimit + (int) (rand.nextFloat() * (rightLimit - leftLimit + 1));
buffer.append((char) randomLimitedInt);
}
return buffer.toString();
@@ -99,7 +91,7 @@ public class QuickstartUtils {
}
public static GenericRecord generateGenericRecord(String rowKey, String riderName, String driverName,
double timestamp) {
double timestamp) {
GenericRecord rec = new GenericData.Record(avroSchema);
rec.put("uuid", rowKey);
rec.put("ts", timestamp);
@@ -114,15 +106,15 @@ public class QuickstartUtils {
}
/**
* Generates a new avro record of the above schema format, retaining the key if optionally provided.
* The riderDriverSuffix string is a random String to simulate updates by changing the rider driver fields
* for records belonging to the same commit. It is purely used for demo purposes. In real world, the actual
* updates are assumed to be provided based on the application requirements.
* Generates a new avro record of the above schema format, retaining the key if optionally provided. The
* riderDriverSuffix string is a random String to simulate updates by changing the rider driver fields for records
* belonging to the same commit. It is purely used for demo purposes. In real world, the actual updates are assumed
* to be provided based on the application requirements.
*/
public static OverwriteWithLatestAvroPayload generateRandomValue(HoodieKey key, String riderDriverSuffix) throws
IOException {
GenericRecord rec = generateGenericRecord(key.getRecordKey(), "rider-" + riderDriverSuffix, "driver-"
+ riderDriverSuffix, 0.0);
public static OverwriteWithLatestAvroPayload generateRandomValue(HoodieKey key, String riderDriverSuffix)
throws IOException {
GenericRecord rec =
generateGenericRecord(key.getRecordKey(), "rider-" + riderDriverSuffix, "driver-" + riderDriverSuffix, 0.0);
return new OverwriteWithLatestAvroPayload(Option.of(rec));
}
@@ -182,19 +174,19 @@ public class QuickstartUtils {
private static Option<String> convertToString(HoodieRecord record) {
try {
String str = HoodieAvroUtils.bytesToAvro(((OverwriteWithLatestAvroPayload) record.getData()).recordBytes,
DataGenerator.avroSchema).toString();
String str = HoodieAvroUtils
.bytesToAvro(((OverwriteWithLatestAvroPayload) record.getData()).recordBytes, DataGenerator.avroSchema)
.toString();
str = "{" + str.substring(str.indexOf("\"ts\":"));
return Option.of(str.replaceAll("}",
", \"partitionpath\": \"" + record.getPartitionPath() + "\"}"));
return Option.of(str.replaceAll("}", ", \"partitionpath\": \"" + record.getPartitionPath() + "\"}"));
} catch (IOException e) {
return Option.empty();
}
}
public static List<String> convertToStringList(List<HoodieRecord> records) {
return records.stream().map(hr -> convertToString(hr)).filter(os -> os.isPresent())
.map(os -> os.get()).collect(Collectors.toList());
return records.stream().map(hr -> convertToString(hr)).filter(os -> os.isPresent()).map(os -> os.get())
.collect(Collectors.toList());
}
public static Map<String, String> getQuickstartWriteConfigs() {

View File

@@ -24,8 +24,7 @@ import org.apache.hudi.common.util.TypedProperties;
import org.apache.hudi.exception.HoodieException;
/**
* Simple key generator, which takes names of fields to be used for recordKey and partitionPath as
* configs.
* Simple key generator, which takes names of fields to be used for recordKey and partitionPath as configs.
*/
public class SimpleKeyGenerator extends KeyGenerator {
@@ -38,15 +37,13 @@ public class SimpleKeyGenerator extends KeyGenerator {
public SimpleKeyGenerator(TypedProperties props) {
super(props);
this.recordKeyField = props.getString(DataSourceWriteOptions.RECORDKEY_FIELD_OPT_KEY());
this.partitionPathField = props
.getString(DataSourceWriteOptions.PARTITIONPATH_FIELD_OPT_KEY());
this.partitionPathField = props.getString(DataSourceWriteOptions.PARTITIONPATH_FIELD_OPT_KEY());
}
@Override
public HoodieKey getKey(GenericRecord record) {
if (recordKeyField == null || partitionPathField == null) {
throw new HoodieException(
"Unable to find field names for record key or partition path in cfg");
throw new HoodieException("Unable to find field names for record key or partition path in cfg");
}
String recordKey = DataSourceUtils.getNestedFieldValAsString(record, recordKeyField);