diff --git a/hudi-cli/src/main/java/org/apache/hudi/cli/commands/CompactionCommand.java b/hudi-cli/src/main/java/org/apache/hudi/cli/commands/CompactionCommand.java index 7802f1219..7a8d1eabd 100644 --- a/hudi-cli/src/main/java/org/apache/hudi/cli/commands/CompactionCommand.java +++ b/hudi-cli/src/main/java/org/apache/hudi/cli/commands/CompactionCommand.java @@ -472,8 +472,7 @@ public class CompactionCommand implements CommandMarker { if (result.get()) { System.out.println("All renames successfully completed to " + operation + " done !!"); } else { - System.out - .println("Some renames failed. DataSet could be in inconsistent-state. " + "Try running compaction repair"); + System.out.println("Some renames failed. DataSet could be in inconsistent-state. Try running compaction repair"); } List rows = new ArrayList<>(); diff --git a/hudi-cli/src/main/java/org/apache/hudi/cli/commands/RepairsCommand.java b/hudi-cli/src/main/java/org/apache/hudi/cli/commands/RepairsCommand.java index 7945a70d4..40bd5b583 100644 --- a/hudi-cli/src/main/java/org/apache/hudi/cli/commands/RepairsCommand.java +++ b/hudi-cli/src/main/java/org/apache/hudi/cli/commands/RepairsCommand.java @@ -53,7 +53,7 @@ public class RepairsCommand implements CommandMarker { } @CliCommand(value = "repair deduplicate", - help = "De-duplicate a partition path contains duplicates & produce " + "repaired files to replace with") + help = "De-duplicate a partition path contains duplicates & produce repaired files to replace with") public String deduplicate( @CliOption(key = {"duplicatedPartitionPath"}, help = "Partition Path containing the duplicates", mandatory = true) final String duplicatedPartitionPath, diff --git a/hudi-client/src/main/java/org/apache/hudi/CompactionAdminClient.java b/hudi-client/src/main/java/org/apache/hudi/CompactionAdminClient.java index 968bc69e5..7494993e3 100644 --- a/hudi-client/src/main/java/org/apache/hudi/CompactionAdminClient.java +++ b/hudi-client/src/main/java/org/apache/hudi/CompactionAdminClient.java @@ -333,7 +333,7 @@ public class CompactionAdminClient extends AbstractHoodieClient { } } else { throw new CompactionValidationException( - "Unable to find any committed instant. Compaction Operation may " + "be pointing to stale file-slices"); + "Unable to find any committed instant. Compaction Operation may be pointing to stale file-slices"); } } catch (CompactionValidationException | IllegalArgumentException e) { return new ValidationOpResult(operation, false, Option.of(e)); diff --git a/hudi-client/src/main/java/org/apache/hudi/HoodieWriteClient.java b/hudi-client/src/main/java/org/apache/hudi/HoodieWriteClient.java index 652abe52b..2400ffd32 100644 --- a/hudi-client/src/main/java/org/apache/hudi/HoodieWriteClient.java +++ b/hudi-client/src/main/java/org/apache/hudi/HoodieWriteClient.java @@ -1017,7 +1017,7 @@ public class HoodieWriteClient extends AbstractHo metaClient.getActiveTimeline().filterPendingCompactionTimeline().lastInstant().ifPresent(latestPending -> { Preconditions.checkArgument( HoodieTimeline.compareTimestamps(latestPending.getTimestamp(), instantTime, HoodieTimeline.LESSER), - "Latest pending compaction instant time must be earlier " + "than this instant time. Latest Compaction :" + "Latest pending compaction instant time must be earlier than this instant time. Latest Compaction :" + latestPending + ", Ingesting at " + instantTime); }); HoodieTable table = HoodieTable.getHoodieTable(metaClient, config, jsc); @@ -1049,7 +1049,7 @@ public class HoodieWriteClient extends AbstractHo metaClient.getCommitsTimeline().filterInflightsExcludingCompaction().firstInstant().ifPresent(earliestInflight -> { Preconditions.checkArgument( HoodieTimeline.compareTimestamps(earliestInflight.getTimestamp(), instantTime, HoodieTimeline.GREATER), - "Earliest write inflight instant time must be later " + "than compaction time. Earliest :" + earliestInflight + "Earliest write inflight instant time must be later than compaction time. Earliest :" + earliestInflight + ", Compaction scheduled at " + instantTime); }); // Committed and pending compaction instants should have strictly lower timestamps diff --git a/hudi-client/src/main/java/org/apache/hudi/config/HoodieCompactionConfig.java b/hudi-client/src/main/java/org/apache/hudi/config/HoodieCompactionConfig.java index 4ecc87fbc..a42cbe39b 100644 --- a/hudi-client/src/main/java/org/apache/hudi/config/HoodieCompactionConfig.java +++ b/hudi-client/src/main/java/org/apache/hudi/config/HoodieCompactionConfig.java @@ -43,8 +43,8 @@ public class HoodieCompactionConfig extends DefaultHoodieConfig { // Turn on inline compaction - after fw delta commits a inline compaction will be run public static final String INLINE_COMPACT_PROP = "hoodie.compact.inline"; // Run a compaction every N delta commits - public static final String INLINE_COMPACT_NUM_DELTA_COMMITS_PROP = "hoodie.compact.inline.max" + ".delta.commits"; - public static final String CLEANER_FILE_VERSIONS_RETAINED_PROP = "hoodie.cleaner.fileversions" + ".retained"; + public static final String INLINE_COMPACT_NUM_DELTA_COMMITS_PROP = "hoodie.compact.inline.max.delta.commits"; + public static final String CLEANER_FILE_VERSIONS_RETAINED_PROP = "hoodie.cleaner.fileversions.retained"; public static final String CLEANER_COMMITS_RETAINED_PROP = "hoodie.cleaner.commits.retained"; public static final String CLEANER_INCREMENTAL_MODE = "hoodie.cleaner.incremental.mode"; public static final String MAX_COMMITS_TO_KEEP_PROP = "hoodie.keep.max.commits"; @@ -58,18 +58,18 @@ public class HoodieCompactionConfig extends DefaultHoodieConfig { * Configs related to specific table types. */ // Number of inserts, that will be put each partition/bucket for writing - public static final String COPY_ON_WRITE_TABLE_INSERT_SPLIT_SIZE = "hoodie.copyonwrite.insert" + ".split.size"; + public static final String COPY_ON_WRITE_TABLE_INSERT_SPLIT_SIZE = "hoodie.copyonwrite.insert.split.size"; // The rationale to pick the insert parallelism is the following. Writing out 100MB files, // with atleast 1kb records, means 100K records per file. we just overprovision to 500K public static final String DEFAULT_COPY_ON_WRITE_TABLE_INSERT_SPLIT_SIZE = String.valueOf(500000); // Config to control whether we control insert split sizes automatically based on average // record sizes - public static final String COPY_ON_WRITE_TABLE_AUTO_SPLIT_INSERTS = "hoodie.copyonwrite.insert" + ".auto.split"; + public static final String COPY_ON_WRITE_TABLE_AUTO_SPLIT_INSERTS = "hoodie.copyonwrite.insert.auto.split"; // its off by default public static final String DEFAULT_COPY_ON_WRITE_TABLE_AUTO_SPLIT_INSERTS = String.valueOf(true); // This value is used as a guessimate for the record size, if we can't determine this from // previous commits - public static final String COPY_ON_WRITE_TABLE_RECORD_SIZE_ESTIMATE = "hoodie.copyonwrite" + ".record.size.estimate"; + public static final String COPY_ON_WRITE_TABLE_RECORD_SIZE_ESTIMATE = "hoodie.copyonwrite.record.size.estimate"; // Used to determine how much more can be packed into a small file, before it exceeds the size // limit. public static final String DEFAULT_COPY_ON_WRITE_TABLE_RECORD_SIZE_ESTIMATE = String.valueOf(1024); @@ -88,10 +88,10 @@ public class HoodieCompactionConfig extends DefaultHoodieConfig { // used to choose a trade off between IO vs Memory when performing compaction process // Depending on outputfile_size and memory provided, choose true to avoid OOM for large file // size + small memory - public static final String COMPACTION_LAZY_BLOCK_READ_ENABLED_PROP = "hoodie.compaction.lazy" + ".block.read"; + public static final String COMPACTION_LAZY_BLOCK_READ_ENABLED_PROP = "hoodie.compaction.lazy.block.read"; public static final String DEFAULT_COMPACTION_LAZY_BLOCK_READ_ENABLED = "false"; // used to choose whether to enable reverse log reading (reverse log traversal) - public static final String COMPACTION_REVERSE_LOG_READ_ENABLED_PROP = "hoodie.compaction" + ".reverse.log.read"; + public static final String COMPACTION_REVERSE_LOG_READ_ENABLED_PROP = "hoodie.compaction.reverse.log.read"; public static final String DEFAULT_COMPACTION_REVERSE_LOG_READ_ENABLED = "false"; private static final String DEFAULT_CLEANER_POLICY = HoodieCleaningPolicy.KEEP_LATEST_COMMITS.name(); private static final String DEFAULT_AUTO_CLEAN = "true"; @@ -104,7 +104,7 @@ public class HoodieCompactionConfig extends DefaultHoodieConfig { private static final String DEFAULT_MIN_COMMITS_TO_KEEP = "20"; private static final String DEFAULT_COMMITS_ARCHIVAL_BATCH_SIZE = String.valueOf(10); public static final String TARGET_PARTITIONS_PER_DAYBASED_COMPACTION_PROP = - "hoodie.compaction.daybased.target" + ".partitions"; + "hoodie.compaction.daybased.target.partitions"; // 500GB of target IO per compaction (both read and write) public static final String DEFAULT_TARGET_PARTITIONS_PER_DAYBASED_COMPACTION = String.valueOf(10); diff --git a/hudi-client/src/main/java/org/apache/hudi/config/HoodieIndexConfig.java b/hudi-client/src/main/java/org/apache/hudi/config/HoodieIndexConfig.java index abba272d5..24e5949c9 100644 --- a/hudi-client/src/main/java/org/apache/hudi/config/HoodieIndexConfig.java +++ b/hudi-client/src/main/java/org/apache/hudi/config/HoodieIndexConfig.java @@ -45,7 +45,7 @@ public class HoodieIndexConfig extends DefaultHoodieConfig { public static final String BLOOM_INDEX_PARALLELISM_PROP = "hoodie.bloom.index.parallelism"; // Disable explicit bloom index parallelism setting by default - hoodie auto computes public static final String DEFAULT_BLOOM_INDEX_PARALLELISM = "0"; - public static final String BLOOM_INDEX_PRUNE_BY_RANGES_PROP = "hoodie.bloom.index.prune.by" + ".ranges"; + public static final String BLOOM_INDEX_PRUNE_BY_RANGES_PROP = "hoodie.bloom.index.prune.by.ranges"; public static final String DEFAULT_BLOOM_INDEX_PRUNE_BY_RANGES = "true"; public static final String BLOOM_INDEX_USE_CACHING_PROP = "hoodie.bloom.index.use.caching"; public static final String DEFAULT_BLOOM_INDEX_USE_CACHING = "true"; @@ -69,7 +69,7 @@ public class HoodieIndexConfig extends DefaultHoodieConfig { public static final String DEFAULT_HBASE_BATCH_SIZE = "100"; - public static final String BLOOM_INDEX_INPUT_STORAGE_LEVEL = "hoodie.bloom.index.input.storage" + ".level"; + public static final String BLOOM_INDEX_INPUT_STORAGE_LEVEL = "hoodie.bloom.index.input.storage.level"; public static final String DEFAULT_BLOOM_INDEX_INPUT_STORAGE_LEVEL = "MEMORY_AND_DISK_SER"; private HoodieIndexConfig(Properties props) { diff --git a/hudi-client/src/main/java/org/apache/hudi/config/HoodieWriteConfig.java b/hudi-client/src/main/java/org/apache/hudi/config/HoodieWriteConfig.java index 3c2563d95..4e9da41d0 100644 --- a/hudi-client/src/main/java/org/apache/hudi/config/HoodieWriteConfig.java +++ b/hudi-client/src/main/java/org/apache/hudi/config/HoodieWriteConfig.java @@ -69,7 +69,7 @@ public class HoodieWriteConfig extends DefaultHoodieConfig { private static final String DEFAULT_WRITE_STATUS_STORAGE_LEVEL = "MEMORY_AND_DISK_SER"; private static final String HOODIE_AUTO_COMMIT_PROP = "hoodie.auto.commit"; private static final String DEFAULT_HOODIE_AUTO_COMMIT = "true"; - private static final String HOODIE_ASSUME_DATE_PARTITIONING_PROP = "hoodie.assume.date" + ".partitioning"; + private static final String HOODIE_ASSUME_DATE_PARTITIONING_PROP = "hoodie.assume.date.partitioning"; private static final String DEFAULT_ASSUME_DATE_PARTITIONING = "false"; private static final String HOODIE_WRITE_STATUS_CLASS_PROP = "hoodie.writestatus.class"; private static final String DEFAULT_HOODIE_WRITE_STATUS_CLASS = WriteStatus.class.getName(); diff --git a/hudi-client/src/main/java/org/apache/hudi/func/OperationResult.java b/hudi-client/src/main/java/org/apache/hudi/func/OperationResult.java index ff1da05be..b64822bd1 100644 --- a/hudi-client/src/main/java/org/apache/hudi/func/OperationResult.java +++ b/hudi-client/src/main/java/org/apache/hudi/func/OperationResult.java @@ -67,7 +67,7 @@ public class OperationResult implements Serializable { @Override public String toString() { - return "OperationResult{" + "operation=" + operation + ", executed=" + executed + ", success=" + success + return "OperationResult{operation=" + operation + ", executed=" + executed + ", success=" + success + ", exception=" + exception + '}'; } } diff --git a/hudi-client/src/main/java/org/apache/hudi/index/bloom/HoodieBloomIndex.java b/hudi-client/src/main/java/org/apache/hudi/index/bloom/HoodieBloomIndex.java index 227788956..dacef2d28 100644 --- a/hudi-client/src/main/java/org/apache/hudi/index/bloom/HoodieBloomIndex.java +++ b/hudi-client/src/main/java/org/apache/hudi/index/bloom/HoodieBloomIndex.java @@ -207,7 +207,7 @@ public class HoodieBloomIndex extends HoodieIndex long totalRecords = recordsPerPartition.values().stream().mapToLong(Long::longValue).sum(); int parallelism = (int) (totalComparisons / MAX_ITEMS_PER_SHUFFLE_PARTITION + 1); LOG.info(String.format( - "TotalRecords %d, TotalFiles %d, TotalAffectedPartitions %d, TotalComparisons %d, " + "SafeParallelism %d", + "TotalRecords %d, TotalFiles %d, TotalAffectedPartitions %d, TotalComparisons %d, SafeParallelism %d", totalRecords, totalFiles, recordsPerPartition.size(), totalComparisons, parallelism)); return parallelism; } @@ -226,8 +226,8 @@ public class HoodieBloomIndex extends HoodieIndex // take the max int indexParallelism = Math.max(inputParallelism, config.getBloomIndexParallelism()); int joinParallelism = Math.max(totalSubPartitions, indexParallelism); - LOG.info("InputParallelism: ${" + inputParallelism + "}, " + "IndexParallelism: ${" - + config.getBloomIndexParallelism() + "}, " + "TotalSubParts: ${" + totalSubPartitions + "}, " + LOG.info("InputParallelism: ${" + inputParallelism + "}, IndexParallelism: ${" + + config.getBloomIndexParallelism() + "}, TotalSubParts: ${" + totalSubPartitions + "}, " + "Join Parallelism set to : " + joinParallelism); return joinParallelism; } diff --git a/hudi-client/src/main/java/org/apache/hudi/index/bloom/KeyRangeNode.java b/hudi-client/src/main/java/org/apache/hudi/index/bloom/KeyRangeNode.java index df2e97189..2c8971aaa 100644 --- a/hudi-client/src/main/java/org/apache/hudi/index/bloom/KeyRangeNode.java +++ b/hudi-client/src/main/java/org/apache/hudi/index/bloom/KeyRangeNode.java @@ -62,7 +62,7 @@ class KeyRangeNode implements Comparable, Serializable { @Override public String toString() { - return "KeyRangeNode{" + "minRecordKey='" + minRecordKey + '\'' + ", maxRecordKey='" + maxRecordKey + '\'' + return "KeyRangeNode{minRecordKey='" + minRecordKey + '\'' + ", maxRecordKey='" + maxRecordKey + '\'' + ", fileNameList=" + fileNameList + ", rightSubTreeMax='" + rightSubTreeMax + '\'' + ", leftSubTreeMax='" + leftSubTreeMax + '\'' + ", rightSubTreeMin='" + rightSubTreeMin + '\'' + ", leftSubTreeMin='" + leftSubTreeMin + '\'' + '}'; diff --git a/hudi-client/src/main/java/org/apache/hudi/io/compact/HoodieRealtimeTableCompactor.java b/hudi-client/src/main/java/org/apache/hudi/io/compact/HoodieRealtimeTableCompactor.java index 8c4b009a4..6f976014a 100644 --- a/hudi-client/src/main/java/org/apache/hudi/io/compact/HoodieRealtimeTableCompactor.java +++ b/hudi-client/src/main/java/org/apache/hudi/io/compact/HoodieRealtimeTableCompactor.java @@ -218,7 +218,7 @@ public class HoodieRealtimeTableCompactor implements HoodieCompactor { compactionPlan.getOperations().stream().noneMatch( op -> fgIdsInPendingCompactions.contains(new HoodieFileGroupId(op.getPartitionPath(), op.getFileId()))), "Bad Compaction Plan. FileId MUST NOT have multiple pending compactions. " - + "Please fix your strategy implementation." + "FileIdsWithPendingCompactions :" + fgIdsInPendingCompactions + + "Please fix your strategy implementation. FileIdsWithPendingCompactions :" + fgIdsInPendingCompactions + ", Selected workload :" + compactionPlan); if (compactionPlan.getOperations().isEmpty()) { LOG.warn("After filtering, Nothing to compact for " + metaClient.getBasePath()); diff --git a/hudi-client/src/main/java/org/apache/hudi/table/HoodieCopyOnWriteTable.java b/hudi-client/src/main/java/org/apache/hudi/table/HoodieCopyOnWriteTable.java index 016d3ceae..982cfd3eb 100644 --- a/hudi-client/src/main/java/org/apache/hudi/table/HoodieCopyOnWriteTable.java +++ b/hudi-client/src/main/java/org/apache/hudi/table/HoodieCopyOnWriteTable.java @@ -574,7 +574,7 @@ public class HoodieCopyOnWriteTable extends Hoodi assignUpdates(profile); assignInserts(profile); - LOG.info("Total Buckets :" + totalBuckets + ", " + "buckets info => " + bucketInfoMap + ", \n" + LOG.info("Total Buckets :" + totalBuckets + ", buckets info => " + bucketInfoMap + ", \n" + "Partition to insert buckets => " + partitionPathToInsertBuckets + ", \n" + "UpdateLocations mapped to buckets =>" + updateLocationToBucket); } diff --git a/hudi-client/src/main/java/org/apache/hudi/table/HoodieMergeOnReadTable.java b/hudi-client/src/main/java/org/apache/hudi/table/HoodieMergeOnReadTable.java index b3fb47e5e..850ea63a6 100644 --- a/hudi-client/src/main/java/org/apache/hudi/table/HoodieMergeOnReadTable.java +++ b/hudi-client/src/main/java/org/apache/hudi/table/HoodieMergeOnReadTable.java @@ -220,7 +220,7 @@ public class HoodieMergeOnReadTable extends Hoodi switch (instantToRollback.getAction()) { case HoodieTimeline.COMMIT_ACTION: LOG.info( - "Rolling back commit action. There are higher delta commits. So only rolling back this " + "instant"); + "Rolling back commit action. There are higher delta commits. So only rolling back this instant"); partitionRollbackRequests.add( RollbackRequest.createRollbackRequestWithDeleteDataAndLogFilesAction(partitionPath, instantToRollback)); break; diff --git a/hudi-client/src/test/java/org/apache/hudi/common/HoodieTestDataGenerator.java b/hudi-client/src/test/java/org/apache/hudi/common/HoodieTestDataGenerator.java index 5ac436037..fc008284b 100644 --- a/hudi-client/src/test/java/org/apache/hudi/common/HoodieTestDataGenerator.java +++ b/hudi-client/src/test/java/org/apache/hudi/common/HoodieTestDataGenerator.java @@ -74,11 +74,11 @@ public class HoodieTestDataGenerator { public static final String[] DEFAULT_PARTITION_PATHS = {DEFAULT_FIRST_PARTITION_PATH, DEFAULT_SECOND_PARTITION_PATH, DEFAULT_THIRD_PARTITION_PATH}; public static final int DEFAULT_PARTITION_DEPTH = 3; - public static String TRIP_EXAMPLE_SCHEMA = "{\"type\": \"record\"," + "\"name\": \"triprec\"," + "\"fields\": [ " - + "{\"name\": \"timestamp\",\"type\": \"double\"}," + "{\"name\": \"_row_key\", \"type\": \"string\"}," - + "{\"name\": \"rider\", \"type\": \"string\"}," + "{\"name\": \"driver\", \"type\": \"string\"}," - + "{\"name\": \"begin_lat\", \"type\": \"double\"}," + "{\"name\": \"begin_lon\", \"type\": \"double\"}," - + "{\"name\": \"end_lat\", \"type\": \"double\"}," + "{\"name\": \"end_lon\", \"type\": \"double\"}," + public static String TRIP_EXAMPLE_SCHEMA = "{\"type\": \"record\",\"name\": \"triprec\",\"fields\": [ " + + "{\"name\": \"timestamp\",\"type\": \"double\"},{\"name\": \"_row_key\", \"type\": \"string\"}," + + "{\"name\": \"rider\", \"type\": \"string\"},{\"name\": \"driver\", \"type\": \"string\"}," + + "{\"name\": \"begin_lat\", \"type\": \"double\"},{\"name\": \"begin_lon\", \"type\": \"double\"}," + + "{\"name\": \"end_lat\", \"type\": \"double\"},{\"name\": \"end_lon\", \"type\": \"double\"}," + "{\"name\":\"fare\",\"type\": \"double\"}]}"; public static String NULL_SCHEMA = Schema.create(Schema.Type.NULL).toString(); public static String TRIP_HIVE_COLUMN_TYPES = "double,string,string,string,double,double,double,double,double"; diff --git a/hudi-client/src/test/java/org/apache/hudi/index/bloom/TestHoodieBloomIndex.java b/hudi-client/src/test/java/org/apache/hudi/index/bloom/TestHoodieBloomIndex.java index 3029dd72b..8a4d163a3 100644 --- a/hudi-client/src/test/java/org/apache/hudi/index/bloom/TestHoodieBloomIndex.java +++ b/hudi-client/src/test/java/org/apache/hudi/index/bloom/TestHoodieBloomIndex.java @@ -290,7 +290,7 @@ public class TestHoodieBloomIndex extends HoodieClientTestHarness { try { bloomIndex.tagLocation(recordRDD, jsc, table); } catch (IllegalArgumentException e) { - fail("EmptyRDD should not result in IllegalArgumentException: Positive number of slices " + "required"); + fail("EmptyRDD should not result in IllegalArgumentException: Positive number of slices required"); } } @@ -300,11 +300,11 @@ public class TestHoodieBloomIndex extends HoodieClientTestHarness { String rowKey1 = UUID.randomUUID().toString(); String rowKey2 = UUID.randomUUID().toString(); String rowKey3 = UUID.randomUUID().toString(); - String recordStr1 = "{\"_row_key\":\"" + rowKey1 + "\"," + "\"time\":\"2016-01-31T03:16:41.415Z\",\"number\":12}"; - String recordStr2 = "{\"_row_key\":\"" + rowKey2 + "\"," + "\"time\":\"2016-01-31T03:20:41.415Z\",\"number\":100}"; - String recordStr3 = "{\"_row_key\":\"" + rowKey3 + "\"," + "\"time\":\"2016-01-31T03:16:41.415Z\",\"number\":15}"; + String recordStr1 = "{\"_row_key\":\"" + rowKey1 + "\",\"time\":\"2016-01-31T03:16:41.415Z\",\"number\":12}"; + String recordStr2 = "{\"_row_key\":\"" + rowKey2 + "\",\"time\":\"2016-01-31T03:20:41.415Z\",\"number\":100}"; + String recordStr3 = "{\"_row_key\":\"" + rowKey3 + "\",\"time\":\"2016-01-31T03:16:41.415Z\",\"number\":15}"; // place same row key under a different partition. - String recordStr4 = "{\"_row_key\":\"" + rowKey1 + "\"," + "\"time\":\"2015-01-31T03:16:41.415Z\",\"number\":32}"; + String recordStr4 = "{\"_row_key\":\"" + rowKey1 + "\",\"time\":\"2015-01-31T03:16:41.415Z\",\"number\":32}"; TestRawTripPayload rowChange1 = new TestRawTripPayload(recordStr1); HoodieRecord record1 = new HoodieRecord(new HoodieKey(rowChange1.getRowKey(), rowChange1.getPartitionPath()), rowChange1); diff --git a/hudi-client/src/test/java/org/apache/hudi/io/TestHoodieCommitArchiveLog.java b/hudi-client/src/test/java/org/apache/hudi/io/TestHoodieCommitArchiveLog.java index 1f7443d14..47d5b322a 100644 --- a/hudi-client/src/test/java/org/apache/hudi/io/TestHoodieCommitArchiveLog.java +++ b/hudi-client/src/test/java/org/apache/hudi/io/TestHoodieCommitArchiveLog.java @@ -341,7 +341,7 @@ public class TestHoodieCommitArchiveLog extends HoodieClientTestHarness { assertTrue(result); timeline = metaClient.getActiveTimeline().reload().getCommitsTimeline().filterCompletedInstants(); assertEquals( - "Since we have a savepoint at 101, we should never archive any commit after 101 (we only " + "archive 100)", 5, + "Since we have a savepoint at 101, we should never archive any commit after 101 (we only archive 100)", 5, timeline.countInstants()); assertTrue("Archived commits should always be safe", timeline.containsInstant(new HoodieInstant(false, HoodieTimeline.COMMIT_ACTION, "101"))); @@ -380,7 +380,7 @@ public class TestHoodieCommitArchiveLog extends HoodieClientTestHarness { assertFalse("Instants before oldest pending compaction can be removed", timeline.containsInstant(new HoodieInstant(false, HoodieTimeline.COMMIT_ACTION, "100"))); assertEquals("Since we have a pending compaction at 101, we should never archive any commit " - + "after 101 (we only " + "archive 100)", 7, timeline.countInstants()); + + "after 101 (we only archive 100)", 7, timeline.countInstants()); assertTrue("Requested Compaction must still be present", timeline.containsInstant(new HoodieInstant(State.REQUESTED, HoodieTimeline.COMPACTION_ACTION, "101"))); assertTrue("Instants greater than oldest pending compaction must be present", diff --git a/hudi-common/src/main/java/org/apache/hudi/common/model/CompactionOperation.java b/hudi-common/src/main/java/org/apache/hudi/common/model/CompactionOperation.java index 63f5da698..ea178a832 100644 --- a/hudi-common/src/main/java/org/apache/hudi/common/model/CompactionOperation.java +++ b/hudi-common/src/main/java/org/apache/hudi/common/model/CompactionOperation.java @@ -135,7 +135,7 @@ public class CompactionOperation implements Serializable { @Override public String toString() { - return "CompactionOperation{" + "baseInstantTime='" + baseInstantTime + '\'' + ", dataFileCommitTime=" + return "CompactionOperation{baseInstantTime='" + baseInstantTime + '\'' + ", dataFileCommitTime=" + dataFileCommitTime + ", deltaFileNames=" + deltaFileNames + ", dataFileName=" + dataFileName + ", id='" + id + '\'' + ", metrics=" + metrics + '}'; } diff --git a/hudi-common/src/main/java/org/apache/hudi/common/model/HoodieCommitMetadata.java b/hudi-common/src/main/java/org/apache/hudi/common/model/HoodieCommitMetadata.java index fa47dd08c..475f75ccc 100644 --- a/hudi-common/src/main/java/org/apache/hudi/common/model/HoodieCommitMetadata.java +++ b/hudi-common/src/main/java/org/apache/hudi/common/model/HoodieCommitMetadata.java @@ -341,7 +341,7 @@ public class HoodieCommitMetadata implements Serializable { @Override public String toString() { - return "HoodieCommitMetadata{" + "partitionToWriteStats=" + partitionToWriteStats + ", compacted=" + compacted + return "HoodieCommitMetadata{partitionToWriteStats=" + partitionToWriteStats + ", compacted=" + compacted + ", extraMetadataMap=" + extraMetadataMap + '}'; } } diff --git a/hudi-common/src/main/java/org/apache/hudi/common/model/HoodieDataFile.java b/hudi-common/src/main/java/org/apache/hudi/common/model/HoodieDataFile.java index 30f6b8a68..4983b74d8 100644 --- a/hudi-common/src/main/java/org/apache/hudi/common/model/HoodieDataFile.java +++ b/hudi-common/src/main/java/org/apache/hudi/common/model/HoodieDataFile.java @@ -98,6 +98,6 @@ public class HoodieDataFile implements Serializable { @Override public String toString() { - return "HoodieDataFile{" + "fullPath=" + fullPath + ", fileLen=" + fileLen + '}'; + return "HoodieDataFile{fullPath=" + fullPath + ", fileLen=" + fileLen + '}'; } } diff --git a/hudi-common/src/main/java/org/apache/hudi/common/model/HoodieFileGroupId.java b/hudi-common/src/main/java/org/apache/hudi/common/model/HoodieFileGroupId.java index 659114d19..eb0fbd580 100644 --- a/hudi-common/src/main/java/org/apache/hudi/common/model/HoodieFileGroupId.java +++ b/hudi-common/src/main/java/org/apache/hudi/common/model/HoodieFileGroupId.java @@ -62,6 +62,6 @@ public class HoodieFileGroupId implements Serializable { @Override public String toString() { - return "HoodieFileGroupId{" + "partitionPath='" + partitionPath + '\'' + ", fileId='" + fileId + '\'' + '}'; + return "HoodieFileGroupId{partitionPath='" + partitionPath + '\'' + ", fileId='" + fileId + '\'' + '}'; } } diff --git a/hudi-common/src/main/java/org/apache/hudi/common/model/HoodieLogFile.java b/hudi-common/src/main/java/org/apache/hudi/common/model/HoodieLogFile.java index e1f4bc592..6369b876b 100644 --- a/hudi-common/src/main/java/org/apache/hudi/common/model/HoodieLogFile.java +++ b/hudi-common/src/main/java/org/apache/hudi/common/model/HoodieLogFile.java @@ -179,6 +179,6 @@ public class HoodieLogFile implements Serializable { @Override public String toString() { - return "HoodieLogFile{" + "pathStr='" + pathStr + '\'' + ", fileLen=" + fileLen + '}'; + return "HoodieLogFile{pathStr='" + pathStr + '\'' + ", fileLen=" + fileLen + '}'; } } diff --git a/hudi-common/src/main/java/org/apache/hudi/common/model/HoodiePartitionMetadata.java b/hudi-common/src/main/java/org/apache/hudi/common/model/HoodiePartitionMetadata.java index 61ff1a14c..013869c52 100644 --- a/hudi-common/src/main/java/org/apache/hudi/common/model/HoodiePartitionMetadata.java +++ b/hudi-common/src/main/java/org/apache/hudi/common/model/HoodiePartitionMetadata.java @@ -101,7 +101,7 @@ public class HoodiePartitionMetadata { fs.rename(tmpMetaPath, metaPath); } } catch (IOException ioe) { - LOG.warn("Error trying to save partition metadata (this is okay, as long as " + "atleast 1 of these succced), " + LOG.warn("Error trying to save partition metadata (this is okay, as long as atleast 1 of these succced), " + partitionPath, ioe); } finally { if (!metafileExists) { diff --git a/hudi-common/src/main/java/org/apache/hudi/common/model/HoodieWriteStat.java b/hudi-common/src/main/java/org/apache/hudi/common/model/HoodieWriteStat.java index 2a2d2cd16..0135dbe33 100644 --- a/hudi-common/src/main/java/org/apache/hudi/common/model/HoodieWriteStat.java +++ b/hudi-common/src/main/java/org/apache/hudi/common/model/HoodieWriteStat.java @@ -320,7 +320,7 @@ public class HoodieWriteStat implements Serializable { @Override public String toString() { - return "HoodieWriteStat{" + "fileId='" + fileId + '\'' + ", path='" + path + '\'' + ", prevCommit='" + prevCommit + return "HoodieWriteStat{fileId='" + fileId + '\'' + ", path='" + path + '\'' + ", prevCommit='" + prevCommit + '\'' + ", numWrites=" + numWrites + ", numDeletes=" + numDeletes + ", numUpdateWrites=" + numUpdateWrites + ", totalWriteBytes=" + totalWriteBytes + ", totalWriteErrors=" + totalWriteErrors + ", tempPath='" + tempPath + '\'' + ", partitionPath='" + partitionPath + '\'' + ", totalLogRecords=" + totalLogRecords diff --git a/hudi-common/src/main/java/org/apache/hudi/common/util/Option.java b/hudi-common/src/main/java/org/apache/hudi/common/util/Option.java index 60746c0c1..a67b6ab95 100644 --- a/hudi-common/src/main/java/org/apache/hudi/common/util/Option.java +++ b/hudi-common/src/main/java/org/apache/hudi/common/util/Option.java @@ -140,6 +140,6 @@ public final class Option implements Serializable { @Override public String toString() { - return "Option{" + "val=" + val + '}'; + return "Option{val=" + val + '}'; } } diff --git a/hudi-common/src/main/java/org/apache/hudi/common/util/ParquetUtils.java b/hudi-common/src/main/java/org/apache/hudi/common/util/ParquetUtils.java index 749c68d70..02add76e1 100644 --- a/hudi-common/src/main/java/org/apache/hudi/common/util/ParquetUtils.java +++ b/hudi-common/src/main/java/org/apache/hudi/common/util/ParquetUtils.java @@ -129,7 +129,7 @@ public class ParquetUtils { footerVals.put(footerName, metadata.get(footerName)); } else if (required) { throw new MetadataNotFoundException( - "Could not find index in Parquet footer. " + "Looked for key " + footerName + " in " + parquetFilePath); + "Could not find index in Parquet footer. Looked for key " + footerName + " in " + parquetFilePath); } } return footerVals; diff --git a/hudi-common/src/main/java/org/apache/hudi/common/util/SpillableMapUtils.java b/hudi-common/src/main/java/org/apache/hudi/common/util/SpillableMapUtils.java index 289e1212b..55ecbdca8 100644 --- a/hudi-common/src/main/java/org/apache/hudi/common/util/SpillableMapUtils.java +++ b/hudi-common/src/main/java/org/apache/hudi/common/util/SpillableMapUtils.java @@ -65,7 +65,7 @@ public class SpillableMapUtils { long crcOfReadValue = generateChecksum(value); if (crc != crcOfReadValue) { throw new HoodieCorruptedDataException( - "checksum of payload written to external disk does not match, " + "data may be corrupted"); + "checksum of payload written to external disk does not match, data may be corrupted"); } return new FileEntry(crc, keySize, valueSize, key, value, timestamp); } diff --git a/hudi-common/src/main/java/org/apache/hudi/common/util/TimelineDiffHelper.java b/hudi-common/src/main/java/org/apache/hudi/common/util/TimelineDiffHelper.java index df88b0e92..7625bb572 100644 --- a/hudi-common/src/main/java/org/apache/hudi/common/util/TimelineDiffHelper.java +++ b/hudi-common/src/main/java/org/apache/hudi/common/util/TimelineDiffHelper.java @@ -64,7 +64,7 @@ public class TimelineDiffHelper { if (!lostPendingCompactions.isEmpty()) { // If a compaction is unscheduled, fall back to complete refresh of fs view since some log files could have been // moved. Its unsafe to incrementally sync in that case. - LOG.warn("Some pending compactions are no longer in new timeline (unscheduled ?)." + "They are :" + LOG.warn("Some pending compactions are no longer in new timeline (unscheduled ?). They are :" + lostPendingCompactions); return TimelineDiffResult.UNSAFE_SYNC_RESULT; } @@ -132,7 +132,7 @@ public class TimelineDiffHelper { @Override public String toString() { - return "TimelineDiffResult{" + "newlySeenInstants=" + newlySeenInstants + ", finishedCompactionInstants=" + return "TimelineDiffResult{newlySeenInstants=" + newlySeenInstants + ", finishedCompactionInstants=" + finishedCompactionInstants + ", canSyncIncrementally=" + canSyncIncrementally + '}'; } } diff --git a/hudi-common/src/test/java/org/apache/hudi/common/table/log/TestHoodieLogFormatAppendFailure.java b/hudi-common/src/test/java/org/apache/hudi/common/table/log/TestHoodieLogFormatAppendFailure.java index ff255455c..66876a504 100644 --- a/hudi-common/src/test/java/org/apache/hudi/common/table/log/TestHoodieLogFormatAppendFailure.java +++ b/hudi-common/src/test/java/org/apache/hudi/common/table/log/TestHoodieLogFormatAppendFailure.java @@ -103,7 +103,7 @@ public class TestHoodieLogFormatAppendFailure { HoodieAvroDataBlock dataBlock = new HoodieAvroDataBlock(records, header); Writer writer = HoodieLogFormat.newWriterBuilder().onParentPath(testPath) - .withFileExtension(HoodieArchivedLogFile.ARCHIVE_EXTENSION).withFileId("commits" + ".archive") + .withFileExtension(HoodieArchivedLogFile.ARCHIVE_EXTENSION).withFileId("commits.archive") .overBaseCommit("").withFs(fs).build(); writer = writer.appendBlock(dataBlock); @@ -134,7 +134,7 @@ public class TestHoodieLogFormatAppendFailure { // Opening a new Writer right now will throw IOException. The code should handle this, rollover the logfile and // return a new writer with a bumped up logVersion writer = HoodieLogFormat.newWriterBuilder().onParentPath(testPath) - .withFileExtension(HoodieArchivedLogFile.ARCHIVE_EXTENSION).withFileId("commits" + ".archive") + .withFileExtension(HoodieArchivedLogFile.ARCHIVE_EXTENSION).withFileId("commits.archive") .overBaseCommit("").withFs(fs).build(); // The log version should be different for this new writer Assert.assertFalse(writer.getLogFile().getLogVersion() == logFileVersion); diff --git a/hudi-common/src/test/java/org/apache/hudi/common/util/TestHoodieAvroUtils.java b/hudi-common/src/test/java/org/apache/hudi/common/util/TestHoodieAvroUtils.java index 296ccca98..ab3682162 100644 --- a/hudi-common/src/test/java/org/apache/hudi/common/util/TestHoodieAvroUtils.java +++ b/hudi-common/src/test/java/org/apache/hudi/common/util/TestHoodieAvroUtils.java @@ -30,8 +30,8 @@ import java.util.Map; */ public class TestHoodieAvroUtils { - private static String EXAMPLE_SCHEMA = "{\"type\": \"record\"," + "\"name\": \"testrec\"," + "\"fields\": [ " - + "{\"name\": \"timestamp\",\"type\": \"double\"}," + "{\"name\": \"_row_key\", \"type\": \"string\"}," + private static String EXAMPLE_SCHEMA = "{\"type\": \"record\",\"name\": \"testrec\",\"fields\": [ " + + "{\"name\": \"timestamp\",\"type\": \"double\"},{\"name\": \"_row_key\", \"type\": \"string\"}," + "{\"name\": \"non_pii_col\", \"type\": \"string\"}," + "{\"name\": \"pii_col\", \"type\": \"string\", \"column_category\": \"user_profile\"}]}"; diff --git a/hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/realtime/AbstractRealtimeRecordReader.java b/hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/realtime/AbstractRealtimeRecordReader.java index a15ed76e6..d7b50d4f1 100644 --- a/hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/realtime/AbstractRealtimeRecordReader.java +++ b/hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/realtime/AbstractRealtimeRecordReader.java @@ -144,11 +144,11 @@ public abstract class AbstractRealtimeRecordReader { if (w instanceof ArrayWritable) { builder.append(arrayWritableToString((ArrayWritable) w)).append(","); } else { - builder.append("\"value" + i + "\":" + "\"" + w + "\"").append(","); + builder.append("\"value" + i + "\":\"" + w + "\"").append(","); if (w == null) { - builder.append("\"type" + i + "\":" + "\"unknown\"").append(","); + builder.append("\"type" + i + "\":\"unknown\"").append(","); } else { - builder.append("\"type" + i + "\":" + "\"" + w.getClass().getSimpleName() + "\"").append(","); + builder.append("\"type" + i + "\":\"" + w.getClass().getSimpleName() + "\"").append(","); } } i++; diff --git a/hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/realtime/HoodieRealtimeFileSplit.java b/hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/realtime/HoodieRealtimeFileSplit.java index 0a050beb4..c955bf64b 100644 --- a/hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/realtime/HoodieRealtimeFileSplit.java +++ b/hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/realtime/HoodieRealtimeFileSplit.java @@ -99,7 +99,7 @@ public class HoodieRealtimeFileSplit extends FileSplit { @Override public String toString() { - return "HoodieRealtimeFileSplit{" + "DataPath=" + getPath() + ", deltaFilePaths=" + deltaFilePaths + return "HoodieRealtimeFileSplit{DataPath=" + getPath() + ", deltaFilePaths=" + deltaFilePaths + ", maxCommitTime='" + maxCommitTime + '\'' + ", basePath='" + basePath + '\'' + '}'; } } diff --git a/hudi-hadoop-mr/src/test/java/org/apache/hudi/hadoop/TestHoodieInputFormat.java b/hudi-hadoop-mr/src/test/java/org/apache/hudi/hadoop/TestHoodieInputFormat.java index 0c3a00222..ab5b4fc74 100644 --- a/hudi-hadoop-mr/src/test/java/org/apache/hudi/hadoop/TestHoodieInputFormat.java +++ b/hudi-hadoop-mr/src/test/java/org/apache/hudi/hadoop/TestHoodieInputFormat.java @@ -108,7 +108,7 @@ public class TestHoodieInputFormat { InputFormatTestUtil.setupIncremental(jobConf, "100", 1); FileStatus[] files = inputFormat.listStatus(jobConf); - assertEquals("We should exclude commit 100 when returning incremental pull with start commit time as " + "100", 0, + assertEquals("We should exclude commit 100 when returning incremental pull with start commit time as 100", 0, files.length); } @@ -152,7 +152,7 @@ public class TestHoodieInputFormat { InputFormatTestUtil.setupIncremental(jobConf, "100", HoodieHiveUtil.MAX_COMMIT_ALL); files = inputFormat.listStatus(jobConf); - assertEquals("Pulling all commits from 100, should get us the 1 file from each of 200,300,400,500,400 " + "commits", + assertEquals("Pulling all commits from 100, should get us the 1 file from each of 200,300,400,500,400 commits", 5, files.length); ensureFilesInCommit("Pulling all commits from 100, should get us the 1 files from 600 commit", files, "600", 1); ensureFilesInCommit("Pulling all commits from 100, should get us the 1 files from 500 commit", files, "500", 1); diff --git a/hudi-hive/src/main/java/org/apache/hudi/hive/HiveSyncConfig.java b/hudi-hive/src/main/java/org/apache/hudi/hive/HiveSyncConfig.java index 8e32aebdd..f85d9a898 100644 --- a/hudi-hive/src/main/java/org/apache/hudi/hive/HiveSyncConfig.java +++ b/hudi-hive/src/main/java/org/apache/hudi/hive/HiveSyncConfig.java @@ -50,12 +50,12 @@ public class HiveSyncConfig implements Serializable { @Parameter(names = "--partitioned-by", description = "Fields in the schema partitioned by") public List partitionFields = new ArrayList<>(); - @Parameter(names = "--partition-value-extractor", description = "Class which implements " + "PartitionValueExtractor " - + "to extract the partition " + "values from HDFS path") + @Parameter(names = "--partition-value-extractor", description = "Class which implements PartitionValueExtractor " + + "to extract the partition values from HDFS path") public String partitionValueExtractorClass = SlashEncodedDayPartitionValueExtractor.class.getName(); @Parameter(names = {"--assume-date-partitioning"}, description = "Assume standard yyyy/mm/dd partitioning, this" - + " exists to support " + "backward compatibility. If" + " you use hoodie 0.3.x, do " + "not set this parameter") + + " exists to support backward compatibility. If you use hoodie 0.3.x, do not set this parameter") public Boolean assumeDatePartitioning = false; @Parameter(names = {"--use-pre-apache-input-format"}, @@ -88,7 +88,7 @@ public class HiveSyncConfig implements Serializable { @Override public String toString() { - return "HiveSyncConfig{" + "databaseName='" + databaseName + '\'' + ", tableName='" + tableName + '\'' + return "HiveSyncConfig{databaseName='" + databaseName + '\'' + ", tableName='" + tableName + '\'' + ", hiveUser='" + hiveUser + '\'' + ", hivePass='" + hivePass + '\'' + ", jdbcUrl='" + jdbcUrl + '\'' + ", basePath='" + basePath + '\'' + ", partitionFields=" + partitionFields + ", partitionValueExtractorClass='" + partitionValueExtractorClass + '\'' + ", assumeDatePartitioning=" + assumeDatePartitioning diff --git a/hudi-hive/src/main/java/org/apache/hudi/hive/HoodieHiveClient.java b/hudi-hive/src/main/java/org/apache/hudi/hive/HoodieHiveClient.java index 84caf734b..d176500fe 100644 --- a/hudi-hive/src/main/java/org/apache/hudi/hive/HoodieHiveClient.java +++ b/hudi-hive/src/main/java/org/apache/hudi/hive/HoodieHiveClient.java @@ -187,7 +187,7 @@ public class HoodieHiveClient { + ". Check partition strategy. "); List partBuilder = new ArrayList<>(); for (int i = 0; i < syncConfig.partitionFields.size(); i++) { - partBuilder.add("`" + syncConfig.partitionFields.get(i) + "`=" + "'" + partitionValues.get(i) + "'"); + partBuilder.add("`" + syncConfig.partitionFields.get(i) + "`='" + partitionValues.get(i) + "'"); } return partBuilder.stream().collect(Collectors.joining(",")); } diff --git a/hudi-integ-test/src/test/java/org/apache/hudi/integ/ITTestHoodieDemo.java b/hudi-integ-test/src/test/java/org/apache/hudi/integ/ITTestHoodieDemo.java index 99082ecba..99a8d010e 100644 --- a/hudi-integ-test/src/test/java/org/apache/hudi/integ/ITTestHoodieDemo.java +++ b/hudi-integ-test/src/test/java/org/apache/hudi/integ/ITTestHoodieDemo.java @@ -34,11 +34,11 @@ import java.util.List; public class ITTestHoodieDemo extends ITTestBase { private static String HDFS_DATA_DIR = "/usr/hive/data/input"; - private static String HDFS_BATCH_PATH1 = HDFS_DATA_DIR + "/" + "batch_1.json"; - private static String HDFS_BATCH_PATH2 = HDFS_DATA_DIR + "/" + "batch_2.json"; - private static String HDFS_PRESTO_INPUT_TABLE_CHECK_PATH = HDFS_DATA_DIR + "/" + "presto-table-check.commands"; - private static String HDFS_PRESTO_INPUT_BATCH1_PATH = HDFS_DATA_DIR + "/" + "presto-batch1.commands"; - private static String HDFS_PRESTO_INPUT_BATCH2_PATH = HDFS_DATA_DIR + "/" + "presto-batch2-after-compaction.commands"; + private static String HDFS_BATCH_PATH1 = HDFS_DATA_DIR + "/batch_1.json"; + private static String HDFS_BATCH_PATH2 = HDFS_DATA_DIR + "/batch_2.json"; + private static String HDFS_PRESTO_INPUT_TABLE_CHECK_PATH = HDFS_DATA_DIR + "/presto-table-check.commands"; + private static String HDFS_PRESTO_INPUT_BATCH1_PATH = HDFS_DATA_DIR + "/presto-batch1.commands"; + private static String HDFS_PRESTO_INPUT_BATCH2_PATH = HDFS_DATA_DIR + "/presto-batch2-after-compaction.commands"; private static String INPUT_BATCH_PATH1 = HOODIE_WS_ROOT + "/docker/demo/data/batch_1.json"; private static String PRESTO_INPUT_TABLE_CHECK_RELATIVE_PATH = "/docker/demo/presto-table-check.commands"; @@ -64,7 +64,7 @@ public class ITTestHoodieDemo extends ITTestBase { private static String HIVE_INCREMENTAL_COMMANDS = HOODIE_WS_ROOT + "/docker/demo/hive-incremental.commands"; private static String HIVE_SYNC_CMD_FMT = - " --enable-hive-sync " + " --hoodie-conf hoodie.datasource.hive_sync.jdbcurl=jdbc:hive2://hiveserver:10000 " + " --enable-hive-sync --hoodie-conf hoodie.datasource.hive_sync.jdbcurl=jdbc:hive2://hiveserver:10000 " + " --hoodie-conf hoodie.datasource.hive_sync.username=hive " + " --hoodie-conf hoodie.datasource.hive_sync.password=hive " + " --hoodie-conf hoodie.datasource.hive_sync.partition_fields=%s " @@ -143,10 +143,10 @@ public class ITTestHoodieDemo extends ITTestBase { assertStdOutContains(stdOutErrPair, "| stock_ticks_mor_rt |"); assertStdOutContains(stdOutErrPair, - "| partition |\n" + "+----------------+\n" + "| dt=2018-08-31 |\n" + "+----------------+\n", 3); + "| partition |\n+----------------+\n| dt=2018-08-31 |\n+----------------+\n", 3); stdOutErrPair = executeHiveCommandFile(HIVE_BATCH1_COMMANDS); - assertStdOutContains(stdOutErrPair, "| symbol | _c1 |\n" + "+---------+----------------------+\n" + assertStdOutContains(stdOutErrPair, "| symbol | _c1 |\n+---------+----------------------+\n" + "| GOOG | 2018-08-31 10:29:00 |\n", 3); assertStdOutContains(stdOutErrPair, "| symbol | ts | volume | open | close |\n" @@ -159,9 +159,9 @@ public class ITTestHoodieDemo extends ITTestBase { private void testSparkSQLAfterFirstBatch() throws Exception { Pair stdOutErrPair = executeSparkSQLCommand(SPARKSQL_BATCH1_COMMANDS, true); assertStdOutContains(stdOutErrPair, "|default |stock_ticks_cow |false |\n" - + "|default |stock_ticks_mor |false |\n" + "|default |stock_ticks_mor_rt |false |"); + + "|default |stock_ticks_mor |false |\n|default |stock_ticks_mor_rt |false |"); assertStdOutContains(stdOutErrPair, - "+------+-------------------+\n" + "|GOOG |2018-08-31 10:29:00|\n" + "+------+-------------------+", 3); + "+------+-------------------+\n|GOOG |2018-08-31 10:29:00|\n+------+-------------------+", 3); assertStdOutContains(stdOutErrPair, "|GOOG |2018-08-31 09:59:00|6330 |1230.5 |1230.02 |", 3); assertStdOutContains(stdOutErrPair, "|GOOG |2018-08-31 10:29:00|3391 |1230.1899|1230.085|", 3); } @@ -203,9 +203,9 @@ public class ITTestHoodieDemo extends ITTestBase { private void testHiveAfterSecondBatch() throws Exception { Pair stdOutErrPair = executeHiveCommandFile(HIVE_BATCH1_COMMANDS); - assertStdOutContains(stdOutErrPair, "| symbol | _c1 |\n" + "+---------+----------------------+\n" + assertStdOutContains(stdOutErrPair, "| symbol | _c1 |\n+---------+----------------------+\n" + "| GOOG | 2018-08-31 10:29:00 |\n"); - assertStdOutContains(stdOutErrPair, "| symbol | _c1 |\n" + "+---------+----------------------+\n" + assertStdOutContains(stdOutErrPair, "| symbol | _c1 |\n+---------+----------------------+\n" + "| GOOG | 2018-08-31 10:59:00 |\n", 2); assertStdOutContains(stdOutErrPair, "| symbol | ts | volume | open | close |\n" @@ -236,7 +236,7 @@ public class ITTestHoodieDemo extends ITTestBase { private void testHiveAfterSecondBatchAfterCompaction() throws Exception { Pair stdOutErrPair = executeHiveCommandFile(HIVE_BATCH2_COMMANDS); - assertStdOutContains(stdOutErrPair, "| symbol | _c1 |\n" + "+---------+----------------------+\n" + assertStdOutContains(stdOutErrPair, "| symbol | _c1 |\n+---------+----------------------+\n" + "| GOOG | 2018-08-31 10:59:00 |", 2); assertStdOutContains(stdOutErrPair, "| symbol | ts | volume | open | close |\n" @@ -259,12 +259,12 @@ public class ITTestHoodieDemo extends ITTestBase { private void testSparkSQLAfterSecondBatch() throws Exception { Pair stdOutErrPair = executeSparkSQLCommand(SPARKSQL_BATCH2_COMMANDS, true); assertStdOutContains(stdOutErrPair, - "+------+-------------------+\n" + "|GOOG |2018-08-31 10:59:00|\n" + "+------+-------------------+", 2); + "+------+-------------------+\n|GOOG |2018-08-31 10:59:00|\n+------+-------------------+", 2); assertStdOutContains(stdOutErrPair, "|GOOG |2018-08-31 09:59:00|6330 |1230.5 |1230.02 |", 3); assertStdOutContains(stdOutErrPair, "|GOOG |2018-08-31 10:59:00|9021 |1227.1993|1227.215|", 2); assertStdOutContains(stdOutErrPair, - "+------+-------------------+\n" + "|GOOG |2018-08-31 10:29:00|\n" + "+------+-------------------+"); + "+------+-------------------+\n|GOOG |2018-08-31 10:29:00|\n+------+-------------------+"); assertStdOutContains(stdOutErrPair, "|GOOG |2018-08-31 10:29:00|3391 |1230.1899|1230.085|"); } @@ -291,10 +291,10 @@ public class ITTestHoodieDemo extends ITTestBase { Pair stdOutErrPair = executeSparkSQLCommand(SPARKSQL_INCREMENTAL_COMMANDS, true); assertStdOutContains(stdOutErrPair, "|GOOG |2018-08-31 10:59:00|9021 |1227.1993|1227.215|"); assertStdOutContains(stdOutErrPair, "|default |stock_ticks_cow |false |\n" - + "|default |stock_ticks_derived_mor |false |\n" + "|default |stock_ticks_derived_mor_rt|false |\n" - + "|default |stock_ticks_mor |false |\n" + "|default |stock_ticks_mor_rt |false |\n" + + "|default |stock_ticks_derived_mor |false |\n|default |stock_ticks_derived_mor_rt|false |\n" + + "|default |stock_ticks_mor |false |\n|default |stock_ticks_mor_rt |false |\n" + "| |stock_ticks_cow_incr |true |"); - assertStdOutContains(stdOutErrPair, "|count(1)|\n" + "+--------+\n" + "|99 |", 2); + assertStdOutContains(stdOutErrPair, "|count(1)|\n+--------+\n|99 |", 2); } private void scheduleAndRunCompaction() throws Exception { diff --git a/hudi-spark/src/main/java/org/apache/hudi/DataSourceUtils.java b/hudi-spark/src/main/java/org/apache/hudi/DataSourceUtils.java index 2a8551a56..8e2cd234c 100644 --- a/hudi-spark/src/main/java/org/apache/hudi/DataSourceUtils.java +++ b/hudi-spark/src/main/java/org/apache/hudi/DataSourceUtils.java @@ -98,7 +98,7 @@ public class DataSourceUtils { } } throw new HoodieException( - fieldName + "(Part -" + parts[i] + ") field not found in record. " + "Acceptable fields were :" + fieldName + "(Part -" + parts[i] + ") field not found in record. Acceptable fields were :" + valueNode.getSchema().getFields().stream().map(Field::name).collect(Collectors.toList())); } diff --git a/hudi-spark/src/main/java/org/apache/hudi/QuickstartUtils.java b/hudi-spark/src/main/java/org/apache/hudi/QuickstartUtils.java index 45e922f0a..22276e254 100644 --- a/hudi-spark/src/main/java/org/apache/hudi/QuickstartUtils.java +++ b/hudi-spark/src/main/java/org/apache/hudi/QuickstartUtils.java @@ -53,11 +53,11 @@ public class QuickstartUtils { private static final String[] DEFAULT_PARTITION_PATHS = {DEFAULT_FIRST_PARTITION_PATH, DEFAULT_SECOND_PARTITION_PATH, DEFAULT_THIRD_PARTITION_PATH}; - static String TRIP_EXAMPLE_SCHEMA = "{\"type\": \"record\"," + "\"name\": \"triprec\"," + "\"fields\": [ " - + "{\"name\": \"ts\",\"type\": \"double\"}," + "{\"name\": \"uuid\", \"type\": \"string\"}," - + "{\"name\": \"rider\", \"type\": \"string\"}," + "{\"name\": \"driver\", \"type\": \"string\"}," - + "{\"name\": \"begin_lat\", \"type\": \"double\"}," + "{\"name\": \"begin_lon\", \"type\": \"double\"}," - + "{\"name\": \"end_lat\", \"type\": \"double\"}," + "{\"name\": \"end_lon\", \"type\": \"double\"}," + static String TRIP_EXAMPLE_SCHEMA = "{\"type\": \"record\",\"name\": \"triprec\",\"fields\": [ " + + "{\"name\": \"ts\",\"type\": \"double\"},{\"name\": \"uuid\", \"type\": \"string\"}," + + "{\"name\": \"rider\", \"type\": \"string\"},{\"name\": \"driver\", \"type\": \"string\"}," + + "{\"name\": \"begin_lat\", \"type\": \"double\"},{\"name\": \"begin_lon\", \"type\": \"double\"}," + + "{\"name\": \"end_lat\", \"type\": \"double\"},{\"name\": \"end_lon\", \"type\": \"double\"}," + "{\"name\":\"fare\",\"type\": \"double\"}]}"; static Schema avroSchema = new Schema.Parser().parse(TRIP_EXAMPLE_SCHEMA); diff --git a/hudi-utilities/src/main/java/org/apache/hudi/utilities/HiveIncrementalPuller.java b/hudi-utilities/src/main/java/org/apache/hudi/utilities/HiveIncrementalPuller.java index cdd9e044e..963bc7d10 100644 --- a/hudi-utilities/src/main/java/org/apache/hudi/utilities/HiveIncrementalPuller.java +++ b/hudi-utilities/src/main/java/org/apache/hudi/utilities/HiveIncrementalPuller.java @@ -181,7 +181,7 @@ public class HiveIncrementalPuller { String incrementalSQL = new Scanner(new File(config.incrementalSQLFile)).useDelimiter("\\Z").next(); if (!incrementalSQL.contains(config.sourceDb + "." + config.sourceTable)) { LOG.info("Incremental SQL does not have " + config.sourceDb + "." + config.sourceTable - + ", which means its pulling from a different table. Fencing this from " + "happening."); + + ", which means its pulling from a different table. Fencing this from happening."); throw new HoodieIncrementalPullSQLException( "Incremental SQL does not have " + config.sourceDb + "." + config.sourceTable); } diff --git a/hudi-utilities/src/main/java/org/apache/hudi/utilities/deltastreamer/AbstractDeltaStreamerService.java b/hudi-utilities/src/main/java/org/apache/hudi/utilities/deltastreamer/AbstractDeltaStreamerService.java index 294b634e2..5d36e8d16 100644 --- a/hudi-utilities/src/main/java/org/apache/hudi/utilities/deltastreamer/AbstractDeltaStreamerService.java +++ b/hudi-utilities/src/main/java/org/apache/hudi/utilities/deltastreamer/AbstractDeltaStreamerService.java @@ -135,7 +135,7 @@ public abstract class AbstractDeltaStreamerService implements Serializable { LOG.info("Monitoring thread(s) !!"); future.get(); } catch (ExecutionException ex) { - LOG.error("Monitor noticed one or more threads failed." + " Requesting graceful shutdown of other threads", ex); + LOG.error("Monitor noticed one or more threads failed. Requesting graceful shutdown of other threads", ex); error = true; shutdown(false); } catch (InterruptedException ie) { diff --git a/hudi-utilities/src/main/java/org/apache/hudi/utilities/deltastreamer/Compactor.java b/hudi-utilities/src/main/java/org/apache/hudi/utilities/deltastreamer/Compactor.java index aa5d89233..eb3212fa0 100644 --- a/hudi-utilities/src/main/java/org/apache/hudi/utilities/deltastreamer/Compactor.java +++ b/hudi-utilities/src/main/java/org/apache/hudi/utilities/deltastreamer/Compactor.java @@ -53,9 +53,9 @@ public class Compactor implements Serializable { long numWriteErrors = res.collect().stream().filter(r -> r.hasErrors()).count(); if (numWriteErrors != 0) { // We treat even a single error in compaction as fatal - LOG.error("Compaction for instant (" + instant + ") failed with write errors. " + "Errors :" + numWriteErrors); + LOG.error("Compaction for instant (" + instant + ") failed with write errors. Errors :" + numWriteErrors); throw new HoodieException( - "Compaction for instant (" + instant + ") failed with write errors. " + "Errors :" + numWriteErrors); + "Compaction for instant (" + instant + ") failed with write errors. Errors :" + numWriteErrors); } // Commit compaction compactionClient.commitCompaction(instant.getTimestamp(), res, Option.empty()); diff --git a/hudi-utilities/src/main/java/org/apache/hudi/utilities/deltastreamer/DeltaSync.java b/hudi-utilities/src/main/java/org/apache/hudi/utilities/deltastreamer/DeltaSync.java index d142fa48c..7dfb015ef 100644 --- a/hudi-utilities/src/main/java/org/apache/hudi/utilities/deltastreamer/DeltaSync.java +++ b/hudi-utilities/src/main/java/org/apache/hudi/utilities/deltastreamer/DeltaSync.java @@ -300,7 +300,7 @@ public class DeltaSync implements Serializable { } if (Objects.equals(checkpointStr, resumeCheckpointStr.orElse(null))) { - LOG.info("No new data, source checkpoint has not changed. Nothing to commit." + "Old checkpoint=(" + LOG.info("No new data, source checkpoint has not changed. Nothing to commit. Old checkpoint=(" + resumeCheckpointStr + "). New Checkpoint=(" + checkpointStr + ")"); return null; } diff --git a/hudi-utilities/src/main/java/org/apache/hudi/utilities/deltastreamer/HoodieDeltaStreamer.java b/hudi-utilities/src/main/java/org/apache/hudi/utilities/deltastreamer/HoodieDeltaStreamer.java index 480956a9a..d4f3ec42e 100644 --- a/hudi-utilities/src/main/java/org/apache/hudi/utilities/deltastreamer/HoodieDeltaStreamer.java +++ b/hudi-utilities/src/main/java/org/apache/hudi/utilities/deltastreamer/HoodieDeltaStreamer.java @@ -160,7 +160,7 @@ public class HoodieDeltaStreamer implements Serializable { @Parameter(names = {"--target-table"}, description = "name of the target table in Hive", required = true) public String targetTableName; - @Parameter(names = {"--storage-type"}, description = "Type of Storage. " + "COPY_ON_WRITE (or) MERGE_ON_READ", + @Parameter(names = {"--storage-type"}, description = "Type of Storage. COPY_ON_WRITE (or) MERGE_ON_READ", required = true) public String storageType; @@ -213,7 +213,7 @@ public class HoodieDeltaStreamer implements Serializable { public Operation operation = Operation.UPSERT; @Parameter(names = {"--filter-dupes"}, - description = "Should duplicate records from source be dropped/filtered out" + "before insert/bulk-insert") + description = "Should duplicate records from source be dropped/filtered out before insert/bulk-insert") public Boolean filterDupes = false; @Parameter(names = {"--enable-hive-sync"}, description = "Enable syncing to hive") @@ -229,7 +229,7 @@ public class HoodieDeltaStreamer implements Serializable { public Boolean continuousMode = false; @Parameter(names = {"--min-sync-interval-seconds"}, - description = "the min sync interval of each sync in " + "continuous mode") + description = "the min sync interval of each sync in continuous mode") public Integer minSyncIntervalSeconds = 0; @Parameter(names = {"--spark-master"}, description = "spark master to use.") @@ -259,7 +259,7 @@ public class HoodieDeltaStreamer implements Serializable { * Compaction is enabled for MoR table by default. This flag disables it */ @Parameter(names = {"--disable-compaction"}, - description = "Compaction is enabled for MoR table by default." + "This flag disables it ") + description = "Compaction is enabled for MoR table by default. This flag disables it ") public Boolean forceDisableCompaction = false; /** diff --git a/hudi-utilities/src/main/java/org/apache/hudi/utilities/deltastreamer/SchedulerConfGenerator.java b/hudi-utilities/src/main/java/org/apache/hudi/utilities/deltastreamer/SchedulerConfGenerator.java index c2abe668a..09c4da021 100644 --- a/hudi-utilities/src/main/java/org/apache/hudi/utilities/deltastreamer/SchedulerConfGenerator.java +++ b/hudi-utilities/src/main/java/org/apache/hudi/utilities/deltastreamer/SchedulerConfGenerator.java @@ -47,10 +47,10 @@ public class SchedulerConfGenerator { public static final String SPARK_SCHEDULER_ALLOCATION_FILE_KEY = "spark.scheduler.allocation.file"; private static String SPARK_SCHEDULING_PATTERN = - "\n" + "\n" + " \n" - + " %s\n" + " %s\n" + " %s\n" - + " \n" + " \n" + " %s\n" - + " %s\n" + " %s\n" + " \n" + ""; + "\n\n \n" + + " %s\n %s\n %s\n" + + " \n \n %s\n" + + " %s\n %s\n \n"; private static String generateConfig(Integer deltaSyncWeight, Integer compactionWeight, Integer deltaSyncMinShare, Integer compactionMinShare) { diff --git a/hudi-utilities/src/main/java/org/apache/hudi/utilities/keygen/TimestampBasedKeyGenerator.java b/hudi-utilities/src/main/java/org/apache/hudi/utilities/keygen/TimestampBasedKeyGenerator.java index 4e997549e..7964a45da 100644 --- a/hudi-utilities/src/main/java/org/apache/hudi/utilities/keygen/TimestampBasedKeyGenerator.java +++ b/hudi-utilities/src/main/java/org/apache/hudi/utilities/keygen/TimestampBasedKeyGenerator.java @@ -57,11 +57,11 @@ public class TimestampBasedKeyGenerator extends SimpleKeyGenerator { static class Config { // One value from TimestampType above - private static final String TIMESTAMP_TYPE_FIELD_PROP = "hoodie.deltastreamer.keygen" + ".timebased.timestamp.type"; + private static final String TIMESTAMP_TYPE_FIELD_PROP = "hoodie.deltastreamer.keygen.timebased.timestamp.type"; private static final String TIMESTAMP_INPUT_DATE_FORMAT_PROP = - "hoodie.deltastreamer.keygen" + ".timebased.input" + ".dateformat"; + "hoodie.deltastreamer.keygen.timebased.input.dateformat"; private static final String TIMESTAMP_OUTPUT_DATE_FORMAT_PROP = - "hoodie.deltastreamer.keygen" + ".timebased.output" + ".dateformat"; + "hoodie.deltastreamer.keygen.timebased.output.dateformat"; } public TimestampBasedKeyGenerator(TypedProperties config) { diff --git a/hudi-utilities/src/main/java/org/apache/hudi/utilities/schema/FilebasedSchemaProvider.java b/hudi-utilities/src/main/java/org/apache/hudi/utilities/schema/FilebasedSchemaProvider.java index 219948a46..0ce8f0b11 100644 --- a/hudi-utilities/src/main/java/org/apache/hudi/utilities/schema/FilebasedSchemaProvider.java +++ b/hudi-utilities/src/main/java/org/apache/hudi/utilities/schema/FilebasedSchemaProvider.java @@ -40,8 +40,8 @@ public class FilebasedSchemaProvider extends SchemaProvider { * Configs supported. */ public static class Config { - private static final String SOURCE_SCHEMA_FILE_PROP = "hoodie.deltastreamer.schemaprovider" + ".source.schema.file"; - private static final String TARGET_SCHEMA_FILE_PROP = "hoodie.deltastreamer.schemaprovider" + ".target.schema.file"; + private static final String SOURCE_SCHEMA_FILE_PROP = "hoodie.deltastreamer.schemaprovider.source.schema.file"; + private static final String TARGET_SCHEMA_FILE_PROP = "hoodie.deltastreamer.schemaprovider.target.schema.file"; } private final FileSystem fs; diff --git a/hudi-utilities/src/main/java/org/apache/hudi/utilities/sources/helpers/IncrSourceHelper.java b/hudi-utilities/src/main/java/org/apache/hudi/utilities/sources/helpers/IncrSourceHelper.java index 19a85eb23..54ea0f3af 100644 --- a/hudi-utilities/src/main/java/org/apache/hudi/utilities/sources/helpers/IncrSourceHelper.java +++ b/hudi-utilities/src/main/java/org/apache/hudi/utilities/sources/helpers/IncrSourceHelper.java @@ -55,7 +55,7 @@ public class IncrSourceHelper { public static Pair calculateBeginAndEndInstants(JavaSparkContext jssc, String srcBasePath, int numInstantsPerFetch, Option beginInstant, boolean readLatestOnMissingBeginInstant) { Preconditions.checkArgument(numInstantsPerFetch > 0, - "Make sure the config" + " hoodie.deltastreamer.source.hoodieincr.num_instants is set to a positive value"); + "Make sure the config hoodie.deltastreamer.source.hoodieincr.num_instants is set to a positive value"); HoodieTableMetaClient srcMetaClient = new HoodieTableMetaClient(jssc.hadoopConfiguration(), srcBasePath, true); final HoodieTimeline activeCommitTimeline =