diff --git a/hudi-common/src/main/java/org/apache/hudi/common/util/collection/DiskBasedMap.java b/hudi-common/src/main/java/org/apache/hudi/common/util/collection/DiskBasedMap.java index 4481770dc..46cac4a16 100644 --- a/hudi-common/src/main/java/org/apache/hudi/common/util/collection/DiskBasedMap.java +++ b/hudi-common/src/main/java/org/apache/hudi/common/util/collection/DiskBasedMap.java @@ -26,7 +26,6 @@ import java.io.Serializable; import java.net.InetAddress; import java.util.AbstractMap; import java.util.Collection; -import java.util.Date; import java.util.HashSet; import java.util.Iterator; import java.util.Map; @@ -213,7 +212,7 @@ public final class DiskBasedMap try { byte[] val = SerializationUtils.serialize(value); Integer valueSize = val.length; - Long timestamp = new Date().getTime(); + Long timestamp = System.currentTimeMillis(); this.valueMetadataMap.put(key, new DiskBasedMap.ValueMetadata(this.filePath, valueSize, filePosition.get(), timestamp)); byte[] serializedKey = SerializationUtils.serialize(key); diff --git a/hudi-hive/src/main/java/org/apache/hudi/hive/HiveSyncConfig.java b/hudi-hive/src/main/java/org/apache/hudi/hive/HiveSyncConfig.java index cd700c3a0..8e32aebdd 100644 --- a/hudi-hive/src/main/java/org/apache/hudi/hive/HiveSyncConfig.java +++ b/hudi-hive/src/main/java/org/apache/hudi/hive/HiveSyncConfig.java @@ -19,6 +19,7 @@ package org.apache.hudi.hive; import com.beust.jcommander.Parameter; + import java.io.Serializable; import java.util.ArrayList; import java.util.List; diff --git a/hudi-hive/src/main/java/org/apache/hudi/hive/HiveSyncTool.java b/hudi-hive/src/main/java/org/apache/hudi/hive/HiveSyncTool.java index 315f575ac..ebf1c0f69 100644 --- a/hudi-hive/src/main/java/org/apache/hudi/hive/HiveSyncTool.java +++ b/hudi-hive/src/main/java/org/apache/hudi/hive/HiveSyncTool.java @@ -18,16 +18,6 @@ package org.apache.hudi.hive; -import com.beust.jcommander.JCommander; -import java.util.List; -import java.util.Map; -import java.util.stream.Collectors; -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.fs.FileSystem; -import org.apache.hadoop.hive.conf.HiveConf; -import org.apache.hadoop.hive.metastore.api.Partition; -import org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat; -import org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe; import org.apache.hudi.common.util.FSUtils; import org.apache.hudi.common.util.Option; import org.apache.hudi.exception.InvalidDatasetException; @@ -36,10 +26,21 @@ import org.apache.hudi.hadoop.realtime.HoodieParquetRealtimeInputFormat; import org.apache.hudi.hive.HoodieHiveClient.PartitionEvent; import org.apache.hudi.hive.HoodieHiveClient.PartitionEvent.PartitionEventType; import org.apache.hudi.hive.util.SchemaUtil; + +import com.beust.jcommander.JCommander; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.hive.conf.HiveConf; +import org.apache.hadoop.hive.metastore.api.Partition; +import org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat; +import org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe; import org.apache.log4j.LogManager; import org.apache.log4j.Logger; import org.apache.parquet.schema.MessageType; +import java.util.List; +import java.util.Map; +import java.util.stream.Collectors; /** * Tool to sync a hoodie HDFS dataset with a hive metastore table. Either use it as a api diff --git a/hudi-hive/src/main/java/org/apache/hudi/hive/HoodieHiveClient.java b/hudi-hive/src/main/java/org/apache/hudi/hive/HoodieHiveClient.java index 0b9e261dd..cf222dba3 100644 --- a/hudi-hive/src/main/java/org/apache/hudi/hive/HoodieHiveClient.java +++ b/hudi-hive/src/main/java/org/apache/hudi/hive/HoodieHiveClient.java @@ -171,7 +171,7 @@ public class HoodieHiveClient { /** * Generate Hive Partition from partition values - * + * * @param partition Partition path * @return */ @@ -685,4 +685,4 @@ public class HoodieHiveClient { return new PartitionEvent(PartitionEventType.UPDATE, storagePartition); } } -} +} \ No newline at end of file diff --git a/hudi-hive/src/main/java/org/apache/hudi/hive/MultiPartKeysValueExtractor.java b/hudi-hive/src/main/java/org/apache/hudi/hive/MultiPartKeysValueExtractor.java index d23250a21..aa6ec307d 100644 --- a/hudi-hive/src/main/java/org/apache/hudi/hive/MultiPartKeysValueExtractor.java +++ b/hudi-hive/src/main/java/org/apache/hudi/hive/MultiPartKeysValueExtractor.java @@ -19,6 +19,7 @@ package org.apache.hudi.hive; import com.google.common.base.Preconditions; + import java.util.Arrays; import java.util.List; import java.util.stream.Collectors; diff --git a/hudi-hive/src/main/java/org/apache/hudi/hive/SchemaDifference.java b/hudi-hive/src/main/java/org/apache/hudi/hive/SchemaDifference.java index a6594e720..752c09946 100644 --- a/hudi-hive/src/main/java/org/apache/hudi/hive/SchemaDifference.java +++ b/hudi-hive/src/main/java/org/apache/hudi/hive/SchemaDifference.java @@ -23,9 +23,10 @@ import com.google.common.collect.ImmutableList; import com.google.common.collect.ImmutableMap; import com.google.common.collect.Lists; import com.google.common.collect.Maps; +import org.apache.parquet.schema.MessageType; + import java.util.List; import java.util.Map; -import org.apache.parquet.schema.MessageType; /** * Represents the schema difference between the storage schema and hive table schema diff --git a/hudi-hive/src/main/java/org/apache/hudi/hive/SlashEncodedDayPartitionValueExtractor.java b/hudi-hive/src/main/java/org/apache/hudi/hive/SlashEncodedDayPartitionValueExtractor.java index 36e3f1bc0..334bb7932 100644 --- a/hudi-hive/src/main/java/org/apache/hudi/hive/SlashEncodedDayPartitionValueExtractor.java +++ b/hudi-hive/src/main/java/org/apache/hudi/hive/SlashEncodedDayPartitionValueExtractor.java @@ -19,11 +19,12 @@ package org.apache.hudi.hive; import com.beust.jcommander.internal.Lists; -import java.util.List; import org.joda.time.DateTime; import org.joda.time.format.DateTimeFormat; import org.joda.time.format.DateTimeFormatter; +import java.util.List; + /** * HDFS Path contain hive partition values for the keys it is partitioned on. This mapping is not straight forward and * requires a pluggable implementation to extract the partition value from HDFS path. diff --git a/hudi-hive/src/main/java/org/apache/hudi/hive/util/ColumnNameXLator.java b/hudi-hive/src/main/java/org/apache/hudi/hive/util/ColumnNameXLator.java index 35a9dcfb8..fdf242e92 100644 --- a/hudi-hive/src/main/java/org/apache/hudi/hive/util/ColumnNameXLator.java +++ b/hudi-hive/src/main/java/org/apache/hudi/hive/util/ColumnNameXLator.java @@ -19,6 +19,7 @@ package org.apache.hudi.hive.util; import com.google.common.collect.Maps; + import java.util.Iterator; import java.util.Map; diff --git a/hudi-hive/src/main/java/org/apache/hudi/hive/util/SchemaUtil.java b/hudi-hive/src/main/java/org/apache/hudi/hive/util/SchemaUtil.java index 2d4c5b5fc..95cc55086 100644 --- a/hudi-hive/src/main/java/org/apache/hudi/hive/util/SchemaUtil.java +++ b/hudi-hive/src/main/java/org/apache/hudi/hive/util/SchemaUtil.java @@ -18,16 +18,6 @@ package org.apache.hudi.hive.util; -import com.google.common.collect.Maps; -import com.google.common.collect.Sets; -import java.io.IOException; -import java.util.ArrayList; -import java.util.List; -import java.util.Map; -import java.util.Set; -import java.util.stream.Collectors; -import org.apache.hadoop.fs.FileSystem; -import org.apache.hadoop.fs.Path; import org.apache.hudi.common.model.HoodieLogFile; import org.apache.hudi.common.table.log.HoodieLogFormat; import org.apache.hudi.common.table.log.HoodieLogFormat.Reader; @@ -36,6 +26,11 @@ import org.apache.hudi.common.table.log.block.HoodieLogBlock; import org.apache.hudi.hive.HiveSyncConfig; import org.apache.hudi.hive.HoodieHiveSyncException; import org.apache.hudi.hive.SchemaDifference; + +import com.google.common.collect.Maps; +import com.google.common.collect.Sets; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; import org.apache.log4j.LogManager; import org.apache.log4j.Logger; import org.apache.parquet.avro.AvroSchemaConverter; @@ -46,6 +41,13 @@ import org.apache.parquet.schema.OriginalType; import org.apache.parquet.schema.PrimitiveType; import org.apache.parquet.schema.Type; +import java.io.IOException; +import java.util.ArrayList; +import java.util.List; +import java.util.Map; +import java.util.Set; +import java.util.stream.Collectors; + /** * Schema Utilities */ diff --git a/hudi-hive/src/test/java/org/apache/hudi/hive/TestHiveSyncTool.java b/hudi-hive/src/test/java/org/apache/hudi/hive/TestHiveSyncTool.java index 5183d679d..826653127 100644 --- a/hudi-hive/src/test/java/org/apache/hudi/hive/TestHiveSyncTool.java +++ b/hudi-hive/src/test/java/org/apache/hudi/hive/TestHiveSyncTool.java @@ -18,22 +18,14 @@ package org.apache.hudi.hive; -import static org.junit.Assert.assertEquals; -import static org.junit.Assert.assertFalse; -import static org.junit.Assert.assertTrue; - -import com.google.common.collect.Lists; -import java.io.IOException; -import java.net.URISyntaxException; -import java.util.Arrays; -import java.util.Collection; -import java.util.List; -import org.apache.hadoop.hive.metastore.api.Partition; import org.apache.hudi.common.util.Option; import org.apache.hudi.common.util.SchemaTestUtil; import org.apache.hudi.hive.HoodieHiveClient.PartitionEvent; import org.apache.hudi.hive.HoodieHiveClient.PartitionEvent.PartitionEventType; import org.apache.hudi.hive.util.SchemaUtil; + +import com.google.common.collect.Lists; +import org.apache.hadoop.hive.metastore.api.Partition; import org.apache.parquet.schema.MessageType; import org.apache.parquet.schema.OriginalType; import org.apache.parquet.schema.PrimitiveType; @@ -45,6 +37,16 @@ import org.junit.Test; import org.junit.runner.RunWith; import org.junit.runners.Parameterized; +import java.io.IOException; +import java.net.URISyntaxException; +import java.util.Arrays; +import java.util.Collection; +import java.util.List; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertTrue; + @SuppressWarnings("ConstantConditions") @RunWith(Parameterized.class) public class TestHiveSyncTool { diff --git a/hudi-hive/src/test/java/org/apache/hudi/hive/TestUtil.java b/hudi-hive/src/test/java/org/apache/hudi/hive/TestUtil.java index 1414acdd2..92624661c 100644 --- a/hudi-hive/src/test/java/org/apache/hudi/hive/TestUtil.java +++ b/hudi-hive/src/test/java/org/apache/hudi/hive/TestUtil.java @@ -18,29 +18,6 @@ package org.apache.hudi.hive; -import static org.junit.Assert.fail; - -import com.google.common.collect.Lists; -import com.google.common.collect.Maps; -import com.google.common.collect.Sets; -import java.io.File; -import java.io.IOException; -import java.net.URISyntaxException; -import java.nio.charset.StandardCharsets; -import java.util.List; -import java.util.Map; -import java.util.Map.Entry; -import java.util.Set; -import java.util.UUID; -import org.apache.avro.Schema; -import org.apache.avro.generic.IndexedRecord; -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.fs.FSDataOutputStream; -import org.apache.hadoop.fs.FileSystem; -import org.apache.hadoop.fs.Path; -import org.apache.hadoop.hdfs.MiniDFSCluster; -import org.apache.hadoop.hive.conf.HiveConf; -import org.apache.hive.service.server.HiveServer2; import org.apache.hudi.avro.HoodieAvroWriteSupport; import org.apache.hudi.common.BloomFilter; import org.apache.hudi.common.minicluster.HdfsTestService; @@ -63,6 +40,19 @@ import org.apache.hudi.common.util.FSUtils; import org.apache.hudi.common.util.FileIOUtils; import org.apache.hudi.common.util.SchemaTestUtil; import org.apache.hudi.hive.util.HiveTestService; + +import com.google.common.collect.Lists; +import com.google.common.collect.Maps; +import com.google.common.collect.Sets; +import org.apache.avro.Schema; +import org.apache.avro.generic.IndexedRecord; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FSDataOutputStream; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.hdfs.MiniDFSCluster; +import org.apache.hadoop.hive.conf.HiveConf; +import org.apache.hive.service.server.HiveServer2; import org.apache.parquet.avro.AvroSchemaConverter; import org.apache.parquet.hadoop.ParquetWriter; import org.apache.parquet.hadoop.metadata.CompressionCodecName; @@ -72,6 +62,18 @@ import org.joda.time.format.DateTimeFormat; import org.joda.time.format.DateTimeFormatter; import org.junit.runners.model.InitializationError; +import java.io.File; +import java.io.IOException; +import java.net.URISyntaxException; +import java.nio.charset.StandardCharsets; +import java.util.List; +import java.util.Map; +import java.util.Map.Entry; +import java.util.Set; +import java.util.UUID; + +import static org.junit.Assert.fail; + @SuppressWarnings("SameParameterValue") public class TestUtil { diff --git a/hudi-hive/src/test/java/org/apache/hudi/hive/util/HiveTestService.java b/hudi-hive/src/test/java/org/apache/hudi/hive/util/HiveTestService.java index 4ecf28060..b83eddcb7 100644 --- a/hudi-hive/src/test/java/org/apache/hudi/hive/util/HiveTestService.java +++ b/hudi-hive/src/test/java/org/apache/hudi/hive/util/HiveTestService.java @@ -18,16 +18,12 @@ package org.apache.hudi.hive.util; +import org.apache.hudi.common.model.HoodieTestUtils; +import org.apache.hudi.common.util.FileIOUtils; + import com.google.common.base.Preconditions; import com.google.common.collect.Maps; import com.google.common.io.Files; -import java.io.File; -import java.io.IOException; -import java.net.InetSocketAddress; -import java.net.SocketException; -import java.util.Map; -import java.util.concurrent.ExecutorService; -import java.util.concurrent.Executors; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.Path; import org.apache.hadoop.hive.conf.HiveConf; @@ -39,8 +35,6 @@ import org.apache.hadoop.hive.metastore.TUGIBasedProcessor; import org.apache.hadoop.hive.metastore.api.MetaException; import org.apache.hadoop.hive.thrift.TUGIContainingTransport; import org.apache.hive.service.server.HiveServer2; -import org.apache.hudi.common.model.HoodieTestUtils; -import org.apache.hudi.common.util.FileIOUtils; import org.apache.log4j.LogManager; import org.apache.log4j.Logger; import org.apache.thrift.TProcessor; @@ -55,6 +49,14 @@ import org.apache.thrift.transport.TTransport; import org.apache.thrift.transport.TTransportException; import org.apache.thrift.transport.TTransportFactory; +import java.io.File; +import java.io.IOException; +import java.net.InetSocketAddress; +import java.net.SocketException; +import java.util.Map; +import java.util.concurrent.ExecutorService; +import java.util.concurrent.Executors; + public class HiveTestService { private static Logger LOG = LogManager.getLogger(HiveTestService.class); diff --git a/hudi-utilities/src/main/java/org/apache/hudi/utilities/adhoc/UpgradePayloadFromUberToApache.java b/hudi-utilities/src/main/java/org/apache/hudi/utilities/adhoc/UpgradePayloadFromUberToApache.java index 679694090..8c04f673b 100644 --- a/hudi-utilities/src/main/java/org/apache/hudi/utilities/adhoc/UpgradePayloadFromUberToApache.java +++ b/hudi-utilities/src/main/java/org/apache/hudi/utilities/adhoc/UpgradePayloadFromUberToApache.java @@ -80,7 +80,7 @@ public class UpgradePayloadFromUberToApache implements Serializable { newPropsMap.put(HoodieCompactionConfig.PAYLOAD_CLASS_PROP, newPayloadClass); Properties props = new Properties(); props.putAll(newPropsMap); - tableConfig.createHoodieProperties(metaClient.getFs(), new Path(metaPath), props); + HoodieTableConfig.createHoodieProperties(metaClient.getFs(), new Path(metaPath), props); logger.info("Finished upgrade for " + basePath); } } diff --git a/style/checkstyle.xml b/style/checkstyle.xml index 91f51c373..ebfb569af 100644 --- a/style/checkstyle.xml +++ b/style/checkstyle.xml @@ -34,7 +34,7 @@ - +