[HUDI-624]: Split some of the code from PR for HUDI-479 (#1344)

2020-02-21 01:22:21 -05:00
parent 185ff646ad
commit 078d4825d9
31 changed files with 130 additions and 141 deletions
--- a/hudi-hive/src/main/java/org/apache/hudi/hive/SchemaDifference.java
+++ b/hudi-hive/src/main/java/org/apache/hudi/hive/SchemaDifference.java
@@ -18,15 +18,14 @@

 package org.apache.hudi.hive;

-import com.google.common.base.Objects;
-import com.google.common.collect.ImmutableList;
-import com.google.common.collect.ImmutableMap;
 import org.apache.parquet.schema.MessageType;

 import java.util.ArrayList;
+import java.util.Collections;
 import java.util.HashMap;
 import java.util.List;
 import java.util.Map;
+import java.util.StringJoiner;

 /**
 * Represents the schema difference between the storage schema and hive table schema.
@@ -43,9 +42,9 @@ public class SchemaDifference {
      Map<String, String> updateColumnTypes, Map<String, String> addColumnTypes) {
    this.storageSchema = storageSchema;
    this.tableSchema = tableSchema;
-    this.deleteColumns = ImmutableList.copyOf(deleteColumns);
-    this.updateColumnTypes = ImmutableMap.copyOf(updateColumnTypes);
-    this.addColumnTypes = ImmutableMap.copyOf(addColumnTypes);
+    this.deleteColumns = Collections.unmodifiableList(deleteColumns);
+    this.updateColumnTypes = Collections.unmodifiableMap(updateColumnTypes);
+    this.addColumnTypes =  Collections.unmodifiableMap(addColumnTypes);
  }

  public List<String> getDeleteColumns() {
@@ -60,12 +59,6 @@ public class SchemaDifference {
    return addColumnTypes;
  }

-  @Override
-  public String toString() {
-    return Objects.toStringHelper(this).add("deleteColumns", deleteColumns).add("updateColumnTypes", updateColumnTypes)
-        .add("addColumnTypes", addColumnTypes).toString();
-  }
-
  public static Builder newBuilder(MessageType storageSchema, Map<String, String> tableSchema) {
    return new Builder(storageSchema, tableSchema);
  }
@@ -74,6 +67,17 @@ public class SchemaDifference {
    return deleteColumns.isEmpty() && updateColumnTypes.isEmpty() && addColumnTypes.isEmpty();
  }

+  @Override
+  public String toString() {
+    return new StringJoiner(", ", SchemaDifference.class.getSimpleName() + "[", "]")
+           .add("storageSchema=" + storageSchema)
+           .add("tableSchema=" + tableSchema)
+           .add("deleteColumns=" + deleteColumns)
+           .add("updateColumnTypes=" + updateColumnTypes)
+           .add("addColumnTypes=" + addColumnTypes)
+           .toString();
+  }
+
  public static class Builder {

    private final MessageType storageSchema;
--- a/hudi-hive/src/main/java/org/apache/hudi/hive/util/SchemaUtil.java
+++ b/hudi-hive/src/main/java/org/apache/hudi/hive/util/SchemaUtil.java
@@ -18,8 +18,6 @@

 package org.apache.hudi.hive.util;

-import com.google.common.collect.Maps;
-import com.google.common.collect.Sets;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
 import org.apache.hudi.common.model.HoodieLogFile;
@@ -42,6 +40,8 @@ import org.apache.parquet.schema.Type;

 import java.io.IOException;
 import java.util.ArrayList;
+import java.util.HashSet;
+import java.util.LinkedHashMap;
 import java.util.List;
 import java.util.Map;
 import java.util.Set;
@@ -67,7 +67,7 @@ public class SchemaUtil {
    }
    LOG.info("Getting schema difference for " + tableSchema + "\r\n\r\n" + newTableSchema);
    SchemaDifference.Builder schemaDiffBuilder = SchemaDifference.newBuilder(storageSchema, tableSchema);
-    Set<String> tableColumns = Sets.newHashSet();
+    Set<String> tableColumns = new HashSet<>();

    for (Map.Entry<String, String> field : tableSchema.entrySet()) {
      String fieldName = field.getKey().toLowerCase();
@@ -140,7 +140,7 @@ public class SchemaUtil {
   * @return : Hive Table schema read from parquet file MAP[String,String]
   */
  public static Map<String, String> convertParquetSchemaToHiveSchema(MessageType messageType) throws IOException {
-    Map<String, String> schema = Maps.newLinkedHashMap();
+    Map<String, String> schema = new LinkedHashMap<>();
    List<Type> parquetFields = messageType.getFields();
    for (Type parquetType : parquetFields) {
      StringBuilder result = new StringBuilder();
--- a/hudi-hive/src/test/java/org/apache/hudi/hive/TestHiveSyncTool.java
+++ b/hudi-hive/src/test/java/org/apache/hudi/hive/TestHiveSyncTool.java
@@ -24,7 +24,6 @@ import org.apache.hudi.hive.HoodieHiveClient.PartitionEvent;
 import org.apache.hudi.hive.HoodieHiveClient.PartitionEvent.PartitionEventType;
 import org.apache.hudi.hive.util.SchemaUtil;

-import com.google.common.collect.Lists;
 import org.apache.hadoop.hive.metastore.api.Partition;
 import org.apache.parquet.schema.MessageType;
 import org.apache.parquet.schema.OriginalType;
@@ -345,7 +344,7 @@ public class TestHiveSyncTool {
    HiveSyncConfig hiveSyncConfig = HiveSyncConfig.copy(TestUtil.hiveSyncConfig);
    hiveSyncConfig.partitionValueExtractorClass = MultiPartKeysValueExtractor.class.getCanonicalName();
    hiveSyncConfig.tableName = "multi_part_key";
-    hiveSyncConfig.partitionFields = Lists.newArrayList("year", "month", "day");
+    hiveSyncConfig.partitionFields = Arrays.asList("year", "month", "day");
    TestUtil.getCreatedTablesSet().add(hiveSyncConfig.databaseName + "." + hiveSyncConfig.tableName);

    HoodieHiveClient hiveClient = new HoodieHiveClient(hiveSyncConfig, TestUtil.getHiveConf(), TestUtil.fileSystem);
--- a/hudi-hive/src/test/java/org/apache/hudi/hive/TestUtil.java
+++ b/hudi-hive/src/test/java/org/apache/hudi/hive/TestUtil.java
@@ -43,9 +43,6 @@ import org.apache.hudi.common.util.FileIOUtils;
 import org.apache.hudi.common.util.SchemaTestUtil;
 import org.apache.hudi.hive.util.HiveTestService;

-import com.google.common.collect.Lists;
-import com.google.common.collect.Maps;
-import com.google.common.collect.Sets;
 import org.apache.avro.Schema;
 import org.apache.avro.generic.IndexedRecord;
 import org.apache.hadoop.conf.Configuration;
@@ -68,6 +65,10 @@ import java.io.File;
 import java.io.IOException;
 import java.net.URISyntaxException;
 import java.nio.charset.StandardCharsets;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.HashSet;
 import java.util.List;
 import java.util.Map;
 import java.util.Map.Entry;
@@ -86,7 +87,7 @@ public class TestUtil {
  static HiveSyncConfig hiveSyncConfig;
  private static DateTimeFormatter dtfOut;
  static FileSystem fileSystem;
-  private static Set<String> createdTablesSet = Sets.newHashSet();
+  private static Set<String> createdTablesSet = new HashSet<>();

  public static void setUp() throws IOException, InterruptedException {
    if (dfsCluster == null) {
@@ -114,7 +115,7 @@ public class TestUtil {
    hiveSyncConfig.basePath = "/tmp/hdfs/TestHiveSyncTool/";
    hiveSyncConfig.assumeDatePartitioning = true;
    hiveSyncConfig.usePreApacheInputFormat = false;
-    hiveSyncConfig.partitionFields = Lists.newArrayList("datestr");
+    hiveSyncConfig.partitionFields = Collections.singletonList("datestr");

    dtfOut = DateTimeFormat.forPattern("yyyy/MM/dd");

@@ -249,7 +250,7 @@ public class TestUtil {

  private static List<HoodieWriteStat> createTestData(Path partPath, boolean isParquetSchemaSimple, String commitTime)
      throws IOException, URISyntaxException {
-    List<HoodieWriteStat> writeStats = Lists.newArrayList();
+    List<HoodieWriteStat> writeStats = new ArrayList<>();
    for (int i = 0; i < 5; i++) {
      // Create 5 files
      String fileId = UUID.randomUUID().toString();
@@ -297,7 +298,7 @@ public class TestUtil {
        .overBaseCommit(dataFile.getCommitTime()).withFs(fileSystem).build();
    List<IndexedRecord> records = (isLogSchemaSimple ? SchemaTestUtil.generateTestRecords(0, 100)
        : SchemaTestUtil.generateEvolvedTestRecords(100, 100));
-    Map<HeaderMetadataType, String> header = Maps.newHashMap();
+    Map<HeaderMetadataType, String> header = new HashMap<>(2);
    header.put(HoodieLogBlock.HeaderMetadataType.INSTANT_TIME, dataFile.getCommitTime());
    header.put(HoodieLogBlock.HeaderMetadataType.SCHEMA, schema.toString());
    HoodieAvroDataBlock dataBlock = new HoodieAvroDataBlock(records, header);