[HUDI-624]: Split some of the code from PR for HUDI-479 (#1344)
This commit is contained in:
@@ -18,15 +18,14 @@
|
||||
|
||||
package org.apache.hudi.hive;
|
||||
|
||||
import com.google.common.base.Objects;
|
||||
import com.google.common.collect.ImmutableList;
|
||||
import com.google.common.collect.ImmutableMap;
|
||||
import org.apache.parquet.schema.MessageType;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collections;
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.StringJoiner;
|
||||
|
||||
/**
|
||||
* Represents the schema difference between the storage schema and hive table schema.
|
||||
@@ -43,9 +42,9 @@ public class SchemaDifference {
|
||||
Map<String, String> updateColumnTypes, Map<String, String> addColumnTypes) {
|
||||
this.storageSchema = storageSchema;
|
||||
this.tableSchema = tableSchema;
|
||||
this.deleteColumns = ImmutableList.copyOf(deleteColumns);
|
||||
this.updateColumnTypes = ImmutableMap.copyOf(updateColumnTypes);
|
||||
this.addColumnTypes = ImmutableMap.copyOf(addColumnTypes);
|
||||
this.deleteColumns = Collections.unmodifiableList(deleteColumns);
|
||||
this.updateColumnTypes = Collections.unmodifiableMap(updateColumnTypes);
|
||||
this.addColumnTypes = Collections.unmodifiableMap(addColumnTypes);
|
||||
}
|
||||
|
||||
public List<String> getDeleteColumns() {
|
||||
@@ -60,12 +59,6 @@ public class SchemaDifference {
|
||||
return addColumnTypes;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return Objects.toStringHelper(this).add("deleteColumns", deleteColumns).add("updateColumnTypes", updateColumnTypes)
|
||||
.add("addColumnTypes", addColumnTypes).toString();
|
||||
}
|
||||
|
||||
public static Builder newBuilder(MessageType storageSchema, Map<String, String> tableSchema) {
|
||||
return new Builder(storageSchema, tableSchema);
|
||||
}
|
||||
@@ -74,6 +67,17 @@ public class SchemaDifference {
|
||||
return deleteColumns.isEmpty() && updateColumnTypes.isEmpty() && addColumnTypes.isEmpty();
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return new StringJoiner(", ", SchemaDifference.class.getSimpleName() + "[", "]")
|
||||
.add("storageSchema=" + storageSchema)
|
||||
.add("tableSchema=" + tableSchema)
|
||||
.add("deleteColumns=" + deleteColumns)
|
||||
.add("updateColumnTypes=" + updateColumnTypes)
|
||||
.add("addColumnTypes=" + addColumnTypes)
|
||||
.toString();
|
||||
}
|
||||
|
||||
public static class Builder {
|
||||
|
||||
private final MessageType storageSchema;
|
||||
|
||||
@@ -18,8 +18,6 @@
|
||||
|
||||
package org.apache.hudi.hive.util;
|
||||
|
||||
import com.google.common.collect.Maps;
|
||||
import com.google.common.collect.Sets;
|
||||
import org.apache.hadoop.fs.FileSystem;
|
||||
import org.apache.hadoop.fs.Path;
|
||||
import org.apache.hudi.common.model.HoodieLogFile;
|
||||
@@ -42,6 +40,8 @@ import org.apache.parquet.schema.Type;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.HashSet;
|
||||
import java.util.LinkedHashMap;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Set;
|
||||
@@ -67,7 +67,7 @@ public class SchemaUtil {
|
||||
}
|
||||
LOG.info("Getting schema difference for " + tableSchema + "\r\n\r\n" + newTableSchema);
|
||||
SchemaDifference.Builder schemaDiffBuilder = SchemaDifference.newBuilder(storageSchema, tableSchema);
|
||||
Set<String> tableColumns = Sets.newHashSet();
|
||||
Set<String> tableColumns = new HashSet<>();
|
||||
|
||||
for (Map.Entry<String, String> field : tableSchema.entrySet()) {
|
||||
String fieldName = field.getKey().toLowerCase();
|
||||
@@ -140,7 +140,7 @@ public class SchemaUtil {
|
||||
* @return : Hive Table schema read from parquet file MAP[String,String]
|
||||
*/
|
||||
public static Map<String, String> convertParquetSchemaToHiveSchema(MessageType messageType) throws IOException {
|
||||
Map<String, String> schema = Maps.newLinkedHashMap();
|
||||
Map<String, String> schema = new LinkedHashMap<>();
|
||||
List<Type> parquetFields = messageType.getFields();
|
||||
for (Type parquetType : parquetFields) {
|
||||
StringBuilder result = new StringBuilder();
|
||||
|
||||
@@ -24,7 +24,6 @@ import org.apache.hudi.hive.HoodieHiveClient.PartitionEvent;
|
||||
import org.apache.hudi.hive.HoodieHiveClient.PartitionEvent.PartitionEventType;
|
||||
import org.apache.hudi.hive.util.SchemaUtil;
|
||||
|
||||
import com.google.common.collect.Lists;
|
||||
import org.apache.hadoop.hive.metastore.api.Partition;
|
||||
import org.apache.parquet.schema.MessageType;
|
||||
import org.apache.parquet.schema.OriginalType;
|
||||
@@ -345,7 +344,7 @@ public class TestHiveSyncTool {
|
||||
HiveSyncConfig hiveSyncConfig = HiveSyncConfig.copy(TestUtil.hiveSyncConfig);
|
||||
hiveSyncConfig.partitionValueExtractorClass = MultiPartKeysValueExtractor.class.getCanonicalName();
|
||||
hiveSyncConfig.tableName = "multi_part_key";
|
||||
hiveSyncConfig.partitionFields = Lists.newArrayList("year", "month", "day");
|
||||
hiveSyncConfig.partitionFields = Arrays.asList("year", "month", "day");
|
||||
TestUtil.getCreatedTablesSet().add(hiveSyncConfig.databaseName + "." + hiveSyncConfig.tableName);
|
||||
|
||||
HoodieHiveClient hiveClient = new HoodieHiveClient(hiveSyncConfig, TestUtil.getHiveConf(), TestUtil.fileSystem);
|
||||
|
||||
@@ -43,9 +43,6 @@ import org.apache.hudi.common.util.FileIOUtils;
|
||||
import org.apache.hudi.common.util.SchemaTestUtil;
|
||||
import org.apache.hudi.hive.util.HiveTestService;
|
||||
|
||||
import com.google.common.collect.Lists;
|
||||
import com.google.common.collect.Maps;
|
||||
import com.google.common.collect.Sets;
|
||||
import org.apache.avro.Schema;
|
||||
import org.apache.avro.generic.IndexedRecord;
|
||||
import org.apache.hadoop.conf.Configuration;
|
||||
@@ -68,6 +65,10 @@ import java.io.File;
|
||||
import java.io.IOException;
|
||||
import java.net.URISyntaxException;
|
||||
import java.nio.charset.StandardCharsets;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collections;
|
||||
import java.util.HashMap;
|
||||
import java.util.HashSet;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Map.Entry;
|
||||
@@ -86,7 +87,7 @@ public class TestUtil {
|
||||
static HiveSyncConfig hiveSyncConfig;
|
||||
private static DateTimeFormatter dtfOut;
|
||||
static FileSystem fileSystem;
|
||||
private static Set<String> createdTablesSet = Sets.newHashSet();
|
||||
private static Set<String> createdTablesSet = new HashSet<>();
|
||||
|
||||
public static void setUp() throws IOException, InterruptedException {
|
||||
if (dfsCluster == null) {
|
||||
@@ -114,7 +115,7 @@ public class TestUtil {
|
||||
hiveSyncConfig.basePath = "/tmp/hdfs/TestHiveSyncTool/";
|
||||
hiveSyncConfig.assumeDatePartitioning = true;
|
||||
hiveSyncConfig.usePreApacheInputFormat = false;
|
||||
hiveSyncConfig.partitionFields = Lists.newArrayList("datestr");
|
||||
hiveSyncConfig.partitionFields = Collections.singletonList("datestr");
|
||||
|
||||
dtfOut = DateTimeFormat.forPattern("yyyy/MM/dd");
|
||||
|
||||
@@ -249,7 +250,7 @@ public class TestUtil {
|
||||
|
||||
private static List<HoodieWriteStat> createTestData(Path partPath, boolean isParquetSchemaSimple, String commitTime)
|
||||
throws IOException, URISyntaxException {
|
||||
List<HoodieWriteStat> writeStats = Lists.newArrayList();
|
||||
List<HoodieWriteStat> writeStats = new ArrayList<>();
|
||||
for (int i = 0; i < 5; i++) {
|
||||
// Create 5 files
|
||||
String fileId = UUID.randomUUID().toString();
|
||||
@@ -297,7 +298,7 @@ public class TestUtil {
|
||||
.overBaseCommit(dataFile.getCommitTime()).withFs(fileSystem).build();
|
||||
List<IndexedRecord> records = (isLogSchemaSimple ? SchemaTestUtil.generateTestRecords(0, 100)
|
||||
: SchemaTestUtil.generateEvolvedTestRecords(100, 100));
|
||||
Map<HeaderMetadataType, String> header = Maps.newHashMap();
|
||||
Map<HeaderMetadataType, String> header = new HashMap<>(2);
|
||||
header.put(HoodieLogBlock.HeaderMetadataType.INSTANT_TIME, dataFile.getCommitTime());
|
||||
header.put(HoodieLogBlock.HeaderMetadataType.SCHEMA, schema.toString());
|
||||
HoodieAvroDataBlock dataBlock = new HoodieAvroDataBlock(records, header);
|
||||
|
||||
Reference in New Issue
Block a user