Hive Sync handling must work for datasets with multi-partition keys

2018-09-16 08:06:30 -07:00
parent 5cb28e7b1f
commit 460e24e84b
5 changed files with 106 additions and 27 deletions
--- a/hoodie-hive/src/test/java/com/uber/hoodie/hive/HiveSyncToolTest.java
+++ b/hoodie-hive/src/test/java/com/uber/hoodie/hive/HiveSyncToolTest.java
@@ -22,9 +22,11 @@ import static org.junit.Assert.assertEquals;
 import static org.junit.Assert.assertFalse;
 import static org.junit.Assert.assertTrue;

+import com.google.common.collect.Lists;
 import com.uber.hoodie.common.util.SchemaTestUtil;
 import com.uber.hoodie.hive.HoodieHiveClient.PartitionEvent;
 import com.uber.hoodie.hive.HoodieHiveClient.PartitionEvent.PartitionEventType;
+import com.uber.hoodie.hive.util.MultiPartKeysValueExtractor;
 import com.uber.hoodie.hive.util.SchemaUtil;
 import java.io.IOException;
 import java.net.URISyntaxException;
@@ -356,4 +358,33 @@ public class HiveSyncToolTest {
    TestUtil.hiveSyncConfig.tableName = roTablename;
  }

+  @Test
+  public void testMultiPartitionKeySync()
+      throws IOException, InitializationError, URISyntaxException, TException,
+      InterruptedException {
+    String commitTime = "100";
+    TestUtil.createCOWDataset(commitTime, 5);
+
+    HiveSyncConfig hiveSyncConfig = HiveSyncConfig.copy(TestUtil.hiveSyncConfig);
+    hiveSyncConfig.partitionValueExtractorClass = MultiPartKeysValueExtractor.class.getCanonicalName();
+    hiveSyncConfig.tableName = "multi_part_key";
+    hiveSyncConfig.partitionFields = Lists.newArrayList("year", "month", "day");
+    TestUtil.getCreatedTablesSet().add(hiveSyncConfig.databaseName + "." + hiveSyncConfig.tableName);
+
+    HoodieHiveClient hiveClient = new HoodieHiveClient(hiveSyncConfig,
+        TestUtil.getHiveConf(), TestUtil.fileSystem);
+    assertFalse("Table " + hiveSyncConfig.tableName + " should not exist initially",
+        hiveClient.doesTableExist());
+    // Lets do the sync
+    HiveSyncTool tool = new HiveSyncTool(hiveSyncConfig, TestUtil.getHiveConf(), TestUtil.fileSystem);
+    tool.syncHoodieTable();
+    assertTrue("Table " + hiveSyncConfig.tableName + " should exist after sync completes",
+        hiveClient.doesTableExist());
+    assertEquals("Hive Schema should match the dataset schema + partition fields",
+        hiveClient.getTableSchema().size(), hiveClient.getDataSchema().getColumns().size() + 3);
+    assertEquals("Table partitions should match the number of partitions we wrote", 5,
+        hiveClient.scanTablePartitions().size());
+    assertEquals("The last commit that was sycned should be updated in the TBLPROPERTIES",
+        commitTime, hiveClient.getLastCommitTimeSynced().get());
+  }
 }
--- a/hoodie-hive/src/test/java/com/uber/hoodie/hive/TestUtil.java
+++ b/hoodie-hive/src/test/java/com/uber/hoodie/hive/TestUtil.java
@@ -358,4 +358,8 @@ public class TestUtil {
    fsout.write(bytes);
    fsout.close();
  }
+
+  public static Set<String> getCreatedTablesSet() {
+    return createdTablesSet;
+  }
 }
--- a/hoodie-hive/src/test/java/com/uber/hoodie/hive/util/MultiPartKeysValueExtractor.java
+++ b/hoodie-hive/src/test/java/com/uber/hoodie/hive/util/MultiPartKeysValueExtractor.java
@@ -0,0 +1,33 @@
+/*
+ * Copyright (c) 2017 Uber Technologies, Inc. (hoodie-dev-group@uber.com)
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *           http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package com.uber.hoodie.hive.util;
+
+import com.uber.hoodie.hive.PartitionValueExtractor;
+import java.util.Arrays;
+import java.util.List;
+
+/**
+ * Partition Key extractor treating each value delimited by slash as separate key.
+ */
+public class MultiPartKeysValueExtractor implements PartitionValueExtractor {
+
+  @Override
+  public List<String> extractPartitionValuesInPath(String partitionPath) {
+    String[] splits = partitionPath.split("/");
+    return Arrays.asList(splits);
+  }
+}