Add hoodie-hive module to support hive registration of hoodie datasets
This commit is contained in:
@@ -0,0 +1,98 @@
|
||||
/*
|
||||
* Copyright (c) 2016 Uber Technologies, Inc. (hoodie-dev-group@uber.com)
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package com.uber.hoodie.hive;
|
||||
|
||||
import com.uber.hoodie.hive.client.HoodieHiveClient;
|
||||
import com.uber.hoodie.hive.model.HoodieDatasetReference;
|
||||
import com.uber.hoodie.hive.util.TestUtil;
|
||||
import org.joda.time.DateTime;
|
||||
import org.junit.Before;
|
||||
import org.junit.Test;
|
||||
import org.junit.runners.model.InitializationError;
|
||||
import parquet.schema.MessageType;
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
import static org.junit.Assert.assertEquals;
|
||||
import static org.junit.Assert.assertFalse;
|
||||
import static org.junit.Assert.assertTrue;
|
||||
|
||||
public class HDroneDatasetTest {
|
||||
private HoodieHiveClient hiveClient;
|
||||
|
||||
@Before
|
||||
public void setUp() throws IOException, InterruptedException {
|
||||
TestUtil.setUp();
|
||||
hiveClient = new HoodieHiveClient(TestUtil.hDroneConfiguration);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testDatasetCreation() throws IOException, InitializationError {
|
||||
HoodieDatasetReference metadata = TestUtil
|
||||
.createDataset("test1", "/tmp/hdfs/DatasetSchemaTest/testSchema/", 5, "/nation.schema");
|
||||
HoodieHiveDatasetSyncTask dataset =
|
||||
HoodieHiveDatasetSyncTask.newBuilder().withReference(metadata)
|
||||
.withConfiguration(TestUtil.hDroneConfiguration).build();
|
||||
assertEquals("There should be 5 new partitions", 5, dataset.getNewPartitions().size());
|
||||
assertEquals("There should not be any changed partitions", 0,
|
||||
dataset.getChangedPartitions().size());
|
||||
assertFalse("Table should not exist", hiveClient.checkTableExists(metadata));
|
||||
dataset.sync();
|
||||
|
||||
dataset = HoodieHiveDatasetSyncTask.newBuilder().withReference(metadata)
|
||||
.withConfiguration(TestUtil.hDroneConfiguration).build();
|
||||
assertTrue("Table should exist after sync", hiveClient.checkTableExists(metadata));
|
||||
assertEquals("After sync, There should not be any new partitions to sync", 0,
|
||||
dataset.getNewPartitions().size());
|
||||
assertEquals("After sync, There should not be any modified partitions to sync", 0,
|
||||
dataset.getChangedPartitions().size());
|
||||
|
||||
assertEquals("Table Schema should have 5 fields", 5,
|
||||
hiveClient.getTableSchema(metadata).size());
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testDatasetEvolution() throws IOException, InitializationError {
|
||||
int initialPartitionsCount = 5;
|
||||
HoodieDatasetReference metadata = TestUtil
|
||||
.createDataset("test1", "/tmp/hdfs/DatasetSchemaTest/testSchema/",
|
||||
initialPartitionsCount, "/nation.schema");
|
||||
HoodieHiveDatasetSyncTask dataset =
|
||||
HoodieHiveDatasetSyncTask.newBuilder().withReference(metadata)
|
||||
.withConfiguration(TestUtil.hDroneConfiguration).build();
|
||||
dataset.sync();
|
||||
|
||||
dataset = HoodieHiveDatasetSyncTask.newBuilder(dataset).build();
|
||||
int newSchemaversion = 2;
|
||||
int newPartitionsCount = 2;
|
||||
TestUtil.evolveDataset(metadata, newPartitionsCount, "/nation_evolved.schema",
|
||||
DateTime.now().getMillis(), newSchemaversion);
|
||||
dataset = HoodieHiveDatasetSyncTask.newBuilder(dataset).build();
|
||||
assertEquals("There should be " + newPartitionsCount + " partitions to be added",
|
||||
newPartitionsCount, dataset.getNewPartitions().size());
|
||||
dataset.sync();
|
||||
|
||||
dataset = HoodieHiveDatasetSyncTask.newBuilder(dataset).build();
|
||||
MessageType newDatasetSchema = dataset.getSchemaSyncTask().getStorageSchema();
|
||||
MessageType expectedSchema = TestUtil.readSchema("/nation_evolved.schema");
|
||||
assertEquals("Table schema should be evolved schema", expectedSchema, newDatasetSchema);
|
||||
assertEquals("Table schema should have 6 fields", 6,
|
||||
hiveClient.getTableSchema(metadata).size());
|
||||
assertEquals("", "BIGINT", hiveClient.getTableSchema(metadata).get("region_key"));
|
||||
}
|
||||
|
||||
}
|
||||
Reference in New Issue
Block a user