100 lines
4.4 KiB
Java
100 lines
4.4 KiB
Java
/*
|
|
* Copyright (c) 2016 Uber Technologies, Inc. (hoodie-dev-group@uber.com)
|
|
*
|
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
|
* you may not use this file except in compliance with the License.
|
|
* You may obtain a copy of the License at
|
|
*
|
|
* http://www.apache.org/licenses/LICENSE-2.0
|
|
*
|
|
* Unless required by applicable law or agreed to in writing, software
|
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
* See the License for the specific language governing permissions and
|
|
* limitations under the License.
|
|
*/
|
|
|
|
package com.uber.hoodie.hive;
|
|
|
|
import com.uber.hoodie.hive.client.HoodieHiveClient;
|
|
import com.uber.hoodie.hive.model.HoodieDatasetReference;
|
|
import com.uber.hoodie.hive.util.TestUtil;
|
|
import org.joda.time.DateTime;
|
|
import org.junit.Before;
|
|
import org.junit.Test;
|
|
import org.junit.runners.model.InitializationError;
|
|
import parquet.schema.MessageType;
|
|
|
|
import java.io.IOException;
|
|
|
|
import static org.junit.Assert.assertEquals;
|
|
import static org.junit.Assert.assertFalse;
|
|
import static org.junit.Assert.assertTrue;
|
|
|
|
public class HDroneDatasetTest {
|
|
private HoodieHiveClient hiveClient;
|
|
|
|
@Before
|
|
public void setUp() throws IOException, InterruptedException {
|
|
TestUtil.setUp();
|
|
hiveClient = new HoodieHiveClient(TestUtil.hDroneConfiguration);
|
|
}
|
|
|
|
@Test
|
|
public void testDatasetCreation() throws IOException, InitializationError {
|
|
HoodieDatasetReference metadata = TestUtil
|
|
.createDataset("test1", "/tmp/hdfs/DatasetSchemaTest/testSchema/", 5, "/nation.schema");
|
|
HoodieHiveDatasetSyncTask dataset =
|
|
HoodieHiveDatasetSyncTask.newBuilder().withReference(metadata)
|
|
.withConfiguration(TestUtil.hDroneConfiguration).build();
|
|
assertEquals("There should be 5 new partitions", 5, dataset.getNewPartitions().size());
|
|
assertEquals("There should not be any changed partitions", 0,
|
|
dataset.getChangedPartitions().size());
|
|
assertFalse("Table should not exist", hiveClient.checkTableExists(metadata));
|
|
dataset.sync();
|
|
|
|
dataset = HoodieHiveDatasetSyncTask.newBuilder().withReference(metadata)
|
|
.withConfiguration(TestUtil.hDroneConfiguration).build();
|
|
assertTrue("Table should exist after flush", hiveClient.checkTableExists(metadata));
|
|
assertEquals("After flush, There should not be any new partitions to flush", 0,
|
|
dataset.getNewPartitions().size());
|
|
assertEquals("After flush, There should not be any modified partitions to flush", 0,
|
|
dataset.getChangedPartitions().size());
|
|
|
|
assertEquals("Table Schema should have 5 fields", 5,
|
|
hiveClient.getTableSchema(metadata).size());
|
|
}
|
|
|
|
@Test
|
|
public void testDatasetEvolution() throws IOException, InitializationError {
|
|
int initialPartitionsCount = 5;
|
|
HoodieDatasetReference metadata = TestUtil
|
|
.createDataset("test1", "/tmp/hdfs/DatasetSchemaTest/testSchema/",
|
|
initialPartitionsCount, "/nation.schema");
|
|
HoodieHiveDatasetSyncTask dataset =
|
|
HoodieHiveDatasetSyncTask.newBuilder().withReference(metadata)
|
|
.withConfiguration(TestUtil.hDroneConfiguration).build();
|
|
dataset.sync();
|
|
|
|
dataset = HoodieHiveDatasetSyncTask.newBuilder(dataset).build();
|
|
int newSchemaversion = 2;
|
|
int newPartitionsCount = 2;
|
|
TestUtil.evolveDataset(metadata, newPartitionsCount, "/nation_evolved.schema",
|
|
DateTime.now().getMillis(), newSchemaversion);
|
|
dataset = HoodieHiveDatasetSyncTask.newBuilder(dataset).build();
|
|
assertEquals("There should be " + newPartitionsCount + " partitions to be added",
|
|
newPartitionsCount, dataset.getNewPartitions().size());
|
|
dataset.sync();
|
|
|
|
dataset = HoodieHiveDatasetSyncTask.newBuilder(dataset).build();
|
|
MessageType newDatasetSchema = dataset.getSchemaSyncTask().getStorageSchema();
|
|
MessageType expectedSchema = TestUtil.readSchema("/nation_evolved.schema");
|
|
assertEquals("Table schema should be evolved schema", expectedSchema, newDatasetSchema);
|
|
assertEquals("Table schema should have 6 fields", 6,
|
|
hiveClient.getTableSchema(metadata).size());
|
|
assertEquals("Valid Evolution should be reflected", "BIGINT",
|
|
hiveClient.getTableSchema(metadata).get("region_key"));
|
|
}
|
|
|
|
}
|