[HUDI-780] Migrate test cases to Junit 5 (#1504)
This commit is contained in:
@@ -30,55 +30,47 @@ import org.apache.parquet.schema.OriginalType;
|
||||
import org.apache.parquet.schema.PrimitiveType;
|
||||
import org.apache.parquet.schema.Types;
|
||||
import org.joda.time.DateTime;
|
||||
import org.junit.After;
|
||||
import org.junit.AfterClass;
|
||||
import org.junit.Before;
|
||||
import org.junit.Test;
|
||||
import org.junit.runner.RunWith;
|
||||
import org.junit.runners.Parameterized;
|
||||
import org.junit.jupiter.api.AfterAll;
|
||||
import org.junit.jupiter.api.AfterEach;
|
||||
import org.junit.jupiter.api.BeforeEach;
|
||||
import org.junit.jupiter.api.Test;
|
||||
import org.junit.jupiter.params.ParameterizedTest;
|
||||
import org.junit.jupiter.params.provider.MethodSource;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.Arrays;
|
||||
import java.util.Collection;
|
||||
import java.util.List;
|
||||
import java.util.stream.Stream;
|
||||
|
||||
import static org.junit.Assert.assertEquals;
|
||||
import static org.junit.Assert.assertFalse;
|
||||
import static org.junit.Assert.assertTrue;
|
||||
import static org.junit.jupiter.api.Assertions.assertEquals;
|
||||
import static org.junit.jupiter.api.Assertions.assertFalse;
|
||||
import static org.junit.jupiter.api.Assertions.assertTrue;
|
||||
|
||||
@RunWith(Parameterized.class)
|
||||
public class TestHiveSyncTool {
|
||||
|
||||
// Test sync tool using both jdbc and metastore client
|
||||
private boolean useJdbc;
|
||||
|
||||
public TestHiveSyncTool(Boolean useJdbc) {
|
||||
this.useJdbc = useJdbc;
|
||||
private static Stream<Boolean> useJdbc() {
|
||||
return Stream.of(false, true);
|
||||
}
|
||||
|
||||
@Parameterized.Parameters(name = "UseJdbc")
|
||||
public static Collection<Boolean[]> data() {
|
||||
return Arrays.asList(new Boolean[][] {{false}, {true}});
|
||||
}
|
||||
|
||||
@Before
|
||||
@BeforeEach
|
||||
public void setUp() throws IOException, InterruptedException {
|
||||
TestUtil.setUp();
|
||||
}
|
||||
|
||||
@After
|
||||
@AfterEach
|
||||
public void teardown() throws IOException {
|
||||
TestUtil.clear();
|
||||
}
|
||||
|
||||
@AfterClass
|
||||
@AfterAll
|
||||
public static void cleanUpClass() {
|
||||
TestUtil.shutdown();
|
||||
}
|
||||
|
||||
/**
|
||||
* Testing converting array types to Hive field declaration strings. According to the Parquet-113 spec:
|
||||
* https://github.com/apache/parquet-format/blob/master/LogicalTypes.md#lists
|
||||
* Testing converting array types to Hive field declaration strings.
|
||||
* <p>
|
||||
* Refer to the Parquet-113 spec: https://github.com/apache/parquet-format/blob/master/LogicalTypes.md#lists
|
||||
*/
|
||||
@Test
|
||||
public void testSchemaConvertArray() throws IOException {
|
||||
@@ -153,44 +145,45 @@ public class TestHiveSyncTool {
|
||||
assertEquals("`map_list` ARRAY< MAP< string, int>>", schemaString);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testBasicSync() throws Exception {
|
||||
TestUtil.hiveSyncConfig.useJdbc = this.useJdbc;
|
||||
@ParameterizedTest
|
||||
@MethodSource("useJdbc")
|
||||
public void testBasicSync(boolean useJdbc) throws Exception {
|
||||
TestUtil.hiveSyncConfig.useJdbc = useJdbc;
|
||||
String instantTime = "100";
|
||||
TestUtil.createCOWTable(instantTime, 5);
|
||||
HoodieHiveClient hiveClient =
|
||||
new HoodieHiveClient(TestUtil.hiveSyncConfig, TestUtil.getHiveConf(), TestUtil.fileSystem);
|
||||
assertFalse("Table " + TestUtil.hiveSyncConfig.tableName + " should not exist initially",
|
||||
hiveClient.doesTableExist(TestUtil.hiveSyncConfig.tableName));
|
||||
assertFalse(hiveClient.doesTableExist(TestUtil.hiveSyncConfig.tableName),
|
||||
"Table " + TestUtil.hiveSyncConfig.tableName + " should not exist initially");
|
||||
// Lets do the sync
|
||||
HiveSyncTool tool = new HiveSyncTool(TestUtil.hiveSyncConfig, TestUtil.getHiveConf(), TestUtil.fileSystem);
|
||||
tool.syncHoodieTable();
|
||||
assertTrue("Table " + TestUtil.hiveSyncConfig.tableName + " should exist after sync completes",
|
||||
hiveClient.doesTableExist(TestUtil.hiveSyncConfig.tableName));
|
||||
assertEquals("Hive Schema should match the table schema + partition field",
|
||||
hiveClient.getTableSchema(TestUtil.hiveSyncConfig.tableName).size(),
|
||||
hiveClient.getDataSchema().getColumns().size() + 1);
|
||||
assertEquals("Table partitions should match the number of partitions we wrote", 5,
|
||||
hiveClient.scanTablePartitions(TestUtil.hiveSyncConfig.tableName).size());
|
||||
assertEquals("The last commit that was sycned should be updated in the TBLPROPERTIES", instantTime,
|
||||
hiveClient.getLastCommitTimeSynced(TestUtil.hiveSyncConfig.tableName).get());
|
||||
assertTrue(hiveClient.doesTableExist(TestUtil.hiveSyncConfig.tableName),
|
||||
"Table " + TestUtil.hiveSyncConfig.tableName + " should exist after sync completes");
|
||||
assertEquals(hiveClient.getTableSchema(TestUtil.hiveSyncConfig.tableName).size(),
|
||||
hiveClient.getDataSchema().getColumns().size() + 1,
|
||||
"Hive Schema should match the table schema + partition field");
|
||||
assertEquals(5, hiveClient.scanTablePartitions(TestUtil.hiveSyncConfig.tableName).size(),
|
||||
"Table partitions should match the number of partitions we wrote");
|
||||
assertEquals(instantTime, hiveClient.getLastCommitTimeSynced(TestUtil.hiveSyncConfig.tableName).get(),
|
||||
"The last commit that was sycned should be updated in the TBLPROPERTIES");
|
||||
|
||||
// Adding of new partitions
|
||||
List<String> newPartition = Arrays.asList("2050/01/01");
|
||||
hiveClient.addPartitionsToTable(TestUtil.hiveSyncConfig.tableName, Arrays.asList());
|
||||
assertEquals("No new partition should be added", 5,
|
||||
hiveClient.scanTablePartitions(TestUtil.hiveSyncConfig.tableName).size());
|
||||
assertEquals(5, hiveClient.scanTablePartitions(TestUtil.hiveSyncConfig.tableName).size(),
|
||||
"No new partition should be added");
|
||||
hiveClient.addPartitionsToTable(TestUtil.hiveSyncConfig.tableName, newPartition);
|
||||
assertEquals("New partition should be added", 6,
|
||||
hiveClient.scanTablePartitions(TestUtil.hiveSyncConfig.tableName).size());
|
||||
assertEquals(6, hiveClient.scanTablePartitions(TestUtil.hiveSyncConfig.tableName).size(),
|
||||
"New partition should be added");
|
||||
|
||||
// Update partitions
|
||||
hiveClient.updatePartitionsToTable(TestUtil.hiveSyncConfig.tableName, Arrays.asList());
|
||||
assertEquals("Partition count should remain the same", 6,
|
||||
hiveClient.scanTablePartitions(TestUtil.hiveSyncConfig.tableName).size());
|
||||
hiveClient.updatePartitionsToTable(TestUtil.hiveSyncConfig.tableName, newPartition);
|
||||
assertEquals("Partition count should remain the same", 6,
|
||||
hiveClient.scanTablePartitions(TestUtil.hiveSyncConfig.tableName).size());
|
||||
assertEquals(6, hiveClient.scanTablePartitions(TestUtil.hiveSyncConfig.tableName).size(),
|
||||
"Partition count should remain the same");
|
||||
hiveClient.updatePartitionsToTable(TestUtil.hiveSyncConfig.tableName, newPartition);
|
||||
assertEquals(6, hiveClient.scanTablePartitions(TestUtil.hiveSyncConfig.tableName).size(),
|
||||
"Partition count should remain the same");
|
||||
|
||||
// Alter partitions
|
||||
// Manually change a hive partition location to check if the sync will detect
|
||||
@@ -203,23 +196,23 @@ public class TestHiveSyncTool {
|
||||
List<String> writtenPartitionsSince = hiveClient.getPartitionsWrittenToSince(Option.empty());
|
||||
writtenPartitionsSince.add(newPartition.get(0));
|
||||
List<PartitionEvent> partitionEvents = hiveClient.getPartitionEvents(hivePartitions, writtenPartitionsSince);
|
||||
assertEquals("There should be only one paritition event", 1, partitionEvents.size());
|
||||
assertEquals("The one partition event must of type UPDATE", PartitionEventType.UPDATE,
|
||||
partitionEvents.iterator().next().eventType);
|
||||
assertEquals(1, partitionEvents.size(), "There should be only one paritition event");
|
||||
assertEquals(PartitionEventType.UPDATE, partitionEvents.iterator().next().eventType,
|
||||
"The one partition event must of type UPDATE");
|
||||
|
||||
tool = new HiveSyncTool(TestUtil.hiveSyncConfig, TestUtil.getHiveConf(), TestUtil.fileSystem);
|
||||
tool.syncHoodieTable();
|
||||
// Sync should update the changed partition to correct path
|
||||
List<Partition> tablePartitions = hiveClient.scanTablePartitions(TestUtil.hiveSyncConfig.tableName);
|
||||
assertEquals("The one partition we wrote should be added to hive", 6, tablePartitions.size());
|
||||
assertEquals("The last commit that was sycned should be 100", instantTime,
|
||||
hiveClient.getLastCommitTimeSynced(TestUtil.hiveSyncConfig.tableName).get());
|
||||
|
||||
assertEquals(6, tablePartitions.size(), "The one partition we wrote should be added to hive");
|
||||
assertEquals(instantTime, hiveClient.getLastCommitTimeSynced(TestUtil.hiveSyncConfig.tableName).get(),
|
||||
"The last commit that was sycned should be 100");
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testSyncIncremental() throws Exception {
|
||||
TestUtil.hiveSyncConfig.useJdbc = this.useJdbc;
|
||||
@ParameterizedTest
|
||||
@MethodSource("useJdbc")
|
||||
public void testSyncIncremental(boolean useJdbc) throws Exception {
|
||||
TestUtil.hiveSyncConfig.useJdbc = useJdbc;
|
||||
String commitTime1 = "100";
|
||||
TestUtil.createCOWTable(commitTime1, 5);
|
||||
HoodieHiveClient hiveClient =
|
||||
@@ -227,10 +220,10 @@ public class TestHiveSyncTool {
|
||||
// Lets do the sync
|
||||
HiveSyncTool tool = new HiveSyncTool(TestUtil.hiveSyncConfig, TestUtil.getHiveConf(), TestUtil.fileSystem);
|
||||
tool.syncHoodieTable();
|
||||
assertEquals("Table partitions should match the number of partitions we wrote", 5,
|
||||
hiveClient.scanTablePartitions(TestUtil.hiveSyncConfig.tableName).size());
|
||||
assertEquals("The last commit that was sycned should be updated in the TBLPROPERTIES", commitTime1,
|
||||
hiveClient.getLastCommitTimeSynced(TestUtil.hiveSyncConfig.tableName).get());
|
||||
assertEquals(5, hiveClient.scanTablePartitions(TestUtil.hiveSyncConfig.tableName).size(),
|
||||
"Table partitions should match the number of partitions we wrote");
|
||||
assertEquals(commitTime1, hiveClient.getLastCommitTimeSynced(TestUtil.hiveSyncConfig.tableName).get(),
|
||||
"The last commit that was sycned should be updated in the TBLPROPERTIES");
|
||||
|
||||
// Now lets create more parititions and these are the only ones which needs to be synced
|
||||
DateTime dateTime = DateTime.now().plusDays(6);
|
||||
@@ -240,25 +233,25 @@ public class TestHiveSyncTool {
|
||||
// Lets do the sync
|
||||
hiveClient = new HoodieHiveClient(TestUtil.hiveSyncConfig, TestUtil.getHiveConf(), TestUtil.fileSystem);
|
||||
List<String> writtenPartitionsSince = hiveClient.getPartitionsWrittenToSince(Option.of(commitTime1));
|
||||
assertEquals("We should have one partition written after 100 commit", 1, writtenPartitionsSince.size());
|
||||
assertEquals(1, writtenPartitionsSince.size(), "We should have one partition written after 100 commit");
|
||||
List<Partition> hivePartitions = hiveClient.scanTablePartitions(TestUtil.hiveSyncConfig.tableName);
|
||||
List<PartitionEvent> partitionEvents = hiveClient.getPartitionEvents(hivePartitions, writtenPartitionsSince);
|
||||
assertEquals("There should be only one paritition event", 1, partitionEvents.size());
|
||||
assertEquals("The one partition event must of type ADD", PartitionEventType.ADD,
|
||||
partitionEvents.iterator().next().eventType);
|
||||
assertEquals(1, partitionEvents.size(), "There should be only one paritition event");
|
||||
assertEquals(PartitionEventType.ADD, partitionEvents.iterator().next().eventType, "The one partition event must of type ADD");
|
||||
|
||||
tool = new HiveSyncTool(TestUtil.hiveSyncConfig, TestUtil.getHiveConf(), TestUtil.fileSystem);
|
||||
tool.syncHoodieTable();
|
||||
// Sync should add the one partition
|
||||
assertEquals("The one partition we wrote should be added to hive", 6,
|
||||
hiveClient.scanTablePartitions(TestUtil.hiveSyncConfig.tableName).size());
|
||||
assertEquals("The last commit that was sycned should be 101", commitTime2,
|
||||
hiveClient.getLastCommitTimeSynced(TestUtil.hiveSyncConfig.tableName).get());
|
||||
assertEquals(6, hiveClient.scanTablePartitions(TestUtil.hiveSyncConfig.tableName).size(),
|
||||
"The one partition we wrote should be added to hive");
|
||||
assertEquals(commitTime2, hiveClient.getLastCommitTimeSynced(TestUtil.hiveSyncConfig.tableName).get(),
|
||||
"The last commit that was sycned should be 101");
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testSyncIncrementalWithSchemaEvolution() throws Exception {
|
||||
TestUtil.hiveSyncConfig.useJdbc = this.useJdbc;
|
||||
@ParameterizedTest
|
||||
@MethodSource("useJdbc")
|
||||
public void testSyncIncrementalWithSchemaEvolution(boolean useJdbc) throws Exception {
|
||||
TestUtil.hiveSyncConfig.useJdbc = useJdbc;
|
||||
String commitTime1 = "100";
|
||||
TestUtil.createCOWTable(commitTime1, 5);
|
||||
HoodieHiveClient hiveClient =
|
||||
@@ -278,42 +271,42 @@ public class TestHiveSyncTool {
|
||||
tool = new HiveSyncTool(TestUtil.hiveSyncConfig, TestUtil.getHiveConf(), TestUtil.fileSystem);
|
||||
tool.syncHoodieTable();
|
||||
|
||||
assertEquals("Hive Schema has evolved and should not be 3 more field", fields + 3,
|
||||
hiveClient.getTableSchema(TestUtil.hiveSyncConfig.tableName).size());
|
||||
assertEquals("Hive Schema has evolved - Field favorite_number has evolved from int to long", "BIGINT",
|
||||
hiveClient.getTableSchema(TestUtil.hiveSyncConfig.tableName).get("favorite_number"));
|
||||
assertTrue("Hive Schema has evolved - Field favorite_movie was added",
|
||||
hiveClient.getTableSchema(TestUtil.hiveSyncConfig.tableName).containsKey("favorite_movie"));
|
||||
assertEquals(fields + 3, hiveClient.getTableSchema(TestUtil.hiveSyncConfig.tableName).size(),
|
||||
"Hive Schema has evolved and should not be 3 more field");
|
||||
assertEquals("BIGINT", hiveClient.getTableSchema(TestUtil.hiveSyncConfig.tableName).get("favorite_number"),
|
||||
"Hive Schema has evolved - Field favorite_number has evolved from int to long");
|
||||
assertTrue(hiveClient.getTableSchema(TestUtil.hiveSyncConfig.tableName).containsKey("favorite_movie"),
|
||||
"Hive Schema has evolved - Field favorite_movie was added");
|
||||
|
||||
// Sync should add the one partition
|
||||
assertEquals("The one partition we wrote should be added to hive", 6,
|
||||
hiveClient.scanTablePartitions(TestUtil.hiveSyncConfig.tableName).size());
|
||||
assertEquals("The last commit that was sycned should be 101", commitTime2,
|
||||
hiveClient.getLastCommitTimeSynced(TestUtil.hiveSyncConfig.tableName).get());
|
||||
assertEquals(6, hiveClient.scanTablePartitions(TestUtil.hiveSyncConfig.tableName).size(),
|
||||
"The one partition we wrote should be added to hive");
|
||||
assertEquals(commitTime2, hiveClient.getLastCommitTimeSynced(TestUtil.hiveSyncConfig.tableName).get(),
|
||||
"The last commit that was sycned should be 101");
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testSyncMergeOnRead() throws Exception {
|
||||
TestUtil.hiveSyncConfig.useJdbc = this.useJdbc;
|
||||
@ParameterizedTest
|
||||
@MethodSource("useJdbc")
|
||||
public void testSyncMergeOnRead(boolean useJdbc) throws Exception {
|
||||
TestUtil.hiveSyncConfig.useJdbc = useJdbc;
|
||||
String instantTime = "100";
|
||||
String deltaCommitTime = "101";
|
||||
TestUtil.createMORTable(instantTime, deltaCommitTime, 5, true);
|
||||
|
||||
String roTableName = TestUtil.hiveSyncConfig.tableName + HiveSyncTool.SUFFIX_READ_OPTIMIZED_TABLE;
|
||||
HoodieHiveClient hiveClient = new HoodieHiveClient(TestUtil.hiveSyncConfig, TestUtil.getHiveConf(), TestUtil.fileSystem);
|
||||
assertFalse("Table " + TestUtil.hiveSyncConfig.tableName + " should not exist initially", hiveClient.doesTableExist(roTableName));
|
||||
assertFalse(hiveClient.doesTableExist(roTableName), "Table " + TestUtil.hiveSyncConfig.tableName + " should not exist initially");
|
||||
// Lets do the sync
|
||||
HiveSyncTool tool = new HiveSyncTool(TestUtil.hiveSyncConfig, TestUtil.getHiveConf(), TestUtil.fileSystem);
|
||||
tool.syncHoodieTable();
|
||||
|
||||
assertTrue("Table " + roTableName + " should exist after sync completes",
|
||||
hiveClient.doesTableExist(roTableName));
|
||||
assertEquals("Hive Schema should match the table schema + partition field", hiveClient.getTableSchema(roTableName).size(),
|
||||
SchemaTestUtil.getSimpleSchema().getFields().size() + 1);
|
||||
assertEquals("Table partitions should match the number of partitions we wrote", 5,
|
||||
hiveClient.scanTablePartitions(roTableName).size());
|
||||
assertEquals("The last commit that was sycned should be updated in the TBLPROPERTIES", deltaCommitTime,
|
||||
hiveClient.getLastCommitTimeSynced(roTableName).get());
|
||||
assertTrue(hiveClient.doesTableExist(roTableName), "Table " + roTableName + " should exist after sync completes");
|
||||
assertEquals(hiveClient.getTableSchema(roTableName).size(), SchemaTestUtil.getSimpleSchema().getFields().size() + 1,
|
||||
"Hive Schema should match the table schema + partition field");
|
||||
assertEquals(5, hiveClient.scanTablePartitions(roTableName).size(),
|
||||
"Table partitions should match the number of partitions we wrote");
|
||||
assertEquals(deltaCommitTime, hiveClient.getLastCommitTimeSynced(roTableName).get(),
|
||||
"The last commit that was sycned should be updated in the TBLPROPERTIES");
|
||||
|
||||
// Now lets create more partitions and these are the only ones which needs to be synced
|
||||
DateTime dateTime = DateTime.now().plusDays(6);
|
||||
@@ -327,17 +320,19 @@ public class TestHiveSyncTool {
|
||||
tool.syncHoodieTable();
|
||||
hiveClient = new HoodieHiveClient(TestUtil.hiveSyncConfig, TestUtil.getHiveConf(), TestUtil.fileSystem);
|
||||
|
||||
assertEquals("Hive Schema should match the evolved table schema + partition field",
|
||||
hiveClient.getTableSchema(roTableName).size(), SchemaTestUtil.getEvolvedSchema().getFields().size() + 1);
|
||||
assertEquals(hiveClient.getTableSchema(roTableName).size(), SchemaTestUtil.getEvolvedSchema().getFields().size() + 1,
|
||||
"Hive Schema should match the evolved table schema + partition field");
|
||||
// Sync should add the one partition
|
||||
assertEquals("The 2 partitions we wrote should be added to hive", 6, hiveClient.scanTablePartitions(roTableName).size());
|
||||
assertEquals("The last commit that was synced should be 103", deltaCommitTime2,
|
||||
hiveClient.getLastCommitTimeSynced(roTableName).get());
|
||||
assertEquals(6, hiveClient.scanTablePartitions(roTableName).size(),
|
||||
"The 2 partitions we wrote should be added to hive");
|
||||
assertEquals(deltaCommitTime2, hiveClient.getLastCommitTimeSynced(roTableName).get(),
|
||||
"The last commit that was synced should be 103");
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testSyncMergeOnReadRT() throws Exception {
|
||||
TestUtil.hiveSyncConfig.useJdbc = this.useJdbc;
|
||||
@ParameterizedTest
|
||||
@MethodSource("useJdbc")
|
||||
public void testSyncMergeOnReadRT(boolean useJdbc) throws Exception {
|
||||
TestUtil.hiveSyncConfig.useJdbc = useJdbc;
|
||||
String instantTime = "100";
|
||||
String deltaCommitTime = "101";
|
||||
String snapshotTableName = TestUtil.hiveSyncConfig.tableName + HiveSyncTool.SUFFIX_SNAPSHOT_TABLE;
|
||||
@@ -345,22 +340,24 @@ public class TestHiveSyncTool {
|
||||
HoodieHiveClient hiveClientRT =
|
||||
new HoodieHiveClient(TestUtil.hiveSyncConfig, TestUtil.getHiveConf(), TestUtil.fileSystem);
|
||||
|
||||
assertFalse("Table " + TestUtil.hiveSyncConfig.tableName + HiveSyncTool.SUFFIX_SNAPSHOT_TABLE
|
||||
+ " should not exist initially", hiveClientRT.doesTableExist(snapshotTableName));
|
||||
assertFalse(hiveClientRT.doesTableExist(snapshotTableName),
|
||||
"Table " + TestUtil.hiveSyncConfig.tableName + HiveSyncTool.SUFFIX_SNAPSHOT_TABLE
|
||||
+ " should not exist initially");
|
||||
|
||||
// Lets do the sync
|
||||
HiveSyncTool tool = new HiveSyncTool(TestUtil.hiveSyncConfig, TestUtil.getHiveConf(), TestUtil.fileSystem);
|
||||
tool.syncHoodieTable();
|
||||
|
||||
assertTrue("Table " + TestUtil.hiveSyncConfig.tableName + HiveSyncTool.SUFFIX_SNAPSHOT_TABLE
|
||||
+ " should exist after sync completes", hiveClientRT.doesTableExist(snapshotTableName));
|
||||
assertTrue(hiveClientRT.doesTableExist(snapshotTableName),
|
||||
"Table " + TestUtil.hiveSyncConfig.tableName + HiveSyncTool.SUFFIX_SNAPSHOT_TABLE
|
||||
+ " should exist after sync completes");
|
||||
|
||||
assertEquals("Hive Schema should match the table schema + partition field", hiveClientRT.getTableSchema(snapshotTableName).size(),
|
||||
SchemaTestUtil.getSimpleSchema().getFields().size() + 1);
|
||||
assertEquals("Table partitions should match the number of partitions we wrote", 5,
|
||||
hiveClientRT.scanTablePartitions(snapshotTableName).size());
|
||||
assertEquals("The last commit that was synced should be updated in the TBLPROPERTIES", deltaCommitTime,
|
||||
hiveClientRT.getLastCommitTimeSynced(snapshotTableName).get());
|
||||
assertEquals(hiveClientRT.getTableSchema(snapshotTableName).size(), SchemaTestUtil.getSimpleSchema().getFields().size() + 1,
|
||||
"Hive Schema should match the table schema + partition field");
|
||||
assertEquals(5, hiveClientRT.scanTablePartitions(snapshotTableName).size(),
|
||||
"Table partitions should match the number of partitions we wrote");
|
||||
assertEquals(deltaCommitTime, hiveClientRT.getLastCommitTimeSynced(snapshotTableName).get(),
|
||||
"The last commit that was synced should be updated in the TBLPROPERTIES");
|
||||
|
||||
// Now lets create more parititions and these are the only ones which needs to be synced
|
||||
DateTime dateTime = DateTime.now().plusDays(6);
|
||||
@@ -374,17 +371,19 @@ public class TestHiveSyncTool {
|
||||
tool.syncHoodieTable();
|
||||
hiveClientRT = new HoodieHiveClient(TestUtil.hiveSyncConfig, TestUtil.getHiveConf(), TestUtil.fileSystem);
|
||||
|
||||
assertEquals("Hive Schema should match the evolved table schema + partition field",
|
||||
hiveClientRT.getTableSchema(snapshotTableName).size(), SchemaTestUtil.getEvolvedSchema().getFields().size() + 1);
|
||||
assertEquals(hiveClientRT.getTableSchema(snapshotTableName).size(), SchemaTestUtil.getEvolvedSchema().getFields().size() + 1,
|
||||
"Hive Schema should match the evolved table schema + partition field");
|
||||
// Sync should add the one partition
|
||||
assertEquals("The 2 partitions we wrote should be added to hive", 6, hiveClientRT.scanTablePartitions(snapshotTableName).size());
|
||||
assertEquals("The last commit that was sycned should be 103", deltaCommitTime2,
|
||||
hiveClientRT.getLastCommitTimeSynced(snapshotTableName).get());
|
||||
assertEquals(6, hiveClientRT.scanTablePartitions(snapshotTableName).size(),
|
||||
"The 2 partitions we wrote should be added to hive");
|
||||
assertEquals(deltaCommitTime2, hiveClientRT.getLastCommitTimeSynced(snapshotTableName).get(),
|
||||
"The last commit that was sycned should be 103");
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testMultiPartitionKeySync() throws Exception {
|
||||
TestUtil.hiveSyncConfig.useJdbc = this.useJdbc;
|
||||
@ParameterizedTest
|
||||
@MethodSource("useJdbc")
|
||||
public void testMultiPartitionKeySync(boolean useJdbc) throws Exception {
|
||||
TestUtil.hiveSyncConfig.useJdbc = useJdbc;
|
||||
String instantTime = "100";
|
||||
TestUtil.createCOWTable(instantTime, 5);
|
||||
|
||||
@@ -395,46 +394,46 @@ public class TestHiveSyncTool {
|
||||
TestUtil.getCreatedTablesSet().add(hiveSyncConfig.databaseName + "." + hiveSyncConfig.tableName);
|
||||
|
||||
HoodieHiveClient hiveClient = new HoodieHiveClient(hiveSyncConfig, TestUtil.getHiveConf(), TestUtil.fileSystem);
|
||||
assertFalse("Table " + hiveSyncConfig.tableName + " should not exist initially",
|
||||
hiveClient.doesTableExist(hiveSyncConfig.tableName));
|
||||
assertFalse(hiveClient.doesTableExist(hiveSyncConfig.tableName),
|
||||
"Table " + hiveSyncConfig.tableName + " should not exist initially");
|
||||
// Lets do the sync
|
||||
HiveSyncTool tool = new HiveSyncTool(hiveSyncConfig, TestUtil.getHiveConf(), TestUtil.fileSystem);
|
||||
tool.syncHoodieTable();
|
||||
assertTrue("Table " + hiveSyncConfig.tableName + " should exist after sync completes",
|
||||
hiveClient.doesTableExist(hiveSyncConfig.tableName));
|
||||
assertEquals("Hive Schema should match the table schema + partition fields",
|
||||
hiveClient.getTableSchema(hiveSyncConfig.tableName).size(),
|
||||
hiveClient.getDataSchema().getColumns().size() + 3);
|
||||
assertEquals("Table partitions should match the number of partitions we wrote", 5,
|
||||
hiveClient.scanTablePartitions(hiveSyncConfig.tableName).size());
|
||||
assertEquals("The last commit that was sycned should be updated in the TBLPROPERTIES", instantTime,
|
||||
hiveClient.getLastCommitTimeSynced(hiveSyncConfig.tableName).get());
|
||||
assertTrue(hiveClient.doesTableExist(hiveSyncConfig.tableName),
|
||||
"Table " + hiveSyncConfig.tableName + " should exist after sync completes");
|
||||
assertEquals(hiveClient.getTableSchema(hiveSyncConfig.tableName).size(),
|
||||
hiveClient.getDataSchema().getColumns().size() + 3,
|
||||
"Hive Schema should match the table schema + partition fields");
|
||||
assertEquals(5, hiveClient.scanTablePartitions(hiveSyncConfig.tableName).size(),
|
||||
"Table partitions should match the number of partitions we wrote");
|
||||
assertEquals(instantTime, hiveClient.getLastCommitTimeSynced(hiveSyncConfig.tableName).get(),
|
||||
"The last commit that was sycned should be updated in the TBLPROPERTIES");
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testReadSchemaForMOR() throws Exception {
|
||||
TestUtil.hiveSyncConfig.useJdbc = this.useJdbc;
|
||||
@ParameterizedTest
|
||||
@MethodSource("useJdbc")
|
||||
public void testReadSchemaForMOR(boolean useJdbc) throws Exception {
|
||||
TestUtil.hiveSyncConfig.useJdbc = useJdbc;
|
||||
String commitTime = "100";
|
||||
String snapshotTableName = TestUtil.hiveSyncConfig.tableName + HiveSyncTool.SUFFIX_SNAPSHOT_TABLE;
|
||||
TestUtil.createMORTable(commitTime, "", 5, false);
|
||||
HoodieHiveClient hiveClientRT =
|
||||
new HoodieHiveClient(TestUtil.hiveSyncConfig, TestUtil.getHiveConf(), TestUtil.fileSystem);
|
||||
|
||||
assertFalse("Table " + TestUtil.hiveSyncConfig.tableName + HiveSyncTool.SUFFIX_SNAPSHOT_TABLE
|
||||
+ " should not exist initially", hiveClientRT.doesTableExist(snapshotTableName));
|
||||
assertFalse(hiveClientRT.doesTableExist(snapshotTableName), "Table " + TestUtil.hiveSyncConfig.tableName + HiveSyncTool.SUFFIX_SNAPSHOT_TABLE
|
||||
+ " should not exist initially");
|
||||
|
||||
// Lets do the sync
|
||||
HiveSyncTool tool = new HiveSyncTool(TestUtil.hiveSyncConfig, TestUtil.getHiveConf(), TestUtil.fileSystem);
|
||||
tool.syncHoodieTable();
|
||||
|
||||
assertTrue("Table " + TestUtil.hiveSyncConfig.tableName + HiveSyncTool.SUFFIX_SNAPSHOT_TABLE
|
||||
+ " should exist after sync completes", hiveClientRT.doesTableExist(snapshotTableName));
|
||||
assertTrue(hiveClientRT.doesTableExist(snapshotTableName), "Table " + TestUtil.hiveSyncConfig.tableName + HiveSyncTool.SUFFIX_SNAPSHOT_TABLE
|
||||
+ " should exist after sync completes");
|
||||
|
||||
// Schema being read from compacted base files
|
||||
assertEquals("Hive Schema should match the table schema + partition field", hiveClientRT.getTableSchema(snapshotTableName).size(),
|
||||
SchemaTestUtil.getSimpleSchema().getFields().size() + 1);
|
||||
assertEquals("Table partitions should match the number of partitions we wrote", 5,
|
||||
hiveClientRT.scanTablePartitions(snapshotTableName).size());
|
||||
assertEquals(hiveClientRT.getTableSchema(snapshotTableName).size(), SchemaTestUtil.getSimpleSchema().getFields().size() + 1,
|
||||
"Hive Schema should match the table schema + partition field");
|
||||
assertEquals(5, hiveClientRT.scanTablePartitions(snapshotTableName).size(), "Table partitions should match the number of partitions we wrote");
|
||||
|
||||
// Now lets create more partitions and these are the only ones which needs to be synced
|
||||
DateTime dateTime = DateTime.now().plusDays(6);
|
||||
@@ -448,12 +447,12 @@ public class TestHiveSyncTool {
|
||||
hiveClientRT = new HoodieHiveClient(TestUtil.hiveSyncConfig, TestUtil.getHiveConf(), TestUtil.fileSystem);
|
||||
|
||||
// Schema being read from the log files
|
||||
assertEquals("Hive Schema should match the evolved table schema + partition field",
|
||||
hiveClientRT.getTableSchema(snapshotTableName).size(), SchemaTestUtil.getEvolvedSchema().getFields().size() + 1);
|
||||
assertEquals(hiveClientRT.getTableSchema(snapshotTableName).size(), SchemaTestUtil.getEvolvedSchema().getFields().size() + 1,
|
||||
"Hive Schema should match the evolved table schema + partition field");
|
||||
// Sync should add the one partition
|
||||
assertEquals("The 1 partition we wrote should be added to hive", 6, hiveClientRT.scanTablePartitions(snapshotTableName).size());
|
||||
assertEquals("The last commit that was sycned should be 103", deltaCommitTime2,
|
||||
hiveClientRT.getLastCommitTimeSynced(snapshotTableName).get());
|
||||
assertEquals(6, hiveClientRT.scanTablePartitions(snapshotTableName).size(), "The 1 partition we wrote should be added to hive");
|
||||
assertEquals(deltaCommitTime2, hiveClientRT.getLastCommitTimeSynced(snapshotTableName).get(),
|
||||
"The last commit that was sycned should be 103");
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@@ -75,7 +75,7 @@ import java.util.Map.Entry;
|
||||
import java.util.Set;
|
||||
import java.util.UUID;
|
||||
|
||||
import static org.junit.Assert.fail;
|
||||
import static org.junit.jupiter.api.Assertions.fail;
|
||||
|
||||
@SuppressWarnings("SameParameterValue")
|
||||
public class TestUtil {
|
||||
|
||||
Reference in New Issue
Block a user