1
0

[HUDI-780] Migrate test cases to Junit 5 (#1504)

This commit is contained in:
Raymond Xu
2020-04-15 12:35:01 -07:00
committed by GitHub
parent 14d4fea833
commit d65efe659d
26 changed files with 522 additions and 282 deletions

View File

@@ -30,55 +30,47 @@ import org.apache.parquet.schema.OriginalType;
import org.apache.parquet.schema.PrimitiveType;
import org.apache.parquet.schema.Types;
import org.joda.time.DateTime;
import org.junit.After;
import org.junit.AfterClass;
import org.junit.Before;
import org.junit.Test;
import org.junit.runner.RunWith;
import org.junit.runners.Parameterized;
import org.junit.jupiter.api.AfterAll;
import org.junit.jupiter.api.AfterEach;
import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.Test;
import org.junit.jupiter.params.ParameterizedTest;
import org.junit.jupiter.params.provider.MethodSource;
import java.io.IOException;
import java.util.Arrays;
import java.util.Collection;
import java.util.List;
import java.util.stream.Stream;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertFalse;
import static org.junit.Assert.assertTrue;
import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertFalse;
import static org.junit.jupiter.api.Assertions.assertTrue;
@RunWith(Parameterized.class)
public class TestHiveSyncTool {
// Test sync tool using both jdbc and metastore client
private boolean useJdbc;
public TestHiveSyncTool(Boolean useJdbc) {
this.useJdbc = useJdbc;
private static Stream<Boolean> useJdbc() {
return Stream.of(false, true);
}
@Parameterized.Parameters(name = "UseJdbc")
public static Collection<Boolean[]> data() {
return Arrays.asList(new Boolean[][] {{false}, {true}});
}
@Before
@BeforeEach
public void setUp() throws IOException, InterruptedException {
TestUtil.setUp();
}
@After
@AfterEach
public void teardown() throws IOException {
TestUtil.clear();
}
@AfterClass
@AfterAll
public static void cleanUpClass() {
TestUtil.shutdown();
}
/**
* Testing converting array types to Hive field declaration strings. According to the Parquet-113 spec:
* https://github.com/apache/parquet-format/blob/master/LogicalTypes.md#lists
* Testing converting array types to Hive field declaration strings.
* <p>
* Refer to the Parquet-113 spec: https://github.com/apache/parquet-format/blob/master/LogicalTypes.md#lists
*/
@Test
public void testSchemaConvertArray() throws IOException {
@@ -153,44 +145,45 @@ public class TestHiveSyncTool {
assertEquals("`map_list` ARRAY< MAP< string, int>>", schemaString);
}
@Test
public void testBasicSync() throws Exception {
TestUtil.hiveSyncConfig.useJdbc = this.useJdbc;
@ParameterizedTest
@MethodSource("useJdbc")
public void testBasicSync(boolean useJdbc) throws Exception {
TestUtil.hiveSyncConfig.useJdbc = useJdbc;
String instantTime = "100";
TestUtil.createCOWTable(instantTime, 5);
HoodieHiveClient hiveClient =
new HoodieHiveClient(TestUtil.hiveSyncConfig, TestUtil.getHiveConf(), TestUtil.fileSystem);
assertFalse("Table " + TestUtil.hiveSyncConfig.tableName + " should not exist initially",
hiveClient.doesTableExist(TestUtil.hiveSyncConfig.tableName));
assertFalse(hiveClient.doesTableExist(TestUtil.hiveSyncConfig.tableName),
"Table " + TestUtil.hiveSyncConfig.tableName + " should not exist initially");
// Lets do the sync
HiveSyncTool tool = new HiveSyncTool(TestUtil.hiveSyncConfig, TestUtil.getHiveConf(), TestUtil.fileSystem);
tool.syncHoodieTable();
assertTrue("Table " + TestUtil.hiveSyncConfig.tableName + " should exist after sync completes",
hiveClient.doesTableExist(TestUtil.hiveSyncConfig.tableName));
assertEquals("Hive Schema should match the table schema + partition field",
hiveClient.getTableSchema(TestUtil.hiveSyncConfig.tableName).size(),
hiveClient.getDataSchema().getColumns().size() + 1);
assertEquals("Table partitions should match the number of partitions we wrote", 5,
hiveClient.scanTablePartitions(TestUtil.hiveSyncConfig.tableName).size());
assertEquals("The last commit that was sycned should be updated in the TBLPROPERTIES", instantTime,
hiveClient.getLastCommitTimeSynced(TestUtil.hiveSyncConfig.tableName).get());
assertTrue(hiveClient.doesTableExist(TestUtil.hiveSyncConfig.tableName),
"Table " + TestUtil.hiveSyncConfig.tableName + " should exist after sync completes");
assertEquals(hiveClient.getTableSchema(TestUtil.hiveSyncConfig.tableName).size(),
hiveClient.getDataSchema().getColumns().size() + 1,
"Hive Schema should match the table schema + partition field");
assertEquals(5, hiveClient.scanTablePartitions(TestUtil.hiveSyncConfig.tableName).size(),
"Table partitions should match the number of partitions we wrote");
assertEquals(instantTime, hiveClient.getLastCommitTimeSynced(TestUtil.hiveSyncConfig.tableName).get(),
"The last commit that was sycned should be updated in the TBLPROPERTIES");
// Adding of new partitions
List<String> newPartition = Arrays.asList("2050/01/01");
hiveClient.addPartitionsToTable(TestUtil.hiveSyncConfig.tableName, Arrays.asList());
assertEquals("No new partition should be added", 5,
hiveClient.scanTablePartitions(TestUtil.hiveSyncConfig.tableName).size());
assertEquals(5, hiveClient.scanTablePartitions(TestUtil.hiveSyncConfig.tableName).size(),
"No new partition should be added");
hiveClient.addPartitionsToTable(TestUtil.hiveSyncConfig.tableName, newPartition);
assertEquals("New partition should be added", 6,
hiveClient.scanTablePartitions(TestUtil.hiveSyncConfig.tableName).size());
assertEquals(6, hiveClient.scanTablePartitions(TestUtil.hiveSyncConfig.tableName).size(),
"New partition should be added");
// Update partitions
hiveClient.updatePartitionsToTable(TestUtil.hiveSyncConfig.tableName, Arrays.asList());
assertEquals("Partition count should remain the same", 6,
hiveClient.scanTablePartitions(TestUtil.hiveSyncConfig.tableName).size());
hiveClient.updatePartitionsToTable(TestUtil.hiveSyncConfig.tableName, newPartition);
assertEquals("Partition count should remain the same", 6,
hiveClient.scanTablePartitions(TestUtil.hiveSyncConfig.tableName).size());
assertEquals(6, hiveClient.scanTablePartitions(TestUtil.hiveSyncConfig.tableName).size(),
"Partition count should remain the same");
hiveClient.updatePartitionsToTable(TestUtil.hiveSyncConfig.tableName, newPartition);
assertEquals(6, hiveClient.scanTablePartitions(TestUtil.hiveSyncConfig.tableName).size(),
"Partition count should remain the same");
// Alter partitions
// Manually change a hive partition location to check if the sync will detect
@@ -203,23 +196,23 @@ public class TestHiveSyncTool {
List<String> writtenPartitionsSince = hiveClient.getPartitionsWrittenToSince(Option.empty());
writtenPartitionsSince.add(newPartition.get(0));
List<PartitionEvent> partitionEvents = hiveClient.getPartitionEvents(hivePartitions, writtenPartitionsSince);
assertEquals("There should be only one paritition event", 1, partitionEvents.size());
assertEquals("The one partition event must of type UPDATE", PartitionEventType.UPDATE,
partitionEvents.iterator().next().eventType);
assertEquals(1, partitionEvents.size(), "There should be only one paritition event");
assertEquals(PartitionEventType.UPDATE, partitionEvents.iterator().next().eventType,
"The one partition event must of type UPDATE");
tool = new HiveSyncTool(TestUtil.hiveSyncConfig, TestUtil.getHiveConf(), TestUtil.fileSystem);
tool.syncHoodieTable();
// Sync should update the changed partition to correct path
List<Partition> tablePartitions = hiveClient.scanTablePartitions(TestUtil.hiveSyncConfig.tableName);
assertEquals("The one partition we wrote should be added to hive", 6, tablePartitions.size());
assertEquals("The last commit that was sycned should be 100", instantTime,
hiveClient.getLastCommitTimeSynced(TestUtil.hiveSyncConfig.tableName).get());
assertEquals(6, tablePartitions.size(), "The one partition we wrote should be added to hive");
assertEquals(instantTime, hiveClient.getLastCommitTimeSynced(TestUtil.hiveSyncConfig.tableName).get(),
"The last commit that was sycned should be 100");
}
@Test
public void testSyncIncremental() throws Exception {
TestUtil.hiveSyncConfig.useJdbc = this.useJdbc;
@ParameterizedTest
@MethodSource("useJdbc")
public void testSyncIncremental(boolean useJdbc) throws Exception {
TestUtil.hiveSyncConfig.useJdbc = useJdbc;
String commitTime1 = "100";
TestUtil.createCOWTable(commitTime1, 5);
HoodieHiveClient hiveClient =
@@ -227,10 +220,10 @@ public class TestHiveSyncTool {
// Lets do the sync
HiveSyncTool tool = new HiveSyncTool(TestUtil.hiveSyncConfig, TestUtil.getHiveConf(), TestUtil.fileSystem);
tool.syncHoodieTable();
assertEquals("Table partitions should match the number of partitions we wrote", 5,
hiveClient.scanTablePartitions(TestUtil.hiveSyncConfig.tableName).size());
assertEquals("The last commit that was sycned should be updated in the TBLPROPERTIES", commitTime1,
hiveClient.getLastCommitTimeSynced(TestUtil.hiveSyncConfig.tableName).get());
assertEquals(5, hiveClient.scanTablePartitions(TestUtil.hiveSyncConfig.tableName).size(),
"Table partitions should match the number of partitions we wrote");
assertEquals(commitTime1, hiveClient.getLastCommitTimeSynced(TestUtil.hiveSyncConfig.tableName).get(),
"The last commit that was sycned should be updated in the TBLPROPERTIES");
// Now lets create more parititions and these are the only ones which needs to be synced
DateTime dateTime = DateTime.now().plusDays(6);
@@ -240,25 +233,25 @@ public class TestHiveSyncTool {
// Lets do the sync
hiveClient = new HoodieHiveClient(TestUtil.hiveSyncConfig, TestUtil.getHiveConf(), TestUtil.fileSystem);
List<String> writtenPartitionsSince = hiveClient.getPartitionsWrittenToSince(Option.of(commitTime1));
assertEquals("We should have one partition written after 100 commit", 1, writtenPartitionsSince.size());
assertEquals(1, writtenPartitionsSince.size(), "We should have one partition written after 100 commit");
List<Partition> hivePartitions = hiveClient.scanTablePartitions(TestUtil.hiveSyncConfig.tableName);
List<PartitionEvent> partitionEvents = hiveClient.getPartitionEvents(hivePartitions, writtenPartitionsSince);
assertEquals("There should be only one paritition event", 1, partitionEvents.size());
assertEquals("The one partition event must of type ADD", PartitionEventType.ADD,
partitionEvents.iterator().next().eventType);
assertEquals(1, partitionEvents.size(), "There should be only one paritition event");
assertEquals(PartitionEventType.ADD, partitionEvents.iterator().next().eventType, "The one partition event must of type ADD");
tool = new HiveSyncTool(TestUtil.hiveSyncConfig, TestUtil.getHiveConf(), TestUtil.fileSystem);
tool.syncHoodieTable();
// Sync should add the one partition
assertEquals("The one partition we wrote should be added to hive", 6,
hiveClient.scanTablePartitions(TestUtil.hiveSyncConfig.tableName).size());
assertEquals("The last commit that was sycned should be 101", commitTime2,
hiveClient.getLastCommitTimeSynced(TestUtil.hiveSyncConfig.tableName).get());
assertEquals(6, hiveClient.scanTablePartitions(TestUtil.hiveSyncConfig.tableName).size(),
"The one partition we wrote should be added to hive");
assertEquals(commitTime2, hiveClient.getLastCommitTimeSynced(TestUtil.hiveSyncConfig.tableName).get(),
"The last commit that was sycned should be 101");
}
@Test
public void testSyncIncrementalWithSchemaEvolution() throws Exception {
TestUtil.hiveSyncConfig.useJdbc = this.useJdbc;
@ParameterizedTest
@MethodSource("useJdbc")
public void testSyncIncrementalWithSchemaEvolution(boolean useJdbc) throws Exception {
TestUtil.hiveSyncConfig.useJdbc = useJdbc;
String commitTime1 = "100";
TestUtil.createCOWTable(commitTime1, 5);
HoodieHiveClient hiveClient =
@@ -278,42 +271,42 @@ public class TestHiveSyncTool {
tool = new HiveSyncTool(TestUtil.hiveSyncConfig, TestUtil.getHiveConf(), TestUtil.fileSystem);
tool.syncHoodieTable();
assertEquals("Hive Schema has evolved and should not be 3 more field", fields + 3,
hiveClient.getTableSchema(TestUtil.hiveSyncConfig.tableName).size());
assertEquals("Hive Schema has evolved - Field favorite_number has evolved from int to long", "BIGINT",
hiveClient.getTableSchema(TestUtil.hiveSyncConfig.tableName).get("favorite_number"));
assertTrue("Hive Schema has evolved - Field favorite_movie was added",
hiveClient.getTableSchema(TestUtil.hiveSyncConfig.tableName).containsKey("favorite_movie"));
assertEquals(fields + 3, hiveClient.getTableSchema(TestUtil.hiveSyncConfig.tableName).size(),
"Hive Schema has evolved and should not be 3 more field");
assertEquals("BIGINT", hiveClient.getTableSchema(TestUtil.hiveSyncConfig.tableName).get("favorite_number"),
"Hive Schema has evolved - Field favorite_number has evolved from int to long");
assertTrue(hiveClient.getTableSchema(TestUtil.hiveSyncConfig.tableName).containsKey("favorite_movie"),
"Hive Schema has evolved - Field favorite_movie was added");
// Sync should add the one partition
assertEquals("The one partition we wrote should be added to hive", 6,
hiveClient.scanTablePartitions(TestUtil.hiveSyncConfig.tableName).size());
assertEquals("The last commit that was sycned should be 101", commitTime2,
hiveClient.getLastCommitTimeSynced(TestUtil.hiveSyncConfig.tableName).get());
assertEquals(6, hiveClient.scanTablePartitions(TestUtil.hiveSyncConfig.tableName).size(),
"The one partition we wrote should be added to hive");
assertEquals(commitTime2, hiveClient.getLastCommitTimeSynced(TestUtil.hiveSyncConfig.tableName).get(),
"The last commit that was sycned should be 101");
}
@Test
public void testSyncMergeOnRead() throws Exception {
TestUtil.hiveSyncConfig.useJdbc = this.useJdbc;
@ParameterizedTest
@MethodSource("useJdbc")
public void testSyncMergeOnRead(boolean useJdbc) throws Exception {
TestUtil.hiveSyncConfig.useJdbc = useJdbc;
String instantTime = "100";
String deltaCommitTime = "101";
TestUtil.createMORTable(instantTime, deltaCommitTime, 5, true);
String roTableName = TestUtil.hiveSyncConfig.tableName + HiveSyncTool.SUFFIX_READ_OPTIMIZED_TABLE;
HoodieHiveClient hiveClient = new HoodieHiveClient(TestUtil.hiveSyncConfig, TestUtil.getHiveConf(), TestUtil.fileSystem);
assertFalse("Table " + TestUtil.hiveSyncConfig.tableName + " should not exist initially", hiveClient.doesTableExist(roTableName));
assertFalse(hiveClient.doesTableExist(roTableName), "Table " + TestUtil.hiveSyncConfig.tableName + " should not exist initially");
// Lets do the sync
HiveSyncTool tool = new HiveSyncTool(TestUtil.hiveSyncConfig, TestUtil.getHiveConf(), TestUtil.fileSystem);
tool.syncHoodieTable();
assertTrue("Table " + roTableName + " should exist after sync completes",
hiveClient.doesTableExist(roTableName));
assertEquals("Hive Schema should match the table schema + partition field", hiveClient.getTableSchema(roTableName).size(),
SchemaTestUtil.getSimpleSchema().getFields().size() + 1);
assertEquals("Table partitions should match the number of partitions we wrote", 5,
hiveClient.scanTablePartitions(roTableName).size());
assertEquals("The last commit that was sycned should be updated in the TBLPROPERTIES", deltaCommitTime,
hiveClient.getLastCommitTimeSynced(roTableName).get());
assertTrue(hiveClient.doesTableExist(roTableName), "Table " + roTableName + " should exist after sync completes");
assertEquals(hiveClient.getTableSchema(roTableName).size(), SchemaTestUtil.getSimpleSchema().getFields().size() + 1,
"Hive Schema should match the table schema + partition field");
assertEquals(5, hiveClient.scanTablePartitions(roTableName).size(),
"Table partitions should match the number of partitions we wrote");
assertEquals(deltaCommitTime, hiveClient.getLastCommitTimeSynced(roTableName).get(),
"The last commit that was sycned should be updated in the TBLPROPERTIES");
// Now lets create more partitions and these are the only ones which needs to be synced
DateTime dateTime = DateTime.now().plusDays(6);
@@ -327,17 +320,19 @@ public class TestHiveSyncTool {
tool.syncHoodieTable();
hiveClient = new HoodieHiveClient(TestUtil.hiveSyncConfig, TestUtil.getHiveConf(), TestUtil.fileSystem);
assertEquals("Hive Schema should match the evolved table schema + partition field",
hiveClient.getTableSchema(roTableName).size(), SchemaTestUtil.getEvolvedSchema().getFields().size() + 1);
assertEquals(hiveClient.getTableSchema(roTableName).size(), SchemaTestUtil.getEvolvedSchema().getFields().size() + 1,
"Hive Schema should match the evolved table schema + partition field");
// Sync should add the one partition
assertEquals("The 2 partitions we wrote should be added to hive", 6, hiveClient.scanTablePartitions(roTableName).size());
assertEquals("The last commit that was synced should be 103", deltaCommitTime2,
hiveClient.getLastCommitTimeSynced(roTableName).get());
assertEquals(6, hiveClient.scanTablePartitions(roTableName).size(),
"The 2 partitions we wrote should be added to hive");
assertEquals(deltaCommitTime2, hiveClient.getLastCommitTimeSynced(roTableName).get(),
"The last commit that was synced should be 103");
}
@Test
public void testSyncMergeOnReadRT() throws Exception {
TestUtil.hiveSyncConfig.useJdbc = this.useJdbc;
@ParameterizedTest
@MethodSource("useJdbc")
public void testSyncMergeOnReadRT(boolean useJdbc) throws Exception {
TestUtil.hiveSyncConfig.useJdbc = useJdbc;
String instantTime = "100";
String deltaCommitTime = "101";
String snapshotTableName = TestUtil.hiveSyncConfig.tableName + HiveSyncTool.SUFFIX_SNAPSHOT_TABLE;
@@ -345,22 +340,24 @@ public class TestHiveSyncTool {
HoodieHiveClient hiveClientRT =
new HoodieHiveClient(TestUtil.hiveSyncConfig, TestUtil.getHiveConf(), TestUtil.fileSystem);
assertFalse("Table " + TestUtil.hiveSyncConfig.tableName + HiveSyncTool.SUFFIX_SNAPSHOT_TABLE
+ " should not exist initially", hiveClientRT.doesTableExist(snapshotTableName));
assertFalse(hiveClientRT.doesTableExist(snapshotTableName),
"Table " + TestUtil.hiveSyncConfig.tableName + HiveSyncTool.SUFFIX_SNAPSHOT_TABLE
+ " should not exist initially");
// Lets do the sync
HiveSyncTool tool = new HiveSyncTool(TestUtil.hiveSyncConfig, TestUtil.getHiveConf(), TestUtil.fileSystem);
tool.syncHoodieTable();
assertTrue("Table " + TestUtil.hiveSyncConfig.tableName + HiveSyncTool.SUFFIX_SNAPSHOT_TABLE
+ " should exist after sync completes", hiveClientRT.doesTableExist(snapshotTableName));
assertTrue(hiveClientRT.doesTableExist(snapshotTableName),
"Table " + TestUtil.hiveSyncConfig.tableName + HiveSyncTool.SUFFIX_SNAPSHOT_TABLE
+ " should exist after sync completes");
assertEquals("Hive Schema should match the table schema + partition field", hiveClientRT.getTableSchema(snapshotTableName).size(),
SchemaTestUtil.getSimpleSchema().getFields().size() + 1);
assertEquals("Table partitions should match the number of partitions we wrote", 5,
hiveClientRT.scanTablePartitions(snapshotTableName).size());
assertEquals("The last commit that was synced should be updated in the TBLPROPERTIES", deltaCommitTime,
hiveClientRT.getLastCommitTimeSynced(snapshotTableName).get());
assertEquals(hiveClientRT.getTableSchema(snapshotTableName).size(), SchemaTestUtil.getSimpleSchema().getFields().size() + 1,
"Hive Schema should match the table schema + partition field");
assertEquals(5, hiveClientRT.scanTablePartitions(snapshotTableName).size(),
"Table partitions should match the number of partitions we wrote");
assertEquals(deltaCommitTime, hiveClientRT.getLastCommitTimeSynced(snapshotTableName).get(),
"The last commit that was synced should be updated in the TBLPROPERTIES");
// Now lets create more parititions and these are the only ones which needs to be synced
DateTime dateTime = DateTime.now().plusDays(6);
@@ -374,17 +371,19 @@ public class TestHiveSyncTool {
tool.syncHoodieTable();
hiveClientRT = new HoodieHiveClient(TestUtil.hiveSyncConfig, TestUtil.getHiveConf(), TestUtil.fileSystem);
assertEquals("Hive Schema should match the evolved table schema + partition field",
hiveClientRT.getTableSchema(snapshotTableName).size(), SchemaTestUtil.getEvolvedSchema().getFields().size() + 1);
assertEquals(hiveClientRT.getTableSchema(snapshotTableName).size(), SchemaTestUtil.getEvolvedSchema().getFields().size() + 1,
"Hive Schema should match the evolved table schema + partition field");
// Sync should add the one partition
assertEquals("The 2 partitions we wrote should be added to hive", 6, hiveClientRT.scanTablePartitions(snapshotTableName).size());
assertEquals("The last commit that was sycned should be 103", deltaCommitTime2,
hiveClientRT.getLastCommitTimeSynced(snapshotTableName).get());
assertEquals(6, hiveClientRT.scanTablePartitions(snapshotTableName).size(),
"The 2 partitions we wrote should be added to hive");
assertEquals(deltaCommitTime2, hiveClientRT.getLastCommitTimeSynced(snapshotTableName).get(),
"The last commit that was sycned should be 103");
}
@Test
public void testMultiPartitionKeySync() throws Exception {
TestUtil.hiveSyncConfig.useJdbc = this.useJdbc;
@ParameterizedTest
@MethodSource("useJdbc")
public void testMultiPartitionKeySync(boolean useJdbc) throws Exception {
TestUtil.hiveSyncConfig.useJdbc = useJdbc;
String instantTime = "100";
TestUtil.createCOWTable(instantTime, 5);
@@ -395,46 +394,46 @@ public class TestHiveSyncTool {
TestUtil.getCreatedTablesSet().add(hiveSyncConfig.databaseName + "." + hiveSyncConfig.tableName);
HoodieHiveClient hiveClient = new HoodieHiveClient(hiveSyncConfig, TestUtil.getHiveConf(), TestUtil.fileSystem);
assertFalse("Table " + hiveSyncConfig.tableName + " should not exist initially",
hiveClient.doesTableExist(hiveSyncConfig.tableName));
assertFalse(hiveClient.doesTableExist(hiveSyncConfig.tableName),
"Table " + hiveSyncConfig.tableName + " should not exist initially");
// Lets do the sync
HiveSyncTool tool = new HiveSyncTool(hiveSyncConfig, TestUtil.getHiveConf(), TestUtil.fileSystem);
tool.syncHoodieTable();
assertTrue("Table " + hiveSyncConfig.tableName + " should exist after sync completes",
hiveClient.doesTableExist(hiveSyncConfig.tableName));
assertEquals("Hive Schema should match the table schema + partition fields",
hiveClient.getTableSchema(hiveSyncConfig.tableName).size(),
hiveClient.getDataSchema().getColumns().size() + 3);
assertEquals("Table partitions should match the number of partitions we wrote", 5,
hiveClient.scanTablePartitions(hiveSyncConfig.tableName).size());
assertEquals("The last commit that was sycned should be updated in the TBLPROPERTIES", instantTime,
hiveClient.getLastCommitTimeSynced(hiveSyncConfig.tableName).get());
assertTrue(hiveClient.doesTableExist(hiveSyncConfig.tableName),
"Table " + hiveSyncConfig.tableName + " should exist after sync completes");
assertEquals(hiveClient.getTableSchema(hiveSyncConfig.tableName).size(),
hiveClient.getDataSchema().getColumns().size() + 3,
"Hive Schema should match the table schema + partition fields");
assertEquals(5, hiveClient.scanTablePartitions(hiveSyncConfig.tableName).size(),
"Table partitions should match the number of partitions we wrote");
assertEquals(instantTime, hiveClient.getLastCommitTimeSynced(hiveSyncConfig.tableName).get(),
"The last commit that was sycned should be updated in the TBLPROPERTIES");
}
@Test
public void testReadSchemaForMOR() throws Exception {
TestUtil.hiveSyncConfig.useJdbc = this.useJdbc;
@ParameterizedTest
@MethodSource("useJdbc")
public void testReadSchemaForMOR(boolean useJdbc) throws Exception {
TestUtil.hiveSyncConfig.useJdbc = useJdbc;
String commitTime = "100";
String snapshotTableName = TestUtil.hiveSyncConfig.tableName + HiveSyncTool.SUFFIX_SNAPSHOT_TABLE;
TestUtil.createMORTable(commitTime, "", 5, false);
HoodieHiveClient hiveClientRT =
new HoodieHiveClient(TestUtil.hiveSyncConfig, TestUtil.getHiveConf(), TestUtil.fileSystem);
assertFalse("Table " + TestUtil.hiveSyncConfig.tableName + HiveSyncTool.SUFFIX_SNAPSHOT_TABLE
+ " should not exist initially", hiveClientRT.doesTableExist(snapshotTableName));
assertFalse(hiveClientRT.doesTableExist(snapshotTableName), "Table " + TestUtil.hiveSyncConfig.tableName + HiveSyncTool.SUFFIX_SNAPSHOT_TABLE
+ " should not exist initially");
// Lets do the sync
HiveSyncTool tool = new HiveSyncTool(TestUtil.hiveSyncConfig, TestUtil.getHiveConf(), TestUtil.fileSystem);
tool.syncHoodieTable();
assertTrue("Table " + TestUtil.hiveSyncConfig.tableName + HiveSyncTool.SUFFIX_SNAPSHOT_TABLE
+ " should exist after sync completes", hiveClientRT.doesTableExist(snapshotTableName));
assertTrue(hiveClientRT.doesTableExist(snapshotTableName), "Table " + TestUtil.hiveSyncConfig.tableName + HiveSyncTool.SUFFIX_SNAPSHOT_TABLE
+ " should exist after sync completes");
// Schema being read from compacted base files
assertEquals("Hive Schema should match the table schema + partition field", hiveClientRT.getTableSchema(snapshotTableName).size(),
SchemaTestUtil.getSimpleSchema().getFields().size() + 1);
assertEquals("Table partitions should match the number of partitions we wrote", 5,
hiveClientRT.scanTablePartitions(snapshotTableName).size());
assertEquals(hiveClientRT.getTableSchema(snapshotTableName).size(), SchemaTestUtil.getSimpleSchema().getFields().size() + 1,
"Hive Schema should match the table schema + partition field");
assertEquals(5, hiveClientRT.scanTablePartitions(snapshotTableName).size(), "Table partitions should match the number of partitions we wrote");
// Now lets create more partitions and these are the only ones which needs to be synced
DateTime dateTime = DateTime.now().plusDays(6);
@@ -448,12 +447,12 @@ public class TestHiveSyncTool {
hiveClientRT = new HoodieHiveClient(TestUtil.hiveSyncConfig, TestUtil.getHiveConf(), TestUtil.fileSystem);
// Schema being read from the log files
assertEquals("Hive Schema should match the evolved table schema + partition field",
hiveClientRT.getTableSchema(snapshotTableName).size(), SchemaTestUtil.getEvolvedSchema().getFields().size() + 1);
assertEquals(hiveClientRT.getTableSchema(snapshotTableName).size(), SchemaTestUtil.getEvolvedSchema().getFields().size() + 1,
"Hive Schema should match the evolved table schema + partition field");
// Sync should add the one partition
assertEquals("The 1 partition we wrote should be added to hive", 6, hiveClientRT.scanTablePartitions(snapshotTableName).size());
assertEquals("The last commit that was sycned should be 103", deltaCommitTime2,
hiveClientRT.getLastCommitTimeSynced(snapshotTableName).get());
assertEquals(6, hiveClientRT.scanTablePartitions(snapshotTableName).size(), "The 1 partition we wrote should be added to hive");
assertEquals(deltaCommitTime2, hiveClientRT.getLastCommitTimeSynced(snapshotTableName).get(),
"The last commit that was sycned should be 103");
}
}

View File

@@ -75,7 +75,7 @@ import java.util.Map.Entry;
import java.util.Set;
import java.util.UUID;
import static org.junit.Assert.fail;
import static org.junit.jupiter.api.Assertions.fail;
@SuppressWarnings("SameParameterValue")
public class TestUtil {