[HUDI-1003] Handle partitions correctly for syncing hudi non-parititioned table to hive (#1720)
This commit is contained in:
@@ -38,6 +38,7 @@ import org.apache.log4j.LogManager;
|
|||||||
import org.apache.log4j.Logger;
|
import org.apache.log4j.Logger;
|
||||||
import org.apache.parquet.schema.MessageType;
|
import org.apache.parquet.schema.MessageType;
|
||||||
|
|
||||||
|
import java.util.ArrayList;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
import java.util.stream.Collectors;
|
import java.util.stream.Collectors;
|
||||||
@@ -64,6 +65,11 @@ public class HiveSyncTool {
|
|||||||
public HiveSyncTool(HiveSyncConfig cfg, HiveConf configuration, FileSystem fs) {
|
public HiveSyncTool(HiveSyncConfig cfg, HiveConf configuration, FileSystem fs) {
|
||||||
this.hoodieHiveClient = new HoodieHiveClient(cfg, configuration, fs);
|
this.hoodieHiveClient = new HoodieHiveClient(cfg, configuration, fs);
|
||||||
this.cfg = cfg;
|
this.cfg = cfg;
|
||||||
|
// Set partitionFields to empty, when the NonPartitionedExtractor is used
|
||||||
|
if (NonPartitionedExtractor.class.getName().equals(cfg.partitionValueExtractorClass)) {
|
||||||
|
LOG.warn("Set partitionFields to empty, since the NonPartitionedExtractor is used");
|
||||||
|
cfg.partitionFields = new ArrayList<>();
|
||||||
|
}
|
||||||
switch (hoodieHiveClient.getTableType()) {
|
switch (hoodieHiveClient.getTableType()) {
|
||||||
case COPY_ON_WRITE:
|
case COPY_ON_WRITE:
|
||||||
this.snapshotTableName = cfg.tableName;
|
this.snapshotTableName = cfg.tableName;
|
||||||
|
|||||||
@@ -457,6 +457,35 @@ public class TestHiveSyncTool {
|
|||||||
"The last commit that was sycned should be updated in the TBLPROPERTIES");
|
"The last commit that was sycned should be updated in the TBLPROPERTIES");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ParameterizedTest
|
||||||
|
@MethodSource("useJdbc")
|
||||||
|
public void testNonPartitionedSync(boolean useJdbc) throws Exception {
|
||||||
|
HiveTestUtil.hiveSyncConfig.useJdbc = useJdbc;
|
||||||
|
String instantTime = "100";
|
||||||
|
HiveTestUtil.createCOWTable(instantTime, 5, true);
|
||||||
|
|
||||||
|
HiveSyncConfig hiveSyncConfig = HiveSyncConfig.copy(HiveTestUtil.hiveSyncConfig);
|
||||||
|
// Set partition value extractor to NonPartitionedExtractor
|
||||||
|
hiveSyncConfig.partitionValueExtractorClass = NonPartitionedExtractor.class.getCanonicalName();
|
||||||
|
hiveSyncConfig.tableName = "non_partitioned";
|
||||||
|
hiveSyncConfig.partitionFields = Arrays.asList("year", "month", "day");
|
||||||
|
HiveTestUtil.getCreatedTablesSet().add(hiveSyncConfig.databaseName + "." + hiveSyncConfig.tableName);
|
||||||
|
|
||||||
|
HoodieHiveClient hiveClient = new HoodieHiveClient(hiveSyncConfig, HiveTestUtil.getHiveConf(), HiveTestUtil.fileSystem);
|
||||||
|
assertFalse(hiveClient.doesTableExist(hiveSyncConfig.tableName),
|
||||||
|
"Table " + hiveSyncConfig.tableName + " should not exist initially");
|
||||||
|
// Lets do the sync
|
||||||
|
HiveSyncTool tool = new HiveSyncTool(hiveSyncConfig, HiveTestUtil.getHiveConf(), HiveTestUtil.fileSystem);
|
||||||
|
tool.syncHoodieTable();
|
||||||
|
assertTrue(hiveClient.doesTableExist(hiveSyncConfig.tableName),
|
||||||
|
"Table " + hiveSyncConfig.tableName + " should exist after sync completes");
|
||||||
|
assertEquals(hiveClient.getTableSchema(hiveSyncConfig.tableName).size(),
|
||||||
|
hiveClient.getDataSchema().getColumns().size(),
|
||||||
|
"Hive Schema should match the table schema,ignoring the partition fields");
|
||||||
|
assertEquals(0, hiveClient.scanTablePartitions(hiveSyncConfig.tableName).size(),
|
||||||
|
"Table should not have partitions because of the NonPartitionedExtractor");
|
||||||
|
}
|
||||||
|
|
||||||
@ParameterizedTest
|
@ParameterizedTest
|
||||||
@MethodSource("useJdbc")
|
@MethodSource("useJdbc")
|
||||||
public void testReadSchemaForMOR(boolean useJdbc) throws Exception {
|
public void testReadSchemaForMOR(boolean useJdbc) throws Exception {
|
||||||
|
|||||||
Reference in New Issue
Block a user