1
0

[HUDI-1269] Make whether the failure of connect hive affects hudi ingest process configurable (#2443)

Co-authored-by: Sivabalan Narayanan <sivabala@uber.com>
This commit is contained in:
liujinhui
2021-02-25 23:09:32 +08:00
committed by GitHub
parent 617cc24ad1
commit 8c2197ae5e
6 changed files with 78 additions and 31 deletions

View File

@@ -76,6 +76,9 @@ public class HiveSyncConfig implements Serializable {
@Parameter(names = {"--auto-create-database"}, description = "Auto create hive database")
public Boolean autoCreateDatabase = true;
@Parameter(names = {"--ignore-exceptions"}, description = "Ignore hive exceptions")
public Boolean ignoreExceptions = false;
@Parameter(names = {"--skip-ro-suffix"}, description = "Skip the `_ro` suffix for Read optimized table, when registering")
public Boolean skipROSuffix = false;
@@ -130,6 +133,7 @@ public class HiveSyncConfig implements Serializable {
+ ", usePreApacheInputFormat=" + usePreApacheInputFormat
+ ", useJdbc=" + useJdbc
+ ", autoCreateDatabase=" + autoCreateDatabase
+ ", ignoreExceptions=" + ignoreExceptions
+ ", skipROSuffix=" + skipROSuffix
+ ", help=" + help
+ ", supportTimestamp=" + supportTimestamp

View File

@@ -58,56 +58,72 @@ public class HiveSyncTool extends AbstractSyncTool {
public static final String SUFFIX_READ_OPTIMIZED_TABLE = "_ro";
private final HiveSyncConfig cfg;
private final HoodieHiveClient hoodieHiveClient;
private final String snapshotTableName;
private final Option<String> roTableTableName;
private HoodieHiveClient hoodieHiveClient = null;
private String snapshotTableName = null;
private Option<String> roTableName = null;
public HiveSyncTool(HiveSyncConfig cfg, HiveConf configuration, FileSystem fs) {
super(configuration.getAllProperties(), fs);
this.hoodieHiveClient = new HoodieHiveClient(cfg, configuration, fs);
try {
this.hoodieHiveClient = new HoodieHiveClient(cfg, configuration, fs);
} catch (RuntimeException e) {
if (cfg.ignoreExceptions) {
LOG.error("Got runtime exception when hive syncing, but continuing as ignoreExceptions config is set ", e);
} else {
throw new HoodieHiveSyncException("Got runtime exception when hive syncing", e);
}
}
this.cfg = cfg;
// Set partitionFields to empty, when the NonPartitionedExtractor is used
if (NonPartitionedExtractor.class.getName().equals(cfg.partitionValueExtractorClass)) {
LOG.warn("Set partitionFields to empty, since the NonPartitionedExtractor is used");
cfg.partitionFields = new ArrayList<>();
}
switch (hoodieHiveClient.getTableType()) {
case COPY_ON_WRITE:
this.snapshotTableName = cfg.tableName;
this.roTableTableName = Option.empty();
break;
case MERGE_ON_READ:
this.snapshotTableName = cfg.tableName + SUFFIX_SNAPSHOT_TABLE;
this.roTableTableName = cfg.skipROSuffix ? Option.of(cfg.tableName) :
Option.of(cfg.tableName + SUFFIX_READ_OPTIMIZED_TABLE);
break;
default:
LOG.error("Unknown table type " + hoodieHiveClient.getTableType());
throw new InvalidTableException(hoodieHiveClient.getBasePath());
if (hoodieHiveClient != null) {
switch (hoodieHiveClient.getTableType()) {
case COPY_ON_WRITE:
this.snapshotTableName = cfg.tableName;
this.roTableName = Option.empty();
break;
case MERGE_ON_READ:
this.snapshotTableName = cfg.tableName + SUFFIX_SNAPSHOT_TABLE;
this.roTableName = cfg.skipROSuffix ? Option.of(cfg.tableName) :
Option.of(cfg.tableName + SUFFIX_READ_OPTIMIZED_TABLE);
break;
default:
LOG.error("Unknown table type " + hoodieHiveClient.getTableType());
throw new InvalidTableException(hoodieHiveClient.getBasePath());
}
}
}
@Override
public void syncHoodieTable() {
try {
switch (hoodieHiveClient.getTableType()) {
case COPY_ON_WRITE:
syncHoodieTable(snapshotTableName, false);
break;
case MERGE_ON_READ:
// sync a RO table for MOR
syncHoodieTable(roTableTableName.get(), false);
// sync a RT table for MOR
syncHoodieTable(snapshotTableName, true);
break;
default:
LOG.error("Unknown table type " + hoodieHiveClient.getTableType());
throw new InvalidTableException(hoodieHiveClient.getBasePath());
if (hoodieHiveClient != null) {
switch (hoodieHiveClient.getTableType()) {
case COPY_ON_WRITE:
syncHoodieTable(snapshotTableName, false);
break;
case MERGE_ON_READ:
// sync a RO table for MOR
syncHoodieTable(roTableName.get(), false);
// sync a RT table for MOR
syncHoodieTable(snapshotTableName, true);
break;
default:
LOG.error("Unknown table type " + hoodieHiveClient.getTableType());
throw new InvalidTableException(hoodieHiveClient.getBasePath());
}
}
} catch (RuntimeException re) {
throw new HoodieException("Got runtime exception when hive syncing " + cfg.tableName, re);
} finally {
hoodieHiveClient.close();
if (hoodieHiveClient != null) {
hoodieHiveClient.close();
}
}
}

View File

@@ -40,6 +40,7 @@ import org.junit.jupiter.params.ParameterizedTest;
import org.junit.jupiter.params.provider.MethodSource;
import java.io.IOException;
import java.net.URISyntaxException;
import java.util.Arrays;
import java.util.Collections;
import java.util.List;
@@ -613,4 +614,25 @@ public class TestHiveSyncTool {
"The last commit that was sycned should be 103");
}
@Test
public void testConnectExceptionIgnoreConfigSet() throws IOException, URISyntaxException {
HiveTestUtil.hiveSyncConfig.useJdbc = true;
String instantTime = "100";
HiveTestUtil.createCOWTable(instantTime, 5, false);
HoodieHiveClient hiveClient =
new HoodieHiveClient(HiveTestUtil.hiveSyncConfig, HiveTestUtil.getHiveConf(), HiveTestUtil.fileSystem);
assertFalse(hiveClient.doesTableExist(HiveTestUtil.hiveSyncConfig.tableName),
"Table " + HiveTestUtil.hiveSyncConfig.tableName + " should not exist initially");
// Lets do the sync
HiveSyncConfig syncToolConfig = HiveSyncConfig.copy(HiveTestUtil.hiveSyncConfig);
syncToolConfig.ignoreExceptions = true;
syncToolConfig.jdbcUrl = HiveTestUtil.hiveSyncConfig.jdbcUrl.replace("9999","9031");
HiveSyncTool tool = new HiveSyncTool(syncToolConfig, HiveTestUtil.getHiveConf(), HiveTestUtil.fileSystem);
tool.syncHoodieTable();
assertFalse(hiveClient.doesTableExist(HiveTestUtil.hiveSyncConfig.tableName),
"Table " + HiveTestUtil.hiveSyncConfig.tableName + " should not exist initially");
}
}