1
0

[HUDI-2795] Add mechanism to safely update,delete and recover table properties (#4038)

* [HUDI-2795] Add mechanism to safely update,delete and recover table properties

  - Fail safe mechanism, that lets queries succeed off a backup file
  - Readers who are not upgraded to this version of code will just fail until recovery is done.
  - Added unit tests that exercises all these scenarios.
  - Adding CLI for recovery, updation to table command.
  - [Pending] Add some hash based verfication to ensure any rare partial writes for HDFS

* Fixing upgrade/downgrade infrastructure to use new updation method
This commit is contained in:
vinoth chandar
2021-11-20 08:07:40 -08:00
committed by GitHub
parent f4b974ac7b
commit ae0c67d9fc
7 changed files with 337 additions and 73 deletions

View File

@@ -23,21 +23,16 @@ import org.apache.hudi.common.engine.HoodieEngineContext;
import org.apache.hudi.common.table.HoodieTableConfig;
import org.apache.hudi.common.table.HoodieTableMetaClient;
import org.apache.hudi.common.table.HoodieTableVersion;
import org.apache.hudi.common.util.FileIOUtils;
import org.apache.hudi.config.HoodieWriteConfig;
import org.apache.hudi.exception.HoodieUpgradeDowngradeException;
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.log4j.LogManager;
import org.apache.log4j.Logger;
import java.io.IOException;
import java.util.Date;
import java.util.HashMap;
import java.util.Map;
import java.util.Properties;
/**
* Helper class to assist in upgrading/downgrading Hoodie when there is a version change.
@@ -107,69 +102,38 @@ public class UpgradeDowngrade {
* @param instantTime current instant time that should not be touched.
*/
public void run(HoodieTableVersion toVersion, String instantTime) {
try {
// Fetch version from property file and current version
HoodieTableVersion fromVersion = metaClient.getTableConfig().getTableVersion();
if (!needsUpgradeOrDowngrade(toVersion)) {
return;
}
if (fs.exists(updatedPropsFilePath)) {
// this can be left over .updated file from a failed attempt before. Many cases exist here.
// a) We failed while writing the .updated file and it's content is partial (e.g hdfs)
// b) We failed without renaming the file to hoodie.properties. We will re-attempt everything now anyway
// c) rename() is not atomic in cloud stores. so hoodie.properties is fine, but we failed before deleting the .updated file
// All cases, it simply suffices to delete the file and proceed.
LOG.info("Deleting existing .updated file with content :" + FileIOUtils.readAsUTFString(fs.open(updatedPropsFilePath)));
fs.delete(updatedPropsFilePath, false);
}
// Perform the actual upgrade/downgrade; this has to be idempotent, for now.
LOG.info("Attempting to move table from version " + fromVersion + " to " + toVersion);
Map<ConfigProperty, String> tableProps = new HashMap<>();
if (fromVersion.versionCode() < toVersion.versionCode()) {
// upgrade
while (fromVersion.versionCode() < toVersion.versionCode()) {
HoodieTableVersion nextVersion = HoodieTableVersion.versionFromCode(fromVersion.versionCode() + 1);
tableProps.putAll(upgrade(fromVersion, nextVersion, instantTime));
fromVersion = nextVersion;
}
} else {
// downgrade
while (fromVersion.versionCode() > toVersion.versionCode()) {
HoodieTableVersion prevVersion = HoodieTableVersion.versionFromCode(fromVersion.versionCode() - 1);
tableProps.putAll(downgrade(fromVersion, prevVersion, instantTime));
fromVersion = prevVersion;
}
}
// Write out the current version in hoodie.properties.updated file
for (Map.Entry<ConfigProperty, String> entry : tableProps.entrySet()) {
metaClient.getTableConfig().setValue(entry.getKey(), entry.getValue());
}
metaClient.getTableConfig().setTableVersion(toVersion);
createUpdatedFile(metaClient.getTableConfig().getProps());
// because for different fs the fs.rename have different action,such as:
// a) for hdfs : if propsFilePath already exist,fs.rename will not replace propsFilePath, but just return false
// b) for localfs: if propsFilePath already exist,fs.rename will replace propsFilePath, and return ture
// c) for aliyun ossfs: if propsFilePath already exist,will throw FileAlreadyExistsException
// so we should delete the old propsFilePath. also upgrade and downgrade is Idempotent
if (fs.exists(propsFilePath)) {
fs.delete(propsFilePath, false);
}
// Rename the .updated file to hoodie.properties. This is atomic in hdfs, but not in cloud stores.
// But as long as this does not leave a partial hoodie.properties file, we are okay.
fs.rename(updatedPropsFilePath, propsFilePath);
} catch (IOException e) {
throw new HoodieUpgradeDowngradeException("Error during upgrade/downgrade to version:" + toVersion, e);
// Fetch version from property file and current version
HoodieTableVersion fromVersion = metaClient.getTableConfig().getTableVersion();
if (!needsUpgradeOrDowngrade(toVersion)) {
return;
}
}
private void createUpdatedFile(Properties props) throws IOException {
try (FSDataOutputStream outputStream = fs.create(updatedPropsFilePath)) {
props.store(outputStream, "Properties saved on " + new Date(System.currentTimeMillis()));
// Perform the actual upgrade/downgrade; this has to be idempotent, for now.
LOG.info("Attempting to move table from version " + fromVersion + " to " + toVersion);
Map<ConfigProperty, String> tableProps = new HashMap<>();
if (fromVersion.versionCode() < toVersion.versionCode()) {
// upgrade
while (fromVersion.versionCode() < toVersion.versionCode()) {
HoodieTableVersion nextVersion = HoodieTableVersion.versionFromCode(fromVersion.versionCode() + 1);
tableProps.putAll(upgrade(fromVersion, nextVersion, instantTime));
fromVersion = nextVersion;
}
} else {
// downgrade
while (fromVersion.versionCode() > toVersion.versionCode()) {
HoodieTableVersion prevVersion = HoodieTableVersion.versionFromCode(fromVersion.versionCode() - 1);
tableProps.putAll(downgrade(fromVersion, prevVersion, instantTime));
fromVersion = prevVersion;
}
}
// Write out the current version in hoodie.properties.updated file
for (Map.Entry<ConfigProperty, String> entry : tableProps.entrySet()) {
metaClient.getTableConfig().setValue(entry.getKey(), entry.getValue());
}
metaClient.getTableConfig().setTableVersion(toVersion);
HoodieTableConfig.update(metaClient.getFs(), new Path(metaClient.getMetaPath()), metaClient.getTableConfig().getProps());
}
protected Map<ConfigProperty, String> upgrade(HoodieTableVersion fromVersion, HoodieTableVersion toVersion, String instantTime) {