1
0

[HUDI-1014] Adding Upgrade and downgrade infra for smooth transitioning from list based rollback to marker based rollback (#1858)

- This pull request adds upgrade/downgrade infra for smooth transition from list based rollback to marker based rollback*
 - A new property called hoodie.table.version is added to hoodie.properties file as part of this. Whenever hoodie is launched with newer table version i.e 1(or moving from pre 0.6.0 to 0.6.0), an upgrade step will be executed automatically to adhere to marker based rollback.*
 - This automatic upgrade step will happen just once per dataset as the hoodie.table.version will be updated in property file after upgrade is completed once*
 - Similarly, a command line tool for Downgrading is added if incase some user wants to downgrade hoodie from table version 1 to 0 or move from hoodie 0.6.0 to pre 0.6.0*
 - *Added UpgradeDowngrade to assist in upgrading or downgrading hoodie table*
 - *Added Interfaces for upgrade and downgrade and concrete implementations for upgrading from 0 to 1 and downgrading from 1 to 0.*
 - *Made some changes to ListingBasedRollbackHelper to expose just rollback stats w/o performing actual rollback, which will be consumed by Upgrade infra*
- Reworking failure handling for upgrade/downgrade
 - Changed tests accordingly, added one test around left over cleanup
 - New tables now write table version into hoodie.properties
 - Clean up code naming, abstractions.

Co-authored-by: Vinoth Chandar <vinoth@apache.org>
This commit is contained in:
Sivabalan Narayanan
2020-08-09 18:32:43 -04:00
committed by GitHub
parent e4a2d98f79
commit ff53e8f0b6
22 changed files with 1521 additions and 264 deletions

View File

@@ -23,7 +23,10 @@ import org.apache.hudi.cli.DedupeSparkJob;
import org.apache.hudi.cli.utils.SparkUtil;
import org.apache.hudi.client.HoodieWriteClient;
import org.apache.hudi.common.config.TypedProperties;
import org.apache.hudi.client.utils.ClientUtils;
import org.apache.hudi.common.fs.FSUtils;
import org.apache.hudi.common.table.HoodieTableMetaClient;
import org.apache.hudi.common.table.HoodieTableVersion;
import org.apache.hudi.common.util.Option;
import org.apache.hudi.common.util.StringUtils;
import org.apache.hudi.config.HoodieBootstrapConfig;
@@ -31,6 +34,7 @@ import org.apache.hudi.config.HoodieIndexConfig;
import org.apache.hudi.config.HoodieWriteConfig;
import org.apache.hudi.exception.HoodieSavepointException;
import org.apache.hudi.index.HoodieIndex;
import org.apache.hudi.table.upgrade.UpgradeDowngrade;
import org.apache.hudi.table.action.compact.strategy.UnBoundedCompactionStrategy;
import org.apache.hudi.utilities.HDFSParquetImporter;
import org.apache.hudi.utilities.HDFSParquetImporter.Config;
@@ -64,7 +68,7 @@ public class SparkMain {
*/
enum SparkCommand {
BOOTSTRAP, ROLLBACK, DEDUPLICATE, ROLLBACK_TO_SAVEPOINT, SAVEPOINT, IMPORT, UPSERT, COMPACT_SCHEDULE, COMPACT_RUN,
COMPACT_UNSCHEDULE_PLAN, COMPACT_UNSCHEDULE_FILE, COMPACT_VALIDATE, COMPACT_REPAIR, CLEAN, DELETE_SAVEPOINT
COMPACT_UNSCHEDULE_PLAN, COMPACT_UNSCHEDULE_FILE, COMPACT_VALIDATE, COMPACT_REPAIR, CLEAN, DELETE_SAVEPOINT, UPGRADE, DOWNGRADE
}
public static void main(String[] args) throws Exception {
@@ -185,6 +189,11 @@ public class SparkMain {
returnCode = doBootstrap(jsc, args[3], args[4], args[5], args[6], args[7], args[8], args[9], args[10],
args[11], args[12], args[13], args[14], args[15], args[16], propsFilePath, configs);
break;
case UPGRADE:
case DOWNGRADE:
assert (args.length == 5);
returnCode = upgradeOrDowngradeTable(jsc, args[3], args[4]);
break;
default:
break;
}
@@ -380,9 +389,35 @@ public class SparkMain {
}
}
/**
* Upgrade or downgrade table.
*
* @param jsc instance of {@link JavaSparkContext} to use.
* @param basePath base path of the dataset.
* @param toVersion version to which upgrade/downgrade to be done.
* @return 0 if success, else -1.
* @throws Exception
*/
protected static int upgradeOrDowngradeTable(JavaSparkContext jsc, String basePath, String toVersion) {
HoodieWriteConfig config = getWriteConfig(basePath);
HoodieTableMetaClient metaClient = ClientUtils.createMetaClient(jsc.hadoopConfiguration(), config, false);
try {
UpgradeDowngrade.run(metaClient, HoodieTableVersion.valueOf(toVersion), config, jsc, null);
LOG.info(String.format("Table at \"%s\" upgraded / downgraded to version \"%s\".", basePath, toVersion));
return 0;
} catch (Exception e) {
LOG.warn(String.format("Failed: Could not upgrade/downgrade table at \"%s\" to version \"%s\".", basePath, toVersion), e);
return -1;
}
}
private static HoodieWriteClient createHoodieClient(JavaSparkContext jsc, String basePath) throws Exception {
HoodieWriteConfig config = HoodieWriteConfig.newBuilder().withPath(basePath)
.withIndexConfig(HoodieIndexConfig.newBuilder().withIndexType(HoodieIndex.IndexType.BLOOM).build()).build();
HoodieWriteConfig config = getWriteConfig(basePath);
return new HoodieWriteClient(jsc, config);
}
private static HoodieWriteConfig getWriteConfig(String basePath) {
return HoodieWriteConfig.newBuilder().withPath(basePath)
.withIndexConfig(HoodieIndexConfig.newBuilder().withIndexType(HoodieIndex.IndexType.BLOOM).build()).build();
}
}

View File

@@ -0,0 +1,81 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hudi.cli.commands;
import org.apache.hudi.cli.HoodieCLI;
import org.apache.hudi.cli.commands.SparkMain.SparkCommand;
import org.apache.hudi.cli.utils.InputStreamConsumer;
import org.apache.hudi.cli.utils.SparkUtil;
import org.apache.hudi.common.table.HoodieTableMetaClient;
import org.apache.spark.launcher.SparkLauncher;
import org.springframework.shell.core.CommandMarker;
import org.springframework.shell.core.annotation.CliCommand;
import org.springframework.shell.core.annotation.CliOption;
/**
* CLI command to assist in upgrading/downgrading Hoodie dataset to a different version.
*/
public class UpgradeOrDowngradeCommand implements CommandMarker {
@CliCommand(value = "upgrade hoodie dataset ", help = "Upgrades hoodie dataset")
public String upgradeHoodieDataset(
@CliOption(key = {"toVersion"}, help = "To version of Hoodie dataset to be upgraded/downgraded to", unspecifiedDefaultValue = "") final String toVersion,
@CliOption(key = {"sparkProperties"}, help = "Spark Properties File Path") final String sparkPropertiesPath,
@CliOption(key = "sparkMaster", unspecifiedDefaultValue = "", help = "Spark Master") String master,
@CliOption(key = "sparkMemory", unspecifiedDefaultValue = "4G",
help = "Spark executor memory") final String sparkMemory)
throws Exception {
HoodieTableMetaClient metaClient = HoodieCLI.getTableMetaClient();
SparkLauncher sparkLauncher = SparkUtil.initLauncher(sparkPropertiesPath);
sparkLauncher.addAppArgs(SparkCommand.UPGRADE.toString(), master, sparkMemory, metaClient.getBasePath(), toVersion);
Process process = sparkLauncher.launch();
InputStreamConsumer.captureOutput(process);
int exitCode = process.waitFor();
HoodieCLI.refreshTableMetadata();
if (exitCode != 0) {
return String.format("Failed: Could not Upgrade/Downgrade Hoodie dataset to \"%s\".", toVersion);
}
return String.format("Hoodie dataset upgraded/downgraded to ", toVersion);
}
@CliCommand(value = "downgrade hoodie dataset ", help = "Upgrades hoodie dataset")
public String downgradeHoodieDataset(
@CliOption(key = {"toVersion"}, help = "To version of Hoodie dataset to be upgraded/downgraded to", unspecifiedDefaultValue = "") final String toVersion,
@CliOption(key = {"sparkProperties"}, help = "Spark Properties File Path") final String sparkPropertiesPath,
@CliOption(key = "sparkMaster", unspecifiedDefaultValue = "", help = "Spark Master") String master,
@CliOption(key = "sparkMemory", unspecifiedDefaultValue = "4G",
help = "Spark executor memory") final String sparkMemory)
throws Exception {
HoodieTableMetaClient metaClient = HoodieCLI.getTableMetaClient();
SparkLauncher sparkLauncher = SparkUtil.initLauncher(sparkPropertiesPath);
sparkLauncher.addAppArgs(SparkCommand.DOWNGRADE.toString(), master, sparkMemory, metaClient.getBasePath(), toVersion);
Process process = sparkLauncher.launch();
InputStreamConsumer.captureOutput(process);
int exitCode = process.waitFor();
HoodieCLI.refreshTableMetadata();
if (exitCode != 0) {
return String.format("Failed: Could not Upgrade/Downgrade Hoodie dataset to \"%s\".", toVersion);
}
return String.format("Hoodie dataset upgraded/downgraded to ", toVersion);
}
}