1
0

[HUDI-2268] Add upgrade and downgrade to and from 0.9.0 (#3470)

- Added upgrade and downgrade step to and from 0.9.0. Upgrade adds few table properties. Downgrade recreates timeline server based marker files if any.
This commit is contained in:
Y Ethan Guo
2021-08-14 17:20:23 -07:00
committed by GitHub
parent 18e6b79947
commit 23dca6c237
27 changed files with 851 additions and 326 deletions

View File

@@ -18,10 +18,6 @@
package org.apache.hudi.table.upgrade;
import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.FileUtil;
import org.apache.hadoop.fs.Path;
import org.apache.hudi.client.SparkRDDWriteClient;
import org.apache.hudi.client.WriteStatus;
import org.apache.hudi.common.config.HoodieConfig;
@@ -31,19 +27,31 @@ import org.apache.hudi.common.model.HoodieLogFile;
import org.apache.hudi.common.model.HoodieRecord;
import org.apache.hudi.common.model.HoodieTableType;
import org.apache.hudi.common.table.HoodieTableConfig;
import org.apache.hudi.common.table.HoodieTableMetaClient;
import org.apache.hudi.common.table.HoodieTableVersion;
import org.apache.hudi.common.table.marker.MarkerType;
import org.apache.hudi.common.table.timeline.HoodieInstant;
import org.apache.hudi.common.table.timeline.versioning.TimelineLayoutVersion;
import org.apache.hudi.common.table.view.SyncableFileSystemView;
import org.apache.hudi.common.testutils.HoodieTestDataGenerator;
import org.apache.hudi.common.testutils.HoodieTestUtils;
import org.apache.hudi.common.util.Option;
import org.apache.hudi.common.util.collection.Pair;
import org.apache.hudi.config.HoodieWriteConfig;
import org.apache.hudi.keygen.constant.KeyGeneratorOptions;
import org.apache.hudi.table.HoodieTable;
import org.apache.hudi.table.marker.WriteMarkers;
import org.apache.hudi.table.marker.WriteMarkersFactory;
import org.apache.hudi.testutils.Assertions;
import org.apache.hudi.testutils.HoodieClientTestBase;
import org.apache.hudi.testutils.HoodieClientTestUtils;
import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.FileUtil;
import org.apache.hadoop.fs.Path;
import org.apache.spark.api.java.JavaRDD;
import org.apache.spark.sql.Dataset;
import org.apache.spark.sql.Row;
@@ -52,20 +60,26 @@ import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.Test;
import org.junit.jupiter.params.ParameterizedTest;
import org.junit.jupiter.params.provider.Arguments;
import org.junit.jupiter.params.provider.EnumSource;
import org.junit.jupiter.params.provider.MethodSource;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Properties;
import java.util.Set;
import java.util.function.Predicate;
import java.util.stream.Collectors;
import java.util.stream.Stream;
import static org.apache.hudi.common.table.HoodieTableConfig.HOODIE_BASE_FILE_FORMAT_PROP;
import static org.apache.hudi.common.table.HoodieTableConfig.HOODIE_TABLE_TYPE_PROP;
import static org.apache.hudi.common.testutils.HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH;
import static org.apache.hudi.common.testutils.HoodieTestDataGenerator.DEFAULT_SECOND_PARTITION_PATH;
import static org.apache.hudi.common.util.MarkerUtils.MARKERS_FILENAME_PREFIX;
import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertFalse;
import static org.junit.jupiter.api.Assertions.assertTrue;
@@ -76,11 +90,29 @@ import static org.junit.jupiter.api.Assertions.assertTrue;
public class TestUpgradeDowngrade extends HoodieClientTestBase {
private static final String TEST_NAME_WITH_PARAMS = "[{index}] Test with deletePartialMarkerFiles={0} and TableType = {1}";
private static final String TEST_NAME_WITH_DOWNGRADE_PARAMS = "[{index}] Test with deletePartialMarkerFiles={0} and TableType = {1} and "
+ "From version = {2}";
public static Stream<Arguments> configParams() {
Object[][] data = new Object[][] {
{true, HoodieTableType.COPY_ON_WRITE}, {false, HoodieTableType.COPY_ON_WRITE},
{true, HoodieTableType.MERGE_ON_READ}, {false, HoodieTableType.MERGE_ON_READ}
{true, HoodieTableType.COPY_ON_WRITE},
{false, HoodieTableType.COPY_ON_WRITE},
{true, HoodieTableType.MERGE_ON_READ},
{false, HoodieTableType.MERGE_ON_READ}
};
return Stream.of(data).map(Arguments::of);
}
public static Stream<Arguments> downGradeConfigParams() {
Object[][] data = new Object[][] {
{true, HoodieTableType.COPY_ON_WRITE, HoodieTableVersion.TWO},
{false, HoodieTableType.COPY_ON_WRITE, HoodieTableVersion.TWO},
{true, HoodieTableType.MERGE_ON_READ, HoodieTableVersion.TWO},
{false, HoodieTableType.MERGE_ON_READ, HoodieTableVersion.TWO},
{true, HoodieTableType.COPY_ON_WRITE, HoodieTableVersion.ONE},
{false, HoodieTableType.COPY_ON_WRITE, HoodieTableVersion.ONE},
{true, HoodieTableType.MERGE_ON_READ, HoodieTableVersion.ONE},
{false, HoodieTableType.MERGE_ON_READ, HoodieTableVersion.ONE}
};
return Stream.of(data).map(Arguments::of);
}
@@ -88,9 +120,9 @@ public class TestUpgradeDowngrade extends HoodieClientTestBase {
@BeforeEach
public void setUp() throws Exception {
initSparkContexts();
initDFS();
initPath();
initTestDataGenerator();
initDFSMetaClient();
initMetaClient();
}
@AfterEach
@@ -100,21 +132,21 @@ public class TestUpgradeDowngrade extends HoodieClientTestBase {
@Test
public void testLeftOverUpdatedPropFileCleanup() throws IOException {
testUpgradeInternal(true, true, HoodieTableType.MERGE_ON_READ);
testUpgradeZeroToOneInternal(true, true, HoodieTableType.MERGE_ON_READ);
}
@ParameterizedTest(name = TEST_NAME_WITH_PARAMS)
@MethodSource("configParams")
public void testUpgrade(boolean deletePartialMarkerFiles, HoodieTableType tableType) throws IOException {
testUpgradeInternal(false, deletePartialMarkerFiles, tableType);
public void testUpgradeZeroToOne(boolean deletePartialMarkerFiles, HoodieTableType tableType) throws IOException {
testUpgradeZeroToOneInternal(false, deletePartialMarkerFiles, tableType);
}
public void testUpgradeInternal(boolean induceResiduesFromPrevUpgrade, boolean deletePartialMarkerFiles, HoodieTableType tableType) throws IOException {
public void testUpgradeZeroToOneInternal(boolean induceResiduesFromPrevUpgrade, boolean deletePartialMarkerFiles, HoodieTableType tableType) throws IOException {
// init config, table and client.
Map<String, String> params = new HashMap<>();
if (tableType == HoodieTableType.MERGE_ON_READ) {
params.put(HOODIE_TABLE_TYPE_PROP.key(), HoodieTableType.MERGE_ON_READ.name());
metaClient = HoodieTestUtils.init(dfs.getConf(), dfsBasePath, HoodieTableType.MERGE_ON_READ);
metaClient = HoodieTestUtils.init(hadoopConf, basePath, HoodieTableType.MERGE_ON_READ);
}
HoodieWriteConfig cfg = getConfigBuilder().withAutoCommit(false).withRollbackUsingMarkers(false).withProps(params).build();
SparkRDDWriteClient client = getHoodieWriteClient(cfg);
@@ -151,30 +183,116 @@ public class TestUpgradeDowngrade extends HoodieClientTestBase {
assertMarkerFilesForUpgrade(table, commitInstant, firstPartitionCommit2FileSlices, secondPartitionCommit2FileSlices);
// verify hoodie.table.version got upgraded
metaClient = HoodieTableMetaClient.builder().setConf(context.getHadoopConf().get()).setBasePath(cfg.getBasePath())
.setLayoutVersion(Option.of(new TimelineLayoutVersion(cfg.getTimelineLayoutVersion()))).build();
assertEquals(metaClient.getTableConfig().getTableVersion().versionCode(), HoodieTableVersion.ONE.versionCode());
assertTableVersionFromPropertyFile(HoodieTableVersion.ONE);
// trigger 3rd commit with marker based rollback enabled.
/* HUDI-2310
List<HoodieRecord> thirdBatch = triggerCommit("003", tableType, true);
// Check the entire dataset has all records only from 1st commit and 3rd commit since 2nd is expected to be rolledback.
assertRows(inputRecords.getKey(), thirdBatch);
if (induceResiduesFromPrevUpgrade) {
assertFalse(dfs.exists(new Path(metaClient.getMetaPath(), SparkUpgradeDowngrade.HOODIE_UPDATED_PROPERTY_FILE)));
}
}*/
}
@ParameterizedTest(name = TEST_NAME_WITH_PARAMS)
@MethodSource("configParams")
public void testDowngrade(boolean deletePartialMarkerFiles, HoodieTableType tableType) throws IOException {
@ParameterizedTest
@EnumSource(value = HoodieTableType.class)
public void testUpgradeOneToTwo(HoodieTableType tableType) throws IOException {
// init config, table and client.
Map<String, String> params = new HashMap<>();
addNewTableParamsToProps(params);
if (tableType == HoodieTableType.MERGE_ON_READ) {
params.put(HOODIE_TABLE_TYPE_PROP.key(), HoodieTableType.MERGE_ON_READ.name());
metaClient = HoodieTestUtils.init(hadoopConf, basePath, HoodieTableType.MERGE_ON_READ);
}
HoodieWriteConfig cfg = getConfigBuilder().withAutoCommit(false).withRollbackUsingMarkers(false).withProps(params).build();
SparkRDDWriteClient client = getHoodieWriteClient(cfg);
// Write inserts
doInsert(client);
// downgrade table props
downgradeTableConfigsFromTwoToOne(cfg);
// perform upgrade
new SparkUpgradeDowngrade(metaClient, cfg, context).run(metaClient, HoodieTableVersion.TWO, cfg, context, null);
// verify hoodie.table.version got upgraded
metaClient = HoodieTableMetaClient.builder().setConf(context.getHadoopConf().get()).setBasePath(cfg.getBasePath())
.setLayoutVersion(Option.of(new TimelineLayoutVersion(cfg.getTimelineLayoutVersion()))).build();
assertEquals(metaClient.getTableConfig().getTableVersion().versionCode(), HoodieTableVersion.TWO.versionCode());
assertTableVersionFromPropertyFile(HoodieTableVersion.TWO);
// verify table props
assertTableProps(cfg);
}
private void addNewTableParamsToProps(Map<String, String> params) {
params.put(KeyGeneratorOptions.RECORDKEY_FIELD.key(), "uuid");
params.put(KeyGeneratorOptions.PARTITIONPATH_FIELD.key(), "partition_path");
params.put(HoodieTableConfig.HOODIE_TABLE_NAME_PROP.key(), metaClient.getTableConfig().getTableName());
params.put(HOODIE_BASE_FILE_FORMAT_PROP.key(), HOODIE_BASE_FILE_FORMAT_PROP.defaultValue().name());
}
private void doInsert(SparkRDDWriteClient client) {
// Write 1 (only inserts)
String commit1 = "000";
client.startCommitWithTime(commit1);
List<HoodieRecord> records = dataGen.generateInserts(commit1, 100);
JavaRDD<HoodieRecord> writeRecords = jsc.parallelize(records, 1);
client.insert(writeRecords, commit1).collect();
}
private void downgradeTableConfigsFromTwoToOne(HoodieWriteConfig cfg) throws IOException {
Properties properties = new Properties(cfg.getProps());
properties.remove(HoodieTableConfig.HOODIE_TABLE_RECORDKEY_FIELDS.key());
properties.remove(HoodieTableConfig.HOODIE_TABLE_PARTITION_FIELDS_PROP.key());
properties.remove(HoodieTableConfig.HOODIE_TABLE_NAME_PROP.key());
properties.remove(HOODIE_BASE_FILE_FORMAT_PROP.key());
properties.setProperty(HoodieTableConfig.HOODIE_TABLE_VERSION_PROP.key(), "1");
metaClient = HoodieTestUtils.init(hadoopConf, basePath, getTableType(), properties);
// set hoodie.table.version to 1 in hoodie.properties file
metaClient.getTableConfig().setTableVersion(HoodieTableVersion.ONE);
}
private void assertTableProps(HoodieWriteConfig cfg) {
HoodieTableConfig tableConfig = metaClient.getTableConfig();
Properties originalProps = cfg.getProps();
assertEquals(tableConfig.getPartitionFieldProp(), originalProps.getProperty(KeyGeneratorOptions.PARTITIONPATH_FIELD.key()));
assertEquals(tableConfig.getRecordKeyFieldProp(), originalProps.getProperty(KeyGeneratorOptions.RECORDKEY_FIELD.key()));
assertEquals(tableConfig.getTableName(), cfg.getTableName());
assertEquals(tableConfig.getBaseFileFormat().name(), originalProps.getProperty(HOODIE_BASE_FILE_FORMAT_PROP.key()));
}
@ParameterizedTest(name = TEST_NAME_WITH_DOWNGRADE_PARAMS)
@MethodSource("downGradeConfigParams")
public void testDowngrade(boolean deletePartialMarkerFiles, HoodieTableType tableType, HoodieTableVersion fromVersion) throws IOException {
MarkerType markerType = fromVersion == HoodieTableVersion.TWO ? MarkerType.TIMELINE_SERVER_BASED : MarkerType.DIRECT;
// init config, table and client.
Map<String, String> params = new HashMap<>();
if (fromVersion == HoodieTableVersion.TWO) {
addNewTableParamsToProps(params);
}
if (tableType == HoodieTableType.MERGE_ON_READ) {
params.put(HOODIE_TABLE_TYPE_PROP.key(), HoodieTableType.MERGE_ON_READ.name());
metaClient = HoodieTestUtils.init(hadoopConf, basePath, HoodieTableType.MERGE_ON_READ);
}
HoodieWriteConfig cfg = getConfigBuilder().withAutoCommit(false).withRollbackUsingMarkers(true)
.withMarkersType(markerType.name()).withProps(params).build();
SparkRDDWriteClient client = getHoodieWriteClient(cfg);
if (fromVersion == HoodieTableVersion.TWO) {
// set table configs
HoodieTableConfig tableConfig = metaClient.getTableConfig();
tableConfig.setValue(HoodieTableConfig.HOODIE_TABLE_NAME_PROP, cfg.getTableName());
tableConfig.setValue(HoodieTableConfig.HOODIE_TABLE_PARTITION_FIELDS_PROP, cfg.getString(KeyGeneratorOptions.PARTITIONPATH_FIELD.key()));
tableConfig.setValue(HoodieTableConfig.HOODIE_TABLE_RECORDKEY_FIELDS, cfg.getString(KeyGeneratorOptions.RECORDKEY_FIELD.key()));
tableConfig.setValue(HOODIE_BASE_FILE_FORMAT_PROP, cfg.getString(HOODIE_BASE_FILE_FORMAT_PROP));
}
// prepare data. Make 2 commits, in which 2nd is not committed.
List<FileSlice> firstPartitionCommit2FileSlices = new ArrayList<>();
@@ -185,7 +303,7 @@ public class TestUpgradeDowngrade extends HoodieClientTestBase {
HoodieInstant commitInstant = table.getPendingCommitTimeline().lastInstant().get();
// delete one of the marker files in 2nd commit if need be.
WriteMarkers writeMarkers = WriteMarkersFactory.get(getConfig().getMarkersType(), table, commitInstant.getTimestamp());
WriteMarkers writeMarkers = WriteMarkersFactory.get(markerType, table, commitInstant.getTimestamp());
List<String> markerPaths = new ArrayList<>(writeMarkers.allMarkerFilePaths());
if (deletePartialMarkerFiles) {
String toDeleteMarkerFile = markerPaths.get(0);
@@ -193,30 +311,55 @@ public class TestUpgradeDowngrade extends HoodieClientTestBase {
markerPaths.remove(toDeleteMarkerFile);
}
// set hoodie.table.version to 1 in hoodie.properties file
prepForDowngrade();
// set hoodie.table.version to fromVersion in hoodie.properties file
HoodieTableVersion toVersion = HoodieTableVersion.ZERO;
if (fromVersion == HoodieTableVersion.TWO) {
prepForDowngradeFromTwoToOne();
toVersion = HoodieTableVersion.ONE;
} else {
prepForDowngradeFromOneToZero();
}
// downgrade should be performed. all marker files should be deleted
new SparkUpgradeDowngrade(metaClient, cfg, context).run(metaClient, HoodieTableVersion.ZERO, cfg, context, null);
new SparkUpgradeDowngrade(metaClient, cfg, context).run(metaClient, toVersion, cfg, context, null);
// assert marker files
assertMarkerFilesForDowngrade(table, commitInstant);
assertMarkerFilesForDowngrade(table, commitInstant, toVersion == HoodieTableVersion.ONE);
// verify hoodie.table.version got downgraded
assertEquals(metaClient.getTableConfig().getTableVersion().versionCode(), HoodieTableVersion.ZERO.versionCode());
assertTableVersionFromPropertyFile(HoodieTableVersion.ZERO);
metaClient = HoodieTableMetaClient.builder().setConf(context.getHadoopConf().get()).setBasePath(cfg.getBasePath())
.setLayoutVersion(Option.of(new TimelineLayoutVersion(cfg.getTimelineLayoutVersion()))).build();
assertEquals(metaClient.getTableConfig().getTableVersion().versionCode(), toVersion.versionCode());
assertTableVersionFromPropertyFile(toVersion);
// trigger 3rd commit with marker based rollback disabled.
/* HUDI-2310
List<HoodieRecord> thirdBatch = triggerCommit("003", tableType, false);
// Check the entire dataset has all records only from 1st commit and 3rd commit since 2nd is expected to be rolledback.
assertRows(inputRecords.getKey(), thirdBatch);
*/
}
private void assertMarkerFilesForDowngrade(HoodieTable table, HoodieInstant commitInstant) throws IOException {
private void assertMarkerFilesForDowngrade(HoodieTable table, HoodieInstant commitInstant, boolean assertExists) throws IOException {
// Verify recreated marker files are as expected
WriteMarkers writeMarkers = WriteMarkersFactory.get(getConfig().getMarkersType(), table, commitInstant.getTimestamp());
assertFalse(writeMarkers.doesMarkerDirExist());
if (assertExists) {
assertTrue(writeMarkers.doesMarkerDirExist());
assertEquals(0, getTimelineServerBasedMarkerFileCount(table.getMetaClient().getMarkerFolderPath(commitInstant.getTimestamp()),
table.getMetaClient().getFs()));
} else {
assertFalse(writeMarkers.doesMarkerDirExist());
}
}
private long getTimelineServerBasedMarkerFileCount(String markerDir, FileSystem fileSystem) throws IOException {
FileStatus[] fileStatuses = fileSystem.listStatus(new Path(markerDir));
Predicate<String> prefixFilter = pathStr -> pathStr.contains(MARKERS_FILENAME_PREFIX);
return Arrays.stream(fileStatuses)
.map(fileStatus -> fileStatus.getPath().toString())
.filter(prefixFilter)
.collect(Collectors.toList()).stream().count();
}
private void assertMarkerFilesForUpgrade(HoodieTable table, HoodieInstant commitInstant, List<FileSlice> firstPartitionCommit2FileSlices,
@@ -349,7 +492,7 @@ public class TestUpgradeDowngrade extends HoodieClientTestBase {
//just generate two partitions
dataGen = new HoodieTestDataGenerator(new String[] {DEFAULT_FIRST_PARTITION_PATH, DEFAULT_SECOND_PARTITION_PATH});
//1. prepare data
HoodieTestDataGenerator.writePartitionMetadata(dfs, new String[] {DEFAULT_FIRST_PARTITION_PATH, DEFAULT_SECOND_PARTITION_PATH}, dfsBasePath);
HoodieTestDataGenerator.writePartitionMetadata(metaClient.getFs(), new String[] {DEFAULT_FIRST_PARTITION_PATH, DEFAULT_SECOND_PARTITION_PATH}, basePath);
/**
* Write 1 (only inserts)
*/
@@ -396,7 +539,7 @@ public class TestUpgradeDowngrade extends HoodieClientTestBase {
return Pair.of(records, records2);
}
private void prepForDowngrade() throws IOException {
private void prepForDowngradeFromOneToZero() throws IOException {
metaClient.getTableConfig().setTableVersion(HoodieTableVersion.ONE);
Path propertyFile = new Path(metaClient.getMetaPath() + "/" + HoodieTableConfig.HOODIE_PROPERTIES_FILE);
try (FSDataOutputStream os = metaClient.getFs().create(propertyFile)) {
@@ -404,13 +547,21 @@ public class TestUpgradeDowngrade extends HoodieClientTestBase {
}
}
private void prepForDowngradeFromTwoToOne() throws IOException {
metaClient.getTableConfig().setTableVersion(HoodieTableVersion.TWO);
Path propertyFile = new Path(metaClient.getMetaPath() + "/" + HoodieTableConfig.HOODIE_PROPERTIES_FILE);
try (FSDataOutputStream os = metaClient.getFs().create(propertyFile)) {
metaClient.getTableConfig().getProps().store(os, "");
}
}
private void createResidualFile() throws IOException {
Path propertyFile = new Path(metaClient.getMetaPath() + "/" + HoodieTableConfig.HOODIE_PROPERTIES_FILE);
Path updatedPropertyFile = new Path(metaClient.getMetaPath() + "/" + SparkUpgradeDowngrade.HOODIE_UPDATED_PROPERTY_FILE);
// Step1: Copy hoodie.properties to hoodie.properties.orig
FileUtil.copy(metaClient.getFs(), propertyFile, metaClient.getFs(), updatedPropertyFile,
false, metaClient.getHadoopConf());
false, hadoopConf);
}
private void assertTableVersionFromPropertyFile(HoodieTableVersion expectedVersion) throws IOException {