[HUDI-2268] Add upgrade and downgrade to and from 0.9.0 (#3470)
- Added upgrade and downgrade step to and from 0.9.0. Upgrade adds few table properties. Downgrade recreates timeline server based marker files if any.
This commit is contained in:
@@ -0,0 +1,33 @@
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.hudi.table.upgrade;
|
||||
|
||||
import org.apache.hudi.HoodieSparkUtils;
|
||||
import org.apache.hudi.config.HoodieWriteConfig;
|
||||
|
||||
/**
|
||||
* Upgrade handle to assist in upgrading hoodie table from version 1 to 2.
|
||||
*/
|
||||
public class OneToTwoUpgradeHandler extends BaseOneToTwoUpgradeHandler {
|
||||
|
||||
@Override
|
||||
String getPartitionColumns(HoodieWriteConfig config) {
|
||||
return HoodieSparkUtils.getPartitionColumns(config.getProps());
|
||||
}
|
||||
}
|
||||
@@ -19,31 +19,17 @@
|
||||
package org.apache.hudi.table.upgrade;
|
||||
|
||||
import org.apache.hudi.common.engine.HoodieEngineContext;
|
||||
import org.apache.hudi.common.table.timeline.HoodieInstant;
|
||||
import org.apache.hudi.common.table.timeline.HoodieTimeline;
|
||||
import org.apache.hudi.config.HoodieWriteConfig;
|
||||
import org.apache.hudi.table.HoodieSparkTable;
|
||||
import org.apache.hudi.table.marker.WriteMarkers;
|
||||
import org.apache.hudi.table.marker.WriteMarkersFactory;
|
||||
|
||||
import java.util.List;
|
||||
import java.util.stream.Collectors;
|
||||
import org.apache.hudi.table.HoodieTable;
|
||||
|
||||
/**
|
||||
* Downgrade handle to assist in downgrading hoodie table from version 1 to 0.
|
||||
*/
|
||||
public class OneToZeroDowngradeHandler implements DowngradeHandler {
|
||||
public class OneToZeroDowngradeHandler extends BaseOneToZeroDowngradeHandler {
|
||||
|
||||
@Override
|
||||
public void downgrade(HoodieWriteConfig config, HoodieEngineContext context, String instantTime) {
|
||||
// fetch pending commit info
|
||||
HoodieSparkTable table = HoodieSparkTable.create(config, context);
|
||||
HoodieTimeline inflightTimeline = table.getMetaClient().getCommitsTimeline().filterPendingExcludingCompaction();
|
||||
List<HoodieInstant> commits = inflightTimeline.getReverseOrderedInstants().collect(Collectors.toList());
|
||||
for (HoodieInstant commitInstant : commits) {
|
||||
// delete existing markers
|
||||
WriteMarkers writeMarkers = WriteMarkersFactory.get(config.getMarkersType(), table, commitInstant.getTimestamp());
|
||||
writeMarkers.quietDeleteMarkerDir(context, config.getMarkersDeleteParallelism());
|
||||
}
|
||||
HoodieTable getTable(HoodieWriteConfig config, HoodieEngineContext context) {
|
||||
return HoodieSparkTable.create(config, context);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -18,6 +18,7 @@
|
||||
|
||||
package org.apache.hudi.table.upgrade;
|
||||
|
||||
import org.apache.hudi.common.config.ConfigProperty;
|
||||
import org.apache.hudi.common.engine.HoodieEngineContext;
|
||||
import org.apache.hudi.common.table.HoodieTableMetaClient;
|
||||
import org.apache.hudi.common.table.HoodieTableVersion;
|
||||
@@ -25,6 +26,7 @@ import org.apache.hudi.config.HoodieWriteConfig;
|
||||
import org.apache.hudi.exception.HoodieUpgradeDowngradeException;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.Map;
|
||||
|
||||
public class SparkUpgradeDowngrade extends AbstractUpgradeDowngrade {
|
||||
|
||||
@@ -43,22 +45,25 @@ public class SparkUpgradeDowngrade extends AbstractUpgradeDowngrade {
|
||||
} catch (IOException e) {
|
||||
throw new HoodieUpgradeDowngradeException("Error during upgrade/downgrade to version:" + toVersion, e);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@Override
|
||||
protected void upgrade(HoodieTableVersion fromVersion, HoodieTableVersion toVersion, String instantTime) {
|
||||
protected Map<ConfigProperty, String> upgrade(HoodieTableVersion fromVersion, HoodieTableVersion toVersion, String instantTime) {
|
||||
if (fromVersion == HoodieTableVersion.ZERO && toVersion == HoodieTableVersion.ONE) {
|
||||
new ZeroToOneUpgradeHandler().upgrade(config, context, instantTime);
|
||||
return new ZeroToOneUpgradeHandler().upgrade(config, context, instantTime);
|
||||
} else if (fromVersion == HoodieTableVersion.ONE && toVersion == HoodieTableVersion.TWO) {
|
||||
return new OneToTwoUpgradeHandler().upgrade(config, context, instantTime);
|
||||
} else {
|
||||
throw new HoodieUpgradeDowngradeException(fromVersion.versionCode(), toVersion.versionCode(), true);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
protected void downgrade(HoodieTableVersion fromVersion, HoodieTableVersion toVersion, String instantTime) {
|
||||
protected Map<ConfigProperty, String> downgrade(HoodieTableVersion fromVersion, HoodieTableVersion toVersion, String instantTime) {
|
||||
if (fromVersion == HoodieTableVersion.ONE && toVersion == HoodieTableVersion.ZERO) {
|
||||
new OneToZeroDowngradeHandler().downgrade(config, context, instantTime);
|
||||
return new OneToZeroDowngradeHandler().downgrade(config, context, instantTime);
|
||||
} else if (fromVersion == HoodieTableVersion.TWO && toVersion == HoodieTableVersion.ONE) {
|
||||
return new TwoToOneDowngradeHandler().downgrade(config, context, instantTime);
|
||||
} else {
|
||||
throw new HoodieUpgradeDowngradeException(fromVersion.versionCode(), toVersion.versionCode(), false);
|
||||
}
|
||||
|
||||
@@ -0,0 +1,35 @@
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.hudi.table.upgrade;
|
||||
|
||||
import org.apache.hudi.common.engine.HoodieEngineContext;
|
||||
import org.apache.hudi.config.HoodieWriteConfig;
|
||||
import org.apache.hudi.table.HoodieSparkTable;
|
||||
import org.apache.hudi.table.HoodieTable;
|
||||
|
||||
/**
|
||||
* Downgrade handle to assist in downgrading hoodie table from version 2 to 1.
|
||||
*/
|
||||
public class TwoToOneDowngradeHandler extends BaseTwoToOneDowngradeHandler {
|
||||
|
||||
@Override
|
||||
HoodieTable getTable(HoodieWriteConfig config, HoodieEngineContext context) {
|
||||
return HoodieSparkTable.create(config, context);
|
||||
}
|
||||
}
|
||||
@@ -20,120 +20,31 @@ package org.apache.hudi.table.upgrade;
|
||||
|
||||
import org.apache.hudi.common.HoodieRollbackStat;
|
||||
import org.apache.hudi.common.engine.HoodieEngineContext;
|
||||
import org.apache.hudi.common.fs.FSUtils;
|
||||
import org.apache.hudi.common.model.HoodieTableType;
|
||||
import org.apache.hudi.common.model.IOType;
|
||||
import org.apache.hudi.common.table.marker.MarkerType;
|
||||
import org.apache.hudi.common.table.timeline.HoodieActiveTimeline;
|
||||
import org.apache.hudi.common.table.HoodieTableMetaClient;
|
||||
import org.apache.hudi.common.table.timeline.HoodieInstant;
|
||||
import org.apache.hudi.common.table.timeline.HoodieTimeline;
|
||||
import org.apache.hudi.common.util.Option;
|
||||
import org.apache.hudi.config.HoodieWriteConfig;
|
||||
import org.apache.hudi.exception.HoodieRollbackException;
|
||||
import org.apache.hudi.table.HoodieSparkTable;
|
||||
import org.apache.hudi.table.HoodieTable;
|
||||
import org.apache.hudi.table.action.rollback.ListingBasedRollbackHelper;
|
||||
import org.apache.hudi.table.action.rollback.ListingBasedRollbackRequest;
|
||||
import org.apache.hudi.table.action.rollback.RollbackUtils;
|
||||
import org.apache.hudi.table.marker.WriteMarkers;
|
||||
import org.apache.hudi.table.marker.WriteMarkersFactory;
|
||||
|
||||
import org.apache.hadoop.fs.FileStatus;
|
||||
import org.apache.hadoop.fs.Path;
|
||||
|
||||
import java.util.List;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
/**
|
||||
* Upgrade handle to assist in upgrading hoodie table from version 0 to 1.
|
||||
*/
|
||||
public class ZeroToOneUpgradeHandler implements UpgradeHandler {
|
||||
public class ZeroToOneUpgradeHandler extends BaseZeroToOneUpgradeHandler {
|
||||
|
||||
@Override
|
||||
public void upgrade(HoodieWriteConfig config, HoodieEngineContext context, String instantTime) {
|
||||
// fetch pending commit info
|
||||
HoodieSparkTable table = HoodieSparkTable.create(config, context);
|
||||
HoodieTimeline inflightTimeline = table.getMetaClient().getCommitsTimeline().filterPendingExcludingCompaction();
|
||||
List<String> commits = inflightTimeline.getReverseOrderedInstants().map(HoodieInstant::getTimestamp)
|
||||
.collect(Collectors.toList());
|
||||
if (commits.size() > 0 && instantTime != null) {
|
||||
// ignore the latest inflight commit since a new commit would have been started and we need to fix any pending commits from previous launch
|
||||
commits.remove(instantTime);
|
||||
}
|
||||
for (String commit : commits) {
|
||||
// for every pending commit, delete old markers and re-create markers in new format
|
||||
recreateMarkers(commit, table, context, config.getMarkersType(), config.getMarkersDeleteParallelism());
|
||||
}
|
||||
HoodieTable getTable(HoodieWriteConfig config, HoodieEngineContext context) {
|
||||
return HoodieSparkTable.create(config, context);
|
||||
}
|
||||
|
||||
/**
|
||||
* Recreate markers in new format.
|
||||
* Step1: Delete existing markers
|
||||
* Step2: Collect all rollback file info.
|
||||
* Step3: recreate markers for all interested files.
|
||||
*
|
||||
* @param commitInstantTime instant of interest for which markers need to be recreated.
|
||||
* @param table instance of {@link HoodieSparkTable} to use
|
||||
* @param context instance of {@link HoodieEngineContext} to use
|
||||
* @param markerType marker type to use
|
||||
* @throws HoodieRollbackException on any exception during upgrade.
|
||||
*/
|
||||
private static void recreateMarkers(final String commitInstantTime,
|
||||
HoodieSparkTable table,
|
||||
HoodieEngineContext context,
|
||||
MarkerType markerType,
|
||||
int parallelism) throws HoodieRollbackException {
|
||||
try {
|
||||
// fetch hoodie instant
|
||||
Option<HoodieInstant> commitInstantOpt = Option.fromJavaOptional(table.getActiveTimeline().getCommitsTimeline().getInstants()
|
||||
.filter(instant -> HoodieActiveTimeline.EQUALS.test(instant.getTimestamp(), commitInstantTime))
|
||||
.findFirst());
|
||||
if (commitInstantOpt.isPresent()) {
|
||||
// delete existing markers
|
||||
WriteMarkers writeMarkers = WriteMarkersFactory.get(markerType, table, commitInstantTime);
|
||||
writeMarkers.quietDeleteMarkerDir(context, parallelism);
|
||||
|
||||
// generate rollback stats
|
||||
List<ListingBasedRollbackRequest> rollbackRequests;
|
||||
if (table.getMetaClient().getTableType() == HoodieTableType.COPY_ON_WRITE) {
|
||||
rollbackRequests = RollbackUtils.generateRollbackRequestsByListingCOW(context, table.getMetaClient().getBasePath(), table.getConfig());
|
||||
} else {
|
||||
rollbackRequests = RollbackUtils.generateRollbackRequestsUsingFileListingMOR(commitInstantOpt.get(), table, context);
|
||||
}
|
||||
List<HoodieRollbackStat> rollbackStats = new ListingBasedRollbackHelper(table.getMetaClient(), table.getConfig())
|
||||
.collectRollbackStats(context, commitInstantOpt.get(), rollbackRequests);
|
||||
|
||||
// recreate markers adhering to marker based rollback
|
||||
for (HoodieRollbackStat rollbackStat : rollbackStats) {
|
||||
for (String path : rollbackStat.getSuccessDeleteFiles()) {
|
||||
String dataFileName = path.substring(path.lastIndexOf("/") + 1);
|
||||
// not feasible to differentiate MERGE from CREATE. hence creating with MERGE IOType for all base files.
|
||||
writeMarkers.create(rollbackStat.getPartitionPath(), dataFileName, IOType.MERGE);
|
||||
}
|
||||
for (FileStatus fileStatus : rollbackStat.getCommandBlocksCount().keySet()) {
|
||||
writeMarkers.create(rollbackStat.getPartitionPath(), getFileNameForMarkerFromLogFile(fileStatus.getPath().toString(), table), IOType.APPEND);
|
||||
}
|
||||
}
|
||||
}
|
||||
} catch (Exception e) {
|
||||
throw new HoodieRollbackException("Exception thrown while upgrading Hoodie Table from version 0 to 1", e);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Curates file name for marker from existing log file path.
|
||||
* log file format : partitionpath/.fileid_baseInstant.log.writetoken
|
||||
* marker file format : partitionpath/fileId_writetoken_baseinstant.basefileExtn.marker.APPEND
|
||||
*
|
||||
* @param logFilePath log file path for which marker file name needs to be generated.
|
||||
* @return the marker file name thus curated.
|
||||
*/
|
||||
private static String getFileNameForMarkerFromLogFile(String logFilePath, HoodieTable table) {
|
||||
Path logPath = new Path(table.getMetaClient().getBasePath(), logFilePath);
|
||||
String fileId = FSUtils.getFileIdFromLogPath(logPath);
|
||||
String baseInstant = FSUtils.getBaseCommitTimeFromLogPath(logPath);
|
||||
String writeToken = FSUtils.getWriteTokenFromLogPath(logPath);
|
||||
|
||||
return FSUtils.makeDataFileName(baseInstant, writeToken, fileId, table.getBaseFileFormat().getFileExtension());
|
||||
@Override
|
||||
List<HoodieRollbackStat> getListBasedRollBackStats(HoodieTableMetaClient metaClient, HoodieWriteConfig config, HoodieEngineContext context, Option<HoodieInstant> commitInstantOpt,
|
||||
List<ListingBasedRollbackRequest> rollbackRequests) {
|
||||
return new ListingBasedRollbackHelper(metaClient, config)
|
||||
.collectRollbackStats(context, commitInstantOpt.get(), rollbackRequests);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -18,11 +18,17 @@
|
||||
|
||||
package org.apache.hudi
|
||||
|
||||
import java.util.Properties
|
||||
|
||||
import org.apache.avro.Schema
|
||||
import org.apache.avro.generic.GenericRecord
|
||||
import org.apache.hadoop.fs.{FileSystem, Path}
|
||||
import org.apache.hudi.client.utils.SparkRowSerDe
|
||||
import org.apache.hudi.common.config.TypedProperties
|
||||
import org.apache.hudi.common.model.HoodieRecord
|
||||
import org.apache.hudi.keygen.constant.KeyGeneratorOptions
|
||||
import org.apache.hudi.keygen.factory.HoodieSparkKeyGeneratorFactory
|
||||
import org.apache.hudi.keygen.{BaseKeyGenerator, CustomAvroKeyGenerator, CustomKeyGenerator, KeyGenerator}
|
||||
import org.apache.spark.SPARK_VERSION
|
||||
import org.apache.spark.rdd.RDD
|
||||
import org.apache.spark.sql.avro.SchemaConverters
|
||||
@@ -221,11 +227,41 @@ object HoodieSparkUtils extends SparkAdapterSupport {
|
||||
val leftExp = toAttribute(attribute, tableSchema)
|
||||
val rightExp = Literal.create(s"%$value%")
|
||||
sparkAdapter.createLike(leftExp, rightExp)
|
||||
case _=> null
|
||||
case _ => null
|
||||
}
|
||||
)
|
||||
}
|
||||
|
||||
/**
|
||||
* @param properties config properties
|
||||
* @return partition columns
|
||||
*/
|
||||
def getPartitionColumns(properties: Properties): String = {
|
||||
val props = new TypedProperties(properties)
|
||||
val keyGenerator = HoodieSparkKeyGeneratorFactory.createKeyGenerator(props)
|
||||
getPartitionColumns(keyGenerator, props)
|
||||
}
|
||||
|
||||
/**
|
||||
* @param keyGen key generator
|
||||
* @return partition columns
|
||||
*/
|
||||
def getPartitionColumns(keyGen: KeyGenerator, typedProperties: TypedProperties): String = {
|
||||
keyGen match {
|
||||
// For CustomKeyGenerator and CustomAvroKeyGenerator, the partition path filed format
|
||||
// is: "field_name: field_type", we extract the field_name from the partition path field.
|
||||
case c: BaseKeyGenerator
|
||||
if c.isInstanceOf[CustomKeyGenerator] || c.isInstanceOf[CustomAvroKeyGenerator] =>
|
||||
c.getPartitionPathFields.asScala.map(pathField =>
|
||||
pathField.split(CustomAvroKeyGenerator.SPLIT_REGEX)
|
||||
.headOption.getOrElse(s"Illegal partition path field format: '$pathField' for ${c.getClass.getSimpleName}"))
|
||||
.mkString(",")
|
||||
|
||||
case b: BaseKeyGenerator => b.getPartitionPathFields.asScala.mkString(",")
|
||||
case _ => typedProperties.getString(KeyGeneratorOptions.PARTITIONPATH_FIELD.key())
|
||||
}
|
||||
}
|
||||
|
||||
private def toAttribute(columnName: String, tableSchema: StructType): AttributeReference = {
|
||||
val field = tableSchema.find(p => p.name == columnName)
|
||||
assert(field.isDefined, s"Cannot find column: $columnName, Table Columns are: " +
|
||||
|
||||
@@ -18,10 +18,6 @@
|
||||
|
||||
package org.apache.hudi.table.upgrade;
|
||||
|
||||
import org.apache.hadoop.fs.FSDataInputStream;
|
||||
import org.apache.hadoop.fs.FSDataOutputStream;
|
||||
import org.apache.hadoop.fs.FileUtil;
|
||||
import org.apache.hadoop.fs.Path;
|
||||
import org.apache.hudi.client.SparkRDDWriteClient;
|
||||
import org.apache.hudi.client.WriteStatus;
|
||||
import org.apache.hudi.common.config.HoodieConfig;
|
||||
@@ -31,19 +27,31 @@ import org.apache.hudi.common.model.HoodieLogFile;
|
||||
import org.apache.hudi.common.model.HoodieRecord;
|
||||
import org.apache.hudi.common.model.HoodieTableType;
|
||||
import org.apache.hudi.common.table.HoodieTableConfig;
|
||||
import org.apache.hudi.common.table.HoodieTableMetaClient;
|
||||
import org.apache.hudi.common.table.HoodieTableVersion;
|
||||
import org.apache.hudi.common.table.marker.MarkerType;
|
||||
import org.apache.hudi.common.table.timeline.HoodieInstant;
|
||||
import org.apache.hudi.common.table.timeline.versioning.TimelineLayoutVersion;
|
||||
import org.apache.hudi.common.table.view.SyncableFileSystemView;
|
||||
import org.apache.hudi.common.testutils.HoodieTestDataGenerator;
|
||||
import org.apache.hudi.common.testutils.HoodieTestUtils;
|
||||
import org.apache.hudi.common.util.Option;
|
||||
import org.apache.hudi.common.util.collection.Pair;
|
||||
import org.apache.hudi.config.HoodieWriteConfig;
|
||||
import org.apache.hudi.keygen.constant.KeyGeneratorOptions;
|
||||
import org.apache.hudi.table.HoodieTable;
|
||||
import org.apache.hudi.table.marker.WriteMarkers;
|
||||
import org.apache.hudi.table.marker.WriteMarkersFactory;
|
||||
import org.apache.hudi.testutils.Assertions;
|
||||
import org.apache.hudi.testutils.HoodieClientTestBase;
|
||||
import org.apache.hudi.testutils.HoodieClientTestUtils;
|
||||
|
||||
import org.apache.hadoop.fs.FSDataInputStream;
|
||||
import org.apache.hadoop.fs.FSDataOutputStream;
|
||||
import org.apache.hadoop.fs.FileStatus;
|
||||
import org.apache.hadoop.fs.FileSystem;
|
||||
import org.apache.hadoop.fs.FileUtil;
|
||||
import org.apache.hadoop.fs.Path;
|
||||
import org.apache.spark.api.java.JavaRDD;
|
||||
import org.apache.spark.sql.Dataset;
|
||||
import org.apache.spark.sql.Row;
|
||||
@@ -52,20 +60,26 @@ import org.junit.jupiter.api.BeforeEach;
|
||||
import org.junit.jupiter.api.Test;
|
||||
import org.junit.jupiter.params.ParameterizedTest;
|
||||
import org.junit.jupiter.params.provider.Arguments;
|
||||
import org.junit.jupiter.params.provider.EnumSource;
|
||||
import org.junit.jupiter.params.provider.MethodSource;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Properties;
|
||||
import java.util.Set;
|
||||
import java.util.function.Predicate;
|
||||
import java.util.stream.Collectors;
|
||||
import java.util.stream.Stream;
|
||||
|
||||
import static org.apache.hudi.common.table.HoodieTableConfig.HOODIE_BASE_FILE_FORMAT_PROP;
|
||||
import static org.apache.hudi.common.table.HoodieTableConfig.HOODIE_TABLE_TYPE_PROP;
|
||||
import static org.apache.hudi.common.testutils.HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH;
|
||||
import static org.apache.hudi.common.testutils.HoodieTestDataGenerator.DEFAULT_SECOND_PARTITION_PATH;
|
||||
import static org.apache.hudi.common.util.MarkerUtils.MARKERS_FILENAME_PREFIX;
|
||||
import static org.junit.jupiter.api.Assertions.assertEquals;
|
||||
import static org.junit.jupiter.api.Assertions.assertFalse;
|
||||
import static org.junit.jupiter.api.Assertions.assertTrue;
|
||||
@@ -76,11 +90,29 @@ import static org.junit.jupiter.api.Assertions.assertTrue;
|
||||
public class TestUpgradeDowngrade extends HoodieClientTestBase {
|
||||
|
||||
private static final String TEST_NAME_WITH_PARAMS = "[{index}] Test with deletePartialMarkerFiles={0} and TableType = {1}";
|
||||
private static final String TEST_NAME_WITH_DOWNGRADE_PARAMS = "[{index}] Test with deletePartialMarkerFiles={0} and TableType = {1} and "
|
||||
+ "From version = {2}";
|
||||
|
||||
public static Stream<Arguments> configParams() {
|
||||
Object[][] data = new Object[][] {
|
||||
{true, HoodieTableType.COPY_ON_WRITE}, {false, HoodieTableType.COPY_ON_WRITE},
|
||||
{true, HoodieTableType.MERGE_ON_READ}, {false, HoodieTableType.MERGE_ON_READ}
|
||||
{true, HoodieTableType.COPY_ON_WRITE},
|
||||
{false, HoodieTableType.COPY_ON_WRITE},
|
||||
{true, HoodieTableType.MERGE_ON_READ},
|
||||
{false, HoodieTableType.MERGE_ON_READ}
|
||||
};
|
||||
return Stream.of(data).map(Arguments::of);
|
||||
}
|
||||
|
||||
public static Stream<Arguments> downGradeConfigParams() {
|
||||
Object[][] data = new Object[][] {
|
||||
{true, HoodieTableType.COPY_ON_WRITE, HoodieTableVersion.TWO},
|
||||
{false, HoodieTableType.COPY_ON_WRITE, HoodieTableVersion.TWO},
|
||||
{true, HoodieTableType.MERGE_ON_READ, HoodieTableVersion.TWO},
|
||||
{false, HoodieTableType.MERGE_ON_READ, HoodieTableVersion.TWO},
|
||||
{true, HoodieTableType.COPY_ON_WRITE, HoodieTableVersion.ONE},
|
||||
{false, HoodieTableType.COPY_ON_WRITE, HoodieTableVersion.ONE},
|
||||
{true, HoodieTableType.MERGE_ON_READ, HoodieTableVersion.ONE},
|
||||
{false, HoodieTableType.MERGE_ON_READ, HoodieTableVersion.ONE}
|
||||
};
|
||||
return Stream.of(data).map(Arguments::of);
|
||||
}
|
||||
@@ -88,9 +120,9 @@ public class TestUpgradeDowngrade extends HoodieClientTestBase {
|
||||
@BeforeEach
|
||||
public void setUp() throws Exception {
|
||||
initSparkContexts();
|
||||
initDFS();
|
||||
initPath();
|
||||
initTestDataGenerator();
|
||||
initDFSMetaClient();
|
||||
initMetaClient();
|
||||
}
|
||||
|
||||
@AfterEach
|
||||
@@ -100,21 +132,21 @@ public class TestUpgradeDowngrade extends HoodieClientTestBase {
|
||||
|
||||
@Test
|
||||
public void testLeftOverUpdatedPropFileCleanup() throws IOException {
|
||||
testUpgradeInternal(true, true, HoodieTableType.MERGE_ON_READ);
|
||||
testUpgradeZeroToOneInternal(true, true, HoodieTableType.MERGE_ON_READ);
|
||||
}
|
||||
|
||||
@ParameterizedTest(name = TEST_NAME_WITH_PARAMS)
|
||||
@MethodSource("configParams")
|
||||
public void testUpgrade(boolean deletePartialMarkerFiles, HoodieTableType tableType) throws IOException {
|
||||
testUpgradeInternal(false, deletePartialMarkerFiles, tableType);
|
||||
public void testUpgradeZeroToOne(boolean deletePartialMarkerFiles, HoodieTableType tableType) throws IOException {
|
||||
testUpgradeZeroToOneInternal(false, deletePartialMarkerFiles, tableType);
|
||||
}
|
||||
|
||||
public void testUpgradeInternal(boolean induceResiduesFromPrevUpgrade, boolean deletePartialMarkerFiles, HoodieTableType tableType) throws IOException {
|
||||
public void testUpgradeZeroToOneInternal(boolean induceResiduesFromPrevUpgrade, boolean deletePartialMarkerFiles, HoodieTableType tableType) throws IOException {
|
||||
// init config, table and client.
|
||||
Map<String, String> params = new HashMap<>();
|
||||
if (tableType == HoodieTableType.MERGE_ON_READ) {
|
||||
params.put(HOODIE_TABLE_TYPE_PROP.key(), HoodieTableType.MERGE_ON_READ.name());
|
||||
metaClient = HoodieTestUtils.init(dfs.getConf(), dfsBasePath, HoodieTableType.MERGE_ON_READ);
|
||||
metaClient = HoodieTestUtils.init(hadoopConf, basePath, HoodieTableType.MERGE_ON_READ);
|
||||
}
|
||||
HoodieWriteConfig cfg = getConfigBuilder().withAutoCommit(false).withRollbackUsingMarkers(false).withProps(params).build();
|
||||
SparkRDDWriteClient client = getHoodieWriteClient(cfg);
|
||||
@@ -151,30 +183,116 @@ public class TestUpgradeDowngrade extends HoodieClientTestBase {
|
||||
assertMarkerFilesForUpgrade(table, commitInstant, firstPartitionCommit2FileSlices, secondPartitionCommit2FileSlices);
|
||||
|
||||
// verify hoodie.table.version got upgraded
|
||||
metaClient = HoodieTableMetaClient.builder().setConf(context.getHadoopConf().get()).setBasePath(cfg.getBasePath())
|
||||
.setLayoutVersion(Option.of(new TimelineLayoutVersion(cfg.getTimelineLayoutVersion()))).build();
|
||||
assertEquals(metaClient.getTableConfig().getTableVersion().versionCode(), HoodieTableVersion.ONE.versionCode());
|
||||
assertTableVersionFromPropertyFile(HoodieTableVersion.ONE);
|
||||
|
||||
// trigger 3rd commit with marker based rollback enabled.
|
||||
/* HUDI-2310
|
||||
List<HoodieRecord> thirdBatch = triggerCommit("003", tableType, true);
|
||||
|
||||
// Check the entire dataset has all records only from 1st commit and 3rd commit since 2nd is expected to be rolledback.
|
||||
assertRows(inputRecords.getKey(), thirdBatch);
|
||||
if (induceResiduesFromPrevUpgrade) {
|
||||
assertFalse(dfs.exists(new Path(metaClient.getMetaPath(), SparkUpgradeDowngrade.HOODIE_UPDATED_PROPERTY_FILE)));
|
||||
}
|
||||
}*/
|
||||
}
|
||||
|
||||
@ParameterizedTest(name = TEST_NAME_WITH_PARAMS)
|
||||
@MethodSource("configParams")
|
||||
public void testDowngrade(boolean deletePartialMarkerFiles, HoodieTableType tableType) throws IOException {
|
||||
@ParameterizedTest
|
||||
@EnumSource(value = HoodieTableType.class)
|
||||
public void testUpgradeOneToTwo(HoodieTableType tableType) throws IOException {
|
||||
// init config, table and client.
|
||||
Map<String, String> params = new HashMap<>();
|
||||
addNewTableParamsToProps(params);
|
||||
if (tableType == HoodieTableType.MERGE_ON_READ) {
|
||||
params.put(HOODIE_TABLE_TYPE_PROP.key(), HoodieTableType.MERGE_ON_READ.name());
|
||||
metaClient = HoodieTestUtils.init(hadoopConf, basePath, HoodieTableType.MERGE_ON_READ);
|
||||
}
|
||||
HoodieWriteConfig cfg = getConfigBuilder().withAutoCommit(false).withRollbackUsingMarkers(false).withProps(params).build();
|
||||
SparkRDDWriteClient client = getHoodieWriteClient(cfg);
|
||||
// Write inserts
|
||||
doInsert(client);
|
||||
|
||||
// downgrade table props
|
||||
downgradeTableConfigsFromTwoToOne(cfg);
|
||||
|
||||
// perform upgrade
|
||||
new SparkUpgradeDowngrade(metaClient, cfg, context).run(metaClient, HoodieTableVersion.TWO, cfg, context, null);
|
||||
|
||||
// verify hoodie.table.version got upgraded
|
||||
metaClient = HoodieTableMetaClient.builder().setConf(context.getHadoopConf().get()).setBasePath(cfg.getBasePath())
|
||||
.setLayoutVersion(Option.of(new TimelineLayoutVersion(cfg.getTimelineLayoutVersion()))).build();
|
||||
assertEquals(metaClient.getTableConfig().getTableVersion().versionCode(), HoodieTableVersion.TWO.versionCode());
|
||||
assertTableVersionFromPropertyFile(HoodieTableVersion.TWO);
|
||||
|
||||
// verify table props
|
||||
assertTableProps(cfg);
|
||||
}
|
||||
|
||||
private void addNewTableParamsToProps(Map<String, String> params) {
|
||||
params.put(KeyGeneratorOptions.RECORDKEY_FIELD.key(), "uuid");
|
||||
params.put(KeyGeneratorOptions.PARTITIONPATH_FIELD.key(), "partition_path");
|
||||
params.put(HoodieTableConfig.HOODIE_TABLE_NAME_PROP.key(), metaClient.getTableConfig().getTableName());
|
||||
params.put(HOODIE_BASE_FILE_FORMAT_PROP.key(), HOODIE_BASE_FILE_FORMAT_PROP.defaultValue().name());
|
||||
}
|
||||
|
||||
private void doInsert(SparkRDDWriteClient client) {
|
||||
// Write 1 (only inserts)
|
||||
String commit1 = "000";
|
||||
client.startCommitWithTime(commit1);
|
||||
List<HoodieRecord> records = dataGen.generateInserts(commit1, 100);
|
||||
JavaRDD<HoodieRecord> writeRecords = jsc.parallelize(records, 1);
|
||||
client.insert(writeRecords, commit1).collect();
|
||||
}
|
||||
|
||||
private void downgradeTableConfigsFromTwoToOne(HoodieWriteConfig cfg) throws IOException {
|
||||
Properties properties = new Properties(cfg.getProps());
|
||||
properties.remove(HoodieTableConfig.HOODIE_TABLE_RECORDKEY_FIELDS.key());
|
||||
properties.remove(HoodieTableConfig.HOODIE_TABLE_PARTITION_FIELDS_PROP.key());
|
||||
properties.remove(HoodieTableConfig.HOODIE_TABLE_NAME_PROP.key());
|
||||
properties.remove(HOODIE_BASE_FILE_FORMAT_PROP.key());
|
||||
properties.setProperty(HoodieTableConfig.HOODIE_TABLE_VERSION_PROP.key(), "1");
|
||||
|
||||
metaClient = HoodieTestUtils.init(hadoopConf, basePath, getTableType(), properties);
|
||||
// set hoodie.table.version to 1 in hoodie.properties file
|
||||
metaClient.getTableConfig().setTableVersion(HoodieTableVersion.ONE);
|
||||
}
|
||||
|
||||
private void assertTableProps(HoodieWriteConfig cfg) {
|
||||
HoodieTableConfig tableConfig = metaClient.getTableConfig();
|
||||
Properties originalProps = cfg.getProps();
|
||||
assertEquals(tableConfig.getPartitionFieldProp(), originalProps.getProperty(KeyGeneratorOptions.PARTITIONPATH_FIELD.key()));
|
||||
assertEquals(tableConfig.getRecordKeyFieldProp(), originalProps.getProperty(KeyGeneratorOptions.RECORDKEY_FIELD.key()));
|
||||
assertEquals(tableConfig.getTableName(), cfg.getTableName());
|
||||
assertEquals(tableConfig.getBaseFileFormat().name(), originalProps.getProperty(HOODIE_BASE_FILE_FORMAT_PROP.key()));
|
||||
}
|
||||
|
||||
@ParameterizedTest(name = TEST_NAME_WITH_DOWNGRADE_PARAMS)
|
||||
@MethodSource("downGradeConfigParams")
|
||||
public void testDowngrade(boolean deletePartialMarkerFiles, HoodieTableType tableType, HoodieTableVersion fromVersion) throws IOException {
|
||||
MarkerType markerType = fromVersion == HoodieTableVersion.TWO ? MarkerType.TIMELINE_SERVER_BASED : MarkerType.DIRECT;
|
||||
// init config, table and client.
|
||||
Map<String, String> params = new HashMap<>();
|
||||
if (fromVersion == HoodieTableVersion.TWO) {
|
||||
addNewTableParamsToProps(params);
|
||||
}
|
||||
if (tableType == HoodieTableType.MERGE_ON_READ) {
|
||||
params.put(HOODIE_TABLE_TYPE_PROP.key(), HoodieTableType.MERGE_ON_READ.name());
|
||||
metaClient = HoodieTestUtils.init(hadoopConf, basePath, HoodieTableType.MERGE_ON_READ);
|
||||
}
|
||||
HoodieWriteConfig cfg = getConfigBuilder().withAutoCommit(false).withRollbackUsingMarkers(true)
|
||||
.withMarkersType(markerType.name()).withProps(params).build();
|
||||
SparkRDDWriteClient client = getHoodieWriteClient(cfg);
|
||||
|
||||
if (fromVersion == HoodieTableVersion.TWO) {
|
||||
// set table configs
|
||||
HoodieTableConfig tableConfig = metaClient.getTableConfig();
|
||||
tableConfig.setValue(HoodieTableConfig.HOODIE_TABLE_NAME_PROP, cfg.getTableName());
|
||||
tableConfig.setValue(HoodieTableConfig.HOODIE_TABLE_PARTITION_FIELDS_PROP, cfg.getString(KeyGeneratorOptions.PARTITIONPATH_FIELD.key()));
|
||||
tableConfig.setValue(HoodieTableConfig.HOODIE_TABLE_RECORDKEY_FIELDS, cfg.getString(KeyGeneratorOptions.RECORDKEY_FIELD.key()));
|
||||
tableConfig.setValue(HOODIE_BASE_FILE_FORMAT_PROP, cfg.getString(HOODIE_BASE_FILE_FORMAT_PROP));
|
||||
}
|
||||
|
||||
// prepare data. Make 2 commits, in which 2nd is not committed.
|
||||
List<FileSlice> firstPartitionCommit2FileSlices = new ArrayList<>();
|
||||
@@ -185,7 +303,7 @@ public class TestUpgradeDowngrade extends HoodieClientTestBase {
|
||||
HoodieInstant commitInstant = table.getPendingCommitTimeline().lastInstant().get();
|
||||
|
||||
// delete one of the marker files in 2nd commit if need be.
|
||||
WriteMarkers writeMarkers = WriteMarkersFactory.get(getConfig().getMarkersType(), table, commitInstant.getTimestamp());
|
||||
WriteMarkers writeMarkers = WriteMarkersFactory.get(markerType, table, commitInstant.getTimestamp());
|
||||
List<String> markerPaths = new ArrayList<>(writeMarkers.allMarkerFilePaths());
|
||||
if (deletePartialMarkerFiles) {
|
||||
String toDeleteMarkerFile = markerPaths.get(0);
|
||||
@@ -193,30 +311,55 @@ public class TestUpgradeDowngrade extends HoodieClientTestBase {
|
||||
markerPaths.remove(toDeleteMarkerFile);
|
||||
}
|
||||
|
||||
// set hoodie.table.version to 1 in hoodie.properties file
|
||||
prepForDowngrade();
|
||||
// set hoodie.table.version to fromVersion in hoodie.properties file
|
||||
HoodieTableVersion toVersion = HoodieTableVersion.ZERO;
|
||||
if (fromVersion == HoodieTableVersion.TWO) {
|
||||
prepForDowngradeFromTwoToOne();
|
||||
toVersion = HoodieTableVersion.ONE;
|
||||
} else {
|
||||
prepForDowngradeFromOneToZero();
|
||||
}
|
||||
|
||||
// downgrade should be performed. all marker files should be deleted
|
||||
new SparkUpgradeDowngrade(metaClient, cfg, context).run(metaClient, HoodieTableVersion.ZERO, cfg, context, null);
|
||||
new SparkUpgradeDowngrade(metaClient, cfg, context).run(metaClient, toVersion, cfg, context, null);
|
||||
|
||||
// assert marker files
|
||||
assertMarkerFilesForDowngrade(table, commitInstant);
|
||||
assertMarkerFilesForDowngrade(table, commitInstant, toVersion == HoodieTableVersion.ONE);
|
||||
|
||||
// verify hoodie.table.version got downgraded
|
||||
assertEquals(metaClient.getTableConfig().getTableVersion().versionCode(), HoodieTableVersion.ZERO.versionCode());
|
||||
assertTableVersionFromPropertyFile(HoodieTableVersion.ZERO);
|
||||
metaClient = HoodieTableMetaClient.builder().setConf(context.getHadoopConf().get()).setBasePath(cfg.getBasePath())
|
||||
.setLayoutVersion(Option.of(new TimelineLayoutVersion(cfg.getTimelineLayoutVersion()))).build();
|
||||
assertEquals(metaClient.getTableConfig().getTableVersion().versionCode(), toVersion.versionCode());
|
||||
assertTableVersionFromPropertyFile(toVersion);
|
||||
|
||||
// trigger 3rd commit with marker based rollback disabled.
|
||||
/* HUDI-2310
|
||||
List<HoodieRecord> thirdBatch = triggerCommit("003", tableType, false);
|
||||
|
||||
// Check the entire dataset has all records only from 1st commit and 3rd commit since 2nd is expected to be rolledback.
|
||||
assertRows(inputRecords.getKey(), thirdBatch);
|
||||
*/
|
||||
}
|
||||
|
||||
private void assertMarkerFilesForDowngrade(HoodieTable table, HoodieInstant commitInstant) throws IOException {
|
||||
private void assertMarkerFilesForDowngrade(HoodieTable table, HoodieInstant commitInstant, boolean assertExists) throws IOException {
|
||||
// Verify recreated marker files are as expected
|
||||
WriteMarkers writeMarkers = WriteMarkersFactory.get(getConfig().getMarkersType(), table, commitInstant.getTimestamp());
|
||||
assertFalse(writeMarkers.doesMarkerDirExist());
|
||||
if (assertExists) {
|
||||
assertTrue(writeMarkers.doesMarkerDirExist());
|
||||
assertEquals(0, getTimelineServerBasedMarkerFileCount(table.getMetaClient().getMarkerFolderPath(commitInstant.getTimestamp()),
|
||||
table.getMetaClient().getFs()));
|
||||
} else {
|
||||
assertFalse(writeMarkers.doesMarkerDirExist());
|
||||
}
|
||||
}
|
||||
|
||||
private long getTimelineServerBasedMarkerFileCount(String markerDir, FileSystem fileSystem) throws IOException {
|
||||
FileStatus[] fileStatuses = fileSystem.listStatus(new Path(markerDir));
|
||||
Predicate<String> prefixFilter = pathStr -> pathStr.contains(MARKERS_FILENAME_PREFIX);
|
||||
return Arrays.stream(fileStatuses)
|
||||
.map(fileStatus -> fileStatus.getPath().toString())
|
||||
.filter(prefixFilter)
|
||||
.collect(Collectors.toList()).stream().count();
|
||||
}
|
||||
|
||||
private void assertMarkerFilesForUpgrade(HoodieTable table, HoodieInstant commitInstant, List<FileSlice> firstPartitionCommit2FileSlices,
|
||||
@@ -349,7 +492,7 @@ public class TestUpgradeDowngrade extends HoodieClientTestBase {
|
||||
//just generate two partitions
|
||||
dataGen = new HoodieTestDataGenerator(new String[] {DEFAULT_FIRST_PARTITION_PATH, DEFAULT_SECOND_PARTITION_PATH});
|
||||
//1. prepare data
|
||||
HoodieTestDataGenerator.writePartitionMetadata(dfs, new String[] {DEFAULT_FIRST_PARTITION_PATH, DEFAULT_SECOND_PARTITION_PATH}, dfsBasePath);
|
||||
HoodieTestDataGenerator.writePartitionMetadata(metaClient.getFs(), new String[] {DEFAULT_FIRST_PARTITION_PATH, DEFAULT_SECOND_PARTITION_PATH}, basePath);
|
||||
/**
|
||||
* Write 1 (only inserts)
|
||||
*/
|
||||
@@ -396,7 +539,7 @@ public class TestUpgradeDowngrade extends HoodieClientTestBase {
|
||||
return Pair.of(records, records2);
|
||||
}
|
||||
|
||||
private void prepForDowngrade() throws IOException {
|
||||
private void prepForDowngradeFromOneToZero() throws IOException {
|
||||
metaClient.getTableConfig().setTableVersion(HoodieTableVersion.ONE);
|
||||
Path propertyFile = new Path(metaClient.getMetaPath() + "/" + HoodieTableConfig.HOODIE_PROPERTIES_FILE);
|
||||
try (FSDataOutputStream os = metaClient.getFs().create(propertyFile)) {
|
||||
@@ -404,13 +547,21 @@ public class TestUpgradeDowngrade extends HoodieClientTestBase {
|
||||
}
|
||||
}
|
||||
|
||||
private void prepForDowngradeFromTwoToOne() throws IOException {
|
||||
metaClient.getTableConfig().setTableVersion(HoodieTableVersion.TWO);
|
||||
Path propertyFile = new Path(metaClient.getMetaPath() + "/" + HoodieTableConfig.HOODIE_PROPERTIES_FILE);
|
||||
try (FSDataOutputStream os = metaClient.getFs().create(propertyFile)) {
|
||||
metaClient.getTableConfig().getProps().store(os, "");
|
||||
}
|
||||
}
|
||||
|
||||
private void createResidualFile() throws IOException {
|
||||
Path propertyFile = new Path(metaClient.getMetaPath() + "/" + HoodieTableConfig.HOODIE_PROPERTIES_FILE);
|
||||
Path updatedPropertyFile = new Path(metaClient.getMetaPath() + "/" + SparkUpgradeDowngrade.HOODIE_UPDATED_PROPERTY_FILE);
|
||||
|
||||
// Step1: Copy hoodie.properties to hoodie.properties.orig
|
||||
FileUtil.copy(metaClient.getFs(), propertyFile, metaClient.getFs(), updatedPropertyFile,
|
||||
false, metaClient.getHadoopConf());
|
||||
false, hadoopConf);
|
||||
}
|
||||
|
||||
private void assertTableVersionFromPropertyFile(HoodieTableVersion expectedVersion) throws IOException {
|
||||
|
||||
Reference in New Issue
Block a user