1
0

[HUDI-2268] Add upgrade and downgrade to and from 0.9.0 (#3470)

- Added upgrade and downgrade step to and from 0.9.0. Upgrade adds few table properties. Downgrade recreates timeline server based marker files if any.
This commit is contained in:
Y Ethan Guo
2021-08-14 17:20:23 -07:00
committed by GitHub
parent 18e6b79947
commit 23dca6c237
27 changed files with 851 additions and 326 deletions

View File

@@ -166,10 +166,23 @@ public class DirectWriteMarkers extends WriteMarkers {
return markerFiles;
}
/**
* Creates a marker file based on the full marker name excluding the base path and instant.
*
* @param markerName the full marker name, e.g., "2021/08/13/file1.marker.CREATE"
* @return path of the marker file
*/
public Option<Path> create(String markerName) {
return create(new Path(markerDirPath, markerName), true);
}
@Override
protected Option<Path> create(String partitionPath, String dataFileName, IOType type, boolean checkIfExists) {
return create(getMarkerPath(partitionPath, dataFileName, type), checkIfExists);
}
private Option<Path> create(Path markerPath, boolean checkIfExists) {
HoodieTimer timer = new HoodieTimer().startTimer();
Path markerPath = getMarkerPath(partitionPath, dataFileName, type);
Path dirPath = markerPath.getParent();
try {
if (!fs.exists(dirPath)) {

View File

@@ -18,6 +18,7 @@
package org.apache.hudi.table.upgrade;
import org.apache.hudi.common.config.ConfigProperty;
import org.apache.hudi.common.engine.HoodieEngineContext;
import org.apache.hudi.common.table.HoodieTableConfig;
import org.apache.hudi.common.table.HoodieTableMetaClient;
@@ -33,6 +34,8 @@ import org.apache.log4j.Logger;
import java.io.IOException;
import java.util.Date;
import java.util.HashMap;
import java.util.Map;
import java.util.Properties;
/**
@@ -112,15 +115,27 @@ public abstract class AbstractUpgradeDowngrade {
// Perform the actual upgrade/downgrade; this has to be idempotent, for now.
LOG.info("Attempting to move table from version " + fromVersion + " to " + toVersion);
Map<ConfigProperty, String> tableProps = new HashMap<>();
if (fromVersion.versionCode() < toVersion.versionCode()) {
// upgrade
upgrade(fromVersion, toVersion, instantTime);
while (fromVersion.versionCode() < toVersion.versionCode()) {
HoodieTableVersion nextVersion = HoodieTableVersion.versionFromCode(fromVersion.versionCode() + 1);
tableProps.putAll(upgrade(fromVersion, nextVersion, instantTime));
fromVersion = nextVersion;
}
} else {
// downgrade
downgrade(fromVersion, toVersion, instantTime);
while (fromVersion.versionCode() > toVersion.versionCode()) {
HoodieTableVersion prevVersion = HoodieTableVersion.versionFromCode(fromVersion.versionCode() - 1);
tableProps.putAll(downgrade(fromVersion, prevVersion, instantTime));
fromVersion = prevVersion;
}
}
// Write out the current version in hoodie.properties.updated file
for (Map.Entry<ConfigProperty, String> entry: tableProps.entrySet()) {
metaClient.getTableConfig().setValue(entry.getKey(), entry.getValue());
}
metaClient.getTableConfig().setTableVersion(toVersion);
createUpdatedFile(metaClient.getTableConfig().getProps());
@@ -143,7 +158,7 @@ public abstract class AbstractUpgradeDowngrade {
}
}
protected abstract void upgrade(HoodieTableVersion fromVersion, HoodieTableVersion toVersion, String instantTime);
protected abstract Map<ConfigProperty, String> upgrade(HoodieTableVersion fromVersion, HoodieTableVersion toVersion, String instantTime);
protected abstract void downgrade(HoodieTableVersion fromVersion, HoodieTableVersion toVersion, String instantTime);
protected abstract Map<ConfigProperty, String> downgrade(HoodieTableVersion fromVersion, HoodieTableVersion toVersion, String instantTime);
}

View File

@@ -0,0 +1,42 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hudi.table.upgrade;
import org.apache.hudi.common.config.ConfigProperty;
import org.apache.hudi.common.engine.HoodieEngineContext;
import org.apache.hudi.common.table.HoodieTableConfig;
import org.apache.hudi.config.HoodieWriteConfig;
import org.apache.hudi.keygen.constant.KeyGeneratorOptions;
import java.util.HashMap;
import java.util.Map;
public abstract class BaseOneToTwoUpgradeHandler implements UpgradeHandler {
@Override
public Map<ConfigProperty, String> upgrade(HoodieWriteConfig config, HoodieEngineContext context, String instantTime) {
Map<ConfigProperty, String> tablePropsToAdd = new HashMap<>();
tablePropsToAdd.put(HoodieTableConfig.HOODIE_TABLE_PARTITION_FIELDS_PROP, getPartitionColumns(config));
tablePropsToAdd.put(HoodieTableConfig.HOODIE_TABLE_RECORDKEY_FIELDS, config.getString(KeyGeneratorOptions.RECORDKEY_FIELD.key()));
tablePropsToAdd.put(HoodieTableConfig.HOODIE_BASE_FILE_FORMAT_PROP, config.getString(HoodieTableConfig.HOODIE_BASE_FILE_FORMAT_PROP));
return tablePropsToAdd;
}
abstract String getPartitionColumns(HoodieWriteConfig config);
}

View File

@@ -0,0 +1,52 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hudi.table.upgrade;
import org.apache.hudi.common.config.ConfigProperty;
import org.apache.hudi.common.engine.HoodieEngineContext;
import org.apache.hudi.common.table.timeline.HoodieInstant;
import org.apache.hudi.common.table.timeline.HoodieTimeline;
import org.apache.hudi.config.HoodieWriteConfig;
import org.apache.hudi.table.HoodieTable;
import org.apache.hudi.table.marker.WriteMarkers;
import org.apache.hudi.table.marker.WriteMarkersFactory;
import java.util.Collections;
import java.util.List;
import java.util.Map;
import java.util.stream.Collectors;
public abstract class BaseOneToZeroDowngradeHandler implements DowngradeHandler {
@Override
public Map<ConfigProperty, String> downgrade(HoodieWriteConfig config, HoodieEngineContext context, String instantTime) {
// fetch pending commit info
HoodieTable table = getTable(config, context);
HoodieTimeline inflightTimeline = table.getMetaClient().getCommitsTimeline().filterPendingExcludingCompaction();
List<HoodieInstant> commits = inflightTimeline.getReverseOrderedInstants().collect(Collectors.toList());
for (HoodieInstant inflightInstant : commits) {
// delete existing markers
WriteMarkers writeMarkers = WriteMarkersFactory.get(config.getMarkersType(), table, inflightInstant.getTimestamp());
writeMarkers.quietDeleteMarkerDir(context, config.getMarkersDeleteParallelism());
}
return Collections.EMPTY_MAP;
}
abstract HoodieTable getTable(HoodieWriteConfig config, HoodieEngineContext context);
}

View File

@@ -0,0 +1,142 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hudi.table.upgrade;
import org.apache.hudi.common.config.ConfigProperty;
import org.apache.hudi.common.engine.HoodieEngineContext;
import org.apache.hudi.common.fs.FSUtils;
import org.apache.hudi.common.table.HoodieTableMetaClient;
import org.apache.hudi.common.table.marker.MarkerType;
import org.apache.hudi.common.table.timeline.HoodieInstant;
import org.apache.hudi.common.table.timeline.HoodieTimeline;
import org.apache.hudi.common.util.MarkerUtils;
import org.apache.hudi.common.util.Option;
import org.apache.hudi.config.HoodieWriteConfig;
import org.apache.hudi.exception.HoodieException;
import org.apache.hudi.table.HoodieTable;
import org.apache.hudi.table.marker.DirectWriteMarkers;
import com.esotericsoftware.minlog.Log;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import java.io.IOException;
import java.util.Arrays;
import java.util.Collection;
import java.util.Collections;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.function.Predicate;
import java.util.stream.Collectors;
import static org.apache.hudi.common.util.MarkerUtils.MARKERS_FILENAME_PREFIX;
public abstract class BaseTwoToOneDowngradeHandler implements DowngradeHandler {
@Override
public Map<ConfigProperty, String> downgrade(HoodieWriteConfig config, HoodieEngineContext context, String instantTime) {
HoodieTable table = getTable(config, context);
HoodieTableMetaClient metaClient = table.getMetaClient();
// re-create marker files if any partial timeline server based markers are found
HoodieTimeline inflightTimeline = metaClient.getCommitsTimeline().filterPendingExcludingCompaction();
List<HoodieInstant> commits = inflightTimeline.getReverseOrderedInstants().collect(Collectors.toList());
for (HoodieInstant inflightInstant : commits) {
// Converts the markers in new format to old format of direct markers
try {
convertToDirectMarkers(
inflightInstant.getTimestamp(), table, context, config.getMarkersDeleteParallelism());
} catch (IOException e) {
throw new HoodieException("Converting marker files to DIRECT style failed during downgrade", e);
}
}
return Collections.EMPTY_MAP;
}
abstract HoodieTable getTable(HoodieWriteConfig config, HoodieEngineContext context);
/**
* Converts the markers in new format(timeline server based) to old format of direct markers,
* i.e., one marker file per data file, without MARKERS.type file.
* This needs to be idempotent.
* 1. read all markers from timeline server based marker files
* 2. create direct style markers
* 3. delete marker type file
* 4. delete timeline server based marker files
*
* @param commitInstantTime instant of interest for marker conversion.
* @param table instance of {@link HoodieTable} to use
* @param context instance of {@link HoodieEngineContext} to use
* @param parallelism parallelism to use
*/
private void convertToDirectMarkers(final String commitInstantTime,
HoodieTable table,
HoodieEngineContext context,
int parallelism) throws IOException {
String markerDir = table.getMetaClient().getMarkerFolderPath(commitInstantTime);
FileSystem fileSystem = FSUtils.getFs(markerDir, context.getHadoopConf().newCopy());
Option<MarkerType> markerTypeOption = MarkerUtils.readMarkerType(fileSystem, markerDir);
if (markerTypeOption.isPresent()) {
switch (markerTypeOption.get()) {
case TIMELINE_SERVER_BASED:
// Reads all markers written by the timeline server
Map<String, Set<String>> markersMap =
MarkerUtils.readTimelineServerBasedMarkersFromFileSystem(
markerDir, fileSystem, context, parallelism);
DirectWriteMarkers directWriteMarkers = new DirectWriteMarkers(table, commitInstantTime);
// Recreates the markers in the direct format
markersMap.values().stream().flatMap(Collection::stream)
.forEach(directWriteMarkers::create);
// Deletes marker type file
MarkerUtils.deleteMarkerTypeFile(fileSystem, markerDir);
// Deletes timeline server based markers
deleteTimelineBasedMarkerFiles(markerDir, fileSystem);
break;
default:
throw new HoodieException("The marker type \"" + markerTypeOption.get().name()
+ "\" is not supported for rollback.");
}
} else {
// In case of partial failures during downgrade, there is a chance that marker type file was deleted,
// but timeline server based marker files are left. So deletes them if any
deleteTimelineBasedMarkerFiles(markerDir, fileSystem);
}
}
private void deleteTimelineBasedMarkerFiles(String markerDir, FileSystem fileSystem) throws IOException {
// Deletes timeline based marker files if any.
Path dirPath = new Path(markerDir);
FileStatus[] fileStatuses = fileSystem.listStatus(dirPath);
Predicate<FileStatus> prefixFilter = fileStatus ->
fileStatus.getPath().getName().startsWith(MARKERS_FILENAME_PREFIX);
List<String> markerDirSubPaths = Arrays.stream(fileStatuses)
.filter(prefixFilter)
.map(fileStatus -> fileStatus.getPath().toString())
.collect(Collectors.toList());
markerDirSubPaths.forEach(fileToDelete -> {
try {
fileSystem.delete(new Path(fileToDelete), false);
} catch (IOException e) {
Log.warn("Deleting Timeline based marker files failed ", e);
}
});
}
}

View File

@@ -0,0 +1,144 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hudi.table.upgrade;
import org.apache.hudi.common.HoodieRollbackStat;
import org.apache.hudi.common.config.ConfigProperty;
import org.apache.hudi.common.engine.HoodieEngineContext;
import org.apache.hudi.common.fs.FSUtils;
import org.apache.hudi.common.model.HoodieTableType;
import org.apache.hudi.common.model.IOType;
import org.apache.hudi.common.table.HoodieTableMetaClient;
import org.apache.hudi.common.table.marker.MarkerType;
import org.apache.hudi.common.table.timeline.HoodieActiveTimeline;
import org.apache.hudi.common.table.timeline.HoodieInstant;
import org.apache.hudi.common.table.timeline.HoodieTimeline;
import org.apache.hudi.common.util.Option;
import org.apache.hudi.config.HoodieWriteConfig;
import org.apache.hudi.exception.HoodieRollbackException;
import org.apache.hudi.table.HoodieTable;
import org.apache.hudi.table.action.rollback.ListingBasedRollbackRequest;
import org.apache.hudi.table.action.rollback.RollbackUtils;
import org.apache.hudi.table.marker.WriteMarkers;
import org.apache.hudi.table.marker.WriteMarkersFactory;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.Path;
import java.util.Collections;
import java.util.List;
import java.util.Map;
import java.util.stream.Collectors;
public abstract class BaseZeroToOneUpgradeHandler implements UpgradeHandler {
@Override
public Map<ConfigProperty, String> upgrade(HoodieWriteConfig config, HoodieEngineContext context, String instantTime) {
// fetch pending commit info
//HoodieSparkTable table = HoodieSparkTable.create(config, context);
HoodieTable table = getTable(config, context);
HoodieTimeline inflightTimeline = table.getMetaClient().getCommitsTimeline().filterPendingExcludingCompaction();
List<String> commits = inflightTimeline.getReverseOrderedInstants().map(HoodieInstant::getTimestamp)
.collect(Collectors.toList());
if (commits.size() > 0 && instantTime != null) {
// ignore the latest inflight commit since a new commit would have been started and we need to fix any pending commits from previous launch
commits.remove(instantTime);
}
for (String commit : commits) {
// for every pending commit, delete old markers and re-create markers in new format
recreateMarkers(commit, table, context, config.getMarkersDeleteParallelism());
}
return Collections.EMPTY_MAP;
}
abstract HoodieTable getTable(HoodieWriteConfig config, HoodieEngineContext context);
/**
* Recreate markers in new format.
* Step1: Delete existing markers
* Step2: Collect all rollback file info.
* Step3: recreate markers for all interested files.
*
* @param commitInstantTime instant of interest for which markers need to be recreated.
* @param table instance of {@link HoodieTable} to use
* @param context instance of {@link HoodieEngineContext} to use
* @throws HoodieRollbackException on any exception during upgrade.
*/
protected void recreateMarkers(final String commitInstantTime,
HoodieTable table,
HoodieEngineContext context,
int parallelism) throws HoodieRollbackException {
try {
// fetch hoodie instant
Option<HoodieInstant> commitInstantOpt = Option.fromJavaOptional(table.getActiveTimeline().getCommitsTimeline().getInstants()
.filter(instant -> HoodieActiveTimeline.EQUALS.test(instant.getTimestamp(), commitInstantTime))
.findFirst());
if (commitInstantOpt.isPresent()) {
// delete existing markers
WriteMarkers writeMarkers = WriteMarkersFactory.get(MarkerType.DIRECT, table, commitInstantTime);
writeMarkers.quietDeleteMarkerDir(context, parallelism);
// generate rollback stats
List<ListingBasedRollbackRequest> rollbackRequests;
if (table.getMetaClient().getTableType() == HoodieTableType.COPY_ON_WRITE) {
rollbackRequests = RollbackUtils.generateRollbackRequestsByListingCOW(context, table.getMetaClient().getBasePath(), table.getConfig());
} else {
rollbackRequests = RollbackUtils.generateRollbackRequestsUsingFileListingMOR(commitInstantOpt.get(), table, context);
}
List<HoodieRollbackStat> rollbackStats = getListBasedRollBackStats(table.getMetaClient(), table.getConfig(),
context, commitInstantOpt, rollbackRequests);
// recreate markers adhering to marker based rollback
for (HoodieRollbackStat rollbackStat : rollbackStats) {
for (String path : rollbackStat.getSuccessDeleteFiles()) {
String dataFileName = path.substring(path.lastIndexOf("/") + 1);
// not feasible to differentiate MERGE from CREATE. hence creating with MERGE IOType for all base files.
writeMarkers.create(rollbackStat.getPartitionPath(), dataFileName, IOType.MERGE);
}
for (FileStatus fileStatus : rollbackStat.getCommandBlocksCount().keySet()) {
writeMarkers.create(rollbackStat.getPartitionPath(), getFileNameForMarkerFromLogFile(fileStatus.getPath().toString(), table), IOType.APPEND);
}
}
}
} catch (Exception e) {
throw new HoodieRollbackException("Exception thrown while upgrading Hoodie Table from version 0 to 1", e);
}
}
abstract List<HoodieRollbackStat> getListBasedRollBackStats(HoodieTableMetaClient metaClient, HoodieWriteConfig config,
HoodieEngineContext context, Option<HoodieInstant> commitInstantOpt,
List<ListingBasedRollbackRequest> rollbackRequests);
/**
* Curates file name for marker from existing log file path.
* log file format : partitionpath/.fileid_baseInstant.log.writetoken
* marker file format : partitionpath/fileId_writetoken_baseinstant.basefileExtn.marker.APPEND
*
* @param logFilePath log file path for which marker file name needs to be generated.
* @return the marker file name thus curated.
*/
private static String getFileNameForMarkerFromLogFile(String logFilePath, HoodieTable table) {
Path logPath = new Path(table.getMetaClient().getBasePath(), logFilePath);
String fileId = FSUtils.getFileIdFromLogPath(logPath);
String baseInstant = FSUtils.getBaseCommitTimeFromLogPath(logPath);
String writeToken = FSUtils.getWriteTokenFromLogPath(logPath);
return FSUtils.makeDataFileName(baseInstant, writeToken, fileId, table.getBaseFileFormat().getFileExtension());
}
}

View File

@@ -18,9 +18,12 @@
package org.apache.hudi.table.upgrade;
import org.apache.hudi.common.config.ConfigProperty;
import org.apache.hudi.common.engine.HoodieEngineContext;
import org.apache.hudi.config.HoodieWriteConfig;
import java.util.Map;
/**
* Interface to assist in downgrading Hoodie table.
*/
@@ -32,6 +35,7 @@ public interface DowngradeHandler {
* @param config instance of {@link HoodieWriteConfig} to be used.
* @param context instance of {@link HoodieEngineContext} to be used.
* @param instantTime current instant time that should not touched.
* @return Map of config properties and its values to be added to table properties.
*/
void downgrade(HoodieWriteConfig config, HoodieEngineContext context, String instantTime);
Map<ConfigProperty, String> downgrade(HoodieWriteConfig config, HoodieEngineContext context, String instantTime);
}

View File

@@ -18,9 +18,12 @@
package org.apache.hudi.table.upgrade;
import org.apache.hudi.common.config.ConfigProperty;
import org.apache.hudi.common.engine.HoodieEngineContext;
import org.apache.hudi.config.HoodieWriteConfig;
import java.util.Map;
/**
* Interface to assist in upgrading Hoodie table.
*/
@@ -32,6 +35,7 @@ public interface UpgradeHandler {
* @param config instance of {@link HoodieWriteConfig} to be used.
* @param context instance of {@link HoodieEngineContext} to be used.
* @param instantTime current instant time that should not be touched.
* @return Map of config properties and its values to be added to table properties.
*/
void upgrade(HoodieWriteConfig config, HoodieEngineContext context, String instantTime);
Map<ConfigProperty, String> upgrade(HoodieWriteConfig config, HoodieEngineContext context, String instantTime);
}