[HUDI-2268] Add upgrade and downgrade to and from 0.9.0 (#3470)
- Added upgrade and downgrade step to and from 0.9.0. Upgrade adds few table properties. Downgrade recreates timeline server based marker files if any.
This commit is contained in:
@@ -28,15 +28,17 @@ import org.apache.spark.launcher.SparkLauncher;
|
||||
import org.springframework.shell.core.CommandMarker;
|
||||
import org.springframework.shell.core.annotation.CliCommand;
|
||||
import org.springframework.shell.core.annotation.CliOption;
|
||||
import org.springframework.stereotype.Component;
|
||||
|
||||
/**
|
||||
* CLI command to assist in upgrading/downgrading Hoodie dataset to a different version.
|
||||
* CLI command to assist in upgrading/downgrading Hoodie table to a different version.
|
||||
*/
|
||||
@Component
|
||||
public class UpgradeOrDowngradeCommand implements CommandMarker {
|
||||
|
||||
@CliCommand(value = "upgrade hoodie dataset ", help = "Upgrades hoodie dataset")
|
||||
public String upgradeHoodieDataset(
|
||||
@CliOption(key = {"toVersion"}, help = "To version of Hoodie dataset to be upgraded/downgraded to", unspecifiedDefaultValue = "") final String toVersion,
|
||||
@CliCommand(value = "upgrade table", help = "Upgrades a table")
|
||||
public String upgradeHoodieTable(
|
||||
@CliOption(key = {"toVersion"}, help = "To version of Hoodie table to be upgraded/downgraded to", unspecifiedDefaultValue = "") final String toVersion,
|
||||
@CliOption(key = {"sparkProperties"}, help = "Spark Properties File Path") final String sparkPropertiesPath,
|
||||
@CliOption(key = "sparkMaster", unspecifiedDefaultValue = "", help = "Spark Master") String master,
|
||||
@CliOption(key = "sparkMemory", unspecifiedDefaultValue = "4G",
|
||||
@@ -52,14 +54,14 @@ public class UpgradeOrDowngradeCommand implements CommandMarker {
|
||||
int exitCode = process.waitFor();
|
||||
HoodieCLI.refreshTableMetadata();
|
||||
if (exitCode != 0) {
|
||||
return String.format("Failed: Could not Upgrade/Downgrade Hoodie dataset to \"%s\".", toVersion);
|
||||
return String.format("Failed: Could not Upgrade/Downgrade Hoodie table to \"%s\".", toVersion);
|
||||
}
|
||||
return String.format("Hoodie dataset upgraded/downgraded to ", toVersion);
|
||||
return String.format("Hoodie table upgraded/downgraded to ", toVersion);
|
||||
}
|
||||
|
||||
@CliCommand(value = "downgrade hoodie dataset ", help = "Upgrades hoodie dataset")
|
||||
public String downgradeHoodieDataset(
|
||||
@CliOption(key = {"toVersion"}, help = "To version of Hoodie dataset to be upgraded/downgraded to", unspecifiedDefaultValue = "") final String toVersion,
|
||||
@CliCommand(value = "downgrade table", help = "Downgrades a table")
|
||||
public String downgradeHoodieTable(
|
||||
@CliOption(key = {"toVersion"}, help = "To version of Hoodie table to be upgraded/downgraded to", unspecifiedDefaultValue = "") final String toVersion,
|
||||
@CliOption(key = {"sparkProperties"}, help = "Spark Properties File Path") final String sparkPropertiesPath,
|
||||
@CliOption(key = "sparkMaster", unspecifiedDefaultValue = "", help = "Spark Master") String master,
|
||||
@CliOption(key = "sparkMemory", unspecifiedDefaultValue = "4G",
|
||||
@@ -74,8 +76,8 @@ public class UpgradeOrDowngradeCommand implements CommandMarker {
|
||||
int exitCode = process.waitFor();
|
||||
HoodieCLI.refreshTableMetadata();
|
||||
if (exitCode != 0) {
|
||||
return String.format("Failed: Could not Upgrade/Downgrade Hoodie dataset to \"%s\".", toVersion);
|
||||
return String.format("Failed: Could not Upgrade/Downgrade Hoodie table to \"%s\".", toVersion);
|
||||
}
|
||||
return String.format("Hoodie dataset upgraded/downgraded to ", toVersion);
|
||||
return String.format("Hoodie table upgraded/downgraded to ", toVersion);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -166,10 +166,23 @@ public class DirectWriteMarkers extends WriteMarkers {
|
||||
return markerFiles;
|
||||
}
|
||||
|
||||
/**
|
||||
* Creates a marker file based on the full marker name excluding the base path and instant.
|
||||
*
|
||||
* @param markerName the full marker name, e.g., "2021/08/13/file1.marker.CREATE"
|
||||
* @return path of the marker file
|
||||
*/
|
||||
public Option<Path> create(String markerName) {
|
||||
return create(new Path(markerDirPath, markerName), true);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected Option<Path> create(String partitionPath, String dataFileName, IOType type, boolean checkIfExists) {
|
||||
return create(getMarkerPath(partitionPath, dataFileName, type), checkIfExists);
|
||||
}
|
||||
|
||||
private Option<Path> create(Path markerPath, boolean checkIfExists) {
|
||||
HoodieTimer timer = new HoodieTimer().startTimer();
|
||||
Path markerPath = getMarkerPath(partitionPath, dataFileName, type);
|
||||
Path dirPath = markerPath.getParent();
|
||||
try {
|
||||
if (!fs.exists(dirPath)) {
|
||||
|
||||
@@ -18,6 +18,7 @@
|
||||
|
||||
package org.apache.hudi.table.upgrade;
|
||||
|
||||
import org.apache.hudi.common.config.ConfigProperty;
|
||||
import org.apache.hudi.common.engine.HoodieEngineContext;
|
||||
import org.apache.hudi.common.table.HoodieTableConfig;
|
||||
import org.apache.hudi.common.table.HoodieTableMetaClient;
|
||||
@@ -33,6 +34,8 @@ import org.apache.log4j.Logger;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.Date;
|
||||
import java.util.HashMap;
|
||||
import java.util.Map;
|
||||
import java.util.Properties;
|
||||
|
||||
/**
|
||||
@@ -112,15 +115,27 @@ public abstract class AbstractUpgradeDowngrade {
|
||||
|
||||
// Perform the actual upgrade/downgrade; this has to be idempotent, for now.
|
||||
LOG.info("Attempting to move table from version " + fromVersion + " to " + toVersion);
|
||||
Map<ConfigProperty, String> tableProps = new HashMap<>();
|
||||
if (fromVersion.versionCode() < toVersion.versionCode()) {
|
||||
// upgrade
|
||||
upgrade(fromVersion, toVersion, instantTime);
|
||||
while (fromVersion.versionCode() < toVersion.versionCode()) {
|
||||
HoodieTableVersion nextVersion = HoodieTableVersion.versionFromCode(fromVersion.versionCode() + 1);
|
||||
tableProps.putAll(upgrade(fromVersion, nextVersion, instantTime));
|
||||
fromVersion = nextVersion;
|
||||
}
|
||||
} else {
|
||||
// downgrade
|
||||
downgrade(fromVersion, toVersion, instantTime);
|
||||
while (fromVersion.versionCode() > toVersion.versionCode()) {
|
||||
HoodieTableVersion prevVersion = HoodieTableVersion.versionFromCode(fromVersion.versionCode() - 1);
|
||||
tableProps.putAll(downgrade(fromVersion, prevVersion, instantTime));
|
||||
fromVersion = prevVersion;
|
||||
}
|
||||
}
|
||||
|
||||
// Write out the current version in hoodie.properties.updated file
|
||||
for (Map.Entry<ConfigProperty, String> entry: tableProps.entrySet()) {
|
||||
metaClient.getTableConfig().setValue(entry.getKey(), entry.getValue());
|
||||
}
|
||||
metaClient.getTableConfig().setTableVersion(toVersion);
|
||||
createUpdatedFile(metaClient.getTableConfig().getProps());
|
||||
|
||||
@@ -143,7 +158,7 @@ public abstract class AbstractUpgradeDowngrade {
|
||||
}
|
||||
}
|
||||
|
||||
protected abstract void upgrade(HoodieTableVersion fromVersion, HoodieTableVersion toVersion, String instantTime);
|
||||
protected abstract Map<ConfigProperty, String> upgrade(HoodieTableVersion fromVersion, HoodieTableVersion toVersion, String instantTime);
|
||||
|
||||
protected abstract void downgrade(HoodieTableVersion fromVersion, HoodieTableVersion toVersion, String instantTime);
|
||||
protected abstract Map<ConfigProperty, String> downgrade(HoodieTableVersion fromVersion, HoodieTableVersion toVersion, String instantTime);
|
||||
}
|
||||
|
||||
@@ -0,0 +1,42 @@
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.hudi.table.upgrade;
|
||||
|
||||
import org.apache.hudi.common.config.ConfigProperty;
|
||||
import org.apache.hudi.common.engine.HoodieEngineContext;
|
||||
import org.apache.hudi.common.table.HoodieTableConfig;
|
||||
import org.apache.hudi.config.HoodieWriteConfig;
|
||||
import org.apache.hudi.keygen.constant.KeyGeneratorOptions;
|
||||
|
||||
import java.util.HashMap;
|
||||
import java.util.Map;
|
||||
|
||||
public abstract class BaseOneToTwoUpgradeHandler implements UpgradeHandler {
|
||||
|
||||
@Override
|
||||
public Map<ConfigProperty, String> upgrade(HoodieWriteConfig config, HoodieEngineContext context, String instantTime) {
|
||||
Map<ConfigProperty, String> tablePropsToAdd = new HashMap<>();
|
||||
tablePropsToAdd.put(HoodieTableConfig.HOODIE_TABLE_PARTITION_FIELDS_PROP, getPartitionColumns(config));
|
||||
tablePropsToAdd.put(HoodieTableConfig.HOODIE_TABLE_RECORDKEY_FIELDS, config.getString(KeyGeneratorOptions.RECORDKEY_FIELD.key()));
|
||||
tablePropsToAdd.put(HoodieTableConfig.HOODIE_BASE_FILE_FORMAT_PROP, config.getString(HoodieTableConfig.HOODIE_BASE_FILE_FORMAT_PROP));
|
||||
return tablePropsToAdd;
|
||||
}
|
||||
|
||||
abstract String getPartitionColumns(HoodieWriteConfig config);
|
||||
}
|
||||
@@ -0,0 +1,52 @@
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.hudi.table.upgrade;
|
||||
|
||||
import org.apache.hudi.common.config.ConfigProperty;
|
||||
import org.apache.hudi.common.engine.HoodieEngineContext;
|
||||
import org.apache.hudi.common.table.timeline.HoodieInstant;
|
||||
import org.apache.hudi.common.table.timeline.HoodieTimeline;
|
||||
import org.apache.hudi.config.HoodieWriteConfig;
|
||||
import org.apache.hudi.table.HoodieTable;
|
||||
import org.apache.hudi.table.marker.WriteMarkers;
|
||||
import org.apache.hudi.table.marker.WriteMarkersFactory;
|
||||
|
||||
import java.util.Collections;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
public abstract class BaseOneToZeroDowngradeHandler implements DowngradeHandler {
|
||||
|
||||
@Override
|
||||
public Map<ConfigProperty, String> downgrade(HoodieWriteConfig config, HoodieEngineContext context, String instantTime) {
|
||||
// fetch pending commit info
|
||||
HoodieTable table = getTable(config, context);
|
||||
HoodieTimeline inflightTimeline = table.getMetaClient().getCommitsTimeline().filterPendingExcludingCompaction();
|
||||
List<HoodieInstant> commits = inflightTimeline.getReverseOrderedInstants().collect(Collectors.toList());
|
||||
for (HoodieInstant inflightInstant : commits) {
|
||||
// delete existing markers
|
||||
WriteMarkers writeMarkers = WriteMarkersFactory.get(config.getMarkersType(), table, inflightInstant.getTimestamp());
|
||||
writeMarkers.quietDeleteMarkerDir(context, config.getMarkersDeleteParallelism());
|
||||
}
|
||||
return Collections.EMPTY_MAP;
|
||||
}
|
||||
|
||||
abstract HoodieTable getTable(HoodieWriteConfig config, HoodieEngineContext context);
|
||||
}
|
||||
@@ -0,0 +1,142 @@
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.hudi.table.upgrade;
|
||||
|
||||
import org.apache.hudi.common.config.ConfigProperty;
|
||||
import org.apache.hudi.common.engine.HoodieEngineContext;
|
||||
import org.apache.hudi.common.fs.FSUtils;
|
||||
import org.apache.hudi.common.table.HoodieTableMetaClient;
|
||||
import org.apache.hudi.common.table.marker.MarkerType;
|
||||
import org.apache.hudi.common.table.timeline.HoodieInstant;
|
||||
import org.apache.hudi.common.table.timeline.HoodieTimeline;
|
||||
import org.apache.hudi.common.util.MarkerUtils;
|
||||
import org.apache.hudi.common.util.Option;
|
||||
import org.apache.hudi.config.HoodieWriteConfig;
|
||||
import org.apache.hudi.exception.HoodieException;
|
||||
import org.apache.hudi.table.HoodieTable;
|
||||
import org.apache.hudi.table.marker.DirectWriteMarkers;
|
||||
|
||||
import com.esotericsoftware.minlog.Log;
|
||||
import org.apache.hadoop.fs.FileStatus;
|
||||
import org.apache.hadoop.fs.FileSystem;
|
||||
import org.apache.hadoop.fs.Path;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.Arrays;
|
||||
import java.util.Collection;
|
||||
import java.util.Collections;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Set;
|
||||
import java.util.function.Predicate;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
import static org.apache.hudi.common.util.MarkerUtils.MARKERS_FILENAME_PREFIX;
|
||||
|
||||
public abstract class BaseTwoToOneDowngradeHandler implements DowngradeHandler {
|
||||
|
||||
@Override
|
||||
public Map<ConfigProperty, String> downgrade(HoodieWriteConfig config, HoodieEngineContext context, String instantTime) {
|
||||
HoodieTable table = getTable(config, context);
|
||||
HoodieTableMetaClient metaClient = table.getMetaClient();
|
||||
|
||||
// re-create marker files if any partial timeline server based markers are found
|
||||
HoodieTimeline inflightTimeline = metaClient.getCommitsTimeline().filterPendingExcludingCompaction();
|
||||
List<HoodieInstant> commits = inflightTimeline.getReverseOrderedInstants().collect(Collectors.toList());
|
||||
for (HoodieInstant inflightInstant : commits) {
|
||||
// Converts the markers in new format to old format of direct markers
|
||||
try {
|
||||
convertToDirectMarkers(
|
||||
inflightInstant.getTimestamp(), table, context, config.getMarkersDeleteParallelism());
|
||||
} catch (IOException e) {
|
||||
throw new HoodieException("Converting marker files to DIRECT style failed during downgrade", e);
|
||||
}
|
||||
}
|
||||
return Collections.EMPTY_MAP;
|
||||
}
|
||||
|
||||
abstract HoodieTable getTable(HoodieWriteConfig config, HoodieEngineContext context);
|
||||
|
||||
/**
|
||||
* Converts the markers in new format(timeline server based) to old format of direct markers,
|
||||
* i.e., one marker file per data file, without MARKERS.type file.
|
||||
* This needs to be idempotent.
|
||||
* 1. read all markers from timeline server based marker files
|
||||
* 2. create direct style markers
|
||||
* 3. delete marker type file
|
||||
* 4. delete timeline server based marker files
|
||||
*
|
||||
* @param commitInstantTime instant of interest for marker conversion.
|
||||
* @param table instance of {@link HoodieTable} to use
|
||||
* @param context instance of {@link HoodieEngineContext} to use
|
||||
* @param parallelism parallelism to use
|
||||
*/
|
||||
private void convertToDirectMarkers(final String commitInstantTime,
|
||||
HoodieTable table,
|
||||
HoodieEngineContext context,
|
||||
int parallelism) throws IOException {
|
||||
String markerDir = table.getMetaClient().getMarkerFolderPath(commitInstantTime);
|
||||
FileSystem fileSystem = FSUtils.getFs(markerDir, context.getHadoopConf().newCopy());
|
||||
Option<MarkerType> markerTypeOption = MarkerUtils.readMarkerType(fileSystem, markerDir);
|
||||
if (markerTypeOption.isPresent()) {
|
||||
switch (markerTypeOption.get()) {
|
||||
case TIMELINE_SERVER_BASED:
|
||||
// Reads all markers written by the timeline server
|
||||
Map<String, Set<String>> markersMap =
|
||||
MarkerUtils.readTimelineServerBasedMarkersFromFileSystem(
|
||||
markerDir, fileSystem, context, parallelism);
|
||||
DirectWriteMarkers directWriteMarkers = new DirectWriteMarkers(table, commitInstantTime);
|
||||
// Recreates the markers in the direct format
|
||||
markersMap.values().stream().flatMap(Collection::stream)
|
||||
.forEach(directWriteMarkers::create);
|
||||
// Deletes marker type file
|
||||
MarkerUtils.deleteMarkerTypeFile(fileSystem, markerDir);
|
||||
// Deletes timeline server based markers
|
||||
deleteTimelineBasedMarkerFiles(markerDir, fileSystem);
|
||||
break;
|
||||
default:
|
||||
throw new HoodieException("The marker type \"" + markerTypeOption.get().name()
|
||||
+ "\" is not supported for rollback.");
|
||||
}
|
||||
} else {
|
||||
// In case of partial failures during downgrade, there is a chance that marker type file was deleted,
|
||||
// but timeline server based marker files are left. So deletes them if any
|
||||
deleteTimelineBasedMarkerFiles(markerDir, fileSystem);
|
||||
}
|
||||
}
|
||||
|
||||
private void deleteTimelineBasedMarkerFiles(String markerDir, FileSystem fileSystem) throws IOException {
|
||||
// Deletes timeline based marker files if any.
|
||||
Path dirPath = new Path(markerDir);
|
||||
FileStatus[] fileStatuses = fileSystem.listStatus(dirPath);
|
||||
Predicate<FileStatus> prefixFilter = fileStatus ->
|
||||
fileStatus.getPath().getName().startsWith(MARKERS_FILENAME_PREFIX);
|
||||
List<String> markerDirSubPaths = Arrays.stream(fileStatuses)
|
||||
.filter(prefixFilter)
|
||||
.map(fileStatus -> fileStatus.getPath().toString())
|
||||
.collect(Collectors.toList());
|
||||
markerDirSubPaths.forEach(fileToDelete -> {
|
||||
try {
|
||||
fileSystem.delete(new Path(fileToDelete), false);
|
||||
} catch (IOException e) {
|
||||
Log.warn("Deleting Timeline based marker files failed ", e);
|
||||
}
|
||||
});
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,144 @@
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.hudi.table.upgrade;
|
||||
|
||||
import org.apache.hudi.common.HoodieRollbackStat;
|
||||
import org.apache.hudi.common.config.ConfigProperty;
|
||||
import org.apache.hudi.common.engine.HoodieEngineContext;
|
||||
import org.apache.hudi.common.fs.FSUtils;
|
||||
import org.apache.hudi.common.model.HoodieTableType;
|
||||
import org.apache.hudi.common.model.IOType;
|
||||
import org.apache.hudi.common.table.HoodieTableMetaClient;
|
||||
import org.apache.hudi.common.table.marker.MarkerType;
|
||||
import org.apache.hudi.common.table.timeline.HoodieActiveTimeline;
|
||||
import org.apache.hudi.common.table.timeline.HoodieInstant;
|
||||
import org.apache.hudi.common.table.timeline.HoodieTimeline;
|
||||
import org.apache.hudi.common.util.Option;
|
||||
import org.apache.hudi.config.HoodieWriteConfig;
|
||||
import org.apache.hudi.exception.HoodieRollbackException;
|
||||
import org.apache.hudi.table.HoodieTable;
|
||||
import org.apache.hudi.table.action.rollback.ListingBasedRollbackRequest;
|
||||
import org.apache.hudi.table.action.rollback.RollbackUtils;
|
||||
import org.apache.hudi.table.marker.WriteMarkers;
|
||||
import org.apache.hudi.table.marker.WriteMarkersFactory;
|
||||
|
||||
import org.apache.hadoop.fs.FileStatus;
|
||||
import org.apache.hadoop.fs.Path;
|
||||
|
||||
import java.util.Collections;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
public abstract class BaseZeroToOneUpgradeHandler implements UpgradeHandler {
|
||||
|
||||
@Override
|
||||
public Map<ConfigProperty, String> upgrade(HoodieWriteConfig config, HoodieEngineContext context, String instantTime) {
|
||||
// fetch pending commit info
|
||||
//HoodieSparkTable table = HoodieSparkTable.create(config, context);
|
||||
HoodieTable table = getTable(config, context);
|
||||
HoodieTimeline inflightTimeline = table.getMetaClient().getCommitsTimeline().filterPendingExcludingCompaction();
|
||||
List<String> commits = inflightTimeline.getReverseOrderedInstants().map(HoodieInstant::getTimestamp)
|
||||
.collect(Collectors.toList());
|
||||
if (commits.size() > 0 && instantTime != null) {
|
||||
// ignore the latest inflight commit since a new commit would have been started and we need to fix any pending commits from previous launch
|
||||
commits.remove(instantTime);
|
||||
}
|
||||
for (String commit : commits) {
|
||||
// for every pending commit, delete old markers and re-create markers in new format
|
||||
recreateMarkers(commit, table, context, config.getMarkersDeleteParallelism());
|
||||
}
|
||||
return Collections.EMPTY_MAP;
|
||||
}
|
||||
|
||||
abstract HoodieTable getTable(HoodieWriteConfig config, HoodieEngineContext context);
|
||||
|
||||
/**
|
||||
* Recreate markers in new format.
|
||||
* Step1: Delete existing markers
|
||||
* Step2: Collect all rollback file info.
|
||||
* Step3: recreate markers for all interested files.
|
||||
*
|
||||
* @param commitInstantTime instant of interest for which markers need to be recreated.
|
||||
* @param table instance of {@link HoodieTable} to use
|
||||
* @param context instance of {@link HoodieEngineContext} to use
|
||||
* @throws HoodieRollbackException on any exception during upgrade.
|
||||
*/
|
||||
protected void recreateMarkers(final String commitInstantTime,
|
||||
HoodieTable table,
|
||||
HoodieEngineContext context,
|
||||
int parallelism) throws HoodieRollbackException {
|
||||
try {
|
||||
// fetch hoodie instant
|
||||
Option<HoodieInstant> commitInstantOpt = Option.fromJavaOptional(table.getActiveTimeline().getCommitsTimeline().getInstants()
|
||||
.filter(instant -> HoodieActiveTimeline.EQUALS.test(instant.getTimestamp(), commitInstantTime))
|
||||
.findFirst());
|
||||
if (commitInstantOpt.isPresent()) {
|
||||
// delete existing markers
|
||||
WriteMarkers writeMarkers = WriteMarkersFactory.get(MarkerType.DIRECT, table, commitInstantTime);
|
||||
writeMarkers.quietDeleteMarkerDir(context, parallelism);
|
||||
|
||||
// generate rollback stats
|
||||
List<ListingBasedRollbackRequest> rollbackRequests;
|
||||
if (table.getMetaClient().getTableType() == HoodieTableType.COPY_ON_WRITE) {
|
||||
rollbackRequests = RollbackUtils.generateRollbackRequestsByListingCOW(context, table.getMetaClient().getBasePath(), table.getConfig());
|
||||
} else {
|
||||
rollbackRequests = RollbackUtils.generateRollbackRequestsUsingFileListingMOR(commitInstantOpt.get(), table, context);
|
||||
}
|
||||
List<HoodieRollbackStat> rollbackStats = getListBasedRollBackStats(table.getMetaClient(), table.getConfig(),
|
||||
context, commitInstantOpt, rollbackRequests);
|
||||
|
||||
// recreate markers adhering to marker based rollback
|
||||
for (HoodieRollbackStat rollbackStat : rollbackStats) {
|
||||
for (String path : rollbackStat.getSuccessDeleteFiles()) {
|
||||
String dataFileName = path.substring(path.lastIndexOf("/") + 1);
|
||||
// not feasible to differentiate MERGE from CREATE. hence creating with MERGE IOType for all base files.
|
||||
writeMarkers.create(rollbackStat.getPartitionPath(), dataFileName, IOType.MERGE);
|
||||
}
|
||||
for (FileStatus fileStatus : rollbackStat.getCommandBlocksCount().keySet()) {
|
||||
writeMarkers.create(rollbackStat.getPartitionPath(), getFileNameForMarkerFromLogFile(fileStatus.getPath().toString(), table), IOType.APPEND);
|
||||
}
|
||||
}
|
||||
}
|
||||
} catch (Exception e) {
|
||||
throw new HoodieRollbackException("Exception thrown while upgrading Hoodie Table from version 0 to 1", e);
|
||||
}
|
||||
}
|
||||
|
||||
abstract List<HoodieRollbackStat> getListBasedRollBackStats(HoodieTableMetaClient metaClient, HoodieWriteConfig config,
|
||||
HoodieEngineContext context, Option<HoodieInstant> commitInstantOpt,
|
||||
List<ListingBasedRollbackRequest> rollbackRequests);
|
||||
|
||||
/**
|
||||
* Curates file name for marker from existing log file path.
|
||||
* log file format : partitionpath/.fileid_baseInstant.log.writetoken
|
||||
* marker file format : partitionpath/fileId_writetoken_baseinstant.basefileExtn.marker.APPEND
|
||||
*
|
||||
* @param logFilePath log file path for which marker file name needs to be generated.
|
||||
* @return the marker file name thus curated.
|
||||
*/
|
||||
private static String getFileNameForMarkerFromLogFile(String logFilePath, HoodieTable table) {
|
||||
Path logPath = new Path(table.getMetaClient().getBasePath(), logFilePath);
|
||||
String fileId = FSUtils.getFileIdFromLogPath(logPath);
|
||||
String baseInstant = FSUtils.getBaseCommitTimeFromLogPath(logPath);
|
||||
String writeToken = FSUtils.getWriteTokenFromLogPath(logPath);
|
||||
|
||||
return FSUtils.makeDataFileName(baseInstant, writeToken, fileId, table.getBaseFileFormat().getFileExtension());
|
||||
}
|
||||
}
|
||||
@@ -18,9 +18,12 @@
|
||||
|
||||
package org.apache.hudi.table.upgrade;
|
||||
|
||||
import org.apache.hudi.common.config.ConfigProperty;
|
||||
import org.apache.hudi.common.engine.HoodieEngineContext;
|
||||
import org.apache.hudi.config.HoodieWriteConfig;
|
||||
|
||||
import java.util.Map;
|
||||
|
||||
/**
|
||||
* Interface to assist in downgrading Hoodie table.
|
||||
*/
|
||||
@@ -32,6 +35,7 @@ public interface DowngradeHandler {
|
||||
* @param config instance of {@link HoodieWriteConfig} to be used.
|
||||
* @param context instance of {@link HoodieEngineContext} to be used.
|
||||
* @param instantTime current instant time that should not touched.
|
||||
* @return Map of config properties and its values to be added to table properties.
|
||||
*/
|
||||
void downgrade(HoodieWriteConfig config, HoodieEngineContext context, String instantTime);
|
||||
Map<ConfigProperty, String> downgrade(HoodieWriteConfig config, HoodieEngineContext context, String instantTime);
|
||||
}
|
||||
|
||||
@@ -18,9 +18,12 @@
|
||||
|
||||
package org.apache.hudi.table.upgrade;
|
||||
|
||||
import org.apache.hudi.common.config.ConfigProperty;
|
||||
import org.apache.hudi.common.engine.HoodieEngineContext;
|
||||
import org.apache.hudi.config.HoodieWriteConfig;
|
||||
|
||||
import java.util.Map;
|
||||
|
||||
/**
|
||||
* Interface to assist in upgrading Hoodie table.
|
||||
*/
|
||||
@@ -32,6 +35,7 @@ public interface UpgradeHandler {
|
||||
* @param config instance of {@link HoodieWriteConfig} to be used.
|
||||
* @param context instance of {@link HoodieEngineContext} to be used.
|
||||
* @param instantTime current instant time that should not be touched.
|
||||
* @return Map of config properties and its values to be added to table properties.
|
||||
*/
|
||||
void upgrade(HoodieWriteConfig config, HoodieEngineContext context, String instantTime);
|
||||
Map<ConfigProperty, String> upgrade(HoodieWriteConfig config, HoodieEngineContext context, String instantTime);
|
||||
}
|
||||
|
||||
@@ -18,6 +18,7 @@
|
||||
|
||||
package org.apache.hudi.table.upgrade;
|
||||
|
||||
import org.apache.hudi.common.config.ConfigProperty;
|
||||
import org.apache.hudi.common.engine.HoodieEngineContext;
|
||||
import org.apache.hudi.common.table.HoodieTableMetaClient;
|
||||
import org.apache.hudi.common.table.HoodieTableVersion;
|
||||
@@ -25,6 +26,7 @@ import org.apache.hudi.config.HoodieWriteConfig;
|
||||
import org.apache.hudi.exception.HoodieUpgradeDowngradeException;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.Map;
|
||||
|
||||
public class FlinkUpgradeDowngrade extends AbstractUpgradeDowngrade {
|
||||
public FlinkUpgradeDowngrade(HoodieTableMetaClient metaClient, HoodieWriteConfig config, HoodieEngineContext context) {
|
||||
@@ -42,18 +44,22 @@ public class FlinkUpgradeDowngrade extends AbstractUpgradeDowngrade {
|
||||
}
|
||||
|
||||
@Override
|
||||
protected void upgrade(HoodieTableVersion fromVersion, HoodieTableVersion toVersion, String instantTime) {
|
||||
protected Map<ConfigProperty, String> upgrade(HoodieTableVersion fromVersion, HoodieTableVersion toVersion, String instantTime) {
|
||||
if (fromVersion == HoodieTableVersion.ZERO && toVersion == HoodieTableVersion.ONE) {
|
||||
new ZeroToOneUpgradeHandler().upgrade(config, context, instantTime);
|
||||
return new ZeroToOneUpgradeHandler().upgrade(config, context, instantTime);
|
||||
} else if (fromVersion == HoodieTableVersion.ONE && toVersion == HoodieTableVersion.TWO) {
|
||||
return new OneToTwoUpgradeHandler().upgrade(config, context, instantTime);
|
||||
} else {
|
||||
throw new HoodieUpgradeDowngradeException(fromVersion.versionCode(), toVersion.versionCode(), true);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
protected void downgrade(HoodieTableVersion fromVersion, HoodieTableVersion toVersion, String instantTime) {
|
||||
protected Map<ConfigProperty, String> downgrade(HoodieTableVersion fromVersion, HoodieTableVersion toVersion, String instantTime) {
|
||||
if (fromVersion == HoodieTableVersion.ONE && toVersion == HoodieTableVersion.ZERO) {
|
||||
new OneToZeroDowngradeHandler().downgrade(config, context, instantTime);
|
||||
return new OneToZeroDowngradeHandler().downgrade(config, context, instantTime);
|
||||
} else if (fromVersion == HoodieTableVersion.TWO && toVersion == HoodieTableVersion.ONE) {
|
||||
return new TwoToOneDowngradeHandler().downgrade(config, context, instantTime);
|
||||
} else {
|
||||
throw new HoodieUpgradeDowngradeException(fromVersion.versionCode(), toVersion.versionCode(), false);
|
||||
}
|
||||
|
||||
@@ -0,0 +1,30 @@
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.hudi.table.upgrade;
|
||||
|
||||
import org.apache.hudi.config.HoodieWriteConfig;
|
||||
import org.apache.hudi.keygen.constant.KeyGeneratorOptions;
|
||||
|
||||
public class OneToTwoUpgradeHandler extends BaseOneToTwoUpgradeHandler {
|
||||
|
||||
@Override
|
||||
String getPartitionColumns(HoodieWriteConfig config) {
|
||||
return config.getProps().getProperty(KeyGeneratorOptions.PARTITIONPATH_FIELD.key());
|
||||
}
|
||||
}
|
||||
@@ -20,31 +20,17 @@ package org.apache.hudi.table.upgrade;
|
||||
|
||||
import org.apache.hudi.client.common.HoodieFlinkEngineContext;
|
||||
import org.apache.hudi.common.engine.HoodieEngineContext;
|
||||
import org.apache.hudi.common.table.timeline.HoodieInstant;
|
||||
import org.apache.hudi.common.table.timeline.HoodieTimeline;
|
||||
import org.apache.hudi.config.HoodieWriteConfig;
|
||||
import org.apache.hudi.table.HoodieFlinkTable;
|
||||
import org.apache.hudi.table.marker.WriteMarkers;
|
||||
import org.apache.hudi.table.marker.WriteMarkersFactory;
|
||||
|
||||
import java.util.List;
|
||||
import java.util.stream.Collectors;
|
||||
import org.apache.hudi.table.HoodieTable;
|
||||
|
||||
/**
|
||||
* Downgrade handle to assist in downgrading hoodie table from version 1 to 0.
|
||||
*/
|
||||
public class OneToZeroDowngradeHandler implements DowngradeHandler {
|
||||
public class OneToZeroDowngradeHandler extends BaseOneToZeroDowngradeHandler {
|
||||
|
||||
@Override
|
||||
public void downgrade(HoodieWriteConfig config, HoodieEngineContext context, String instantTime) {
|
||||
// fetch pending commit info
|
||||
HoodieFlinkTable table = HoodieFlinkTable.create(config, (HoodieFlinkEngineContext) context);
|
||||
HoodieTimeline inflightTimeline = table.getMetaClient().getCommitsTimeline().filterPendingExcludingCompaction();
|
||||
List<HoodieInstant> commits = inflightTimeline.getReverseOrderedInstants().collect(Collectors.toList());
|
||||
for (HoodieInstant commitInstant : commits) {
|
||||
// delete existing markers
|
||||
WriteMarkers writeMarkers = WriteMarkersFactory.get(config.getMarkersType(), table, commitInstant.getTimestamp());
|
||||
writeMarkers.quietDeleteMarkerDir(context, config.getMarkersDeleteParallelism());
|
||||
}
|
||||
HoodieTable getTable(HoodieWriteConfig config, HoodieEngineContext context) {
|
||||
return HoodieFlinkTable.create(config, (HoodieFlinkEngineContext) context);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -0,0 +1,32 @@
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.hudi.table.upgrade;
|
||||
|
||||
import org.apache.hudi.client.common.HoodieFlinkEngineContext;
|
||||
import org.apache.hudi.common.engine.HoodieEngineContext;
|
||||
import org.apache.hudi.config.HoodieWriteConfig;
|
||||
import org.apache.hudi.table.HoodieFlinkTable;
|
||||
import org.apache.hudi.table.HoodieTable;
|
||||
|
||||
public class TwoToOneDowngradeHandler extends BaseTwoToOneDowngradeHandler {
|
||||
@Override
|
||||
HoodieTable getTable(HoodieWriteConfig config, HoodieEngineContext context) {
|
||||
return HoodieFlinkTable.create(config, (HoodieFlinkEngineContext) context);
|
||||
}
|
||||
}
|
||||
@@ -21,120 +21,31 @@ package org.apache.hudi.table.upgrade;
|
||||
import org.apache.hudi.client.common.HoodieFlinkEngineContext;
|
||||
import org.apache.hudi.common.HoodieRollbackStat;
|
||||
import org.apache.hudi.common.engine.HoodieEngineContext;
|
||||
import org.apache.hudi.common.fs.FSUtils;
|
||||
import org.apache.hudi.common.model.HoodieTableType;
|
||||
import org.apache.hudi.common.model.IOType;
|
||||
import org.apache.hudi.common.table.marker.MarkerType;
|
||||
import org.apache.hudi.common.table.timeline.HoodieActiveTimeline;
|
||||
import org.apache.hudi.common.table.HoodieTableMetaClient;
|
||||
import org.apache.hudi.common.table.timeline.HoodieInstant;
|
||||
import org.apache.hudi.common.table.timeline.HoodieTimeline;
|
||||
import org.apache.hudi.common.util.Option;
|
||||
import org.apache.hudi.config.HoodieWriteConfig;
|
||||
import org.apache.hudi.exception.HoodieRollbackException;
|
||||
import org.apache.hudi.table.HoodieFlinkTable;
|
||||
import org.apache.hudi.table.HoodieTable;
|
||||
import org.apache.hudi.table.action.rollback.ListingBasedRollbackHelper;
|
||||
import org.apache.hudi.table.action.rollback.ListingBasedRollbackRequest;
|
||||
import org.apache.hudi.table.action.rollback.RollbackUtils;
|
||||
import org.apache.hudi.table.marker.WriteMarkers;
|
||||
import org.apache.hudi.table.marker.WriteMarkersFactory;
|
||||
|
||||
import org.apache.hadoop.fs.FileStatus;
|
||||
import org.apache.hadoop.fs.Path;
|
||||
|
||||
import java.util.List;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
/**
|
||||
* Upgrade handle to assist in upgrading hoodie table from version 0 to 1.
|
||||
*/
|
||||
public class ZeroToOneUpgradeHandler implements UpgradeHandler {
|
||||
public class ZeroToOneUpgradeHandler extends BaseZeroToOneUpgradeHandler {
|
||||
|
||||
@Override
|
||||
public void upgrade(HoodieWriteConfig config, HoodieEngineContext context, String instantTime) {
|
||||
// fetch pending commit info
|
||||
HoodieFlinkTable table = HoodieFlinkTable.create(config, (HoodieFlinkEngineContext) context);
|
||||
HoodieTimeline inflightTimeline = table.getMetaClient().getCommitsTimeline().filterPendingExcludingCompaction();
|
||||
List<String> commits = inflightTimeline.getReverseOrderedInstants().map(HoodieInstant::getTimestamp)
|
||||
.collect(Collectors.toList());
|
||||
if (commits.size() > 0 && instantTime != null) {
|
||||
// ignore the latest inflight commit since a new commit would have been started and we need to fix any pending commits from previous launch
|
||||
commits.remove(instantTime);
|
||||
}
|
||||
for (String commit : commits) {
|
||||
// for every pending commit, delete old markers and re-create markers in new format
|
||||
recreateMarkers(commit, table, context, config.getMarkersType(), config.getMarkersDeleteParallelism());
|
||||
}
|
||||
HoodieTable getTable(HoodieWriteConfig config, HoodieEngineContext context) {
|
||||
return HoodieFlinkTable.create(config, (HoodieFlinkEngineContext) context);
|
||||
}
|
||||
|
||||
/**
|
||||
* Recreate markers in new format.
|
||||
* Step1: Delete existing markers
|
||||
* Step2: Collect all rollback file info.
|
||||
* Step3: recreate markers for all interested files.
|
||||
*
|
||||
* @param commitInstantTime instant of interest for which markers need to be recreated.
|
||||
* @param table instance of {@link HoodieFlinkTable} to use
|
||||
* @param context instance of {@link HoodieEngineContext} to use
|
||||
* @param markerType marker type to use
|
||||
* @throws HoodieRollbackException on any exception during upgrade.
|
||||
*/
|
||||
private static void recreateMarkers(final String commitInstantTime,
|
||||
HoodieFlinkTable table,
|
||||
HoodieEngineContext context,
|
||||
MarkerType markerType,
|
||||
int parallelism) throws HoodieRollbackException {
|
||||
try {
|
||||
// fetch hoodie instant
|
||||
Option<HoodieInstant> commitInstantOpt = Option.fromJavaOptional(table.getActiveTimeline().getCommitsTimeline().getInstants()
|
||||
.filter(instant -> HoodieActiveTimeline.EQUALS.test(instant.getTimestamp(), commitInstantTime))
|
||||
.findFirst());
|
||||
if (commitInstantOpt.isPresent()) {
|
||||
// delete existing markers
|
||||
WriteMarkers writeMarkers = WriteMarkersFactory.get(markerType, table, commitInstantTime);
|
||||
writeMarkers.quietDeleteMarkerDir(context, parallelism);
|
||||
|
||||
// generate rollback stats
|
||||
List<ListingBasedRollbackRequest> rollbackRequests;
|
||||
if (table.getMetaClient().getTableType() == HoodieTableType.COPY_ON_WRITE) {
|
||||
rollbackRequests = RollbackUtils.generateRollbackRequestsByListingCOW(context, table.getMetaClient().getBasePath(), table.getConfig());
|
||||
} else {
|
||||
rollbackRequests = RollbackUtils.generateRollbackRequestsUsingFileListingMOR(commitInstantOpt.get(), table, context);
|
||||
}
|
||||
List<HoodieRollbackStat> rollbackStats = new ListingBasedRollbackHelper(table.getMetaClient(), table.getConfig())
|
||||
.collectRollbackStats(context, commitInstantOpt.get(), rollbackRequests);
|
||||
|
||||
// recreate markers adhering to marker based rollback
|
||||
for (HoodieRollbackStat rollbackStat : rollbackStats) {
|
||||
for (String path : rollbackStat.getSuccessDeleteFiles()) {
|
||||
String dataFileName = path.substring(path.lastIndexOf("/") + 1);
|
||||
// not feasible to differentiate MERGE from CREATE. hence creating with MERGE IOType for all base files.
|
||||
writeMarkers.create(rollbackStat.getPartitionPath(), dataFileName, IOType.MERGE);
|
||||
}
|
||||
for (FileStatus fileStatus : rollbackStat.getCommandBlocksCount().keySet()) {
|
||||
writeMarkers.create(rollbackStat.getPartitionPath(), getFileNameForMarkerFromLogFile(fileStatus.getPath().toString(), table), IOType.APPEND);
|
||||
}
|
||||
}
|
||||
}
|
||||
} catch (Exception e) {
|
||||
throw new HoodieRollbackException("Exception thrown while upgrading Hoodie Table from version 0 to 1", e);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Curates file name for marker from existing log file path.
|
||||
* log file format : partitionpath/.fileid_baseInstant.log.writetoken
|
||||
* marker file format : partitionpath/fileId_writetoken_baseinstant.basefileExtn.marker.APPEND
|
||||
*
|
||||
* @param logFilePath log file path for which marker file name needs to be generated.
|
||||
* @return the marker file name thus curated.
|
||||
*/
|
||||
private static String getFileNameForMarkerFromLogFile(String logFilePath, HoodieTable table) {
|
||||
Path logPath = new Path(table.getMetaClient().getBasePath(), logFilePath);
|
||||
String fileId = FSUtils.getFileIdFromLogPath(logPath);
|
||||
String baseInstant = FSUtils.getBaseCommitTimeFromLogPath(logPath);
|
||||
String writeToken = FSUtils.getWriteTokenFromLogPath(logPath);
|
||||
|
||||
return FSUtils.makeDataFileName(baseInstant, writeToken, fileId, table.getBaseFileFormat().getFileExtension());
|
||||
@Override
|
||||
List<HoodieRollbackStat> getListBasedRollBackStats(HoodieTableMetaClient metaClient, HoodieWriteConfig config, HoodieEngineContext context, Option<HoodieInstant> commitInstantOpt,
|
||||
List<ListingBasedRollbackRequest> rollbackRequests) {
|
||||
return new ListingBasedRollbackHelper(metaClient, config)
|
||||
.collectRollbackStats(context, commitInstantOpt.get(), rollbackRequests);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -0,0 +1,33 @@
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.hudi.table.upgrade;
|
||||
|
||||
import org.apache.hudi.HoodieSparkUtils;
|
||||
import org.apache.hudi.config.HoodieWriteConfig;
|
||||
|
||||
/**
|
||||
* Upgrade handle to assist in upgrading hoodie table from version 1 to 2.
|
||||
*/
|
||||
public class OneToTwoUpgradeHandler extends BaseOneToTwoUpgradeHandler {
|
||||
|
||||
@Override
|
||||
String getPartitionColumns(HoodieWriteConfig config) {
|
||||
return HoodieSparkUtils.getPartitionColumns(config.getProps());
|
||||
}
|
||||
}
|
||||
@@ -19,31 +19,17 @@
|
||||
package org.apache.hudi.table.upgrade;
|
||||
|
||||
import org.apache.hudi.common.engine.HoodieEngineContext;
|
||||
import org.apache.hudi.common.table.timeline.HoodieInstant;
|
||||
import org.apache.hudi.common.table.timeline.HoodieTimeline;
|
||||
import org.apache.hudi.config.HoodieWriteConfig;
|
||||
import org.apache.hudi.table.HoodieSparkTable;
|
||||
import org.apache.hudi.table.marker.WriteMarkers;
|
||||
import org.apache.hudi.table.marker.WriteMarkersFactory;
|
||||
|
||||
import java.util.List;
|
||||
import java.util.stream.Collectors;
|
||||
import org.apache.hudi.table.HoodieTable;
|
||||
|
||||
/**
|
||||
* Downgrade handle to assist in downgrading hoodie table from version 1 to 0.
|
||||
*/
|
||||
public class OneToZeroDowngradeHandler implements DowngradeHandler {
|
||||
public class OneToZeroDowngradeHandler extends BaseOneToZeroDowngradeHandler {
|
||||
|
||||
@Override
|
||||
public void downgrade(HoodieWriteConfig config, HoodieEngineContext context, String instantTime) {
|
||||
// fetch pending commit info
|
||||
HoodieSparkTable table = HoodieSparkTable.create(config, context);
|
||||
HoodieTimeline inflightTimeline = table.getMetaClient().getCommitsTimeline().filterPendingExcludingCompaction();
|
||||
List<HoodieInstant> commits = inflightTimeline.getReverseOrderedInstants().collect(Collectors.toList());
|
||||
for (HoodieInstant commitInstant : commits) {
|
||||
// delete existing markers
|
||||
WriteMarkers writeMarkers = WriteMarkersFactory.get(config.getMarkersType(), table, commitInstant.getTimestamp());
|
||||
writeMarkers.quietDeleteMarkerDir(context, config.getMarkersDeleteParallelism());
|
||||
}
|
||||
HoodieTable getTable(HoodieWriteConfig config, HoodieEngineContext context) {
|
||||
return HoodieSparkTable.create(config, context);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -18,6 +18,7 @@
|
||||
|
||||
package org.apache.hudi.table.upgrade;
|
||||
|
||||
import org.apache.hudi.common.config.ConfigProperty;
|
||||
import org.apache.hudi.common.engine.HoodieEngineContext;
|
||||
import org.apache.hudi.common.table.HoodieTableMetaClient;
|
||||
import org.apache.hudi.common.table.HoodieTableVersion;
|
||||
@@ -25,6 +26,7 @@ import org.apache.hudi.config.HoodieWriteConfig;
|
||||
import org.apache.hudi.exception.HoodieUpgradeDowngradeException;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.Map;
|
||||
|
||||
public class SparkUpgradeDowngrade extends AbstractUpgradeDowngrade {
|
||||
|
||||
@@ -43,22 +45,25 @@ public class SparkUpgradeDowngrade extends AbstractUpgradeDowngrade {
|
||||
} catch (IOException e) {
|
||||
throw new HoodieUpgradeDowngradeException("Error during upgrade/downgrade to version:" + toVersion, e);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@Override
|
||||
protected void upgrade(HoodieTableVersion fromVersion, HoodieTableVersion toVersion, String instantTime) {
|
||||
protected Map<ConfigProperty, String> upgrade(HoodieTableVersion fromVersion, HoodieTableVersion toVersion, String instantTime) {
|
||||
if (fromVersion == HoodieTableVersion.ZERO && toVersion == HoodieTableVersion.ONE) {
|
||||
new ZeroToOneUpgradeHandler().upgrade(config, context, instantTime);
|
||||
return new ZeroToOneUpgradeHandler().upgrade(config, context, instantTime);
|
||||
} else if (fromVersion == HoodieTableVersion.ONE && toVersion == HoodieTableVersion.TWO) {
|
||||
return new OneToTwoUpgradeHandler().upgrade(config, context, instantTime);
|
||||
} else {
|
||||
throw new HoodieUpgradeDowngradeException(fromVersion.versionCode(), toVersion.versionCode(), true);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
protected void downgrade(HoodieTableVersion fromVersion, HoodieTableVersion toVersion, String instantTime) {
|
||||
protected Map<ConfigProperty, String> downgrade(HoodieTableVersion fromVersion, HoodieTableVersion toVersion, String instantTime) {
|
||||
if (fromVersion == HoodieTableVersion.ONE && toVersion == HoodieTableVersion.ZERO) {
|
||||
new OneToZeroDowngradeHandler().downgrade(config, context, instantTime);
|
||||
return new OneToZeroDowngradeHandler().downgrade(config, context, instantTime);
|
||||
} else if (fromVersion == HoodieTableVersion.TWO && toVersion == HoodieTableVersion.ONE) {
|
||||
return new TwoToOneDowngradeHandler().downgrade(config, context, instantTime);
|
||||
} else {
|
||||
throw new HoodieUpgradeDowngradeException(fromVersion.versionCode(), toVersion.versionCode(), false);
|
||||
}
|
||||
|
||||
@@ -0,0 +1,35 @@
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.hudi.table.upgrade;
|
||||
|
||||
import org.apache.hudi.common.engine.HoodieEngineContext;
|
||||
import org.apache.hudi.config.HoodieWriteConfig;
|
||||
import org.apache.hudi.table.HoodieSparkTable;
|
||||
import org.apache.hudi.table.HoodieTable;
|
||||
|
||||
/**
|
||||
* Downgrade handle to assist in downgrading hoodie table from version 2 to 1.
|
||||
*/
|
||||
public class TwoToOneDowngradeHandler extends BaseTwoToOneDowngradeHandler {
|
||||
|
||||
@Override
|
||||
HoodieTable getTable(HoodieWriteConfig config, HoodieEngineContext context) {
|
||||
return HoodieSparkTable.create(config, context);
|
||||
}
|
||||
}
|
||||
@@ -20,120 +20,31 @@ package org.apache.hudi.table.upgrade;
|
||||
|
||||
import org.apache.hudi.common.HoodieRollbackStat;
|
||||
import org.apache.hudi.common.engine.HoodieEngineContext;
|
||||
import org.apache.hudi.common.fs.FSUtils;
|
||||
import org.apache.hudi.common.model.HoodieTableType;
|
||||
import org.apache.hudi.common.model.IOType;
|
||||
import org.apache.hudi.common.table.marker.MarkerType;
|
||||
import org.apache.hudi.common.table.timeline.HoodieActiveTimeline;
|
||||
import org.apache.hudi.common.table.HoodieTableMetaClient;
|
||||
import org.apache.hudi.common.table.timeline.HoodieInstant;
|
||||
import org.apache.hudi.common.table.timeline.HoodieTimeline;
|
||||
import org.apache.hudi.common.util.Option;
|
||||
import org.apache.hudi.config.HoodieWriteConfig;
|
||||
import org.apache.hudi.exception.HoodieRollbackException;
|
||||
import org.apache.hudi.table.HoodieSparkTable;
|
||||
import org.apache.hudi.table.HoodieTable;
|
||||
import org.apache.hudi.table.action.rollback.ListingBasedRollbackHelper;
|
||||
import org.apache.hudi.table.action.rollback.ListingBasedRollbackRequest;
|
||||
import org.apache.hudi.table.action.rollback.RollbackUtils;
|
||||
import org.apache.hudi.table.marker.WriteMarkers;
|
||||
import org.apache.hudi.table.marker.WriteMarkersFactory;
|
||||
|
||||
import org.apache.hadoop.fs.FileStatus;
|
||||
import org.apache.hadoop.fs.Path;
|
||||
|
||||
import java.util.List;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
/**
|
||||
* Upgrade handle to assist in upgrading hoodie table from version 0 to 1.
|
||||
*/
|
||||
public class ZeroToOneUpgradeHandler implements UpgradeHandler {
|
||||
public class ZeroToOneUpgradeHandler extends BaseZeroToOneUpgradeHandler {
|
||||
|
||||
@Override
|
||||
public void upgrade(HoodieWriteConfig config, HoodieEngineContext context, String instantTime) {
|
||||
// fetch pending commit info
|
||||
HoodieSparkTable table = HoodieSparkTable.create(config, context);
|
||||
HoodieTimeline inflightTimeline = table.getMetaClient().getCommitsTimeline().filterPendingExcludingCompaction();
|
||||
List<String> commits = inflightTimeline.getReverseOrderedInstants().map(HoodieInstant::getTimestamp)
|
||||
.collect(Collectors.toList());
|
||||
if (commits.size() > 0 && instantTime != null) {
|
||||
// ignore the latest inflight commit since a new commit would have been started and we need to fix any pending commits from previous launch
|
||||
commits.remove(instantTime);
|
||||
}
|
||||
for (String commit : commits) {
|
||||
// for every pending commit, delete old markers and re-create markers in new format
|
||||
recreateMarkers(commit, table, context, config.getMarkersType(), config.getMarkersDeleteParallelism());
|
||||
}
|
||||
HoodieTable getTable(HoodieWriteConfig config, HoodieEngineContext context) {
|
||||
return HoodieSparkTable.create(config, context);
|
||||
}
|
||||
|
||||
/**
|
||||
* Recreate markers in new format.
|
||||
* Step1: Delete existing markers
|
||||
* Step2: Collect all rollback file info.
|
||||
* Step3: recreate markers for all interested files.
|
||||
*
|
||||
* @param commitInstantTime instant of interest for which markers need to be recreated.
|
||||
* @param table instance of {@link HoodieSparkTable} to use
|
||||
* @param context instance of {@link HoodieEngineContext} to use
|
||||
* @param markerType marker type to use
|
||||
* @throws HoodieRollbackException on any exception during upgrade.
|
||||
*/
|
||||
private static void recreateMarkers(final String commitInstantTime,
|
||||
HoodieSparkTable table,
|
||||
HoodieEngineContext context,
|
||||
MarkerType markerType,
|
||||
int parallelism) throws HoodieRollbackException {
|
||||
try {
|
||||
// fetch hoodie instant
|
||||
Option<HoodieInstant> commitInstantOpt = Option.fromJavaOptional(table.getActiveTimeline().getCommitsTimeline().getInstants()
|
||||
.filter(instant -> HoodieActiveTimeline.EQUALS.test(instant.getTimestamp(), commitInstantTime))
|
||||
.findFirst());
|
||||
if (commitInstantOpt.isPresent()) {
|
||||
// delete existing markers
|
||||
WriteMarkers writeMarkers = WriteMarkersFactory.get(markerType, table, commitInstantTime);
|
||||
writeMarkers.quietDeleteMarkerDir(context, parallelism);
|
||||
|
||||
// generate rollback stats
|
||||
List<ListingBasedRollbackRequest> rollbackRequests;
|
||||
if (table.getMetaClient().getTableType() == HoodieTableType.COPY_ON_WRITE) {
|
||||
rollbackRequests = RollbackUtils.generateRollbackRequestsByListingCOW(context, table.getMetaClient().getBasePath(), table.getConfig());
|
||||
} else {
|
||||
rollbackRequests = RollbackUtils.generateRollbackRequestsUsingFileListingMOR(commitInstantOpt.get(), table, context);
|
||||
}
|
||||
List<HoodieRollbackStat> rollbackStats = new ListingBasedRollbackHelper(table.getMetaClient(), table.getConfig())
|
||||
.collectRollbackStats(context, commitInstantOpt.get(), rollbackRequests);
|
||||
|
||||
// recreate markers adhering to marker based rollback
|
||||
for (HoodieRollbackStat rollbackStat : rollbackStats) {
|
||||
for (String path : rollbackStat.getSuccessDeleteFiles()) {
|
||||
String dataFileName = path.substring(path.lastIndexOf("/") + 1);
|
||||
// not feasible to differentiate MERGE from CREATE. hence creating with MERGE IOType for all base files.
|
||||
writeMarkers.create(rollbackStat.getPartitionPath(), dataFileName, IOType.MERGE);
|
||||
}
|
||||
for (FileStatus fileStatus : rollbackStat.getCommandBlocksCount().keySet()) {
|
||||
writeMarkers.create(rollbackStat.getPartitionPath(), getFileNameForMarkerFromLogFile(fileStatus.getPath().toString(), table), IOType.APPEND);
|
||||
}
|
||||
}
|
||||
}
|
||||
} catch (Exception e) {
|
||||
throw new HoodieRollbackException("Exception thrown while upgrading Hoodie Table from version 0 to 1", e);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Curates file name for marker from existing log file path.
|
||||
* log file format : partitionpath/.fileid_baseInstant.log.writetoken
|
||||
* marker file format : partitionpath/fileId_writetoken_baseinstant.basefileExtn.marker.APPEND
|
||||
*
|
||||
* @param logFilePath log file path for which marker file name needs to be generated.
|
||||
* @return the marker file name thus curated.
|
||||
*/
|
||||
private static String getFileNameForMarkerFromLogFile(String logFilePath, HoodieTable table) {
|
||||
Path logPath = new Path(table.getMetaClient().getBasePath(), logFilePath);
|
||||
String fileId = FSUtils.getFileIdFromLogPath(logPath);
|
||||
String baseInstant = FSUtils.getBaseCommitTimeFromLogPath(logPath);
|
||||
String writeToken = FSUtils.getWriteTokenFromLogPath(logPath);
|
||||
|
||||
return FSUtils.makeDataFileName(baseInstant, writeToken, fileId, table.getBaseFileFormat().getFileExtension());
|
||||
@Override
|
||||
List<HoodieRollbackStat> getListBasedRollBackStats(HoodieTableMetaClient metaClient, HoodieWriteConfig config, HoodieEngineContext context, Option<HoodieInstant> commitInstantOpt,
|
||||
List<ListingBasedRollbackRequest> rollbackRequests) {
|
||||
return new ListingBasedRollbackHelper(metaClient, config)
|
||||
.collectRollbackStats(context, commitInstantOpt.get(), rollbackRequests);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -18,11 +18,17 @@
|
||||
|
||||
package org.apache.hudi
|
||||
|
||||
import java.util.Properties
|
||||
|
||||
import org.apache.avro.Schema
|
||||
import org.apache.avro.generic.GenericRecord
|
||||
import org.apache.hadoop.fs.{FileSystem, Path}
|
||||
import org.apache.hudi.client.utils.SparkRowSerDe
|
||||
import org.apache.hudi.common.config.TypedProperties
|
||||
import org.apache.hudi.common.model.HoodieRecord
|
||||
import org.apache.hudi.keygen.constant.KeyGeneratorOptions
|
||||
import org.apache.hudi.keygen.factory.HoodieSparkKeyGeneratorFactory
|
||||
import org.apache.hudi.keygen.{BaseKeyGenerator, CustomAvroKeyGenerator, CustomKeyGenerator, KeyGenerator}
|
||||
import org.apache.spark.SPARK_VERSION
|
||||
import org.apache.spark.rdd.RDD
|
||||
import org.apache.spark.sql.avro.SchemaConverters
|
||||
@@ -221,11 +227,41 @@ object HoodieSparkUtils extends SparkAdapterSupport {
|
||||
val leftExp = toAttribute(attribute, tableSchema)
|
||||
val rightExp = Literal.create(s"%$value%")
|
||||
sparkAdapter.createLike(leftExp, rightExp)
|
||||
case _=> null
|
||||
case _ => null
|
||||
}
|
||||
)
|
||||
}
|
||||
|
||||
/**
|
||||
* @param properties config properties
|
||||
* @return partition columns
|
||||
*/
|
||||
def getPartitionColumns(properties: Properties): String = {
|
||||
val props = new TypedProperties(properties)
|
||||
val keyGenerator = HoodieSparkKeyGeneratorFactory.createKeyGenerator(props)
|
||||
getPartitionColumns(keyGenerator, props)
|
||||
}
|
||||
|
||||
/**
|
||||
* @param keyGen key generator
|
||||
* @return partition columns
|
||||
*/
|
||||
def getPartitionColumns(keyGen: KeyGenerator, typedProperties: TypedProperties): String = {
|
||||
keyGen match {
|
||||
// For CustomKeyGenerator and CustomAvroKeyGenerator, the partition path filed format
|
||||
// is: "field_name: field_type", we extract the field_name from the partition path field.
|
||||
case c: BaseKeyGenerator
|
||||
if c.isInstanceOf[CustomKeyGenerator] || c.isInstanceOf[CustomAvroKeyGenerator] =>
|
||||
c.getPartitionPathFields.asScala.map(pathField =>
|
||||
pathField.split(CustomAvroKeyGenerator.SPLIT_REGEX)
|
||||
.headOption.getOrElse(s"Illegal partition path field format: '$pathField' for ${c.getClass.getSimpleName}"))
|
||||
.mkString(",")
|
||||
|
||||
case b: BaseKeyGenerator => b.getPartitionPathFields.asScala.mkString(",")
|
||||
case _ => typedProperties.getString(KeyGeneratorOptions.PARTITIONPATH_FIELD.key())
|
||||
}
|
||||
}
|
||||
|
||||
private def toAttribute(columnName: String, tableSchema: StructType): AttributeReference = {
|
||||
val field = tableSchema.find(p => p.name == columnName)
|
||||
assert(field.isDefined, s"Cannot find column: $columnName, Table Columns are: " +
|
||||
|
||||
@@ -18,10 +18,6 @@
|
||||
|
||||
package org.apache.hudi.table.upgrade;
|
||||
|
||||
import org.apache.hadoop.fs.FSDataInputStream;
|
||||
import org.apache.hadoop.fs.FSDataOutputStream;
|
||||
import org.apache.hadoop.fs.FileUtil;
|
||||
import org.apache.hadoop.fs.Path;
|
||||
import org.apache.hudi.client.SparkRDDWriteClient;
|
||||
import org.apache.hudi.client.WriteStatus;
|
||||
import org.apache.hudi.common.config.HoodieConfig;
|
||||
@@ -31,19 +27,31 @@ import org.apache.hudi.common.model.HoodieLogFile;
|
||||
import org.apache.hudi.common.model.HoodieRecord;
|
||||
import org.apache.hudi.common.model.HoodieTableType;
|
||||
import org.apache.hudi.common.table.HoodieTableConfig;
|
||||
import org.apache.hudi.common.table.HoodieTableMetaClient;
|
||||
import org.apache.hudi.common.table.HoodieTableVersion;
|
||||
import org.apache.hudi.common.table.marker.MarkerType;
|
||||
import org.apache.hudi.common.table.timeline.HoodieInstant;
|
||||
import org.apache.hudi.common.table.timeline.versioning.TimelineLayoutVersion;
|
||||
import org.apache.hudi.common.table.view.SyncableFileSystemView;
|
||||
import org.apache.hudi.common.testutils.HoodieTestDataGenerator;
|
||||
import org.apache.hudi.common.testutils.HoodieTestUtils;
|
||||
import org.apache.hudi.common.util.Option;
|
||||
import org.apache.hudi.common.util.collection.Pair;
|
||||
import org.apache.hudi.config.HoodieWriteConfig;
|
||||
import org.apache.hudi.keygen.constant.KeyGeneratorOptions;
|
||||
import org.apache.hudi.table.HoodieTable;
|
||||
import org.apache.hudi.table.marker.WriteMarkers;
|
||||
import org.apache.hudi.table.marker.WriteMarkersFactory;
|
||||
import org.apache.hudi.testutils.Assertions;
|
||||
import org.apache.hudi.testutils.HoodieClientTestBase;
|
||||
import org.apache.hudi.testutils.HoodieClientTestUtils;
|
||||
|
||||
import org.apache.hadoop.fs.FSDataInputStream;
|
||||
import org.apache.hadoop.fs.FSDataOutputStream;
|
||||
import org.apache.hadoop.fs.FileStatus;
|
||||
import org.apache.hadoop.fs.FileSystem;
|
||||
import org.apache.hadoop.fs.FileUtil;
|
||||
import org.apache.hadoop.fs.Path;
|
||||
import org.apache.spark.api.java.JavaRDD;
|
||||
import org.apache.spark.sql.Dataset;
|
||||
import org.apache.spark.sql.Row;
|
||||
@@ -52,20 +60,26 @@ import org.junit.jupiter.api.BeforeEach;
|
||||
import org.junit.jupiter.api.Test;
|
||||
import org.junit.jupiter.params.ParameterizedTest;
|
||||
import org.junit.jupiter.params.provider.Arguments;
|
||||
import org.junit.jupiter.params.provider.EnumSource;
|
||||
import org.junit.jupiter.params.provider.MethodSource;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Properties;
|
||||
import java.util.Set;
|
||||
import java.util.function.Predicate;
|
||||
import java.util.stream.Collectors;
|
||||
import java.util.stream.Stream;
|
||||
|
||||
import static org.apache.hudi.common.table.HoodieTableConfig.HOODIE_BASE_FILE_FORMAT_PROP;
|
||||
import static org.apache.hudi.common.table.HoodieTableConfig.HOODIE_TABLE_TYPE_PROP;
|
||||
import static org.apache.hudi.common.testutils.HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH;
|
||||
import static org.apache.hudi.common.testutils.HoodieTestDataGenerator.DEFAULT_SECOND_PARTITION_PATH;
|
||||
import static org.apache.hudi.common.util.MarkerUtils.MARKERS_FILENAME_PREFIX;
|
||||
import static org.junit.jupiter.api.Assertions.assertEquals;
|
||||
import static org.junit.jupiter.api.Assertions.assertFalse;
|
||||
import static org.junit.jupiter.api.Assertions.assertTrue;
|
||||
@@ -76,11 +90,29 @@ import static org.junit.jupiter.api.Assertions.assertTrue;
|
||||
public class TestUpgradeDowngrade extends HoodieClientTestBase {
|
||||
|
||||
private static final String TEST_NAME_WITH_PARAMS = "[{index}] Test with deletePartialMarkerFiles={0} and TableType = {1}";
|
||||
private static final String TEST_NAME_WITH_DOWNGRADE_PARAMS = "[{index}] Test with deletePartialMarkerFiles={0} and TableType = {1} and "
|
||||
+ "From version = {2}";
|
||||
|
||||
public static Stream<Arguments> configParams() {
|
||||
Object[][] data = new Object[][] {
|
||||
{true, HoodieTableType.COPY_ON_WRITE}, {false, HoodieTableType.COPY_ON_WRITE},
|
||||
{true, HoodieTableType.MERGE_ON_READ}, {false, HoodieTableType.MERGE_ON_READ}
|
||||
{true, HoodieTableType.COPY_ON_WRITE},
|
||||
{false, HoodieTableType.COPY_ON_WRITE},
|
||||
{true, HoodieTableType.MERGE_ON_READ},
|
||||
{false, HoodieTableType.MERGE_ON_READ}
|
||||
};
|
||||
return Stream.of(data).map(Arguments::of);
|
||||
}
|
||||
|
||||
public static Stream<Arguments> downGradeConfigParams() {
|
||||
Object[][] data = new Object[][] {
|
||||
{true, HoodieTableType.COPY_ON_WRITE, HoodieTableVersion.TWO},
|
||||
{false, HoodieTableType.COPY_ON_WRITE, HoodieTableVersion.TWO},
|
||||
{true, HoodieTableType.MERGE_ON_READ, HoodieTableVersion.TWO},
|
||||
{false, HoodieTableType.MERGE_ON_READ, HoodieTableVersion.TWO},
|
||||
{true, HoodieTableType.COPY_ON_WRITE, HoodieTableVersion.ONE},
|
||||
{false, HoodieTableType.COPY_ON_WRITE, HoodieTableVersion.ONE},
|
||||
{true, HoodieTableType.MERGE_ON_READ, HoodieTableVersion.ONE},
|
||||
{false, HoodieTableType.MERGE_ON_READ, HoodieTableVersion.ONE}
|
||||
};
|
||||
return Stream.of(data).map(Arguments::of);
|
||||
}
|
||||
@@ -88,9 +120,9 @@ public class TestUpgradeDowngrade extends HoodieClientTestBase {
|
||||
@BeforeEach
|
||||
public void setUp() throws Exception {
|
||||
initSparkContexts();
|
||||
initDFS();
|
||||
initPath();
|
||||
initTestDataGenerator();
|
||||
initDFSMetaClient();
|
||||
initMetaClient();
|
||||
}
|
||||
|
||||
@AfterEach
|
||||
@@ -100,21 +132,21 @@ public class TestUpgradeDowngrade extends HoodieClientTestBase {
|
||||
|
||||
@Test
|
||||
public void testLeftOverUpdatedPropFileCleanup() throws IOException {
|
||||
testUpgradeInternal(true, true, HoodieTableType.MERGE_ON_READ);
|
||||
testUpgradeZeroToOneInternal(true, true, HoodieTableType.MERGE_ON_READ);
|
||||
}
|
||||
|
||||
@ParameterizedTest(name = TEST_NAME_WITH_PARAMS)
|
||||
@MethodSource("configParams")
|
||||
public void testUpgrade(boolean deletePartialMarkerFiles, HoodieTableType tableType) throws IOException {
|
||||
testUpgradeInternal(false, deletePartialMarkerFiles, tableType);
|
||||
public void testUpgradeZeroToOne(boolean deletePartialMarkerFiles, HoodieTableType tableType) throws IOException {
|
||||
testUpgradeZeroToOneInternal(false, deletePartialMarkerFiles, tableType);
|
||||
}
|
||||
|
||||
public void testUpgradeInternal(boolean induceResiduesFromPrevUpgrade, boolean deletePartialMarkerFiles, HoodieTableType tableType) throws IOException {
|
||||
public void testUpgradeZeroToOneInternal(boolean induceResiduesFromPrevUpgrade, boolean deletePartialMarkerFiles, HoodieTableType tableType) throws IOException {
|
||||
// init config, table and client.
|
||||
Map<String, String> params = new HashMap<>();
|
||||
if (tableType == HoodieTableType.MERGE_ON_READ) {
|
||||
params.put(HOODIE_TABLE_TYPE_PROP.key(), HoodieTableType.MERGE_ON_READ.name());
|
||||
metaClient = HoodieTestUtils.init(dfs.getConf(), dfsBasePath, HoodieTableType.MERGE_ON_READ);
|
||||
metaClient = HoodieTestUtils.init(hadoopConf, basePath, HoodieTableType.MERGE_ON_READ);
|
||||
}
|
||||
HoodieWriteConfig cfg = getConfigBuilder().withAutoCommit(false).withRollbackUsingMarkers(false).withProps(params).build();
|
||||
SparkRDDWriteClient client = getHoodieWriteClient(cfg);
|
||||
@@ -151,30 +183,116 @@ public class TestUpgradeDowngrade extends HoodieClientTestBase {
|
||||
assertMarkerFilesForUpgrade(table, commitInstant, firstPartitionCommit2FileSlices, secondPartitionCommit2FileSlices);
|
||||
|
||||
// verify hoodie.table.version got upgraded
|
||||
metaClient = HoodieTableMetaClient.builder().setConf(context.getHadoopConf().get()).setBasePath(cfg.getBasePath())
|
||||
.setLayoutVersion(Option.of(new TimelineLayoutVersion(cfg.getTimelineLayoutVersion()))).build();
|
||||
assertEquals(metaClient.getTableConfig().getTableVersion().versionCode(), HoodieTableVersion.ONE.versionCode());
|
||||
assertTableVersionFromPropertyFile(HoodieTableVersion.ONE);
|
||||
|
||||
// trigger 3rd commit with marker based rollback enabled.
|
||||
/* HUDI-2310
|
||||
List<HoodieRecord> thirdBatch = triggerCommit("003", tableType, true);
|
||||
|
||||
// Check the entire dataset has all records only from 1st commit and 3rd commit since 2nd is expected to be rolledback.
|
||||
assertRows(inputRecords.getKey(), thirdBatch);
|
||||
if (induceResiduesFromPrevUpgrade) {
|
||||
assertFalse(dfs.exists(new Path(metaClient.getMetaPath(), SparkUpgradeDowngrade.HOODIE_UPDATED_PROPERTY_FILE)));
|
||||
}
|
||||
}*/
|
||||
}
|
||||
|
||||
@ParameterizedTest(name = TEST_NAME_WITH_PARAMS)
|
||||
@MethodSource("configParams")
|
||||
public void testDowngrade(boolean deletePartialMarkerFiles, HoodieTableType tableType) throws IOException {
|
||||
@ParameterizedTest
|
||||
@EnumSource(value = HoodieTableType.class)
|
||||
public void testUpgradeOneToTwo(HoodieTableType tableType) throws IOException {
|
||||
// init config, table and client.
|
||||
Map<String, String> params = new HashMap<>();
|
||||
addNewTableParamsToProps(params);
|
||||
if (tableType == HoodieTableType.MERGE_ON_READ) {
|
||||
params.put(HOODIE_TABLE_TYPE_PROP.key(), HoodieTableType.MERGE_ON_READ.name());
|
||||
metaClient = HoodieTestUtils.init(hadoopConf, basePath, HoodieTableType.MERGE_ON_READ);
|
||||
}
|
||||
HoodieWriteConfig cfg = getConfigBuilder().withAutoCommit(false).withRollbackUsingMarkers(false).withProps(params).build();
|
||||
SparkRDDWriteClient client = getHoodieWriteClient(cfg);
|
||||
// Write inserts
|
||||
doInsert(client);
|
||||
|
||||
// downgrade table props
|
||||
downgradeTableConfigsFromTwoToOne(cfg);
|
||||
|
||||
// perform upgrade
|
||||
new SparkUpgradeDowngrade(metaClient, cfg, context).run(metaClient, HoodieTableVersion.TWO, cfg, context, null);
|
||||
|
||||
// verify hoodie.table.version got upgraded
|
||||
metaClient = HoodieTableMetaClient.builder().setConf(context.getHadoopConf().get()).setBasePath(cfg.getBasePath())
|
||||
.setLayoutVersion(Option.of(new TimelineLayoutVersion(cfg.getTimelineLayoutVersion()))).build();
|
||||
assertEquals(metaClient.getTableConfig().getTableVersion().versionCode(), HoodieTableVersion.TWO.versionCode());
|
||||
assertTableVersionFromPropertyFile(HoodieTableVersion.TWO);
|
||||
|
||||
// verify table props
|
||||
assertTableProps(cfg);
|
||||
}
|
||||
|
||||
private void addNewTableParamsToProps(Map<String, String> params) {
|
||||
params.put(KeyGeneratorOptions.RECORDKEY_FIELD.key(), "uuid");
|
||||
params.put(KeyGeneratorOptions.PARTITIONPATH_FIELD.key(), "partition_path");
|
||||
params.put(HoodieTableConfig.HOODIE_TABLE_NAME_PROP.key(), metaClient.getTableConfig().getTableName());
|
||||
params.put(HOODIE_BASE_FILE_FORMAT_PROP.key(), HOODIE_BASE_FILE_FORMAT_PROP.defaultValue().name());
|
||||
}
|
||||
|
||||
private void doInsert(SparkRDDWriteClient client) {
|
||||
// Write 1 (only inserts)
|
||||
String commit1 = "000";
|
||||
client.startCommitWithTime(commit1);
|
||||
List<HoodieRecord> records = dataGen.generateInserts(commit1, 100);
|
||||
JavaRDD<HoodieRecord> writeRecords = jsc.parallelize(records, 1);
|
||||
client.insert(writeRecords, commit1).collect();
|
||||
}
|
||||
|
||||
private void downgradeTableConfigsFromTwoToOne(HoodieWriteConfig cfg) throws IOException {
|
||||
Properties properties = new Properties(cfg.getProps());
|
||||
properties.remove(HoodieTableConfig.HOODIE_TABLE_RECORDKEY_FIELDS.key());
|
||||
properties.remove(HoodieTableConfig.HOODIE_TABLE_PARTITION_FIELDS_PROP.key());
|
||||
properties.remove(HoodieTableConfig.HOODIE_TABLE_NAME_PROP.key());
|
||||
properties.remove(HOODIE_BASE_FILE_FORMAT_PROP.key());
|
||||
properties.setProperty(HoodieTableConfig.HOODIE_TABLE_VERSION_PROP.key(), "1");
|
||||
|
||||
metaClient = HoodieTestUtils.init(hadoopConf, basePath, getTableType(), properties);
|
||||
// set hoodie.table.version to 1 in hoodie.properties file
|
||||
metaClient.getTableConfig().setTableVersion(HoodieTableVersion.ONE);
|
||||
}
|
||||
|
||||
private void assertTableProps(HoodieWriteConfig cfg) {
|
||||
HoodieTableConfig tableConfig = metaClient.getTableConfig();
|
||||
Properties originalProps = cfg.getProps();
|
||||
assertEquals(tableConfig.getPartitionFieldProp(), originalProps.getProperty(KeyGeneratorOptions.PARTITIONPATH_FIELD.key()));
|
||||
assertEquals(tableConfig.getRecordKeyFieldProp(), originalProps.getProperty(KeyGeneratorOptions.RECORDKEY_FIELD.key()));
|
||||
assertEquals(tableConfig.getTableName(), cfg.getTableName());
|
||||
assertEquals(tableConfig.getBaseFileFormat().name(), originalProps.getProperty(HOODIE_BASE_FILE_FORMAT_PROP.key()));
|
||||
}
|
||||
|
||||
@ParameterizedTest(name = TEST_NAME_WITH_DOWNGRADE_PARAMS)
|
||||
@MethodSource("downGradeConfigParams")
|
||||
public void testDowngrade(boolean deletePartialMarkerFiles, HoodieTableType tableType, HoodieTableVersion fromVersion) throws IOException {
|
||||
MarkerType markerType = fromVersion == HoodieTableVersion.TWO ? MarkerType.TIMELINE_SERVER_BASED : MarkerType.DIRECT;
|
||||
// init config, table and client.
|
||||
Map<String, String> params = new HashMap<>();
|
||||
if (fromVersion == HoodieTableVersion.TWO) {
|
||||
addNewTableParamsToProps(params);
|
||||
}
|
||||
if (tableType == HoodieTableType.MERGE_ON_READ) {
|
||||
params.put(HOODIE_TABLE_TYPE_PROP.key(), HoodieTableType.MERGE_ON_READ.name());
|
||||
metaClient = HoodieTestUtils.init(hadoopConf, basePath, HoodieTableType.MERGE_ON_READ);
|
||||
}
|
||||
HoodieWriteConfig cfg = getConfigBuilder().withAutoCommit(false).withRollbackUsingMarkers(true)
|
||||
.withMarkersType(markerType.name()).withProps(params).build();
|
||||
SparkRDDWriteClient client = getHoodieWriteClient(cfg);
|
||||
|
||||
if (fromVersion == HoodieTableVersion.TWO) {
|
||||
// set table configs
|
||||
HoodieTableConfig tableConfig = metaClient.getTableConfig();
|
||||
tableConfig.setValue(HoodieTableConfig.HOODIE_TABLE_NAME_PROP, cfg.getTableName());
|
||||
tableConfig.setValue(HoodieTableConfig.HOODIE_TABLE_PARTITION_FIELDS_PROP, cfg.getString(KeyGeneratorOptions.PARTITIONPATH_FIELD.key()));
|
||||
tableConfig.setValue(HoodieTableConfig.HOODIE_TABLE_RECORDKEY_FIELDS, cfg.getString(KeyGeneratorOptions.RECORDKEY_FIELD.key()));
|
||||
tableConfig.setValue(HOODIE_BASE_FILE_FORMAT_PROP, cfg.getString(HOODIE_BASE_FILE_FORMAT_PROP));
|
||||
}
|
||||
|
||||
// prepare data. Make 2 commits, in which 2nd is not committed.
|
||||
List<FileSlice> firstPartitionCommit2FileSlices = new ArrayList<>();
|
||||
@@ -185,7 +303,7 @@ public class TestUpgradeDowngrade extends HoodieClientTestBase {
|
||||
HoodieInstant commitInstant = table.getPendingCommitTimeline().lastInstant().get();
|
||||
|
||||
// delete one of the marker files in 2nd commit if need be.
|
||||
WriteMarkers writeMarkers = WriteMarkersFactory.get(getConfig().getMarkersType(), table, commitInstant.getTimestamp());
|
||||
WriteMarkers writeMarkers = WriteMarkersFactory.get(markerType, table, commitInstant.getTimestamp());
|
||||
List<String> markerPaths = new ArrayList<>(writeMarkers.allMarkerFilePaths());
|
||||
if (deletePartialMarkerFiles) {
|
||||
String toDeleteMarkerFile = markerPaths.get(0);
|
||||
@@ -193,30 +311,55 @@ public class TestUpgradeDowngrade extends HoodieClientTestBase {
|
||||
markerPaths.remove(toDeleteMarkerFile);
|
||||
}
|
||||
|
||||
// set hoodie.table.version to 1 in hoodie.properties file
|
||||
prepForDowngrade();
|
||||
// set hoodie.table.version to fromVersion in hoodie.properties file
|
||||
HoodieTableVersion toVersion = HoodieTableVersion.ZERO;
|
||||
if (fromVersion == HoodieTableVersion.TWO) {
|
||||
prepForDowngradeFromTwoToOne();
|
||||
toVersion = HoodieTableVersion.ONE;
|
||||
} else {
|
||||
prepForDowngradeFromOneToZero();
|
||||
}
|
||||
|
||||
// downgrade should be performed. all marker files should be deleted
|
||||
new SparkUpgradeDowngrade(metaClient, cfg, context).run(metaClient, HoodieTableVersion.ZERO, cfg, context, null);
|
||||
new SparkUpgradeDowngrade(metaClient, cfg, context).run(metaClient, toVersion, cfg, context, null);
|
||||
|
||||
// assert marker files
|
||||
assertMarkerFilesForDowngrade(table, commitInstant);
|
||||
assertMarkerFilesForDowngrade(table, commitInstant, toVersion == HoodieTableVersion.ONE);
|
||||
|
||||
// verify hoodie.table.version got downgraded
|
||||
assertEquals(metaClient.getTableConfig().getTableVersion().versionCode(), HoodieTableVersion.ZERO.versionCode());
|
||||
assertTableVersionFromPropertyFile(HoodieTableVersion.ZERO);
|
||||
metaClient = HoodieTableMetaClient.builder().setConf(context.getHadoopConf().get()).setBasePath(cfg.getBasePath())
|
||||
.setLayoutVersion(Option.of(new TimelineLayoutVersion(cfg.getTimelineLayoutVersion()))).build();
|
||||
assertEquals(metaClient.getTableConfig().getTableVersion().versionCode(), toVersion.versionCode());
|
||||
assertTableVersionFromPropertyFile(toVersion);
|
||||
|
||||
// trigger 3rd commit with marker based rollback disabled.
|
||||
/* HUDI-2310
|
||||
List<HoodieRecord> thirdBatch = triggerCommit("003", tableType, false);
|
||||
|
||||
// Check the entire dataset has all records only from 1st commit and 3rd commit since 2nd is expected to be rolledback.
|
||||
assertRows(inputRecords.getKey(), thirdBatch);
|
||||
*/
|
||||
}
|
||||
|
||||
private void assertMarkerFilesForDowngrade(HoodieTable table, HoodieInstant commitInstant) throws IOException {
|
||||
private void assertMarkerFilesForDowngrade(HoodieTable table, HoodieInstant commitInstant, boolean assertExists) throws IOException {
|
||||
// Verify recreated marker files are as expected
|
||||
WriteMarkers writeMarkers = WriteMarkersFactory.get(getConfig().getMarkersType(), table, commitInstant.getTimestamp());
|
||||
assertFalse(writeMarkers.doesMarkerDirExist());
|
||||
if (assertExists) {
|
||||
assertTrue(writeMarkers.doesMarkerDirExist());
|
||||
assertEquals(0, getTimelineServerBasedMarkerFileCount(table.getMetaClient().getMarkerFolderPath(commitInstant.getTimestamp()),
|
||||
table.getMetaClient().getFs()));
|
||||
} else {
|
||||
assertFalse(writeMarkers.doesMarkerDirExist());
|
||||
}
|
||||
}
|
||||
|
||||
private long getTimelineServerBasedMarkerFileCount(String markerDir, FileSystem fileSystem) throws IOException {
|
||||
FileStatus[] fileStatuses = fileSystem.listStatus(new Path(markerDir));
|
||||
Predicate<String> prefixFilter = pathStr -> pathStr.contains(MARKERS_FILENAME_PREFIX);
|
||||
return Arrays.stream(fileStatuses)
|
||||
.map(fileStatus -> fileStatus.getPath().toString())
|
||||
.filter(prefixFilter)
|
||||
.collect(Collectors.toList()).stream().count();
|
||||
}
|
||||
|
||||
private void assertMarkerFilesForUpgrade(HoodieTable table, HoodieInstant commitInstant, List<FileSlice> firstPartitionCommit2FileSlices,
|
||||
@@ -349,7 +492,7 @@ public class TestUpgradeDowngrade extends HoodieClientTestBase {
|
||||
//just generate two partitions
|
||||
dataGen = new HoodieTestDataGenerator(new String[] {DEFAULT_FIRST_PARTITION_PATH, DEFAULT_SECOND_PARTITION_PATH});
|
||||
//1. prepare data
|
||||
HoodieTestDataGenerator.writePartitionMetadata(dfs, new String[] {DEFAULT_FIRST_PARTITION_PATH, DEFAULT_SECOND_PARTITION_PATH}, dfsBasePath);
|
||||
HoodieTestDataGenerator.writePartitionMetadata(metaClient.getFs(), new String[] {DEFAULT_FIRST_PARTITION_PATH, DEFAULT_SECOND_PARTITION_PATH}, basePath);
|
||||
/**
|
||||
* Write 1 (only inserts)
|
||||
*/
|
||||
@@ -396,7 +539,7 @@ public class TestUpgradeDowngrade extends HoodieClientTestBase {
|
||||
return Pair.of(records, records2);
|
||||
}
|
||||
|
||||
private void prepForDowngrade() throws IOException {
|
||||
private void prepForDowngradeFromOneToZero() throws IOException {
|
||||
metaClient.getTableConfig().setTableVersion(HoodieTableVersion.ONE);
|
||||
Path propertyFile = new Path(metaClient.getMetaPath() + "/" + HoodieTableConfig.HOODIE_PROPERTIES_FILE);
|
||||
try (FSDataOutputStream os = metaClient.getFs().create(propertyFile)) {
|
||||
@@ -404,13 +547,21 @@ public class TestUpgradeDowngrade extends HoodieClientTestBase {
|
||||
}
|
||||
}
|
||||
|
||||
private void prepForDowngradeFromTwoToOne() throws IOException {
|
||||
metaClient.getTableConfig().setTableVersion(HoodieTableVersion.TWO);
|
||||
Path propertyFile = new Path(metaClient.getMetaPath() + "/" + HoodieTableConfig.HOODIE_PROPERTIES_FILE);
|
||||
try (FSDataOutputStream os = metaClient.getFs().create(propertyFile)) {
|
||||
metaClient.getTableConfig().getProps().store(os, "");
|
||||
}
|
||||
}
|
||||
|
||||
private void createResidualFile() throws IOException {
|
||||
Path propertyFile = new Path(metaClient.getMetaPath() + "/" + HoodieTableConfig.HOODIE_PROPERTIES_FILE);
|
||||
Path updatedPropertyFile = new Path(metaClient.getMetaPath() + "/" + SparkUpgradeDowngrade.HOODIE_UPDATED_PROPERTY_FILE);
|
||||
|
||||
// Step1: Copy hoodie.properties to hoodie.properties.orig
|
||||
FileUtil.copy(metaClient.getFs(), propertyFile, metaClient.getFs(), updatedPropertyFile,
|
||||
false, metaClient.getHadoopConf());
|
||||
false, hadoopConf);
|
||||
}
|
||||
|
||||
private void assertTableVersionFromPropertyFile(HoodieTableVersion expectedVersion) throws IOException {
|
||||
|
||||
@@ -30,7 +30,9 @@ public enum HoodieTableVersion {
|
||||
// < 0.6.0 versions
|
||||
ZERO(0),
|
||||
// 0.6.0 onwards
|
||||
ONE(1);
|
||||
ONE(1),
|
||||
// 0.9.0 onwards
|
||||
TWO(2);
|
||||
|
||||
private final int versionCode;
|
||||
|
||||
@@ -43,10 +45,10 @@ public enum HoodieTableVersion {
|
||||
}
|
||||
|
||||
public static HoodieTableVersion current() {
|
||||
return ONE;
|
||||
return TWO;
|
||||
}
|
||||
|
||||
static HoodieTableVersion versionFromCode(int versionCode) {
|
||||
public static HoodieTableVersion versionFromCode(int versionCode) {
|
||||
return Arrays.stream(HoodieTableVersion.values())
|
||||
.filter(v -> v.versionCode == versionCode).findAny()
|
||||
.orElseThrow(() -> new HoodieException("Unknown versionCode:" + versionCode));
|
||||
|
||||
@@ -179,12 +179,13 @@ public class MarkerUtils {
|
||||
try {
|
||||
if (fileSystem.exists(dirPath)) {
|
||||
FileStatus[] fileStatuses = fileSystem.listStatus(dirPath);
|
||||
Predicate<String> prefixFilter = pathStr -> pathStr.contains(MARKERS_FILENAME_PREFIX);
|
||||
Predicate<String> markerTypeFilter =
|
||||
pathStr -> !stripMarkerFolderPrefix(pathStr, markerDir).equals(MARKER_TYPE_FILENAME);
|
||||
Predicate<FileStatus> prefixFilter = fileStatus ->
|
||||
fileStatus.getPath().getName().startsWith(MARKERS_FILENAME_PREFIX);
|
||||
Predicate<FileStatus> markerTypeFilter = fileStatus ->
|
||||
!fileStatus.getPath().getName().equals(MARKER_TYPE_FILENAME);
|
||||
List<String> markerDirSubPaths = Arrays.stream(fileStatuses)
|
||||
.map(fileStatus -> fileStatus.getPath().toString())
|
||||
.filter(prefixFilter.and(markerTypeFilter))
|
||||
.map(fileStatus -> fileStatus.getPath().toString())
|
||||
.collect(Collectors.toList());
|
||||
|
||||
if (markerDirSubPaths.size() > 0) {
|
||||
@@ -216,4 +217,4 @@ public class MarkerUtils {
|
||||
throw new HoodieIOException(ioe.getMessage(), ioe);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -18,6 +18,9 @@
|
||||
package org.apache.hudi
|
||||
|
||||
|
||||
import java.util
|
||||
import java.util.Properties
|
||||
|
||||
import org.apache.avro.Schema
|
||||
import org.apache.avro.generic.GenericRecord
|
||||
import org.apache.hadoop.conf.Configuration
|
||||
@@ -29,9 +32,8 @@ import org.apache.hudi.client.{HoodieWriteResult, SparkRDDWriteClient}
|
||||
import org.apache.hudi.common.config.{HoodieConfig, HoodieMetadataConfig, TypedProperties}
|
||||
import org.apache.hudi.common.fs.FSUtils
|
||||
import org.apache.hudi.common.model.{HoodieRecordPayload, HoodieTableType, WriteOperationType}
|
||||
import org.apache.hudi.common.table.TableSchemaResolver
|
||||
import org.apache.hudi.common.table.timeline.HoodieActiveTimeline
|
||||
import org.apache.hudi.common.table.{HoodieTableConfig, HoodieTableMetaClient}
|
||||
import org.apache.hudi.common.table.{HoodieTableConfig, HoodieTableMetaClient, TableSchemaResolver}
|
||||
import org.apache.hudi.common.util.{CommitUtils, ReflectionUtils}
|
||||
import org.apache.hudi.config.HoodieBootstrapConfig.{BOOTSTRAP_BASE_PATH, BOOTSTRAP_INDEX_CLASS}
|
||||
import org.apache.hudi.config.{HoodieInternalConfig, HoodieWriteConfig}
|
||||
@@ -51,8 +53,6 @@ import org.apache.spark.sql.types.StructType
|
||||
import org.apache.spark.sql.{DataFrame, Dataset, Row, SQLContext, SaveMode, SparkSession}
|
||||
import org.apache.spark.{SPARK_VERSION, SparkContext}
|
||||
|
||||
import java.util
|
||||
import java.util.Properties
|
||||
import scala.collection.JavaConversions._
|
||||
import scala.collection.mutable.ListBuffer
|
||||
|
||||
@@ -118,7 +118,7 @@ object HoodieSparkSqlWriter {
|
||||
} else {
|
||||
// Handle various save modes
|
||||
handleSaveModes(sqlContext.sparkSession, mode, basePath, tableConfig, tblName, operation, fs)
|
||||
val partitionColumns = HoodieWriterUtils.getPartitionColumns(keyGenerator)
|
||||
val partitionColumns = HoodieSparkUtils.getPartitionColumns(keyGenerator, toProperties(parameters))
|
||||
// Create the table if not present
|
||||
if (!tableExists) {
|
||||
val baseFileFormat = hoodieConfig.getStringOrDefault(HoodieTableConfig.HOODIE_BASE_FILE_FORMAT_PROP)
|
||||
|
||||
@@ -17,13 +17,13 @@
|
||||
|
||||
package org.apache.hudi
|
||||
|
||||
import java.util.Properties
|
||||
|
||||
import org.apache.hudi.DataSourceWriteOptions._
|
||||
import org.apache.hudi.common.config.HoodieMetadataConfig.{METADATA_ENABLE_PROP, METADATA_VALIDATE_PROP}
|
||||
import org.apache.hudi.common.config.{HoodieConfig, TypedProperties}
|
||||
import org.apache.hudi.keygen.factory.HoodieSparkKeyGeneratorFactory
|
||||
import org.apache.hudi.keygen.{BaseKeyGenerator, CustomAvroKeyGenerator, CustomKeyGenerator, KeyGenerator}
|
||||
|
||||
import java.util.Properties
|
||||
import scala.collection.JavaConversions.mapAsJavaMap
|
||||
import scala.collection.JavaConverters.{mapAsScalaMapConverter, _}
|
||||
|
||||
@@ -96,23 +96,7 @@ object HoodieWriterUtils {
|
||||
val props = new TypedProperties()
|
||||
props.putAll(parameters.asJava)
|
||||
val keyGen = HoodieSparkKeyGeneratorFactory.createKeyGenerator(props)
|
||||
getPartitionColumns(keyGen)
|
||||
}
|
||||
|
||||
def getPartitionColumns(keyGen: KeyGenerator): String = {
|
||||
keyGen match {
|
||||
// For CustomKeyGenerator and CustomAvroKeyGenerator, the partition path filed format
|
||||
// is: "field_name: field_type", we extract the field_name from the partition path field.
|
||||
case c: BaseKeyGenerator
|
||||
if c.isInstanceOf[CustomKeyGenerator] || c.isInstanceOf[CustomAvroKeyGenerator] =>
|
||||
c.getPartitionPathFields.asScala.map(pathField =>
|
||||
pathField.split(CustomAvroKeyGenerator.SPLIT_REGEX)
|
||||
.headOption.getOrElse(s"Illegal partition path field format: '$pathField' for ${c.getClass.getSimpleName}"))
|
||||
.mkString(",")
|
||||
|
||||
case b: BaseKeyGenerator => b.getPartitionPathFields.asScala.mkString(",")
|
||||
case _=> null
|
||||
}
|
||||
HoodieSparkUtils.getPartitionColumns(keyGen, props)
|
||||
}
|
||||
|
||||
def convertMapToHoodieConfig(parameters: Map[String, String]): HoodieConfig = {
|
||||
|
||||
@@ -273,7 +273,6 @@ public class MarkerDirState implements Serializable {
|
||||
private void syncMarkersFromFileSystem() {
|
||||
Map<String, Set<String>> fileMarkersSetMap = MarkerUtils.readTimelineServerBasedMarkersFromFileSystem(
|
||||
markerDirPath, fileSystem, hoodieEngineContext, parallelism);
|
||||
|
||||
for (String markersFilePathStr : fileMarkersSetMap.keySet()) {
|
||||
Set<String> fileMarkers = fileMarkersSetMap.get(markersFilePathStr);
|
||||
if (!fileMarkers.isEmpty()) {
|
||||
|
||||
@@ -21,7 +21,6 @@ package org.apache.hudi.utilities.deltastreamer;
|
||||
import org.apache.hudi.DataSourceUtils;
|
||||
import org.apache.hudi.DataSourceWriteOptions;
|
||||
import org.apache.hudi.HoodieSparkUtils;
|
||||
import org.apache.hudi.HoodieWriterUtils;
|
||||
import org.apache.hudi.avro.HoodieAvroUtils;
|
||||
import org.apache.hudi.client.SparkRDDWriteClient;
|
||||
import org.apache.hudi.client.WriteStatus;
|
||||
@@ -50,7 +49,6 @@ import org.apache.hudi.config.HoodieClusteringConfig;
|
||||
import org.apache.hudi.config.HoodieCompactionConfig;
|
||||
import org.apache.hudi.config.HoodiePayloadConfig;
|
||||
import org.apache.hudi.config.HoodieWriteConfig;
|
||||
import org.apache.hudi.utilities.exception.HoodieDeltaStreamerException;
|
||||
import org.apache.hudi.exception.HoodieException;
|
||||
import org.apache.hudi.hive.HiveSyncConfig;
|
||||
import org.apache.hudi.hive.HiveSyncTool;
|
||||
@@ -62,6 +60,7 @@ import org.apache.hudi.utilities.UtilHelpers;
|
||||
import org.apache.hudi.utilities.callback.kafka.HoodieWriteCommitKafkaCallback;
|
||||
import org.apache.hudi.utilities.callback.kafka.HoodieWriteCommitKafkaCallbackConfig;
|
||||
import org.apache.hudi.utilities.deltastreamer.HoodieDeltaStreamer.Config;
|
||||
import org.apache.hudi.utilities.exception.HoodieDeltaStreamerException;
|
||||
import org.apache.hudi.utilities.schema.DelegatingSchemaProvider;
|
||||
import org.apache.hudi.utilities.schema.SchemaProvider;
|
||||
import org.apache.hudi.utilities.schema.SchemaSet;
|
||||
@@ -249,7 +248,7 @@ public class DeltaSync implements Serializable {
|
||||
}
|
||||
} else {
|
||||
this.commitTimelineOpt = Option.empty();
|
||||
String partitionColumns = HoodieWriterUtils.getPartitionColumns(keyGenerator);
|
||||
String partitionColumns = HoodieSparkUtils.getPartitionColumns(keyGenerator, props);
|
||||
HoodieTableMetaClient.withPropertyBuilder()
|
||||
.setTableType(cfg.tableType)
|
||||
.setTableName(cfg.targetTableName)
|
||||
@@ -353,7 +352,7 @@ public class DeltaSync implements Serializable {
|
||||
}
|
||||
}
|
||||
} else {
|
||||
String partitionColumns = HoodieWriterUtils.getPartitionColumns(keyGenerator);
|
||||
String partitionColumns = HoodieSparkUtils.getPartitionColumns(keyGenerator, props);
|
||||
HoodieTableMetaClient.withPropertyBuilder()
|
||||
.setTableType(cfg.tableType)
|
||||
.setTableName(cfg.targetTableName)
|
||||
|
||||
Reference in New Issue
Block a user