1
0

[HUDI-2795] Add mechanism to safely update,delete and recover table properties (#4038)

* [HUDI-2795] Add mechanism to safely update,delete and recover table properties

  - Fail safe mechanism, that lets queries succeed off a backup file
  - Readers who are not upgraded to this version of code will just fail until recovery is done.
  - Added unit tests that exercises all these scenarios.
  - Adding CLI for recovery, updation to table command.
  - [Pending] Add some hash based verfication to ensure any rare partial writes for HDFS

* Fixing upgrade/downgrade infrastructure to use new updation method
This commit is contained in:
vinoth chandar
2021-11-20 08:07:40 -08:00
committed by GitHub
parent f4b974ac7b
commit ae0c67d9fc
7 changed files with 337 additions and 73 deletions

View File

@@ -29,6 +29,7 @@ import org.apache.hudi.common.model.HoodieRecord;
import org.apache.hudi.common.model.HoodieTableType;
import org.apache.hudi.common.model.OverwriteWithLatestAvroPayload;
import org.apache.hudi.common.table.timeline.versioning.TimelineLayoutVersion;
import org.apache.hudi.common.util.FileIOUtils;
import org.apache.hudi.common.util.Option;
import org.apache.hudi.common.util.ValidationUtils;
import org.apache.hudi.exception.HoodieIOException;
@@ -47,6 +48,8 @@ import java.util.Arrays;
import java.util.Date;
import java.util.Map;
import java.util.Properties;
import java.util.Set;
import java.util.function.BiConsumer;
import java.util.stream.Collectors;
/**
@@ -69,6 +72,7 @@ public class HoodieTableConfig extends HoodieConfig {
private static final Logger LOG = LogManager.getLogger(HoodieTableConfig.class);
public static final String HOODIE_PROPERTIES_FILE = "hoodie.properties";
public static final String HOODIE_PROPERTIES_FILE_BACKUP = "hoodie.properties.backup";
public static final ConfigProperty<String> NAME = ConfigProperty
.key("hoodie.table.name")
@@ -173,12 +177,11 @@ public class HoodieTableConfig extends HoodieConfig {
Path propertyPath = new Path(metaPath, HOODIE_PROPERTIES_FILE);
LOG.info("Loading table properties from " + propertyPath);
try {
try (FSDataInputStream inputStream = fs.open(propertyPath)) {
props.load(inputStream);
}
fetchConfigs(fs, metaPath);
if (contains(PAYLOAD_CLASS_NAME) && payloadClassName != null
&& !getString(PAYLOAD_CLASS_NAME).equals(payloadClassName)) {
setValue(PAYLOAD_CLASS_NAME, payloadClassName);
// FIXME(vc): wonder if this can be removed. Need to look into history.
try (FSDataOutputStream outputStream = fs.create(propertyPath)) {
props.store(outputStream, "Properties saved on " + new Date(System.currentTimeMillis()));
}
@@ -192,16 +195,103 @@ public class HoodieTableConfig extends HoodieConfig {
/**
* For serializing and de-serializing.
*
*/
public HoodieTableConfig() {
super();
}
private void fetchConfigs(FileSystem fs, String metaPath) throws IOException {
Path cfgPath = new Path(metaPath, HOODIE_PROPERTIES_FILE);
try (FSDataInputStream is = fs.open(cfgPath)) {
props.load(is);
} catch (IOException ioe) {
if (!fs.exists(cfgPath)) {
LOG.warn("Run `table recover-configs` if config update/delete failed midway. Falling back to backed up configs.");
// try the backup. this way no query ever fails if update fails midway.
Path backupCfgPath = new Path(metaPath, HOODIE_PROPERTIES_FILE_BACKUP);
try (FSDataInputStream is = fs.open(backupCfgPath)) {
props.load(is);
}
} else {
throw ioe;
}
}
}
public static void recover(FileSystem fs, Path metadataFolder) throws IOException {
Path cfgPath = new Path(metadataFolder, HOODIE_PROPERTIES_FILE);
Path backupCfgPath = new Path(metadataFolder, HOODIE_PROPERTIES_FILE_BACKUP);
recoverIfNeeded(fs, cfgPath, backupCfgPath);
}
static void recoverIfNeeded(FileSystem fs, Path cfgPath, Path backupCfgPath) throws IOException {
if (!fs.exists(cfgPath)) {
// copy over from backup
try (FSDataInputStream in = fs.open(backupCfgPath);
FSDataOutputStream out = fs.create(cfgPath, false)) {
FileIOUtils.copy(in, out);
}
}
// regardless, we don't need the backup anymore.
fs.delete(backupCfgPath, false);
}
private static void upsertProperties(Properties current, Properties updated) {
updated.forEach((k, v) -> current.setProperty(k.toString(), v.toString()));
}
private static void deleteProperties(Properties current, Properties deleted) {
deleted.forEach((k, v) -> current.remove(k.toString()));
}
private static void modify(FileSystem fs, Path metadataFolder, Properties modifyProps, BiConsumer<Properties, Properties> modifyFn) {
Path cfgPath = new Path(metadataFolder, HOODIE_PROPERTIES_FILE);
Path backupCfgPath = new Path(metadataFolder, HOODIE_PROPERTIES_FILE_BACKUP);
try {
// 0. do any recovery from prior attempts.
recoverIfNeeded(fs, cfgPath, backupCfgPath);
// 1. backup the existing properties.
try (FSDataInputStream in = fs.open(cfgPath);
FSDataOutputStream out = fs.create(backupCfgPath, false)) {
FileIOUtils.copy(in, out);
}
/// 2. delete the properties file, reads will go to the backup, until we are done.
fs.delete(cfgPath, false);
// 3. read current props, upsert and save back.
try (FSDataInputStream in = fs.open(backupCfgPath);
FSDataOutputStream out = fs.create(cfgPath, true)) {
Properties props = new Properties();
props.load(in);
modifyFn.accept(props, modifyProps);
props.store(out, "Updated at " + System.currentTimeMillis());
}
// 4. verify and remove backup.
// FIXME(vc): generate a hash for verification.
fs.delete(backupCfgPath, false);
} catch (IOException e) {
throw new HoodieIOException("Error updating table configs.", e);
}
}
/**
* Upserts the table config with the set of properties passed in. We implement a fail-safe backup protocol
* here for safely updating with recovery and also ensuring the table config continues to be readable.
*/
public static void update(FileSystem fs, Path metadataFolder, Properties updatedProps) {
modify(fs, metadataFolder, updatedProps, HoodieTableConfig::upsertProperties);
}
public static void delete(FileSystem fs, Path metadataFolder, Set<String> deletedProps) {
Properties props = new Properties();
deletedProps.forEach(p -> props.setProperty(p, ""));
modify(fs, metadataFolder, props, HoodieTableConfig::deleteProperties);
}
/**
* Initialize the hoodie meta directory and any necessary files inside the meta (including the hoodie.properties).
*/
public static void createHoodieProperties(FileSystem fs, Path metadataFolder, Properties properties)
public static void create(FileSystem fs, Path metadataFolder, Properties properties)
throws IOException {
if (!fs.exists(metadataFolder)) {
fs.mkdirs(metadataFolder);

View File

@@ -377,7 +377,7 @@ public class HoodieTableMetaClient implements Serializable {
}
initializeBootstrapDirsIfNotExists(hadoopConf, basePath, fs);
HoodieTableConfig.createHoodieProperties(fs, metaPathDir, props);
HoodieTableConfig.create(fs, metaPathDir, props);
// We should not use fs.getConf as this might be different from the original configuration
// used to create the fs in unit tests
HoodieTableMetaClient metaClient = HoodieTableMetaClient.builder().setConf(hadoopConf).setBasePath(basePath).build();

View File

@@ -94,7 +94,7 @@ public class TestBootstrapIndex extends HoodieCommonTestHarness {
props.put(HoodieTableConfig.BOOTSTRAP_INDEX_ENABLE.key(), "false");
Properties properties = new Properties();
properties.putAll(props);
HoodieTableConfig.createHoodieProperties(metaClient.getFs(), new Path(metaClient.getMetaPath()), properties);
HoodieTableConfig.create(metaClient.getFs(), new Path(metaClient.getMetaPath()), properties);
metaClient = HoodieTableMetaClient.builder().setConf(metaClient.getHadoopConf()).setBasePath(basePath).build();
BootstrapIndex bootstrapIndex = BootstrapIndex.getBootstrapIndex(metaClient);

View File

@@ -0,0 +1,137 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hudi.common.table;
import org.apache.hudi.common.testutils.HoodieCommonTestHarness;
import org.apache.hudi.common.util.CollectionUtils;
import org.apache.hudi.exception.HoodieIOException;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.Test;
import org.junit.jupiter.params.ParameterizedTest;
import org.junit.jupiter.params.provider.ValueSource;
import java.io.IOException;
import java.util.Properties;
import java.util.Set;
import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertFalse;
import static org.junit.jupiter.api.Assertions.assertNull;
import static org.junit.jupiter.api.Assertions.assertThrows;
import static org.junit.jupiter.api.Assertions.assertTrue;
public class TestHoodieTableConfig extends HoodieCommonTestHarness {
private FileSystem fs;
private Path metaPath;
private Path cfgPath;
private Path backupCfgPath;
@BeforeEach
public void setUp() throws Exception {
initPath();
fs = new Path(basePath).getFileSystem(new Configuration());
metaPath = new Path(basePath, HoodieTableMetaClient.METAFOLDER_NAME);
Properties props = new Properties();
props.setProperty(HoodieTableConfig.NAME.key(), "test-table");
HoodieTableConfig.create(fs, metaPath, props);
cfgPath = new Path(metaPath, HoodieTableConfig.HOODIE_PROPERTIES_FILE);
backupCfgPath = new Path(metaPath, HoodieTableConfig.HOODIE_PROPERTIES_FILE_BACKUP);
}
@Test
public void testCreate() throws IOException {
assertTrue(fs.exists(new Path(metaPath, HoodieTableConfig.HOODIE_PROPERTIES_FILE)));
HoodieTableConfig config = new HoodieTableConfig(fs, metaPath.toString(), null);
assertEquals(4, config.getProps().size());
}
@Test
public void testUpdate() throws IOException {
Properties updatedProps = new Properties();
updatedProps.setProperty(HoodieTableConfig.NAME.key(), "test-table2");
updatedProps.setProperty(HoodieTableConfig.PRECOMBINE_FIELD.key(), "new_field");
HoodieTableConfig.update(fs, metaPath, updatedProps);
assertTrue(fs.exists(cfgPath));
assertFalse(fs.exists(backupCfgPath));
HoodieTableConfig config = new HoodieTableConfig(fs, metaPath.toString(), null);
assertEquals(5, config.getProps().size());
assertEquals("test-table2", config.getTableName());
assertEquals("new_field", config.getPreCombineField());
}
@Test
public void testDelete() throws IOException {
Set<String> deletedProps = CollectionUtils.createSet(HoodieTableConfig.ARCHIVELOG_FOLDER.key(), "hoodie.invalid.config");
HoodieTableConfig.delete(fs, metaPath, deletedProps);
assertTrue(fs.exists(cfgPath));
assertFalse(fs.exists(backupCfgPath));
HoodieTableConfig config = new HoodieTableConfig(fs, metaPath.toString(), null);
assertEquals(3, config.getProps().size());
assertNull(config.getProps().getProperty("hoodie.invalid.config"));
assertFalse(config.getProps().contains(HoodieTableConfig.ARCHIVELOG_FOLDER.key()));
}
@Test
public void testReadsWhenPropsFileDoesNotExist() throws IOException {
fs.delete(cfgPath, false);
assertThrows(HoodieIOException.class, () -> {
new HoodieTableConfig(fs, metaPath.toString(), null);
});
}
@Test
public void testReadsWithUpdateFailures() throws IOException {
HoodieTableConfig config = new HoodieTableConfig(fs, metaPath.toString(), null);
fs.delete(cfgPath, false);
try (FSDataOutputStream out = fs.create(backupCfgPath)) {
config.getProps().store(out, "");
}
assertFalse(fs.exists(cfgPath));
assertTrue(fs.exists(backupCfgPath));
config = new HoodieTableConfig(fs, metaPath.toString(), null);
assertEquals(4, config.getProps().size());
}
@ParameterizedTest
@ValueSource(booleans = {true, false})
public void testUpdateRecovery(boolean shouldPropsFileExist) throws IOException {
HoodieTableConfig config = new HoodieTableConfig(fs, metaPath.toString(), null);
if (!shouldPropsFileExist) {
fs.delete(cfgPath, false);
}
try (FSDataOutputStream out = fs.create(backupCfgPath)) {
config.getProps().store(out, "");
}
HoodieTableConfig.recoverIfNeeded(fs, cfgPath, backupCfgPath);
assertTrue(fs.exists(cfgPath));
assertFalse(fs.exists(backupCfgPath));
config = new HoodieTableConfig(fs, metaPath.toString(), null);
assertEquals(4, config.getProps().size());
}
}