1
0

[HUDI-2809] Introduce a checksum mechanism for validating hoodie.properties (#4712)

Fix dependency conflict

Fix repairs command

Implement putIfAbsent for DDB lock provider

Add upgrade step and validate while fetching configs

Validate checksum for latest table version only while fetching config

Move generateChecksum to BinaryUtil

Rebase and resolve conflict

Fix table version check
This commit is contained in:
Sagar Sumit
2022-02-18 10:17:06 +05:30
committed by GitHub
parent 2844a77b43
commit ed106f671e
20 changed files with 320 additions and 40 deletions

View File

@@ -45,7 +45,6 @@ import org.springframework.shell.core.annotation.CliOption;
import org.springframework.stereotype.Component;
import scala.collection.JavaConverters;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.util.List;
@@ -153,10 +152,12 @@ public class RepairsCommand implements CommandMarker {
HoodieTableMetaClient client = HoodieCLI.getTableMetaClient();
Properties newProps = new Properties();
newProps.load(new FileInputStream(new File(overwriteFilePath)));
newProps.load(new FileInputStream(overwriteFilePath));
Map<String, String> oldProps = client.getTableConfig().propsMap();
Path metaPathDir = new Path(client.getBasePath(), METAFOLDER_NAME);
HoodieTableConfig.create(client.getFs(), metaPathDir, newProps);
// reload new props as checksum would have been added
newProps = HoodieTableMetaClient.reload(HoodieCLI.getTableMetaClient()).getTableConfig().getProps();
TreeSet<String> allPropKeys = new TreeSet<>();
allPropKeys.addAll(newProps.keySet().stream().map(Object::toString).collect(Collectors.toSet()));

View File

@@ -39,7 +39,6 @@ import org.junit.jupiter.api.Tag;
import org.junit.jupiter.api.Test;
import org.springframework.shell.core.CommandResult;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.net.URL;
@@ -51,6 +50,14 @@ import java.util.Map;
import java.util.Properties;
import java.util.stream.Collectors;
import static org.apache.hudi.common.table.HoodieTableConfig.ARCHIVELOG_FOLDER;
import static org.apache.hudi.common.table.HoodieTableConfig.NAME;
import static org.apache.hudi.common.table.HoodieTableConfig.TABLE_CHECKSUM;
import static org.apache.hudi.common.table.HoodieTableConfig.TIMELINE_LAYOUT_VERSION;
import static org.apache.hudi.common.table.HoodieTableConfig.TYPE;
import static org.apache.hudi.common.table.HoodieTableConfig.VERSION;
import static org.apache.hudi.common.table.HoodieTableConfig.generateChecksum;
import static org.apache.hudi.common.table.HoodieTableConfig.validateChecksum;
import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertNotNull;
import static org.junit.jupiter.api.Assertions.assertTrue;
@@ -104,7 +111,7 @@ public class TestRepairsCommand extends CLIFunctionalTestHarness {
// expected all 'No'.
String[][] rows = FSUtils.getAllPartitionFoldersThreeLevelsDown(fs, tablePath)
.stream()
.map(partition -> new String[]{partition, "No", "None"})
.map(partition -> new String[] {partition, "No", "None"})
.toArray(String[][]::new);
String expected = HoodiePrintHelper.print(new String[] {HoodieTableHeaderFields.HEADER_PARTITION_PATH,
HoodieTableHeaderFields.HEADER_METADATA_PRESENT, HoodieTableHeaderFields.HEADER_ACTION}, rows);
@@ -135,7 +142,7 @@ public class TestRepairsCommand extends CLIFunctionalTestHarness {
List<String> paths = FSUtils.getAllPartitionFoldersThreeLevelsDown(fs, tablePath);
// after dry run, the action will be 'Repaired'
String[][] rows = paths.stream()
.map(partition -> new String[]{partition, "No", "Repaired"})
.map(partition -> new String[] {partition, "No", "Repaired"})
.toArray(String[][]::new);
String expected = HoodiePrintHelper.print(new String[] {HoodieTableHeaderFields.HEADER_PARTITION_PATH,
HoodieTableHeaderFields.HEADER_METADATA_PRESENT, HoodieTableHeaderFields.HEADER_ACTION}, rows);
@@ -147,7 +154,7 @@ public class TestRepairsCommand extends CLIFunctionalTestHarness {
// after real run, Metadata is present now.
rows = paths.stream()
.map(partition -> new String[]{partition, "Yes", "None"})
.map(partition -> new String[] {partition, "Yes", "None"})
.toArray(String[][]::new);
expected = HoodiePrintHelper.print(new String[] {HoodieTableHeaderFields.HEADER_PARTITION_PATH,
HoodieTableHeaderFields.HEADER_METADATA_PRESENT, HoodieTableHeaderFields.HEADER_ACTION}, rows);
@@ -170,19 +177,24 @@ public class TestRepairsCommand extends CLIFunctionalTestHarness {
Map<String, String> oldProps = HoodieCLI.getTableMetaClient().getTableConfig().propsMap();
// after overwrite, the stored value in .hoodie is equals to which read from properties.
Map<String, String> result = HoodieTableMetaClient.reload(HoodieCLI.getTableMetaClient()).getTableConfig().propsMap();
HoodieTableConfig tableConfig = HoodieTableMetaClient.reload(HoodieCLI.getTableMetaClient()).getTableConfig();
Map<String, String> result = tableConfig.propsMap();
// validate table checksum
assertTrue(result.containsKey(TABLE_CHECKSUM.key()));
assertTrue(validateChecksum(tableConfig.getProps()));
Properties expectProps = new Properties();
expectProps.load(new FileInputStream(new File(newProps.getPath())));
expectProps.load(new FileInputStream(newProps.getPath()));
Map<String, String> expected = expectProps.entrySet().stream()
.collect(Collectors.toMap(e -> String.valueOf(e.getKey()), e -> String.valueOf(e.getValue())));
expected.putIfAbsent(TABLE_CHECKSUM.key(), String.valueOf(generateChecksum(tableConfig.getProps())));
assertEquals(expected, result);
// check result
List<String> allPropsStr = Arrays.asList("hoodie.table.name", "hoodie.table.type", "hoodie.table.version",
"hoodie.archivelog.folder", "hoodie.timeline.layout.version");
String[][] rows = allPropsStr.stream().sorted().map(key -> new String[]{key,
oldProps.getOrDefault(key, "null"), result.getOrDefault(key, "null")})
List<String> allPropsStr = Arrays.asList(NAME.key(), TYPE.key(), VERSION.key(),
ARCHIVELOG_FOLDER.key(), TIMELINE_LAYOUT_VERSION.key(), TABLE_CHECKSUM.key());
String[][] rows = allPropsStr.stream().sorted().map(key -> new String[] {key,
oldProps.getOrDefault(key, "null"), result.getOrDefault(key, "null")})
.toArray(String[][]::new);
String expect = HoodiePrintHelper.print(new String[] {HoodieTableHeaderFields.HEADER_HOODIE_PROPERTY,
HoodieTableHeaderFields.HEADER_OLD_VALUE, HoodieTableHeaderFields.HEADER_NEW_VALUE}, rows);

View File

@@ -163,7 +163,6 @@
<dependency>
<groupId>org.awaitility</groupId>
<artifactId>awaitility</artifactId>
<version>3.1.2</version>
<scope>test</scope>
</dependency>

View File

@@ -0,0 +1,38 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.hudi.table.upgrade;
import org.apache.hudi.common.config.ConfigProperty;
import org.apache.hudi.common.engine.HoodieEngineContext;
import org.apache.hudi.config.HoodieWriteConfig;
import java.util.Collections;
import java.util.Map;
/**
* DowngradeHandler to assist in downgrading {@link org.apache.hudi.table.HoodieTable} from version 4 to 3.
*/
public class FourToThreeDowngradeHandler implements DowngradeHandler {
@Override
public Map<ConfigProperty, String> downgrade(HoodieWriteConfig config, HoodieEngineContext context, String instantTime, SupportsUpgradeDowngrade upgradeDowngradeHelper) {
return Collections.emptyMap();
}
}

View File

@@ -0,0 +1,41 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.hudi.table.upgrade;
import org.apache.hudi.common.config.ConfigProperty;
import org.apache.hudi.common.engine.HoodieEngineContext;
import org.apache.hudi.common.table.HoodieTableConfig;
import org.apache.hudi.config.HoodieWriteConfig;
import java.util.Hashtable;
import java.util.Map;
/**
* UpgradeHandler to assist in upgrading {@link org.apache.hudi.table.HoodieTable} from version 3 to 4.
*/
public class ThreeToFourUpgradeHandler implements UpgradeHandler {
@Override
public Map<ConfigProperty, String> upgrade(HoodieWriteConfig config, HoodieEngineContext context, String instantTime, SupportsUpgradeDowngrade upgradeDowngradeHelper) {
Map<ConfigProperty, String> tablePropsToAdd = new Hashtable<>();
tablePropsToAdd.put(HoodieTableConfig.TABLE_CHECKSUM, String.valueOf(HoodieTableConfig.generateChecksum(config.getProps())));
return tablePropsToAdd;
}
}

View File

@@ -143,6 +143,8 @@ public class UpgradeDowngrade {
return new OneToTwoUpgradeHandler().upgrade(config, context, instantTime, upgradeDowngradeHelper);
} else if (fromVersion == HoodieTableVersion.TWO && toVersion == HoodieTableVersion.THREE) {
return new TwoToThreeUpgradeHandler().upgrade(config, context, instantTime, upgradeDowngradeHelper);
} else if (fromVersion == HoodieTableVersion.THREE && toVersion == HoodieTableVersion.FOUR) {
return new ThreeToFourUpgradeHandler().upgrade(config, context, instantTime, upgradeDowngradeHelper);
} else {
throw new HoodieUpgradeDowngradeException(fromVersion.versionCode(), toVersion.versionCode(), true);
}
@@ -155,6 +157,8 @@ public class UpgradeDowngrade {
return new TwoToOneDowngradeHandler().downgrade(config, context, instantTime, upgradeDowngradeHelper);
} else if (fromVersion == HoodieTableVersion.THREE && toVersion == HoodieTableVersion.TWO) {
return new ThreeToTwoDowngradeHandler().downgrade(config, context, instantTime, upgradeDowngradeHelper);
} else if (fromVersion == HoodieTableVersion.FOUR && toVersion == HoodieTableVersion.THREE) {
return new FourToThreeDowngradeHandler().downgrade(config, context, instantTime, upgradeDowngradeHelper);
} else {
throw new HoodieUpgradeDowngradeException(fromVersion.versionCode(), toVersion.versionCode(), false);
}

View File

@@ -169,9 +169,9 @@
</dependency>
<!-- Other Utils -->
<dependency>
<groupId>org.awaitility</groupId>
<artifactId>awaitility</artifactId>
<scope>test</scope>
<groupId>org.awaitility</groupId>
<artifactId>awaitility</artifactId>
<scope>test</scope>
</dependency>
</dependencies>

View File

@@ -1548,7 +1548,7 @@ public class TestHoodieBackedMetadata extends TestHoodieMetadataBase {
assertTrue(currentStatus.getModificationTime() > prevStatus.getModificationTime());
initMetaClient();
assertEquals(metaClient.getTableConfig().getTableVersion().versionCode(), HoodieTableVersion.THREE.versionCode());
assertEquals(metaClient.getTableConfig().getTableVersion().versionCode(), HoodieTableVersion.FOUR.versionCode());
assertTrue(fs.exists(new Path(metadataTableBasePath)), "Metadata table should exist");
FileStatus newStatus = fs.getFileStatus(new Path(metadataTableBasePath));
assertTrue(oldStatus.getModificationTime() < newStatus.getModificationTime());
@@ -1630,7 +1630,7 @@ public class TestHoodieBackedMetadata extends TestHoodieMetadataBase {
}
initMetaClient();
assertEquals(metaClient.getTableConfig().getTableVersion().versionCode(), HoodieTableVersion.THREE.versionCode());
assertEquals(metaClient.getTableConfig().getTableVersion().versionCode(), HoodieTableVersion.FOUR.versionCode());
assertTrue(fs.exists(new Path(metadataTableBasePath)), "Metadata table should exist");
FileStatus newStatus = fs.getFileStatus(new Path(metadataTableBasePath));
assertTrue(oldStatus.getModificationTime() < newStatus.getModificationTime());

View File

@@ -232,6 +232,43 @@ public class TestUpgradeDowngrade extends HoodieClientTestBase {
assertTableProps(cfg);
}
@Test
public void testUpgradeDowngradeBetweenThreeAndCurrentVersion() throws IOException {
// init config, table and client.
Map<String, String> params = new HashMap<>();
addNewTableParamsToProps(params);
HoodieWriteConfig cfg = getConfigBuilder().withAutoCommit(false).withRollbackUsingMarkers(false).withProps(params).build();
// write inserts
SparkRDDWriteClient client = getHoodieWriteClient(cfg);
doInsert(client);
// current version should have TABLE_CHECKSUM key
assertEquals(HoodieTableVersion.current(), metaClient.getTableConfig().getTableVersion());
assertTableVersionFromPropertyFile(HoodieTableVersion.current());
assertTrue(metaClient.getTableConfig().getProps().containsKey(HoodieTableConfig.TABLE_CHECKSUM.key()));
String checksum = metaClient.getTableConfig().getProps().getString(HoodieTableConfig.TABLE_CHECKSUM.key());
// downgrade to version 3 and check TABLE_CHECKSUM is still present
new UpgradeDowngrade(metaClient, cfg, context, SparkUpgradeDowngradeHelper.getInstance()).run(HoodieTableVersion.THREE, null);
assertEquals(HoodieTableVersion.THREE.versionCode(), metaClient.getTableConfig().getTableVersion().versionCode());
assertTableVersionFromPropertyFile(HoodieTableVersion.THREE);
assertTrue(metaClient.getTableConfig().getProps().containsKey(HoodieTableConfig.TABLE_CHECKSUM.key()));
assertEquals(checksum, metaClient.getTableConfig().getProps().getString(HoodieTableConfig.TABLE_CHECKSUM.key()));
// remove TABLE_CHECKSUM and upgrade to current version
metaClient.getTableConfig().getProps().remove(HoodieTableConfig.TABLE_CHECKSUM.key());
new UpgradeDowngrade(metaClient, cfg, context, SparkUpgradeDowngradeHelper.getInstance()).run(HoodieTableVersion.current(), null);
// verify upgrade and TABLE_CHECKSUM
metaClient = HoodieTableMetaClient.builder().setConf(context.getHadoopConf().get()).setBasePath(cfg.getBasePath())
.setLayoutVersion(Option.of(new TimelineLayoutVersion(cfg.getTimelineLayoutVersion()))).build();
assertEquals(HoodieTableVersion.current().versionCode(), metaClient.getTableConfig().getTableVersion().versionCode());
assertTableVersionFromPropertyFile(HoodieTableVersion.current());
assertTrue(metaClient.getTableConfig().getProps().containsKey(HoodieTableConfig.TABLE_CHECKSUM.key()));
assertEquals(checksum, metaClient.getTableConfig().getProps().getString(HoodieTableConfig.TABLE_CHECKSUM.key()));
}
private void addNewTableParamsToProps(Map<String, String> params) {
params.put(KeyGeneratorOptions.RECORDKEY_FIELD_NAME.key(), "uuid");
params.put(KeyGeneratorOptions.PARTITIONPATH_FIELD_NAME.key(), "partition_path");

View File

@@ -20,9 +20,14 @@ package org.apache.hudi.common.config;
import java.io.Serializable;
import java.util.Arrays;
import java.util.Collections;
import java.util.Enumeration;
import java.util.HashSet;
import java.util.LinkedHashSet;
import java.util.List;
import java.util.Objects;
import java.util.Properties;
import java.util.Set;
import java.util.stream.Collectors;
/**
@@ -30,6 +35,8 @@ import java.util.stream.Collectors;
*/
public class TypedProperties extends Properties implements Serializable {
private final HashSet<Object> keys = new LinkedHashSet<>();
public TypedProperties() {
super(null);
}
@@ -42,6 +49,45 @@ public class TypedProperties extends Properties implements Serializable {
}
}
@Override
public Enumeration propertyNames() {
return Collections.enumeration(keys);
}
@Override
public synchronized Enumeration<Object> keys() {
return Collections.enumeration(keys);
}
@Override
public Set<String> stringPropertyNames() {
Set<String> set = new LinkedHashSet<>();
for (Object key : this.keys) {
set.add((String) key);
}
return set;
}
@Override
public synchronized Object put(Object key, Object value) {
keys.remove(key);
keys.add(key);
return super.put(key, value);
}
public synchronized Object putIfAbsent(Object key, Object value) {
if (!containsKey(String.valueOf(key))) {
keys.add(key);
}
return super.putIfAbsent(key, value);
}
@Override
public Object remove(Object key) {
keys.remove(key);
return super.remove(key);
}
private void checkKey(String property) {
if (!containsKey(property)) {
throw new IllegalArgumentException("Property " + property + " not found");

View File

@@ -31,6 +31,7 @@ import org.apache.hudi.common.config.ConfigClassProperty;
import org.apache.hudi.common.config.ConfigGroups;
import org.apache.hudi.common.config.ConfigProperty;
import org.apache.hudi.common.config.HoodieConfig;
import org.apache.hudi.common.config.TypedProperties;
import org.apache.hudi.common.model.HoodieFileFormat;
import org.apache.hudi.common.model.HoodieRecord;
import org.apache.hudi.common.model.HoodieTableType;
@@ -38,6 +39,7 @@ import org.apache.hudi.common.model.HoodieTimelineTimeZone;
import org.apache.hudi.common.model.OverwriteWithLatestAvroPayload;
import org.apache.hudi.common.table.timeline.HoodieInstantTimeGenerator;
import org.apache.hudi.common.table.timeline.versioning.TimelineLayoutVersion;
import org.apache.hudi.common.util.BinaryUtil;
import org.apache.hudi.common.util.FileIOUtils;
import org.apache.hudi.common.util.Option;
import org.apache.hudi.common.util.ValidationUtils;
@@ -58,6 +60,8 @@ import java.util.Set;
import java.util.function.BiConsumer;
import java.util.stream.Collectors;
import static java.nio.charset.StandardCharsets.UTF_8;
/**
* Configurations on the Hoodie Table like type of ingestion, storage formats, hive table name etc Configurations are loaded from hoodie.properties, these properties are usually set during
* initializing a path as hoodie base path and never changes during the lifetime of a hoodie table.
@@ -197,6 +201,14 @@ public class HoodieTableConfig extends HoodieConfig {
public static final String NO_OP_BOOTSTRAP_INDEX_CLASS = NoOpBootstrapIndex.class.getName();
public static final ConfigProperty<String> TABLE_CHECKSUM = ConfigProperty
.key("hoodie.table.checksum")
.noDefaultValue()
.sinceVersion("0.11.0")
.withDocumentation("Table checksum is used to guard against partial writes in HDFS. It is added as the last entry in hoodie.properties and then used to validate while reading table config.");
private static final String TABLE_CHECKSUM_FORMAT = "%s.%s"; // <database_name>.<table_name>
public HoodieTableConfig(FileSystem fs, String metaPath, String payloadClassName) {
super();
Path propertyPath = new Path(metaPath, HOODIE_PROPERTIES_FILE);
@@ -208,6 +220,9 @@ public class HoodieTableConfig extends HoodieConfig {
setValue(PAYLOAD_CLASS_NAME, payloadClassName);
// FIXME(vc): wonder if this can be removed. Need to look into history.
try (FSDataOutputStream outputStream = fs.create(propertyPath)) {
if (!isValidChecksum()) {
setValue(TABLE_CHECKSUM, String.valueOf(generateChecksum(props)));
}
props.store(outputStream, "Properties saved on " + new Date(System.currentTimeMillis()));
}
}
@@ -218,6 +233,10 @@ public class HoodieTableConfig extends HoodieConfig {
"hoodie.properties file seems invalid. Please check for left over `.updated` files if any, manually copy it to hoodie.properties and retry");
}
private boolean isValidChecksum() {
return contains(TABLE_CHECKSUM) && validateChecksum(props);
}
/**
* For serializing and de-serializing.
*/
@@ -227,13 +246,20 @@ public class HoodieTableConfig extends HoodieConfig {
private void fetchConfigs(FileSystem fs, String metaPath) throws IOException {
Path cfgPath = new Path(metaPath, HOODIE_PROPERTIES_FILE);
Path backupCfgPath = new Path(metaPath, HOODIE_PROPERTIES_FILE_BACKUP);
try (FSDataInputStream is = fs.open(cfgPath)) {
props.load(is);
// validate checksum for latest table version
if (getTableVersion().versionCode() >= HoodieTableVersion.FOUR.versionCode() && !isValidChecksum()) {
LOG.warn("Checksum validation failed. Falling back to backed up configs.");
try (FSDataInputStream fsDataInputStream = fs.open(backupCfgPath)) {
props.load(fsDataInputStream);
}
}
} catch (IOException ioe) {
if (!fs.exists(cfgPath)) {
LOG.warn("Run `table recover-configs` if config update/delete failed midway. Falling back to backed up configs.");
// try the backup. this way no query ever fails if update fails midway.
Path backupCfgPath = new Path(metaPath, HOODIE_PROPERTIES_FILE_BACKUP);
try (FSDataInputStream is = fs.open(backupCfgPath)) {
props.load(is);
}
@@ -284,15 +310,31 @@ public class HoodieTableConfig extends HoodieConfig {
/// 2. delete the properties file, reads will go to the backup, until we are done.
fs.delete(cfgPath, false);
// 3. read current props, upsert and save back.
String checksum;
try (FSDataInputStream in = fs.open(backupCfgPath);
FSDataOutputStream out = fs.create(cfgPath, true)) {
Properties props = new Properties();
Properties props = new TypedProperties();
props.load(in);
modifyFn.accept(props, modifyProps);
if (props.containsKey(TABLE_CHECKSUM.key()) && validateChecksum(props)) {
checksum = props.getProperty(TABLE_CHECKSUM.key());
} else {
checksum = String.valueOf(generateChecksum(props));
props.setProperty(TABLE_CHECKSUM.key(), checksum);
}
props.store(out, "Updated at " + System.currentTimeMillis());
}
// 4. verify and remove backup.
// FIXME(vc): generate a hash for verification.
try (FSDataInputStream in = fs.open(cfgPath)) {
Properties props = new TypedProperties();
props.load(in);
if (!props.containsKey(TABLE_CHECKSUM.key()) || !props.getProperty(TABLE_CHECKSUM.key()).equals(checksum)) {
// delete the properties file and throw exception indicating update failure
// subsequent writes will recover and update, reads will go to the backup until then
fs.delete(cfgPath, false);
throw new HoodieIOException("Checksum property missing or does not match.");
}
}
fs.delete(backupCfgPath, false);
} catch (IOException e) {
throw new HoodieIOException("Error updating table configs.", e);
@@ -343,10 +385,28 @@ public class HoodieTableConfig extends HoodieConfig {
if (hoodieConfig.contains(TIMELINE_TIMEZONE)) {
HoodieInstantTimeGenerator.setCommitTimeZone(HoodieTimelineTimeZone.valueOf(hoodieConfig.getString(TIMELINE_TIMEZONE)));
}
if (hoodieConfig.contains(TABLE_CHECKSUM)) {
hoodieConfig.setValue(TABLE_CHECKSUM, hoodieConfig.getString(TABLE_CHECKSUM));
} else {
hoodieConfig.setValue(TABLE_CHECKSUM, String.valueOf(generateChecksum(hoodieConfig.getProps())));
}
hoodieConfig.getProps().store(outputStream, "Properties saved on " + new Date(System.currentTimeMillis()));
}
}
public static long generateChecksum(Properties props) {
if (!props.containsKey(NAME.key())) {
throw new IllegalArgumentException(NAME.key() + " property needs to be specified");
}
String table = props.getProperty(NAME.key());
String database = props.getProperty(DATABASE_NAME.key(), "");
return BinaryUtil.generateChecksum(String.format(TABLE_CHECKSUM_FORMAT, database, table).getBytes(UTF_8));
}
public static boolean validateChecksum(Properties props) {
return Long.parseLong(props.getProperty(TABLE_CHECKSUM.key())) == generateChecksum(props);
}
/**
* Read the table type from the table properties and if not found, return the default.
*/
@@ -505,6 +565,13 @@ public class HoodieTableConfig extends HoodieConfig {
return getString(URL_ENCODE_PARTITIONING);
}
/**
* Read the table checksum.
*/
private Long getTableChecksum() {
return getLong(TABLE_CHECKSUM);
}
public Map<String, String> propsMap() {
return props.entrySet().stream()
.collect(Collectors.toMap(e -> String.valueOf(e.getKey()), e -> String.valueOf(e.getValue())));

View File

@@ -34,7 +34,9 @@ public enum HoodieTableVersion {
// 0.9.0 onwards
TWO(2),
// 0.10.0 onwards
THREE(3);
THREE(3),
// 0.11.0 onwards
FOUR(4);
private final int versionCode;
@@ -47,7 +49,7 @@ public enum HoodieTableVersion {
}
public static HoodieTableVersion current() {
return THREE;
return FOUR;
}
public static HoodieTableVersion versionFromCode(int versionCode) {

View File

@@ -19,6 +19,7 @@
package org.apache.hudi.common.util;
import java.nio.charset.Charset;
import java.util.zip.CRC32;
public class BinaryUtil {
@@ -187,5 +188,14 @@ public class BinaryUtil {
}
return temp;
}
/**
* Generate a checksum for a given set of bytes.
*/
public static long generateChecksum(byte[] data) {
CRC32 crc = new CRC32();
crc.update(data);
return crc.getValue();
}
}

View File

@@ -33,9 +33,9 @@ import org.apache.avro.generic.GenericRecord;
import java.io.IOException;
import java.io.RandomAccessFile;
import java.util.zip.CRC32;
import static org.apache.hudi.avro.HoodieAvroUtils.getNullableValAsString;
import static org.apache.hudi.common.util.BinaryUtil.generateChecksum;
/**
* A utility class supports spillable map.
@@ -95,15 +95,6 @@ public class SpillableMapUtils {
return outputStream.getSize();
}
/**
* Generate a checksum for a given set of bytes.
*/
public static long generateChecksum(byte[] data) {
CRC32 crc = new CRC32();
crc.update(data);
return crc.getValue();
}
/**
* Compute a bytes representation of the payload by serializing the contents This is used to estimate the size of the
* payload (either in memory or when written to disk).

View File

@@ -56,6 +56,8 @@ import java.util.zip.Deflater;
import java.util.zip.DeflaterOutputStream;
import java.util.zip.InflaterInputStream;
import static org.apache.hudi.common.util.BinaryUtil.generateChecksum;
/**
* This class provides a disk spillable only map implementation. All of the data is currenly written to one file,
* without any rollover support. It uses the following : 1) An in-memory map that tracks the key-> latest ValueMetadata.
@@ -223,7 +225,7 @@ public final class BitCaskDiskMap<T extends Serializable, R extends Serializable
new BitCaskDiskMap.ValueMetadata(this.filePath, valueSize, filePosition.get(), timestamp));
byte[] serializedKey = SerializationUtils.serialize(key);
filePosition
.set(SpillableMapUtils.spillToDisk(writeOnlyFileHandle, new FileEntry(SpillableMapUtils.generateChecksum(val),
.set(SpillableMapUtils.spillToDisk(writeOnlyFileHandle, new FileEntry(generateChecksum(val),
serializedKey.length, valueSize, serializedKey, val, timestamp)));
if (flush) {
flushToDisk();

View File

@@ -19,8 +19,10 @@
package org.apache.hudi.common.properties;
import org.apache.hudi.common.config.TypedProperties;
import org.junit.jupiter.api.Test;
import java.io.IOException;
import java.util.Properties;
import static org.junit.jupiter.api.Assertions.assertEquals;
@@ -81,4 +83,25 @@ public class TestTypedProperties {
assertEquals(true, typedProperties.getBoolean("key1", false));
assertEquals(false, typedProperties.getBoolean("key2", false));
}
@Test
public void testPropertiesOrder() throws IOException {
Properties properties = new TypedProperties();
properties.put("key0", "true");
properties.put("key1", "false");
properties.put("key2", "true");
properties.put("key3", "false");
properties.put("key4", "true");
properties.put("key5", "true");
properties.put("key6", "false");
properties.put("key7", "true");
properties.put("key8", "false");
properties.put("key9", "true");
TypedProperties typedProperties = new TypedProperties(properties);
String[] props = typedProperties.stringPropertyNames().toArray(new String[0]);
for (int i = 0; i < props.length; i++) {
assertEquals(String.format("key%d", i), props[i]);
}
}
}

View File

@@ -64,7 +64,7 @@ public class TestHoodieTableConfig extends HoodieCommonTestHarness {
public void testCreate() throws IOException {
assertTrue(fs.exists(new Path(metaPath, HoodieTableConfig.HOODIE_PROPERTIES_FILE)));
HoodieTableConfig config = new HoodieTableConfig(fs, metaPath.toString(), null);
assertEquals(4, config.getProps().size());
assertEquals(5, config.getProps().size());
}
@Test
@@ -77,7 +77,7 @@ public class TestHoodieTableConfig extends HoodieCommonTestHarness {
assertTrue(fs.exists(cfgPath));
assertFalse(fs.exists(backupCfgPath));
HoodieTableConfig config = new HoodieTableConfig(fs, metaPath.toString(), null);
assertEquals(5, config.getProps().size());
assertEquals(6, config.getProps().size());
assertEquals("test-table2", config.getTableName());
assertEquals("new_field", config.getPreCombineField());
}
@@ -90,7 +90,7 @@ public class TestHoodieTableConfig extends HoodieCommonTestHarness {
assertTrue(fs.exists(cfgPath));
assertFalse(fs.exists(backupCfgPath));
HoodieTableConfig config = new HoodieTableConfig(fs, metaPath.toString(), null);
assertEquals(3, config.getProps().size());
assertEquals(4, config.getProps().size());
assertNull(config.getProps().getProperty("hoodie.invalid.config"));
assertFalse(config.getProps().contains(HoodieTableConfig.ARCHIVELOG_FOLDER.key()));
}
@@ -114,7 +114,7 @@ public class TestHoodieTableConfig extends HoodieCommonTestHarness {
assertFalse(fs.exists(cfgPath));
assertTrue(fs.exists(backupCfgPath));
config = new HoodieTableConfig(fs, metaPath.toString(), null);
assertEquals(4, config.getProps().size());
assertEquals(5, config.getProps().size());
}
@ParameterizedTest
@@ -132,6 +132,6 @@ public class TestHoodieTableConfig extends HoodieCommonTestHarness {
assertTrue(fs.exists(cfgPath));
assertFalse(fs.exists(backupCfgPath));
config = new HoodieTableConfig(fs, metaPath.toString(), null);
assertEquals(4, config.getProps().size());
assertEquals(5, config.getProps().size());
}
}

View File

@@ -54,6 +54,8 @@ public class TestHoodieTableMetaClient extends HoodieCommonTestHarness {
assertEquals(basePath, metaClient.getBasePath(), "Basepath should be the one assigned");
assertEquals(basePath + "/.hoodie", metaClient.getMetaPath(),
"Metapath should be ${basepath}/.hoodie");
assertTrue(metaClient.getTableConfig().getProps().containsKey(HoodieTableConfig.TABLE_CHECKSUM.key()));
assertTrue(HoodieTableConfig.validateChecksum(metaClient.getTableConfig().getProps()));
}
@Test

View File

@@ -363,7 +363,6 @@
<dependency>
<groupId>org.awaitility</groupId>
<artifactId>awaitility</artifactId>
<version>3.1.2</version>
<scope>test</scope>
</dependency>

View File

@@ -1119,6 +1119,12 @@
<artifactId>awaitility</artifactId>
<version>${awaitility.version}</version>
<scope>test</scope>
<exclusions>
<exclusion>
<groupId>org.objenesis</groupId>
<artifactId>objenesis</artifactId>
</exclusion>
</exclusions>
</dependency>
<dependency>