Spark Stage retry handling
This commit is contained in:
committed by
vinoth chandar
parent
3fd2fd6e9d
commit
145034c5fa
@@ -29,6 +29,7 @@ import com.uber.hoodie.common.model.HoodieFileGroupId;
|
|||||||
import com.uber.hoodie.common.model.HoodieLogFile;
|
import com.uber.hoodie.common.model.HoodieLogFile;
|
||||||
import com.uber.hoodie.common.table.HoodieTableMetaClient;
|
import com.uber.hoodie.common.table.HoodieTableMetaClient;
|
||||||
import com.uber.hoodie.common.table.HoodieTimeline;
|
import com.uber.hoodie.common.table.HoodieTimeline;
|
||||||
|
import com.uber.hoodie.common.table.log.HoodieLogFormat;
|
||||||
import com.uber.hoodie.common.table.timeline.HoodieInstant;
|
import com.uber.hoodie.common.table.timeline.HoodieInstant;
|
||||||
import com.uber.hoodie.common.table.timeline.HoodieInstant.State;
|
import com.uber.hoodie.common.table.timeline.HoodieInstant.State;
|
||||||
import com.uber.hoodie.common.table.view.HoodieTableFileSystemView;
|
import com.uber.hoodie.common.table.view.HoodieTableFileSystemView;
|
||||||
@@ -245,7 +246,7 @@ public class CompactionAdminClient extends AbstractHoodieClient {
|
|||||||
"Expect new log version to be sane");
|
"Expect new log version to be sane");
|
||||||
HoodieLogFile newLogFile = new HoodieLogFile(new Path(lf.getPath().getParent(),
|
HoodieLogFile newLogFile = new HoodieLogFile(new Path(lf.getPath().getParent(),
|
||||||
FSUtils.makeLogFileName(lf.getFileId(), "." + FSUtils.getFileExtensionFromLog(lf.getPath()),
|
FSUtils.makeLogFileName(lf.getFileId(), "." + FSUtils.getFileExtensionFromLog(lf.getPath()),
|
||||||
compactionInstant, lf.getLogVersion() - maxVersion)));
|
compactionInstant, lf.getLogVersion() - maxVersion, HoodieLogFormat.UNKNOWN_WRITE_TOKEN)));
|
||||||
return Pair.of(lf, newLogFile);
|
return Pair.of(lf, newLogFile);
|
||||||
}).collect(Collectors.toList());
|
}).collect(Collectors.toList());
|
||||||
}
|
}
|
||||||
@@ -436,7 +437,7 @@ public class CompactionAdminClient extends AbstractHoodieClient {
|
|||||||
.filter(fs -> fs.getFileId().equals(operation.getFileId())).findFirst().get();
|
.filter(fs -> fs.getFileId().equals(operation.getFileId())).findFirst().get();
|
||||||
List<HoodieLogFile> logFilesToRepair =
|
List<HoodieLogFile> logFilesToRepair =
|
||||||
merged.getLogFiles().filter(lf -> lf.getBaseCommitTime().equals(compactionInstant))
|
merged.getLogFiles().filter(lf -> lf.getBaseCommitTime().equals(compactionInstant))
|
||||||
.sorted(HoodieLogFile.getBaseInstantAndLogVersionComparator().reversed())
|
.sorted(HoodieLogFile.getLogFileComparator())
|
||||||
.collect(Collectors.toList());
|
.collect(Collectors.toList());
|
||||||
FileSlice fileSliceForCompaction =
|
FileSlice fileSliceForCompaction =
|
||||||
fileSystemView.getLatestFileSlicesBeforeOrOn(operation.getPartitionPath(), operation.getBaseInstantTime())
|
fileSystemView.getLatestFileSlicesBeforeOrOn(operation.getPartitionPath(), operation.getBaseInstantTime())
|
||||||
@@ -451,7 +452,7 @@ public class CompactionAdminClient extends AbstractHoodieClient {
|
|||||||
for (HoodieLogFile toRepair : logFilesToRepair) {
|
for (HoodieLogFile toRepair : logFilesToRepair) {
|
||||||
int version = maxUsedVersion + 1;
|
int version = maxUsedVersion + 1;
|
||||||
HoodieLogFile newLf = new HoodieLogFile(new Path(parentPath, FSUtils.makeLogFileName(operation.getFileId(),
|
HoodieLogFile newLf = new HoodieLogFile(new Path(parentPath, FSUtils.makeLogFileName(operation.getFileId(),
|
||||||
logExtn, operation.getBaseInstantTime(), version)));
|
logExtn, operation.getBaseInstantTime(), version, HoodieLogFormat.UNKNOWN_WRITE_TOKEN)));
|
||||||
result.add(Pair.of(toRepair, newLf));
|
result.add(Pair.of(toRepair, newLf));
|
||||||
maxUsedVersion = version;
|
maxUsedVersion = version;
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -72,6 +72,7 @@ import java.util.List;
|
|||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
import java.util.Optional;
|
import java.util.Optional;
|
||||||
import java.util.stream.Collectors;
|
import java.util.stream.Collectors;
|
||||||
|
import java.util.stream.IntStream;
|
||||||
import org.apache.log4j.LogManager;
|
import org.apache.log4j.LogManager;
|
||||||
import org.apache.log4j.Logger;
|
import org.apache.log4j.Logger;
|
||||||
import org.apache.spark.Partitioner;
|
import org.apache.spark.Partitioner;
|
||||||
@@ -333,9 +334,10 @@ public class HoodieWriteClient<T extends HoodieRecordPayload> extends AbstractHo
|
|||||||
String commitTime, HoodieTable<T> table,
|
String commitTime, HoodieTable<T> table,
|
||||||
Option<UserDefinedBulkInsertPartitioner> bulkInsertPartitioner) {
|
Option<UserDefinedBulkInsertPartitioner> bulkInsertPartitioner) {
|
||||||
final JavaRDD<HoodieRecord<T>> repartitionedRecords;
|
final JavaRDD<HoodieRecord<T>> repartitionedRecords;
|
||||||
|
final int parallelism = config.getBulkInsertShuffleParallelism();
|
||||||
if (bulkInsertPartitioner.isDefined()) {
|
if (bulkInsertPartitioner.isDefined()) {
|
||||||
repartitionedRecords = bulkInsertPartitioner.get()
|
repartitionedRecords = bulkInsertPartitioner.get()
|
||||||
.repartitionRecords(dedupedRecords, config.getBulkInsertShuffleParallelism());
|
.repartitionRecords(dedupedRecords, parallelism);
|
||||||
} else {
|
} else {
|
||||||
// Now, sort the records and line them up nicely for loading.
|
// Now, sort the records and line them up nicely for loading.
|
||||||
repartitionedRecords = dedupedRecords.sortBy(record -> {
|
repartitionedRecords = dedupedRecords.sortBy(record -> {
|
||||||
@@ -343,10 +345,16 @@ public class HoodieWriteClient<T extends HoodieRecordPayload> extends AbstractHo
|
|||||||
// the records split evenly across RDD partitions, such that small partitions fit
|
// the records split evenly across RDD partitions, such that small partitions fit
|
||||||
// into 1 RDD partition, while big ones spread evenly across multiple RDD partitions
|
// into 1 RDD partition, while big ones spread evenly across multiple RDD partitions
|
||||||
return String.format("%s+%s", record.getPartitionPath(), record.getRecordKey());
|
return String.format("%s+%s", record.getPartitionPath(), record.getRecordKey());
|
||||||
}, true, config.getBulkInsertShuffleParallelism());
|
}, true, parallelism);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
//generate new file ID prefixes for each output partition
|
||||||
|
final List<String> fileIDPrefixes = IntStream.range(0, parallelism)
|
||||||
|
.mapToObj(i -> FSUtils.createNewFileIdPfx())
|
||||||
|
.collect(Collectors.toList());
|
||||||
|
|
||||||
JavaRDD<WriteStatus> writeStatusRDD = repartitionedRecords
|
JavaRDD<WriteStatus> writeStatusRDD = repartitionedRecords
|
||||||
.mapPartitionsWithIndex(new BulkInsertMapFunction<T>(commitTime, config, table), true)
|
.mapPartitionsWithIndex(new BulkInsertMapFunction<T>(commitTime, config, table, fileIDPrefixes), true)
|
||||||
.flatMap(writeStatuses -> writeStatuses.iterator());
|
.flatMap(writeStatuses -> writeStatuses.iterator());
|
||||||
|
|
||||||
return updateIndexAndCommitIfNeeded(writeStatusRDD, table, commitTime);
|
return updateIndexAndCommitIfNeeded(writeStatusRDD, table, commitTime);
|
||||||
@@ -498,20 +506,7 @@ public class HoodieWriteClient<T extends HoodieRecordPayload> extends AbstractHo
|
|||||||
updateMetadataAndRollingStats(actionType, metadata, stats);
|
updateMetadataAndRollingStats(actionType, metadata, stats);
|
||||||
|
|
||||||
// Finalize write
|
// Finalize write
|
||||||
final Timer.Context finalizeCtx = metrics.getFinalizeCtx();
|
finalizeWrite(table, commitTime, stats);
|
||||||
try {
|
|
||||||
table.finalizeWrite(jsc, stats);
|
|
||||||
if (finalizeCtx != null) {
|
|
||||||
Optional<Long> durationInMs = Optional.of(metrics.getDurationInMs(finalizeCtx.stop()));
|
|
||||||
durationInMs.ifPresent(duration -> {
|
|
||||||
logger.info("Finalize write elapsed time (milliseconds): " + duration);
|
|
||||||
metrics.updateFinalizeWriteMetrics(duration, stats.size());
|
|
||||||
});
|
|
||||||
}
|
|
||||||
} catch (HoodieIOException ioe) {
|
|
||||||
throw new HoodieCommitException(
|
|
||||||
"Failed to complete commit " + commitTime + " due to finalize errors.", ioe);
|
|
||||||
}
|
|
||||||
|
|
||||||
// add in extra metadata
|
// add in extra metadata
|
||||||
if (extraMetadata.isPresent()) {
|
if (extraMetadata.isPresent()) {
|
||||||
@@ -1270,7 +1265,7 @@ public class HoodieWriteClient<T extends HoodieRecordPayload> extends AbstractHo
|
|||||||
String compactionCommitTime, boolean autoCommit, Optional<Map<String, String>> extraMetadata) {
|
String compactionCommitTime, boolean autoCommit, Optional<Map<String, String>> extraMetadata) {
|
||||||
if (autoCommit) {
|
if (autoCommit) {
|
||||||
HoodieCommitMetadata metadata =
|
HoodieCommitMetadata metadata =
|
||||||
doCompactionCommit(compactedStatuses, table.getMetaClient(), compactionCommitTime, extraMetadata);
|
doCompactionCommit(table, compactedStatuses, compactionCommitTime, extraMetadata);
|
||||||
if (compactionTimer != null) {
|
if (compactionTimer != null) {
|
||||||
long durationInMs = metrics.getDurationInMs(compactionTimer.stop());
|
long durationInMs = metrics.getDurationInMs(compactionTimer.stop());
|
||||||
try {
|
try {
|
||||||
@@ -1288,6 +1283,23 @@ public class HoodieWriteClient<T extends HoodieRecordPayload> extends AbstractHo
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private void finalizeWrite(HoodieTable<T> table, String instantTime, List<HoodieWriteStat> stats) {
|
||||||
|
try {
|
||||||
|
final Timer.Context finalizeCtx = metrics.getFinalizeCtx();
|
||||||
|
table.finalizeWrite(jsc, instantTime, stats);
|
||||||
|
if (finalizeCtx != null) {
|
||||||
|
Optional<Long> durationInMs = Optional.of(metrics.getDurationInMs(finalizeCtx.stop()));
|
||||||
|
durationInMs.ifPresent(duration -> {
|
||||||
|
logger.info("Finalize write elapsed time (milliseconds): " + duration);
|
||||||
|
metrics.updateFinalizeWriteMetrics(duration, stats.size());
|
||||||
|
});
|
||||||
|
}
|
||||||
|
} catch (HoodieIOException ioe) {
|
||||||
|
throw new HoodieCommitException(
|
||||||
|
"Failed to complete commit " + instantTime + " due to finalize errors.", ioe);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Rollback failed compactions. Inflight rollbacks for compactions revert the .inflight file to the .requested file
|
* Rollback failed compactions. Inflight rollbacks for compactions revert the .inflight file to the .requested file
|
||||||
*
|
*
|
||||||
@@ -1301,8 +1313,9 @@ public class HoodieWriteClient<T extends HoodieRecordPayload> extends AbstractHo
|
|||||||
table.getActiveTimeline().revertCompactionInflightToRequested(inflightInstant);
|
table.getActiveTimeline().revertCompactionInflightToRequested(inflightInstant);
|
||||||
}
|
}
|
||||||
|
|
||||||
private HoodieCommitMetadata doCompactionCommit(JavaRDD<WriteStatus> writeStatuses,
|
private HoodieCommitMetadata doCompactionCommit(HoodieTable<T> table, JavaRDD<WriteStatus> writeStatuses,
|
||||||
HoodieTableMetaClient metaClient, String compactionCommitTime, Optional<Map<String, String>> extraMetadata) {
|
String compactionCommitTime, Optional<Map<String, String>> extraMetadata) {
|
||||||
|
HoodieTableMetaClient metaClient = table.getMetaClient();
|
||||||
List<HoodieWriteStat> updateStatusMap = writeStatuses.map(WriteStatus::getStat)
|
List<HoodieWriteStat> updateStatusMap = writeStatuses.map(WriteStatus::getStat)
|
||||||
.collect();
|
.collect();
|
||||||
|
|
||||||
@@ -1311,6 +1324,10 @@ public class HoodieWriteClient<T extends HoodieRecordPayload> extends AbstractHo
|
|||||||
metadata.addWriteStat(stat.getPartitionPath(), stat);
|
metadata.addWriteStat(stat.getPartitionPath(), stat);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Finalize write
|
||||||
|
List<HoodieWriteStat> stats = writeStatuses.map(WriteStatus::getStat).collect();
|
||||||
|
finalizeWrite(table, compactionCommitTime, stats);
|
||||||
|
|
||||||
// Copy extraMetadata
|
// Copy extraMetadata
|
||||||
extraMetadata.ifPresent(m -> {
|
extraMetadata.ifPresent(m -> {
|
||||||
m.entrySet().stream().forEach(e -> {
|
m.entrySet().stream().forEach(e -> {
|
||||||
|
|||||||
@@ -62,19 +62,26 @@ public class HoodieWriteConfig extends DefaultHoodieConfig {
|
|||||||
private static final String DEFAULT_ASSUME_DATE_PARTITIONING = "false";
|
private static final String DEFAULT_ASSUME_DATE_PARTITIONING = "false";
|
||||||
private static final String HOODIE_WRITE_STATUS_CLASS_PROP = "hoodie.writestatus.class";
|
private static final String HOODIE_WRITE_STATUS_CLASS_PROP = "hoodie.writestatus.class";
|
||||||
private static final String DEFAULT_HOODIE_WRITE_STATUS_CLASS = WriteStatus.class.getName();
|
private static final String DEFAULT_HOODIE_WRITE_STATUS_CLASS = WriteStatus.class.getName();
|
||||||
private static final String HOODIE_COPYONWRITE_USE_TEMP_FOLDER_CREATE =
|
|
||||||
"hoodie.copyonwrite.use" + ".temp.folder.for.create";
|
|
||||||
private static final String DEFAULT_HOODIE_COPYONWRITE_USE_TEMP_FOLDER_CREATE = "false";
|
|
||||||
private static final String HOODIE_COPYONWRITE_USE_TEMP_FOLDER_MERGE =
|
|
||||||
"hoodie.copyonwrite.use" + ".temp.folder.for.merge";
|
|
||||||
private static final String DEFAULT_HOODIE_COPYONWRITE_USE_TEMP_FOLDER_MERGE = "false";
|
|
||||||
private static final String FINALIZE_WRITE_PARALLELISM = "hoodie.finalize.write.parallelism";
|
private static final String FINALIZE_WRITE_PARALLELISM = "hoodie.finalize.write.parallelism";
|
||||||
private static final String DEFAULT_FINALIZE_WRITE_PARALLELISM = DEFAULT_PARALLELISM;
|
private static final String DEFAULT_FINALIZE_WRITE_PARALLELISM = DEFAULT_PARALLELISM;
|
||||||
private static final String CONSISTENCY_CHECK_ENABLED = "hoodie.consistency.check.enabled";
|
private static final String CONSISTENCY_CHECK_ENABLED_PROP = "hoodie.consistency.check.enabled";
|
||||||
private static final String DEFAULT_CONSISTENCY_CHECK_ENABLED = "false";
|
private static final String DEFAULT_CONSISTENCY_CHECK_ENABLED = "false";
|
||||||
private static final String EMBEDDED_TIMELINE_SERVER_ENABLED = "hoodie.embed.timeline.server";
|
private static final String EMBEDDED_TIMELINE_SERVER_ENABLED = "hoodie.embed.timeline.server";
|
||||||
private static final String DEFAULT_EMBEDDED_TIMELINE_SERVER_ENABLED = "false";
|
private static final String DEFAULT_EMBEDDED_TIMELINE_SERVER_ENABLED = "false";
|
||||||
|
|
||||||
|
// time between successive attempts to ensure written data's metadata is consistent on storage
|
||||||
|
private static final String INITIAL_CONSISTENCY_CHECK_INTERVAL_MS_PROP =
|
||||||
|
"hoodie.consistency.check.initial_interval_ms";
|
||||||
|
private static long DEFAULT_INITIAL_CONSISTENCY_CHECK_INTERVAL_MS = 2000L;
|
||||||
|
|
||||||
|
// max interval time
|
||||||
|
private static final String MAX_CONSISTENCY_CHECK_INTERVAL_MS_PROP = "hoodie.consistency.check.max_interval_ms";
|
||||||
|
private static long DEFAULT_MAX_CONSISTENCY_CHECK_INTERVAL_MS = 300000L;
|
||||||
|
|
||||||
|
// maximum number of checks, for consistency of written data. Will wait upto 256 Secs
|
||||||
|
private static final String MAX_CONSISTENCY_CHECKS_PROP = "hoodie.consistency.check.max_checks";
|
||||||
|
private static int DEFAULT_MAX_CONSISTENCY_CHECKS = 7;
|
||||||
|
|
||||||
// Hoodie Write Client transparently rewrites File System View config when embedded mode is enabled
|
// Hoodie Write Client transparently rewrites File System View config when embedded mode is enabled
|
||||||
// We keep track of original config and rewritten config
|
// We keep track of original config and rewritten config
|
||||||
private final FileSystemViewStorageConfig clientSpecifiedViewStorageConfig;
|
private final FileSystemViewStorageConfig clientSpecifiedViewStorageConfig;
|
||||||
@@ -148,31 +155,30 @@ public class HoodieWriteConfig extends DefaultHoodieConfig {
|
|||||||
return props.getProperty(HOODIE_WRITE_STATUS_CLASS_PROP);
|
return props.getProperty(HOODIE_WRITE_STATUS_CLASS_PROP);
|
||||||
}
|
}
|
||||||
|
|
||||||
public boolean shouldUseTempFolderForCopyOnWriteForCreate() {
|
|
||||||
return Boolean.parseBoolean(props.getProperty(HOODIE_COPYONWRITE_USE_TEMP_FOLDER_CREATE));
|
|
||||||
}
|
|
||||||
|
|
||||||
public boolean shouldUseTempFolderForCopyOnWriteForMerge() {
|
|
||||||
return Boolean.parseBoolean(props.getProperty(HOODIE_COPYONWRITE_USE_TEMP_FOLDER_MERGE));
|
|
||||||
}
|
|
||||||
|
|
||||||
public boolean shouldUseTempFolderForCopyOnWrite() {
|
|
||||||
return shouldUseTempFolderForCopyOnWriteForCreate()
|
|
||||||
|| shouldUseTempFolderForCopyOnWriteForMerge();
|
|
||||||
}
|
|
||||||
|
|
||||||
public int getFinalizeWriteParallelism() {
|
public int getFinalizeWriteParallelism() {
|
||||||
return Integer.parseInt(props.getProperty(FINALIZE_WRITE_PARALLELISM));
|
return Integer.parseInt(props.getProperty(FINALIZE_WRITE_PARALLELISM));
|
||||||
}
|
}
|
||||||
|
|
||||||
public boolean isConsistencyCheckEnabled() {
|
public boolean isConsistencyCheckEnabled() {
|
||||||
return Boolean.parseBoolean(props.getProperty(CONSISTENCY_CHECK_ENABLED));
|
return Boolean.parseBoolean(props.getProperty(CONSISTENCY_CHECK_ENABLED_PROP));
|
||||||
}
|
}
|
||||||
|
|
||||||
public boolean isEmbeddedTimelineServerEnabled() {
|
public boolean isEmbeddedTimelineServerEnabled() {
|
||||||
return Boolean.parseBoolean(props.getProperty(EMBEDDED_TIMELINE_SERVER_ENABLED));
|
return Boolean.parseBoolean(props.getProperty(EMBEDDED_TIMELINE_SERVER_ENABLED));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public int getMaxConsistencyChecks() {
|
||||||
|
return Integer.parseInt(props.getProperty(MAX_CONSISTENCY_CHECKS_PROP));
|
||||||
|
}
|
||||||
|
|
||||||
|
public int getInitialConsistencyCheckIntervalMs() {
|
||||||
|
return Integer.parseInt(props.getProperty(INITIAL_CONSISTENCY_CHECK_INTERVAL_MS_PROP));
|
||||||
|
}
|
||||||
|
|
||||||
|
public int getMaxConsistencyCheckIntervalMs() {
|
||||||
|
return Integer.parseInt(props.getProperty(MAX_CONSISTENCY_CHECK_INTERVAL_MS_PROP));
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* compaction properties
|
* compaction properties
|
||||||
**/
|
**/
|
||||||
@@ -588,20 +594,6 @@ public class HoodieWriteConfig extends DefaultHoodieConfig {
|
|||||||
return this;
|
return this;
|
||||||
}
|
}
|
||||||
|
|
||||||
public Builder withUseTempFolderCopyOnWriteForCreate(
|
|
||||||
boolean shouldUseTempFolderCopyOnWriteForCreate) {
|
|
||||||
props.setProperty(HOODIE_COPYONWRITE_USE_TEMP_FOLDER_CREATE,
|
|
||||||
String.valueOf(shouldUseTempFolderCopyOnWriteForCreate));
|
|
||||||
return this;
|
|
||||||
}
|
|
||||||
|
|
||||||
public Builder withUseTempFolderCopyOnWriteForMerge(
|
|
||||||
boolean shouldUseTempFolderCopyOnWriteForMerge) {
|
|
||||||
props.setProperty(HOODIE_COPYONWRITE_USE_TEMP_FOLDER_MERGE,
|
|
||||||
String.valueOf(shouldUseTempFolderCopyOnWriteForMerge));
|
|
||||||
return this;
|
|
||||||
}
|
|
||||||
|
|
||||||
public Builder withFileSystemViewConfig(FileSystemViewStorageConfig viewStorageConfig) {
|
public Builder withFileSystemViewConfig(FileSystemViewStorageConfig viewStorageConfig) {
|
||||||
props.putAll(viewStorageConfig.getProps());
|
props.putAll(viewStorageConfig.getProps());
|
||||||
isViewConfigSet = true;
|
isViewConfigSet = true;
|
||||||
@@ -614,7 +606,7 @@ public class HoodieWriteConfig extends DefaultHoodieConfig {
|
|||||||
}
|
}
|
||||||
|
|
||||||
public Builder withConsistencyCheckEnabled(boolean enabled) {
|
public Builder withConsistencyCheckEnabled(boolean enabled) {
|
||||||
props.setProperty(CONSISTENCY_CHECK_ENABLED, String.valueOf(enabled));
|
props.setProperty(CONSISTENCY_CHECK_ENABLED_PROP, String.valueOf(enabled));
|
||||||
return this;
|
return this;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -623,6 +615,21 @@ public class HoodieWriteConfig extends DefaultHoodieConfig {
|
|||||||
return this;
|
return this;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public Builder withInitialConsistencyCheckIntervalMs(int initialIntevalMs) {
|
||||||
|
props.setProperty(INITIAL_CONSISTENCY_CHECK_INTERVAL_MS_PROP, String.valueOf(initialIntevalMs));
|
||||||
|
return this;
|
||||||
|
}
|
||||||
|
|
||||||
|
public Builder withMaxConsistencyCheckIntervalMs(int maxIntervalMs) {
|
||||||
|
props.setProperty(MAX_CONSISTENCY_CHECK_INTERVAL_MS_PROP, String.valueOf(maxIntervalMs));
|
||||||
|
return this;
|
||||||
|
}
|
||||||
|
|
||||||
|
public Builder withMaxConsistencyChecks(int maxConsistencyChecks) {
|
||||||
|
props.setProperty(MAX_CONSISTENCY_CHECKS_PROP, String.valueOf(maxConsistencyChecks));
|
||||||
|
return this;
|
||||||
|
}
|
||||||
|
|
||||||
public HoodieWriteConfig build() {
|
public HoodieWriteConfig build() {
|
||||||
// Check for mandatory properties
|
// Check for mandatory properties
|
||||||
setDefaultOnCondition(props, !props.containsKey(INSERT_PARALLELISM), INSERT_PARALLELISM,
|
setDefaultOnCondition(props, !props.containsKey(INSERT_PARALLELISM), INSERT_PARALLELISM,
|
||||||
@@ -643,18 +650,18 @@ public class HoodieWriteConfig extends DefaultHoodieConfig {
|
|||||||
HOODIE_ASSUME_DATE_PARTITIONING_PROP, DEFAULT_ASSUME_DATE_PARTITIONING);
|
HOODIE_ASSUME_DATE_PARTITIONING_PROP, DEFAULT_ASSUME_DATE_PARTITIONING);
|
||||||
setDefaultOnCondition(props, !props.containsKey(HOODIE_WRITE_STATUS_CLASS_PROP),
|
setDefaultOnCondition(props, !props.containsKey(HOODIE_WRITE_STATUS_CLASS_PROP),
|
||||||
HOODIE_WRITE_STATUS_CLASS_PROP, DEFAULT_HOODIE_WRITE_STATUS_CLASS);
|
HOODIE_WRITE_STATUS_CLASS_PROP, DEFAULT_HOODIE_WRITE_STATUS_CLASS);
|
||||||
setDefaultOnCondition(props, !props.containsKey(HOODIE_COPYONWRITE_USE_TEMP_FOLDER_CREATE),
|
|
||||||
HOODIE_COPYONWRITE_USE_TEMP_FOLDER_CREATE,
|
|
||||||
DEFAULT_HOODIE_COPYONWRITE_USE_TEMP_FOLDER_CREATE);
|
|
||||||
setDefaultOnCondition(props, !props.containsKey(HOODIE_COPYONWRITE_USE_TEMP_FOLDER_MERGE),
|
|
||||||
HOODIE_COPYONWRITE_USE_TEMP_FOLDER_MERGE,
|
|
||||||
DEFAULT_HOODIE_COPYONWRITE_USE_TEMP_FOLDER_MERGE);
|
|
||||||
setDefaultOnCondition(props, !props.containsKey(FINALIZE_WRITE_PARALLELISM),
|
setDefaultOnCondition(props, !props.containsKey(FINALIZE_WRITE_PARALLELISM),
|
||||||
FINALIZE_WRITE_PARALLELISM, DEFAULT_FINALIZE_WRITE_PARALLELISM);
|
FINALIZE_WRITE_PARALLELISM, DEFAULT_FINALIZE_WRITE_PARALLELISM);
|
||||||
setDefaultOnCondition(props, !props.containsKey(CONSISTENCY_CHECK_ENABLED),
|
setDefaultOnCondition(props, !props.containsKey(CONSISTENCY_CHECK_ENABLED_PROP),
|
||||||
CONSISTENCY_CHECK_ENABLED, DEFAULT_CONSISTENCY_CHECK_ENABLED);
|
CONSISTENCY_CHECK_ENABLED_PROP, DEFAULT_CONSISTENCY_CHECK_ENABLED);
|
||||||
setDefaultOnCondition(props, !props.containsKey(EMBEDDED_TIMELINE_SERVER_ENABLED),
|
setDefaultOnCondition(props, !props.containsKey(EMBEDDED_TIMELINE_SERVER_ENABLED),
|
||||||
EMBEDDED_TIMELINE_SERVER_ENABLED, DEFAULT_EMBEDDED_TIMELINE_SERVER_ENABLED);
|
EMBEDDED_TIMELINE_SERVER_ENABLED, DEFAULT_EMBEDDED_TIMELINE_SERVER_ENABLED);
|
||||||
|
setDefaultOnCondition(props, !props.containsKey(INITIAL_CONSISTENCY_CHECK_INTERVAL_MS_PROP),
|
||||||
|
INITIAL_CONSISTENCY_CHECK_INTERVAL_MS_PROP, String.valueOf(DEFAULT_INITIAL_CONSISTENCY_CHECK_INTERVAL_MS));
|
||||||
|
setDefaultOnCondition(props, !props.containsKey(MAX_CONSISTENCY_CHECK_INTERVAL_MS_PROP),
|
||||||
|
MAX_CONSISTENCY_CHECK_INTERVAL_MS_PROP, String.valueOf(DEFAULT_MAX_CONSISTENCY_CHECK_INTERVAL_MS));
|
||||||
|
setDefaultOnCondition(props, !props.containsKey(MAX_CONSISTENCY_CHECKS_PROP),
|
||||||
|
MAX_CONSISTENCY_CHECKS_PROP, String.valueOf(DEFAULT_MAX_CONSISTENCY_CHECKS));
|
||||||
|
|
||||||
// Make sure the props is propagated
|
// Make sure the props is propagated
|
||||||
setDefaultOnCondition(props, !isIndexConfigSet,
|
setDefaultOnCondition(props, !isIndexConfigSet,
|
||||||
|
|||||||
@@ -35,17 +35,19 @@ public class BulkInsertMapFunction<T extends HoodieRecordPayload> implements
|
|||||||
private String commitTime;
|
private String commitTime;
|
||||||
private HoodieWriteConfig config;
|
private HoodieWriteConfig config;
|
||||||
private HoodieTable<T> hoodieTable;
|
private HoodieTable<T> hoodieTable;
|
||||||
|
private List<String> fileIDPrefixes;
|
||||||
|
|
||||||
public BulkInsertMapFunction(String commitTime, HoodieWriteConfig config,
|
public BulkInsertMapFunction(String commitTime, HoodieWriteConfig config,
|
||||||
HoodieTable<T> hoodieTable) {
|
HoodieTable<T> hoodieTable, List<String> fileIDPrefixes) {
|
||||||
this.commitTime = commitTime;
|
this.commitTime = commitTime;
|
||||||
this.config = config;
|
this.config = config;
|
||||||
this.hoodieTable = hoodieTable;
|
this.hoodieTable = hoodieTable;
|
||||||
|
this.fileIDPrefixes = fileIDPrefixes;
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public Iterator<List<WriteStatus>> call(Integer partition,
|
public Iterator<List<WriteStatus>> call(Integer partition, Iterator<HoodieRecord<T>> sortedRecordItr) {
|
||||||
Iterator<HoodieRecord<T>> sortedRecordItr) throws Exception {
|
return new CopyOnWriteLazyInsertIterable<>(sortedRecordItr, config, commitTime, hoodieTable,
|
||||||
return new CopyOnWriteLazyInsertIterable<>(sortedRecordItr, config, commitTime, hoodieTable);
|
fileIDPrefixes.get(partition));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -27,16 +27,12 @@ import com.uber.hoodie.io.HoodieCreateHandle;
|
|||||||
import com.uber.hoodie.io.HoodieIOHandle;
|
import com.uber.hoodie.io.HoodieIOHandle;
|
||||||
import com.uber.hoodie.table.HoodieTable;
|
import com.uber.hoodie.table.HoodieTable;
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
import java.util.HashSet;
|
|
||||||
import java.util.Iterator;
|
import java.util.Iterator;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
import java.util.Optional;
|
import java.util.Optional;
|
||||||
import java.util.Set;
|
|
||||||
import java.util.UUID;
|
|
||||||
import java.util.function.Function;
|
import java.util.function.Function;
|
||||||
import org.apache.avro.Schema;
|
import org.apache.avro.Schema;
|
||||||
import org.apache.avro.generic.IndexedRecord;
|
import org.apache.avro.generic.IndexedRecord;
|
||||||
import org.apache.spark.TaskContext;
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Lazy Iterable, that writes a stream of HoodieRecords sorted by the partitionPath, into new
|
* Lazy Iterable, that writes a stream of HoodieRecords sorted by the partitionPath, into new
|
||||||
@@ -48,15 +44,17 @@ public class CopyOnWriteLazyInsertIterable<T extends HoodieRecordPayload> extend
|
|||||||
protected final HoodieWriteConfig hoodieConfig;
|
protected final HoodieWriteConfig hoodieConfig;
|
||||||
protected final String commitTime;
|
protected final String commitTime;
|
||||||
protected final HoodieTable<T> hoodieTable;
|
protected final HoodieTable<T> hoodieTable;
|
||||||
protected Set<String> partitionsCleaned;
|
protected final String idPrefix;
|
||||||
|
protected int numFilesWritten;
|
||||||
|
|
||||||
public CopyOnWriteLazyInsertIterable(Iterator<HoodieRecord<T>> sortedRecordItr, HoodieWriteConfig config,
|
public CopyOnWriteLazyInsertIterable(Iterator<HoodieRecord<T>> sortedRecordItr, HoodieWriteConfig config,
|
||||||
String commitTime, HoodieTable<T> hoodieTable) {
|
String commitTime, HoodieTable<T> hoodieTable, String idPrefix) {
|
||||||
super(sortedRecordItr);
|
super(sortedRecordItr);
|
||||||
this.partitionsCleaned = new HashSet<>();
|
|
||||||
this.hoodieConfig = config;
|
this.hoodieConfig = config;
|
||||||
this.commitTime = commitTime;
|
this.commitTime = commitTime;
|
||||||
this.hoodieTable = hoodieTable;
|
this.hoodieTable = hoodieTable;
|
||||||
|
this.idPrefix = idPrefix;
|
||||||
|
this.numFilesWritten = 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Used for caching HoodieRecord along with insertValue. We need this to offload computation work to buffering thread.
|
// Used for caching HoodieRecord along with insertValue. We need this to offload computation work to buffering thread.
|
||||||
@@ -113,7 +111,10 @@ public class CopyOnWriteLazyInsertIterable<T extends HoodieRecordPayload> extend
|
|||||||
|
|
||||||
@Override
|
@Override
|
||||||
protected void end() {
|
protected void end() {
|
||||||
|
}
|
||||||
|
|
||||||
|
protected String getNextFileId(String idPfx) {
|
||||||
|
return String.format("%s-%d", idPfx, numFilesWritten++);
|
||||||
}
|
}
|
||||||
|
|
||||||
protected CopyOnWriteInsertHandler getInsertHandler() {
|
protected CopyOnWriteInsertHandler getInsertHandler() {
|
||||||
@@ -133,20 +134,11 @@ public class CopyOnWriteLazyInsertIterable<T extends HoodieRecordPayload> extend
|
|||||||
@Override
|
@Override
|
||||||
protected void consumeOneRecord(HoodieInsertValueGenResult<HoodieRecord> payload) {
|
protected void consumeOneRecord(HoodieInsertValueGenResult<HoodieRecord> payload) {
|
||||||
final HoodieRecord insertPayload = payload.record;
|
final HoodieRecord insertPayload = payload.record;
|
||||||
// clean up any partial failures
|
|
||||||
if (!partitionsCleaned.contains(insertPayload.getPartitionPath())) {
|
|
||||||
// This insert task could fail multiple times, but Spark will faithfully retry with
|
|
||||||
// the same data again. Thus, before we open any files under a given partition, we
|
|
||||||
// first delete any files in the same partitionPath written by same Spark partition
|
|
||||||
HoodieIOHandle.cleanupTmpFilesFromCurrentCommit(hoodieConfig, commitTime, insertPayload.getPartitionPath(),
|
|
||||||
TaskContext.getPartitionId(), hoodieTable);
|
|
||||||
partitionsCleaned.add(insertPayload.getPartitionPath());
|
|
||||||
}
|
|
||||||
|
|
||||||
// lazily initialize the handle, for the first time
|
// lazily initialize the handle, for the first time
|
||||||
if (handle == null) {
|
if (handle == null) {
|
||||||
handle = new HoodieCreateHandle(hoodieConfig, commitTime, hoodieTable, insertPayload.getPartitionPath(), UUID
|
handle = new HoodieCreateHandle(hoodieConfig, commitTime, hoodieTable, insertPayload.getPartitionPath(),
|
||||||
.randomUUID().toString());
|
getNextFileId(idPrefix));
|
||||||
}
|
}
|
||||||
|
|
||||||
if (handle.canWrite(payload.record)) {
|
if (handle.canWrite(payload.record)) {
|
||||||
@@ -156,8 +148,8 @@ public class CopyOnWriteLazyInsertIterable<T extends HoodieRecordPayload> extend
|
|||||||
// handle is full.
|
// handle is full.
|
||||||
statuses.add(handle.close());
|
statuses.add(handle.close());
|
||||||
// Need to handle the rejected payload & open new handle
|
// Need to handle the rejected payload & open new handle
|
||||||
handle = new HoodieCreateHandle(hoodieConfig, commitTime, hoodieTable, insertPayload.getPartitionPath(), UUID
|
handle = new HoodieCreateHandle(hoodieConfig, commitTime, hoodieTable, insertPayload.getPartitionPath(),
|
||||||
.randomUUID().toString());
|
getNextFileId(idPrefix));
|
||||||
handle.write(insertPayload, payload.insertValue, payload.exception); // we should be able to write 1 payload.
|
handle.write(insertPayload, payload.insertValue, payload.exception); // we should be able to write 1 payload.
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -34,8 +34,8 @@ public class MergeOnReadLazyInsertIterable<T extends HoodieRecordPayload> extend
|
|||||||
CopyOnWriteLazyInsertIterable<T> {
|
CopyOnWriteLazyInsertIterable<T> {
|
||||||
|
|
||||||
public MergeOnReadLazyInsertIterable(Iterator<HoodieRecord<T>> sortedRecordItr, HoodieWriteConfig config,
|
public MergeOnReadLazyInsertIterable(Iterator<HoodieRecord<T>> sortedRecordItr, HoodieWriteConfig config,
|
||||||
String commitTime, HoodieTable<T> hoodieTable) {
|
String commitTime, HoodieTable<T> hoodieTable, String idPfx) {
|
||||||
super(sortedRecordItr, config, commitTime, hoodieTable);
|
super(sortedRecordItr, config, commitTime, hoodieTable, idPfx);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
@@ -51,7 +51,7 @@ public class MergeOnReadLazyInsertIterable<T extends HoodieRecordPayload> extend
|
|||||||
List<WriteStatus> statuses = new ArrayList<>();
|
List<WriteStatus> statuses = new ArrayList<>();
|
||||||
// lazily initialize the handle, for the first time
|
// lazily initialize the handle, for the first time
|
||||||
if (handle == null) {
|
if (handle == null) {
|
||||||
handle = new HoodieAppendHandle(hoodieConfig, commitTime, hoodieTable);
|
handle = new HoodieAppendHandle(hoodieConfig, commitTime, hoodieTable, getNextFileId(idPrefix));
|
||||||
}
|
}
|
||||||
if (handle.canWrite(insertPayload)) {
|
if (handle.canWrite(insertPayload)) {
|
||||||
// write the payload, if the handle has capacity
|
// write the payload, if the handle has capacity
|
||||||
@@ -61,7 +61,7 @@ public class MergeOnReadLazyInsertIterable<T extends HoodieRecordPayload> extend
|
|||||||
handle.close();
|
handle.close();
|
||||||
statuses.add(handle.getWriteStatus());
|
statuses.add(handle.getWriteStatus());
|
||||||
// Need to handle the rejected payload & open new handle
|
// Need to handle the rejected payload & open new handle
|
||||||
handle = new HoodieAppendHandle(hoodieConfig, commitTime, hoodieTable);
|
handle = new HoodieAppendHandle(hoodieConfig, commitTime, hoodieTable, getNextFileId(idPrefix));
|
||||||
handle.write(insertPayload, payload.insertValue, payload.exception); // we should be able to write 1 payload.
|
handle.write(insertPayload, payload.insertValue, payload.exception); // we should be able to write 1 payload.
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -1,112 +0,0 @@
|
|||||||
/*
|
|
||||||
* Copyright (c) 2018 Uber Technologies, Inc. (hoodie-dev-group@uber.com)
|
|
||||||
*
|
|
||||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
|
||||||
* you may not use this file except in compliance with the License.
|
|
||||||
* You may obtain a copy of the License at
|
|
||||||
*
|
|
||||||
* http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
*
|
|
||||||
* Unless required by applicable law or agreed to in writing, software
|
|
||||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
* See the License for the specific language governing permissions and
|
|
||||||
* limitations under the License.
|
|
||||||
*/
|
|
||||||
|
|
||||||
package com.uber.hoodie.io;
|
|
||||||
|
|
||||||
import com.google.common.annotations.VisibleForTesting;
|
|
||||||
import com.uber.hoodie.common.SerializableConfiguration;
|
|
||||||
import com.uber.hoodie.common.util.FSUtils;
|
|
||||||
import java.io.Serializable;
|
|
||||||
import java.util.ArrayList;
|
|
||||||
import java.util.Arrays;
|
|
||||||
import java.util.List;
|
|
||||||
import java.util.Set;
|
|
||||||
import java.util.stream.Collectors;
|
|
||||||
import java.util.stream.StreamSupport;
|
|
||||||
import org.apache.hadoop.fs.FileSystem;
|
|
||||||
import org.apache.hadoop.fs.Path;
|
|
||||||
import org.apache.log4j.LogManager;
|
|
||||||
import org.apache.log4j.Logger;
|
|
||||||
import org.apache.spark.api.java.JavaSparkContext;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Checks if all the written paths have their metadata consistent on storage and thus be listable to
|
|
||||||
* queries. This is important for cloud, stores like AWS S3 which are eventually consistent with
|
|
||||||
* their metadata. Without such checks, we may proceed to commit the written data, without the
|
|
||||||
* written data being made available to queries. In cases like incremental pull this can lead to
|
|
||||||
* downstream readers failing to ever see some data.
|
|
||||||
*/
|
|
||||||
public class ConsistencyCheck implements Serializable {
|
|
||||||
|
|
||||||
private static final transient Logger log = LogManager.getLogger(ConsistencyCheck.class);
|
|
||||||
|
|
||||||
private String basePath;
|
|
||||||
|
|
||||||
private List<String> relPaths;
|
|
||||||
|
|
||||||
private transient JavaSparkContext jsc;
|
|
||||||
|
|
||||||
private SerializableConfiguration hadoopConf;
|
|
||||||
|
|
||||||
private int parallelism;
|
|
||||||
|
|
||||||
public ConsistencyCheck(String basePath, List<String> relPaths, JavaSparkContext jsc,
|
|
||||||
int parallelism) {
|
|
||||||
this.basePath = basePath;
|
|
||||||
this.relPaths = relPaths;
|
|
||||||
this.jsc = jsc;
|
|
||||||
this.hadoopConf = new SerializableConfiguration(jsc.hadoopConfiguration());
|
|
||||||
this.parallelism = parallelism;
|
|
||||||
}
|
|
||||||
|
|
||||||
@VisibleForTesting
|
|
||||||
void sleepSafe(long waitMs) {
|
|
||||||
try {
|
|
||||||
Thread.sleep(waitMs);
|
|
||||||
} catch (InterruptedException e) {
|
|
||||||
// ignore & continue next attempt
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Repeatedly lists the filesystem on the paths, with exponential backoff and marks paths found as
|
|
||||||
* passing the check.
|
|
||||||
*
|
|
||||||
* @return list of (relative) paths failing the check
|
|
||||||
*/
|
|
||||||
public List<String> check(int maxAttempts, long initalDelayMs) {
|
|
||||||
long waitMs = initalDelayMs;
|
|
||||||
int attempt = 0;
|
|
||||||
|
|
||||||
List<String> remainingPaths = new ArrayList<>(relPaths);
|
|
||||||
while (attempt++ < maxAttempts) {
|
|
||||||
remainingPaths = jsc.parallelize(remainingPaths, parallelism)
|
|
||||||
.groupBy(p -> new Path(basePath, p).getParent()) // list by partition
|
|
||||||
.map(pair -> {
|
|
||||||
FileSystem fs = FSUtils.getFs(basePath, hadoopConf.get());
|
|
||||||
// list the partition path and obtain all file paths present
|
|
||||||
Set<String> fileNames = Arrays.stream(fs.listStatus(pair._1()))
|
|
||||||
.map(s -> s.getPath().getName())
|
|
||||||
.collect(Collectors.toSet());
|
|
||||||
|
|
||||||
// only return paths that can't be found
|
|
||||||
return StreamSupport.stream(pair._2().spliterator(), false)
|
|
||||||
.filter(p -> !fileNames.contains(new Path(basePath, p).getName()))
|
|
||||||
.collect(Collectors.toList());
|
|
||||||
})
|
|
||||||
.flatMap(List::iterator).collect();
|
|
||||||
if (remainingPaths.size() == 0) {
|
|
||||||
break; // we are done.
|
|
||||||
}
|
|
||||||
|
|
||||||
log.info("Consistency check, waiting for " + waitMs + " ms , after attempt :" + attempt);
|
|
||||||
sleepSafe(waitMs);
|
|
||||||
waitMs = waitMs * 2; // double check interval every attempt
|
|
||||||
}
|
|
||||||
|
|
||||||
return remainingPaths;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
@@ -32,6 +32,7 @@ import com.uber.hoodie.common.table.log.HoodieLogFormat.Writer;
|
|||||||
import com.uber.hoodie.common.table.log.block.HoodieAvroDataBlock;
|
import com.uber.hoodie.common.table.log.block.HoodieAvroDataBlock;
|
||||||
import com.uber.hoodie.common.table.log.block.HoodieDeleteBlock;
|
import com.uber.hoodie.common.table.log.block.HoodieDeleteBlock;
|
||||||
import com.uber.hoodie.common.table.log.block.HoodieLogBlock;
|
import com.uber.hoodie.common.table.log.block.HoodieLogBlock;
|
||||||
|
import com.uber.hoodie.common.util.FSUtils;
|
||||||
import com.uber.hoodie.common.util.HoodieAvroUtils;
|
import com.uber.hoodie.common.util.HoodieAvroUtils;
|
||||||
import com.uber.hoodie.common.util.Option;
|
import com.uber.hoodie.common.util.Option;
|
||||||
import com.uber.hoodie.config.HoodieWriteConfig;
|
import com.uber.hoodie.config.HoodieWriteConfig;
|
||||||
@@ -40,12 +41,10 @@ import com.uber.hoodie.exception.HoodieUpsertException;
|
|||||||
import com.uber.hoodie.table.HoodieTable;
|
import com.uber.hoodie.table.HoodieTable;
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
import java.util.Comparator;
|
|
||||||
import java.util.Iterator;
|
import java.util.Iterator;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
import java.util.Optional;
|
import java.util.Optional;
|
||||||
import java.util.UUID;
|
|
||||||
import java.util.concurrent.atomic.AtomicLong;
|
import java.util.concurrent.atomic.AtomicLong;
|
||||||
import org.apache.avro.generic.GenericRecord;
|
import org.apache.avro.generic.GenericRecord;
|
||||||
import org.apache.avro.generic.IndexedRecord;
|
import org.apache.avro.generic.IndexedRecord;
|
||||||
@@ -96,14 +95,14 @@ public class HoodieAppendHandle<T extends HoodieRecordPayload> extends HoodieIOH
|
|||||||
|
|
||||||
public HoodieAppendHandle(HoodieWriteConfig config, String commitTime, HoodieTable<T> hoodieTable,
|
public HoodieAppendHandle(HoodieWriteConfig config, String commitTime, HoodieTable<T> hoodieTable,
|
||||||
String fileId, Iterator<HoodieRecord<T>> recordItr) {
|
String fileId, Iterator<HoodieRecord<T>> recordItr) {
|
||||||
super(config, commitTime, hoodieTable);
|
super(config, commitTime, fileId, hoodieTable);
|
||||||
writeStatus.setStat(new HoodieDeltaWriteStat());
|
writeStatus.setStat(new HoodieDeltaWriteStat());
|
||||||
this.fileId = fileId;
|
this.fileId = fileId;
|
||||||
this.recordItr = recordItr;
|
this.recordItr = recordItr;
|
||||||
}
|
}
|
||||||
|
|
||||||
public HoodieAppendHandle(HoodieWriteConfig config, String commitTime, HoodieTable<T> hoodieTable) {
|
public HoodieAppendHandle(HoodieWriteConfig config, String commitTime, HoodieTable<T> hoodieTable, String fileId) {
|
||||||
this(config, commitTime, hoodieTable, UUID.randomUUID().toString(), null);
|
this(config, commitTime, hoodieTable, fileId, null);
|
||||||
}
|
}
|
||||||
|
|
||||||
private void init(HoodieRecord record) {
|
private void init(HoodieRecord record) {
|
||||||
@@ -270,12 +269,16 @@ public class HoodieAppendHandle<T extends HoodieRecordPayload> extends HoodieIOH
|
|||||||
|
|
||||||
private Writer createLogWriter(Option<FileSlice> fileSlice, String baseCommitTime)
|
private Writer createLogWriter(Option<FileSlice> fileSlice, String baseCommitTime)
|
||||||
throws IOException, InterruptedException {
|
throws IOException, InterruptedException {
|
||||||
|
Optional<HoodieLogFile> latestLogFile = fileSlice.get().getLatestLogFile();
|
||||||
|
|
||||||
return HoodieLogFormat.newWriterBuilder()
|
return HoodieLogFormat.newWriterBuilder()
|
||||||
.onParentPath(new Path(hoodieTable.getMetaClient().getBasePath(), partitionPath))
|
.onParentPath(new Path(hoodieTable.getMetaClient().getBasePath(), partitionPath))
|
||||||
.withFileId(fileId).overBaseCommit(baseCommitTime).withLogVersion(
|
.withFileId(fileId).overBaseCommit(baseCommitTime).withLogVersion(
|
||||||
fileSlice.get().getLogFiles().map(logFile -> logFile.getLogVersion())
|
latestLogFile.map(HoodieLogFile::getLogVersion).orElse(HoodieLogFile.LOGFILE_BASE_VERSION))
|
||||||
.max(Comparator.naturalOrder()).orElse(HoodieLogFile.LOGFILE_BASE_VERSION))
|
|
||||||
.withSizeThreshold(config.getLogFileMaxSize()).withFs(fs)
|
.withSizeThreshold(config.getLogFileMaxSize()).withFs(fs)
|
||||||
|
.withLogWriteToken(
|
||||||
|
latestLogFile.map(x -> FSUtils.getWriteTokenFromLogPath(x.getPath())).orElse(writeToken))
|
||||||
|
.withRolloverLogWriteToken(writeToken)
|
||||||
.withFileExtension(HoodieLogFile.DELTA_EXTENSION).build();
|
.withFileExtension(HoodieLogFile.DELTA_EXTENSION).build();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -45,7 +45,6 @@ public class HoodieCreateHandle<T extends HoodieRecordPayload> extends HoodieIOH
|
|||||||
|
|
||||||
private final HoodieStorageWriter<IndexedRecord> storageWriter;
|
private final HoodieStorageWriter<IndexedRecord> storageWriter;
|
||||||
private final Path path;
|
private final Path path;
|
||||||
private Path tempPath = null;
|
|
||||||
private long recordsWritten = 0;
|
private long recordsWritten = 0;
|
||||||
private long insertRecordsWritten = 0;
|
private long insertRecordsWritten = 0;
|
||||||
private long recordsDeleted = 0;
|
private long recordsDeleted = 0;
|
||||||
@@ -54,26 +53,22 @@ public class HoodieCreateHandle<T extends HoodieRecordPayload> extends HoodieIOH
|
|||||||
|
|
||||||
public HoodieCreateHandle(HoodieWriteConfig config, String commitTime, HoodieTable<T> hoodieTable,
|
public HoodieCreateHandle(HoodieWriteConfig config, String commitTime, HoodieTable<T> hoodieTable,
|
||||||
String partitionPath, String fileId) {
|
String partitionPath, String fileId) {
|
||||||
super(config, commitTime, hoodieTable);
|
super(config, commitTime, fileId, hoodieTable);
|
||||||
writeStatus.setFileId(fileId);
|
writeStatus.setFileId(fileId);
|
||||||
writeStatus.setPartitionPath(partitionPath);
|
writeStatus.setPartitionPath(partitionPath);
|
||||||
|
|
||||||
final int sparkPartitionId = TaskContext.getPartitionId();
|
this.path = makeNewPath(partitionPath);
|
||||||
this.path = makeNewPath(partitionPath, sparkPartitionId, writeStatus.getFileId());
|
|
||||||
if (config.shouldUseTempFolderForCopyOnWriteForCreate()) {
|
|
||||||
this.tempPath = makeTempPath(partitionPath, sparkPartitionId, writeStatus.getFileId(),
|
|
||||||
TaskContext.get().stageId(), TaskContext.get().taskAttemptId());
|
|
||||||
}
|
|
||||||
|
|
||||||
try {
|
try {
|
||||||
HoodiePartitionMetadata partitionMetadata = new HoodiePartitionMetadata(fs, commitTime,
|
HoodiePartitionMetadata partitionMetadata = new HoodiePartitionMetadata(fs, commitTime,
|
||||||
new Path(config.getBasePath()), FSUtils.getPartitionPath(config.getBasePath(), partitionPath));
|
new Path(config.getBasePath()), FSUtils.getPartitionPath(config.getBasePath(), partitionPath));
|
||||||
partitionMetadata.trySave(TaskContext.getPartitionId());
|
partitionMetadata.trySave(TaskContext.getPartitionId());
|
||||||
|
createMarkerFile(partitionPath);
|
||||||
this.storageWriter = HoodieStorageWriterFactory
|
this.storageWriter = HoodieStorageWriterFactory
|
||||||
.getStorageWriter(commitTime, getStorageWriterPath(), hoodieTable, config, writerSchema);
|
.getStorageWriter(commitTime, path, hoodieTable, config, writerSchema);
|
||||||
} catch (IOException e) {
|
} catch (IOException e) {
|
||||||
throw new HoodieInsertException(
|
throw new HoodieInsertException(
|
||||||
"Failed to initialize HoodieStorageWriter for path " + getStorageWriterPath(), e);
|
"Failed to initialize HoodieStorageWriter for path " + path, e);
|
||||||
}
|
}
|
||||||
logger.info("New InsertHandle for partition :" + partitionPath + " with fileId " + fileId);
|
logger.info("New InsertHandle for partition :" + partitionPath + " with fileId " + fileId);
|
||||||
}
|
}
|
||||||
@@ -138,7 +133,7 @@ public class HoodieCreateHandle<T extends HoodieRecordPayload> extends HoodieIOH
|
|||||||
}
|
}
|
||||||
} catch (IOException io) {
|
} catch (IOException io) {
|
||||||
throw new HoodieInsertException(
|
throw new HoodieInsertException(
|
||||||
"Failed to insert records for path " + getStorageWriterPath(), io);
|
"Failed to insert records for path " + path, io);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -165,8 +160,8 @@ public class HoodieCreateHandle<T extends HoodieRecordPayload> extends HoodieIOH
|
|||||||
stat.setNumInserts(insertRecordsWritten);
|
stat.setNumInserts(insertRecordsWritten);
|
||||||
stat.setPrevCommit(HoodieWriteStat.NULL_COMMIT);
|
stat.setPrevCommit(HoodieWriteStat.NULL_COMMIT);
|
||||||
stat.setFileId(writeStatus.getFileId());
|
stat.setFileId(writeStatus.getFileId());
|
||||||
stat.setPaths(new Path(config.getBasePath()), path, tempPath);
|
stat.setPath(new Path(config.getBasePath()), path);
|
||||||
long fileSizeInBytes = FSUtils.getFileSize(fs, getStorageWriterPath());
|
long fileSizeInBytes = FSUtils.getFileSize(fs, path);
|
||||||
stat.setTotalWriteBytes(fileSizeInBytes);
|
stat.setTotalWriteBytes(fileSizeInBytes);
|
||||||
stat.setFileSizeInBytes(fileSizeInBytes);
|
stat.setFileSizeInBytes(fileSizeInBytes);
|
||||||
stat.setTotalWriteErrors(writeStatus.getTotalErrorRecords());
|
stat.setTotalWriteErrors(writeStatus.getTotalErrorRecords());
|
||||||
@@ -180,9 +175,4 @@ public class HoodieCreateHandle<T extends HoodieRecordPayload> extends HoodieIOH
|
|||||||
throw new HoodieInsertException("Failed to close the Insert Handle for path " + path, e);
|
throw new HoodieInsertException("Failed to close the Insert Handle for path " + path, e);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private Path getStorageWriterPath() {
|
|
||||||
// Use tempPath for storage writer if possible
|
|
||||||
return (this.tempPath == null) ? this.path : this.tempPath;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -17,14 +17,17 @@
|
|||||||
package com.uber.hoodie.io;
|
package com.uber.hoodie.io;
|
||||||
|
|
||||||
import com.uber.hoodie.WriteStatus;
|
import com.uber.hoodie.WriteStatus;
|
||||||
|
import com.uber.hoodie.common.io.storage.HoodieWrapperFileSystem;
|
||||||
import com.uber.hoodie.common.model.HoodieRecord;
|
import com.uber.hoodie.common.model.HoodieRecord;
|
||||||
import com.uber.hoodie.common.model.HoodieRecordPayload;
|
import com.uber.hoodie.common.model.HoodieRecordPayload;
|
||||||
import com.uber.hoodie.common.table.HoodieTableMetaClient;
|
|
||||||
import com.uber.hoodie.common.util.FSUtils;
|
import com.uber.hoodie.common.util.FSUtils;
|
||||||
|
import com.uber.hoodie.common.util.FailSafeConsistencyGuard;
|
||||||
import com.uber.hoodie.common.util.HoodieAvroUtils;
|
import com.uber.hoodie.common.util.HoodieAvroUtils;
|
||||||
import com.uber.hoodie.common.util.HoodieTimer;
|
import com.uber.hoodie.common.util.HoodieTimer;
|
||||||
|
import com.uber.hoodie.common.util.NoOpConsistencyGuard;
|
||||||
import com.uber.hoodie.common.util.ReflectionUtils;
|
import com.uber.hoodie.common.util.ReflectionUtils;
|
||||||
import com.uber.hoodie.config.HoodieWriteConfig;
|
import com.uber.hoodie.config.HoodieWriteConfig;
|
||||||
|
import com.uber.hoodie.exception.HoodieException;
|
||||||
import com.uber.hoodie.exception.HoodieIOException;
|
import com.uber.hoodie.exception.HoodieIOException;
|
||||||
import com.uber.hoodie.table.HoodieTable;
|
import com.uber.hoodie.table.HoodieTable;
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
@@ -32,16 +35,19 @@ import java.util.Optional;
|
|||||||
import org.apache.avro.Schema;
|
import org.apache.avro.Schema;
|
||||||
import org.apache.avro.generic.GenericRecord;
|
import org.apache.avro.generic.GenericRecord;
|
||||||
import org.apache.avro.generic.IndexedRecord;
|
import org.apache.avro.generic.IndexedRecord;
|
||||||
import org.apache.hadoop.fs.FileStatus;
|
|
||||||
import org.apache.hadoop.fs.FileSystem;
|
import org.apache.hadoop.fs.FileSystem;
|
||||||
import org.apache.hadoop.fs.Path;
|
import org.apache.hadoop.fs.Path;
|
||||||
import org.apache.log4j.LogManager;
|
import org.apache.log4j.LogManager;
|
||||||
import org.apache.log4j.Logger;
|
import org.apache.log4j.Logger;
|
||||||
|
import org.apache.spark.TaskContext;
|
||||||
|
|
||||||
|
|
||||||
public abstract class HoodieIOHandle<T extends HoodieRecordPayload> {
|
public abstract class HoodieIOHandle<T extends HoodieRecordPayload> {
|
||||||
|
|
||||||
private static Logger logger = LogManager.getLogger(HoodieIOHandle.class);
|
private static Logger logger = LogManager.getLogger(HoodieIOHandle.class);
|
||||||
protected final String commitTime;
|
protected final String commitTime;
|
||||||
|
protected final String fileId;
|
||||||
|
protected final String writeToken;
|
||||||
protected final HoodieWriteConfig config;
|
protected final HoodieWriteConfig config;
|
||||||
protected final FileSystem fs;
|
protected final FileSystem fs;
|
||||||
protected final HoodieTable<T> hoodieTable;
|
protected final HoodieTable<T> hoodieTable;
|
||||||
@@ -50,10 +56,13 @@ public abstract class HoodieIOHandle<T extends HoodieRecordPayload> {
|
|||||||
protected HoodieTimer timer;
|
protected HoodieTimer timer;
|
||||||
protected final WriteStatus writeStatus;
|
protected final WriteStatus writeStatus;
|
||||||
|
|
||||||
public HoodieIOHandle(HoodieWriteConfig config, String commitTime, HoodieTable<T> hoodieTable) {
|
public HoodieIOHandle(HoodieWriteConfig config, String commitTime, String fileId,
|
||||||
|
HoodieTable<T> hoodieTable) {
|
||||||
this.commitTime = commitTime;
|
this.commitTime = commitTime;
|
||||||
|
this.fileId = fileId;
|
||||||
|
this.writeToken = makeSparkWriteToken();
|
||||||
this.config = config;
|
this.config = config;
|
||||||
this.fs = hoodieTable.getMetaClient().getFs();
|
this.fs = getFileSystem(hoodieTable, config);
|
||||||
this.hoodieTable = hoodieTable;
|
this.hoodieTable = hoodieTable;
|
||||||
this.originalSchema = new Schema.Parser().parse(config.getSchema());
|
this.originalSchema = new Schema.Parser().parse(config.getSchema());
|
||||||
this.writerSchema = createHoodieWriteSchema(originalSchema);
|
this.writerSchema = createHoodieWriteSchema(originalSchema);
|
||||||
@@ -63,33 +72,26 @@ public abstract class HoodieIOHandle<T extends HoodieRecordPayload> {
|
|||||||
config.getWriteStatusFailureFraction());
|
config.getWriteStatusFailureFraction());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private static FileSystem getFileSystem(HoodieTable hoodieTable, HoodieWriteConfig config) {
|
||||||
|
return new HoodieWrapperFileSystem(hoodieTable.getMetaClient().getFs(), config.isConsistencyCheckEnabled()
|
||||||
|
? new FailSafeConsistencyGuard(hoodieTable.getMetaClient().getFs(),
|
||||||
|
config.getMaxConsistencyChecks(), config.getInitialConsistencyCheckIntervalMs(),
|
||||||
|
config.getMaxConsistencyCheckIntervalMs()) : new NoOpConsistencyGuard());
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Deletes any new tmp files written during the current commit, into the partition
|
* Generate a write token based on the currently running spark task and its place in the spark dag.
|
||||||
*/
|
*/
|
||||||
public static void cleanupTmpFilesFromCurrentCommit(HoodieWriteConfig config, String commitTime,
|
private static String makeSparkWriteToken() {
|
||||||
String partitionPath, int taskPartitionId, HoodieTable hoodieTable) {
|
return FSUtils.makeWriteToken(TaskContext.getPartitionId(), TaskContext.get().stageId(),
|
||||||
FileSystem fs = hoodieTable.getMetaClient().getFs();
|
TaskContext.get().taskAttemptId());
|
||||||
try {
|
|
||||||
FileStatus[] prevFailedFiles = fs.globStatus(new Path(String
|
|
||||||
.format("%s/%s/%s", config.getBasePath(), partitionPath,
|
|
||||||
FSUtils.maskWithoutFileId(commitTime, taskPartitionId))));
|
|
||||||
if (prevFailedFiles != null) {
|
|
||||||
logger.info(
|
|
||||||
"Deleting " + prevFailedFiles.length + " files generated by previous failed attempts.");
|
|
||||||
for (FileStatus status : prevFailedFiles) {
|
|
||||||
fs.delete(status.getPath(), false);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
} catch (IOException e) {
|
|
||||||
throw new HoodieIOException("Failed to cleanup Temp files from commit " + commitTime, e);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
public static Schema createHoodieWriteSchema(Schema originalSchema) {
|
public static Schema createHoodieWriteSchema(Schema originalSchema) {
|
||||||
return HoodieAvroUtils.addMetadataFields(originalSchema);
|
return HoodieAvroUtils.addMetadataFields(originalSchema);
|
||||||
}
|
}
|
||||||
|
|
||||||
public Path makeNewPath(String partitionPath, int taskPartitionId, String fileName) {
|
public Path makeNewPath(String partitionPath) {
|
||||||
Path path = FSUtils.getPartitionPath(config.getBasePath(), partitionPath);
|
Path path = FSUtils.getPartitionPath(config.getBasePath(), partitionPath);
|
||||||
try {
|
try {
|
||||||
fs.mkdirs(path); // create a new partition as needed.
|
fs.mkdirs(path); // create a new partition as needed.
|
||||||
@@ -97,16 +99,37 @@ public abstract class HoodieIOHandle<T extends HoodieRecordPayload> {
|
|||||||
throw new HoodieIOException("Failed to make dir " + path, e);
|
throw new HoodieIOException("Failed to make dir " + path, e);
|
||||||
}
|
}
|
||||||
|
|
||||||
return new Path(path.toString(),
|
return new Path(path.toString(), FSUtils.makeDataFileName(commitTime, writeToken, fileId));
|
||||||
FSUtils.makeDataFileName(commitTime, taskPartitionId, fileName));
|
|
||||||
}
|
}
|
||||||
|
|
||||||
public Path makeTempPath(String partitionPath, int taskPartitionId, String fileName, int stageId,
|
/**
|
||||||
long taskAttemptId) {
|
* Creates an empty marker file corresponding to storage writer path
|
||||||
Path path = new Path(config.getBasePath(), HoodieTableMetaClient.TEMPFOLDER_NAME);
|
* @param partitionPath Partition path
|
||||||
return new Path(path.toString(),
|
*/
|
||||||
FSUtils.makeTempDataFileName(partitionPath, commitTime, taskPartitionId, fileName, stageId,
|
protected void createMarkerFile(String partitionPath) {
|
||||||
taskAttemptId));
|
Path markerPath = makeNewMarkerPath(partitionPath);
|
||||||
|
try {
|
||||||
|
logger.info("Creating Marker Path=" + markerPath);
|
||||||
|
fs.create(markerPath, false).close();
|
||||||
|
} catch (IOException e) {
|
||||||
|
throw new HoodieException("Failed to create marker file " + markerPath, e);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* THe marker path will be <base-path>/.hoodie/.temp/<instant_ts>/2019/04/25/filename
|
||||||
|
* @param partitionPath
|
||||||
|
* @return
|
||||||
|
*/
|
||||||
|
private Path makeNewMarkerPath(String partitionPath) {
|
||||||
|
Path markerRootPath = new Path(hoodieTable.getMetaClient().getMarkerFolderPath(commitTime));
|
||||||
|
Path path = FSUtils.getPartitionPath(markerRootPath, partitionPath);
|
||||||
|
try {
|
||||||
|
fs.mkdirs(path); // create a new partition as needed.
|
||||||
|
} catch (IOException e) {
|
||||||
|
throw new HoodieIOException("Failed to make dir " + path, e);
|
||||||
|
}
|
||||||
|
return new Path(path.toString(), FSUtils.makeMarkerFile(commitTime, writeToken, fileId));
|
||||||
}
|
}
|
||||||
|
|
||||||
public Schema getWriterSchema() {
|
public Schema getWriterSchema() {
|
||||||
|
|||||||
@@ -57,7 +57,6 @@ public class HoodieMergeHandle<T extends HoodieRecordPayload> extends HoodieIOHa
|
|||||||
private HoodieStorageWriter<IndexedRecord> storageWriter;
|
private HoodieStorageWriter<IndexedRecord> storageWriter;
|
||||||
private Path newFilePath;
|
private Path newFilePath;
|
||||||
private Path oldFilePath;
|
private Path oldFilePath;
|
||||||
private Path tempPath = null;
|
|
||||||
private long recordsWritten = 0;
|
private long recordsWritten = 0;
|
||||||
private long recordsDeleted = 0;
|
private long recordsDeleted = 0;
|
||||||
private long updatedRecordsWritten = 0;
|
private long updatedRecordsWritten = 0;
|
||||||
@@ -66,7 +65,7 @@ public class HoodieMergeHandle<T extends HoodieRecordPayload> extends HoodieIOHa
|
|||||||
|
|
||||||
public HoodieMergeHandle(HoodieWriteConfig config, String commitTime, HoodieTable<T> hoodieTable,
|
public HoodieMergeHandle(HoodieWriteConfig config, String commitTime, HoodieTable<T> hoodieTable,
|
||||||
Iterator<HoodieRecord<T>> recordItr, String fileId) {
|
Iterator<HoodieRecord<T>> recordItr, String fileId) {
|
||||||
super(config, commitTime, hoodieTable);
|
super(config, commitTime, fileId, hoodieTable);
|
||||||
String partitionPath = init(fileId, recordItr);
|
String partitionPath = init(fileId, recordItr);
|
||||||
init(fileId, partitionPath,
|
init(fileId, partitionPath,
|
||||||
hoodieTable.getROFileSystemView().getLatestDataFile(partitionPath, fileId).get());
|
hoodieTable.getROFileSystemView().getLatestDataFile(partitionPath, fileId).get());
|
||||||
@@ -77,7 +76,7 @@ public class HoodieMergeHandle<T extends HoodieRecordPayload> extends HoodieIOHa
|
|||||||
*/
|
*/
|
||||||
public HoodieMergeHandle(HoodieWriteConfig config, String commitTime, HoodieTable<T> hoodieTable,
|
public HoodieMergeHandle(HoodieWriteConfig config, String commitTime, HoodieTable<T> hoodieTable,
|
||||||
Map<String, HoodieRecord<T>> keyToNewRecords, String fileId, HoodieDataFile dataFileToBeMerged) {
|
Map<String, HoodieRecord<T>> keyToNewRecords, String fileId, HoodieDataFile dataFileToBeMerged) {
|
||||||
super(config, commitTime, hoodieTable);
|
super(config, commitTime, fileId, hoodieTable);
|
||||||
this.keyToNewRecords = keyToNewRecords;
|
this.keyToNewRecords = keyToNewRecords;
|
||||||
this.useWriterSchema = true;
|
this.useWriterSchema = true;
|
||||||
init(fileId, keyToNewRecords.get(keyToNewRecords.keySet().stream().findFirst().get())
|
init(fileId, keyToNewRecords.get(keyToNewRecords.keySet().stream().findFirst().get())
|
||||||
@@ -101,30 +100,25 @@ public class HoodieMergeHandle<T extends HoodieRecordPayload> extends HoodieIOHa
|
|||||||
oldFilePath = new Path(
|
oldFilePath = new Path(
|
||||||
config.getBasePath() + "/" + partitionPath + "/" + latestValidFilePath);
|
config.getBasePath() + "/" + partitionPath + "/" + latestValidFilePath);
|
||||||
String relativePath = new Path((partitionPath.isEmpty() ? "" : partitionPath + "/") + FSUtils
|
String relativePath = new Path((partitionPath.isEmpty() ? "" : partitionPath + "/") + FSUtils
|
||||||
.makeDataFileName(commitTime, TaskContext.getPartitionId(), fileId)).toString();
|
.makeDataFileName(commitTime, writeToken, fileId)).toString();
|
||||||
newFilePath = new Path(config.getBasePath(), relativePath);
|
newFilePath = new Path(config.getBasePath(), relativePath);
|
||||||
if (config.shouldUseTempFolderForCopyOnWriteForMerge()) {
|
|
||||||
this.tempPath = makeTempPath(partitionPath, TaskContext.getPartitionId(), fileId,
|
|
||||||
TaskContext.get().stageId(), TaskContext.get().taskAttemptId());
|
|
||||||
}
|
|
||||||
|
|
||||||
// handle cases of partial failures, for update task
|
|
||||||
if (fs.exists(newFilePath)) {
|
|
||||||
fs.delete(newFilePath, false);
|
|
||||||
}
|
|
||||||
|
|
||||||
logger.info(String
|
logger.info(String
|
||||||
.format("Merging new data into oldPath %s, as newPath %s", oldFilePath.toString(),
|
.format("Merging new data into oldPath %s, as newPath %s", oldFilePath.toString(),
|
||||||
getStorageWriterPath().toString()));
|
newFilePath.toString()));
|
||||||
// file name is same for all records, in this bunch
|
// file name is same for all records, in this bunch
|
||||||
writeStatus.setFileId(fileId);
|
writeStatus.setFileId(fileId);
|
||||||
writeStatus.setPartitionPath(partitionPath);
|
writeStatus.setPartitionPath(partitionPath);
|
||||||
writeStatus.getStat().setPartitionPath(partitionPath);
|
writeStatus.getStat().setPartitionPath(partitionPath);
|
||||||
writeStatus.getStat().setFileId(fileId);
|
writeStatus.getStat().setFileId(fileId);
|
||||||
writeStatus.getStat().setPaths(new Path(config.getBasePath()), newFilePath, tempPath);
|
writeStatus.getStat().setPath(new Path(config.getBasePath()), newFilePath);
|
||||||
|
|
||||||
|
// Create Marker file
|
||||||
|
createMarkerFile(partitionPath);
|
||||||
|
|
||||||
// Create the writer for writing the new version file
|
// Create the writer for writing the new version file
|
||||||
storageWriter = HoodieStorageWriterFactory
|
storageWriter = HoodieStorageWriterFactory
|
||||||
.getStorageWriter(commitTime, getStorageWriterPath(), hoodieTable, config, writerSchema);
|
.getStorageWriter(commitTime, newFilePath, hoodieTable, config, writerSchema);
|
||||||
} catch (IOException io) {
|
} catch (IOException io) {
|
||||||
logger.error("Error in update task at commit " + commitTime, io);
|
logger.error("Error in update task at commit " + commitTime, io);
|
||||||
writeStatus.setGlobalError(io);
|
writeStatus.setGlobalError(io);
|
||||||
@@ -231,17 +225,17 @@ public class HoodieMergeHandle<T extends HoodieRecordPayload> extends HoodieIOHa
|
|||||||
if (copyOldRecord) {
|
if (copyOldRecord) {
|
||||||
// this should work as it is, since this is an existing record
|
// this should work as it is, since this is an existing record
|
||||||
String errMsg = "Failed to merge old record into new file for key " + key + " from old file "
|
String errMsg = "Failed to merge old record into new file for key " + key + " from old file "
|
||||||
+ getOldFilePath() + " to new file " + getStorageWriterPath();
|
+ getOldFilePath() + " to new file " + newFilePath;
|
||||||
try {
|
try {
|
||||||
storageWriter.writeAvro(key, oldRecord);
|
storageWriter.writeAvro(key, oldRecord);
|
||||||
} catch (ClassCastException e) {
|
} catch (ClassCastException e) {
|
||||||
logger.error("Schema mismatch when rewriting old record " + oldRecord + " from file "
|
logger.error("Schema mismatch when rewriting old record " + oldRecord + " from file "
|
||||||
+ getOldFilePath() + " to file " + getStorageWriterPath() + " with writerSchema " + writerSchema
|
+ getOldFilePath() + " to file " + newFilePath + " with writerSchema " + writerSchema
|
||||||
.toString(true));
|
.toString(true));
|
||||||
throw new HoodieUpsertException(errMsg, e);
|
throw new HoodieUpsertException(errMsg, e);
|
||||||
} catch (IOException e) {
|
} catch (IOException e) {
|
||||||
logger.error("Failed to merge old record into new file for key " + key + " from old file "
|
logger.error("Failed to merge old record into new file for key " + key + " from old file "
|
||||||
+ getOldFilePath() + " to new file " + getStorageWriterPath(), e);
|
+ getOldFilePath() + " to new file " + newFilePath, e);
|
||||||
throw new HoodieUpsertException(errMsg, e);
|
throw new HoodieUpsertException(errMsg, e);
|
||||||
}
|
}
|
||||||
recordsWritten++;
|
recordsWritten++;
|
||||||
@@ -270,7 +264,7 @@ public class HoodieMergeHandle<T extends HoodieRecordPayload> extends HoodieIOHa
|
|||||||
storageWriter.close();
|
storageWriter.close();
|
||||||
}
|
}
|
||||||
|
|
||||||
long fileSizeInBytes = FSUtils.getFileSize(fs, getStorageWriterPath());
|
long fileSizeInBytes = FSUtils.getFileSize(fs, newFilePath);
|
||||||
writeStatus.getStat().setTotalWriteBytes(fileSizeInBytes);
|
writeStatus.getStat().setTotalWriteBytes(fileSizeInBytes);
|
||||||
writeStatus.getStat().setFileSizeInBytes(fileSizeInBytes);
|
writeStatus.getStat().setFileSizeInBytes(fileSizeInBytes);
|
||||||
writeStatus.getStat().setNumWrites(recordsWritten);
|
writeStatus.getStat().setNumWrites(recordsWritten);
|
||||||
@@ -291,11 +285,6 @@ public class HoodieMergeHandle<T extends HoodieRecordPayload> extends HoodieIOHa
|
|||||||
return oldFilePath;
|
return oldFilePath;
|
||||||
}
|
}
|
||||||
|
|
||||||
private Path getStorageWriterPath() {
|
|
||||||
// Use tempPath for storage writer if possible
|
|
||||||
return (this.tempPath == null) ? this.newFilePath : this.tempPath;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public WriteStatus getWriteStatus() {
|
public WriteStatus getWriteStatus() {
|
||||||
return writeStatus;
|
return writeStatus;
|
||||||
|
|||||||
@@ -201,7 +201,7 @@ public class HoodieRealtimeTableCompactor implements HoodieCompactor {
|
|||||||
.map(
|
.map(
|
||||||
s -> {
|
s -> {
|
||||||
List<HoodieLogFile> logFiles = s.getLogFiles().sorted(HoodieLogFile
|
List<HoodieLogFile> logFiles = s.getLogFiles().sorted(HoodieLogFile
|
||||||
.getBaseInstantAndLogVersionComparator().reversed()).collect(Collectors.toList());
|
.getLogFileComparator()).collect(Collectors.toList());
|
||||||
totalLogFiles.add((long) logFiles.size());
|
totalLogFiles.add((long) logFiles.size());
|
||||||
totalFileSlices.add(1L);
|
totalFileSlices.add(1L);
|
||||||
// Avro generated classes are not inheriting Serializable. Using CompactionOperation POJO
|
// Avro generated classes are not inheriting Serializable. Using CompactionOperation POJO
|
||||||
|
|||||||
@@ -17,6 +17,7 @@
|
|||||||
package com.uber.hoodie.io.storage;
|
package com.uber.hoodie.io.storage;
|
||||||
|
|
||||||
import com.uber.hoodie.avro.HoodieAvroWriteSupport;
|
import com.uber.hoodie.avro.HoodieAvroWriteSupport;
|
||||||
|
import com.uber.hoodie.common.io.storage.HoodieWrapperFileSystem;
|
||||||
import com.uber.hoodie.common.model.HoodieRecord;
|
import com.uber.hoodie.common.model.HoodieRecord;
|
||||||
import com.uber.hoodie.common.model.HoodieRecordPayload;
|
import com.uber.hoodie.common.model.HoodieRecordPayload;
|
||||||
import com.uber.hoodie.common.util.FSUtils;
|
import com.uber.hoodie.common.util.FSUtils;
|
||||||
|
|||||||
@@ -28,8 +28,6 @@ import com.uber.hoodie.common.model.HoodieRecord;
|
|||||||
import com.uber.hoodie.common.model.HoodieRecordLocation;
|
import com.uber.hoodie.common.model.HoodieRecordLocation;
|
||||||
import com.uber.hoodie.common.model.HoodieRecordPayload;
|
import com.uber.hoodie.common.model.HoodieRecordPayload;
|
||||||
import com.uber.hoodie.common.model.HoodieRollingStatMetadata;
|
import com.uber.hoodie.common.model.HoodieRollingStatMetadata;
|
||||||
import com.uber.hoodie.common.model.HoodieWriteStat;
|
|
||||||
import com.uber.hoodie.common.table.HoodieTableMetaClient;
|
|
||||||
import com.uber.hoodie.common.table.HoodieTimeline;
|
import com.uber.hoodie.common.table.HoodieTimeline;
|
||||||
import com.uber.hoodie.common.table.timeline.HoodieActiveTimeline;
|
import com.uber.hoodie.common.table.timeline.HoodieActiveTimeline;
|
||||||
import com.uber.hoodie.common.table.timeline.HoodieInstant;
|
import com.uber.hoodie.common.table.timeline.HoodieInstant;
|
||||||
@@ -52,7 +50,6 @@ import java.io.IOException;
|
|||||||
import java.io.Serializable;
|
import java.io.Serializable;
|
||||||
import java.nio.charset.StandardCharsets;
|
import java.nio.charset.StandardCharsets;
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
import java.util.Arrays;
|
|
||||||
import java.util.Collections;
|
import java.util.Collections;
|
||||||
import java.util.HashMap;
|
import java.util.HashMap;
|
||||||
import java.util.Iterator;
|
import java.util.Iterator;
|
||||||
@@ -234,14 +231,14 @@ public class HoodieCopyOnWriteTable<T extends HoodieRecordPayload> extends Hoodi
|
|||||||
return new HoodieMergeHandle<>(config, commitTime, this, keyToNewRecords, fileId, dataFileToBeMerged);
|
return new HoodieMergeHandle<>(config, commitTime, this, keyToNewRecords, fileId, dataFileToBeMerged);
|
||||||
}
|
}
|
||||||
|
|
||||||
public Iterator<List<WriteStatus>> handleInsert(String commitTime,
|
public Iterator<List<WriteStatus>> handleInsert(String commitTime, String idPfx,
|
||||||
Iterator<HoodieRecord<T>> recordItr) throws Exception {
|
Iterator<HoodieRecord<T>> recordItr) throws Exception {
|
||||||
// This is needed since sometimes some buckets are never picked in getPartition() and end up with 0 records
|
// This is needed since sometimes some buckets are never picked in getPartition() and end up with 0 records
|
||||||
if (!recordItr.hasNext()) {
|
if (!recordItr.hasNext()) {
|
||||||
logger.info("Empty partition");
|
logger.info("Empty partition");
|
||||||
return Collections.singletonList((List<WriteStatus>) Collections.EMPTY_LIST).iterator();
|
return Collections.singletonList((List<WriteStatus>) Collections.EMPTY_LIST).iterator();
|
||||||
}
|
}
|
||||||
return new CopyOnWriteLazyInsertIterable<>(recordItr, config, commitTime, this);
|
return new CopyOnWriteLazyInsertIterable<>(recordItr, config, commitTime, this, idPfx);
|
||||||
}
|
}
|
||||||
|
|
||||||
public Iterator<List<WriteStatus>> handleInsert(String commitTime, String partitionPath, String fileId,
|
public Iterator<List<WriteStatus>> handleInsert(String commitTime, String partitionPath, String fileId,
|
||||||
@@ -261,9 +258,9 @@ public class HoodieCopyOnWriteTable<T extends HoodieRecordPayload> extends Hoodi
|
|||||||
BucketType btype = binfo.bucketType;
|
BucketType btype = binfo.bucketType;
|
||||||
try {
|
try {
|
||||||
if (btype.equals(BucketType.INSERT)) {
|
if (btype.equals(BucketType.INSERT)) {
|
||||||
return handleInsert(commitTime, recordItr);
|
return handleInsert(commitTime, binfo.fileIdPrefix, recordItr);
|
||||||
} else if (btype.equals(BucketType.UPDATE)) {
|
} else if (btype.equals(BucketType.UPDATE)) {
|
||||||
return handleUpdate(commitTime, binfo.fileLoc, recordItr);
|
return handleUpdate(commitTime, binfo.fileIdPrefix, recordItr);
|
||||||
} else {
|
} else {
|
||||||
throw new HoodieUpsertException(
|
throw new HoodieUpsertException(
|
||||||
"Unknown bucketType " + btype + " for partition :" + partition);
|
"Unknown bucketType " + btype + " for partition :" + partition);
|
||||||
@@ -376,9 +373,6 @@ public class HoodieCopyOnWriteTable<T extends HoodieRecordPayload> extends Hoodi
|
|||||||
.withDeletedFileResults(filesToDeletedStatus).build();
|
.withDeletedFileResults(filesToDeletedStatus).build();
|
||||||
}).collect();
|
}).collect();
|
||||||
|
|
||||||
// clean temporary data files
|
|
||||||
cleanTemporaryDataFiles(jsc);
|
|
||||||
|
|
||||||
// Delete Inflight instant if enabled
|
// Delete Inflight instant if enabled
|
||||||
deleteInflightInstant(deleteInstants, activeTimeline,
|
deleteInflightInstant(deleteInstants, activeTimeline,
|
||||||
new HoodieInstant(true, actionType, commit));
|
new HoodieInstant(true, actionType, commit));
|
||||||
@@ -391,99 +385,28 @@ public class HoodieCopyOnWriteTable<T extends HoodieRecordPayload> extends Hoodi
|
|||||||
* @param activeTimeline Hoodie active timeline
|
* @param activeTimeline Hoodie active timeline
|
||||||
* @param instantToBeDeleted Instant to be deleted
|
* @param instantToBeDeleted Instant to be deleted
|
||||||
*/
|
*/
|
||||||
protected static void deleteInflightInstant(boolean deleteInstant, HoodieActiveTimeline activeTimeline,
|
protected void deleteInflightInstant(boolean deleteInstant, HoodieActiveTimeline activeTimeline,
|
||||||
HoodieInstant instantToBeDeleted) {
|
HoodieInstant instantToBeDeleted) {
|
||||||
// Remove the rolled back inflight commits
|
// Remove the rolled back inflight commits
|
||||||
if (deleteInstant) {
|
if (deleteInstant) {
|
||||||
activeTimeline.deleteInflight(instantToBeDeleted);
|
try {
|
||||||
logger.info("Deleted inflight commit " + instantToBeDeleted);
|
//TODO: Cleanup Hoodie 1.0 rollback to simply call super.cleanFailedWrites with consistency check disabled
|
||||||
|
// and empty WriteStat list.
|
||||||
|
Path markerDir = new Path(metaClient.getMarkerFolderPath(instantToBeDeleted.getTimestamp()));
|
||||||
|
logger.info("Removing marker directory=" + markerDir);
|
||||||
|
if (metaClient.getFs().exists(markerDir)) {
|
||||||
|
metaClient.getFs().delete(markerDir, true);
|
||||||
|
}
|
||||||
|
activeTimeline.deleteInflight(instantToBeDeleted);
|
||||||
|
logger.info("Deleted inflight commit " + instantToBeDeleted);
|
||||||
|
} catch (IOException e) {
|
||||||
|
throw new HoodieIOException(e.getMessage(), e);
|
||||||
|
}
|
||||||
} else {
|
} else {
|
||||||
logger.warn("Rollback finished without deleting inflight instant file. Instant=" + instantToBeDeleted);
|
logger.warn("Rollback finished without deleting inflight instant file. Instant=" + instantToBeDeleted);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
|
||||||
* Finalize the written data files
|
|
||||||
*
|
|
||||||
* @param stats List of HoodieWriteStats
|
|
||||||
* @return number of files finalized
|
|
||||||
*/
|
|
||||||
@Override
|
|
||||||
@SuppressWarnings("unchecked")
|
|
||||||
public void finalizeWrite(JavaSparkContext jsc, List<HoodieWriteStat> stats)
|
|
||||||
throws HoodieIOException {
|
|
||||||
|
|
||||||
super.finalizeWrite(jsc, stats);
|
|
||||||
|
|
||||||
if (config.shouldUseTempFolderForCopyOnWrite()) {
|
|
||||||
// This is to rename each data file from temporary path to its final location
|
|
||||||
jsc.parallelize(stats, config.getFinalizeWriteParallelism())
|
|
||||||
.foreach(writeStat -> {
|
|
||||||
final FileSystem fs = getMetaClient().getFs();
|
|
||||||
final Path finalPath = new Path(config.getBasePath(), writeStat.getPath());
|
|
||||||
|
|
||||||
if (writeStat.getTempPath() != null) {
|
|
||||||
final Path tempPath = new Path(config.getBasePath(), writeStat.getTempPath());
|
|
||||||
boolean success;
|
|
||||||
try {
|
|
||||||
logger.info("Renaming temporary file: " + tempPath + " to " + finalPath);
|
|
||||||
success = fs.rename(tempPath, finalPath);
|
|
||||||
} catch (IOException e) {
|
|
||||||
throw new HoodieIOException(
|
|
||||||
"Failed to rename file: " + tempPath + " to " + finalPath);
|
|
||||||
}
|
|
||||||
|
|
||||||
if (!success) {
|
|
||||||
throw new HoodieIOException(
|
|
||||||
"Failed to rename file: " + tempPath + " to " + finalPath);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
});
|
|
||||||
|
|
||||||
// clean temporary data files
|
|
||||||
cleanTemporaryDataFiles(jsc);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Clean temporary data files that are produced from previous failed commit or retried spark
|
|
||||||
* stages.
|
|
||||||
*/
|
|
||||||
private void cleanTemporaryDataFiles(JavaSparkContext jsc) {
|
|
||||||
if (!config.shouldUseTempFolderForCopyOnWrite()) {
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
final FileSystem fs = getMetaClient().getFs();
|
|
||||||
final Path temporaryFolder = new Path(config.getBasePath(),
|
|
||||||
HoodieTableMetaClient.TEMPFOLDER_NAME);
|
|
||||||
try {
|
|
||||||
if (!fs.exists(temporaryFolder)) {
|
|
||||||
logger.info("Temporary folder does not exist: " + temporaryFolder);
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
List<FileStatus> fileStatusesList = Arrays.asList(fs.listStatus(temporaryFolder));
|
|
||||||
List<Tuple2<String, Boolean>> results = jsc
|
|
||||||
.parallelize(fileStatusesList, config.getFinalizeWriteParallelism()).map(fileStatus -> {
|
|
||||||
FileSystem fs1 = getMetaClient().getFs();
|
|
||||||
boolean success = fs1.delete(fileStatus.getPath(), false);
|
|
||||||
logger
|
|
||||||
.info("Deleting file in temporary folder" + fileStatus.getPath() + "\t" + success);
|
|
||||||
return new Tuple2<>(fileStatus.getPath().toString(), success);
|
|
||||||
}).collect();
|
|
||||||
|
|
||||||
for (Tuple2<String, Boolean> result : results) {
|
|
||||||
if (!result._2()) {
|
|
||||||
logger.info("Failed to delete file: " + result._1());
|
|
||||||
throw new HoodieIOException("Failed to delete file in temporary folder: " + result._1());
|
|
||||||
}
|
|
||||||
}
|
|
||||||
} catch (IOException e) {
|
|
||||||
throw new HoodieIOException(
|
|
||||||
"Failed to clean data files in temporary folder: " + temporaryFolder);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
private List<HoodieCleanStat> cleanPartitionPaths(List<String> partitionsToClean,
|
private List<HoodieCleanStat> cleanPartitionPaths(List<String> partitionsToClean,
|
||||||
JavaSparkContext jsc) {
|
JavaSparkContext jsc) {
|
||||||
int cleanerParallelism = Math.min(partitionsToClean.size(), config.getCleanerParallelism());
|
int cleanerParallelism = Math.min(partitionsToClean.size(), config.getCleanerParallelism());
|
||||||
@@ -624,13 +547,13 @@ public class HoodieCopyOnWriteTable<T extends HoodieRecordPayload> extends Hoodi
|
|||||||
class BucketInfo implements Serializable {
|
class BucketInfo implements Serializable {
|
||||||
|
|
||||||
BucketType bucketType;
|
BucketType bucketType;
|
||||||
String fileLoc;
|
String fileIdPrefix;
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public String toString() {
|
public String toString() {
|
||||||
final StringBuilder sb = new StringBuilder("BucketInfo {");
|
final StringBuilder sb = new StringBuilder("BucketInfo {");
|
||||||
sb.append("bucketType=").append(bucketType).append(", ");
|
sb.append("bucketType=").append(bucketType).append(", ");
|
||||||
sb.append("fileLoc=").append(fileLoc);
|
sb.append("fileIdPrefix=").append(fileIdPrefix);
|
||||||
sb.append('}');
|
sb.append('}');
|
||||||
return sb.toString();
|
return sb.toString();
|
||||||
}
|
}
|
||||||
@@ -697,12 +620,12 @@ public class HoodieCopyOnWriteTable<T extends HoodieRecordPayload> extends Hoodi
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private int addUpdateBucket(String fileLoc) {
|
private int addUpdateBucket(String fileIdHint) {
|
||||||
int bucket = totalBuckets;
|
int bucket = totalBuckets;
|
||||||
updateLocationToBucket.put(fileLoc, bucket);
|
updateLocationToBucket.put(fileIdHint, bucket);
|
||||||
BucketInfo bucketInfo = new BucketInfo();
|
BucketInfo bucketInfo = new BucketInfo();
|
||||||
bucketInfo.bucketType = BucketType.UPDATE;
|
bucketInfo.bucketType = BucketType.UPDATE;
|
||||||
bucketInfo.fileLoc = fileLoc;
|
bucketInfo.fileIdPrefix = fileIdHint;
|
||||||
bucketInfoMap.put(totalBuckets, bucketInfo);
|
bucketInfoMap.put(totalBuckets, bucketInfo);
|
||||||
totalBuckets++;
|
totalBuckets++;
|
||||||
return bucket;
|
return bucket;
|
||||||
@@ -764,6 +687,7 @@ public class HoodieCopyOnWriteTable<T extends HoodieRecordPayload> extends Hoodi
|
|||||||
recordsPerBucket.add(totalUnassignedInserts / insertBuckets);
|
recordsPerBucket.add(totalUnassignedInserts / insertBuckets);
|
||||||
BucketInfo bucketInfo = new BucketInfo();
|
BucketInfo bucketInfo = new BucketInfo();
|
||||||
bucketInfo.bucketType = BucketType.INSERT;
|
bucketInfo.bucketType = BucketType.INSERT;
|
||||||
|
bucketInfo.fileIdPrefix = FSUtils.createNewFileIdPfx();
|
||||||
bucketInfoMap.put(totalBuckets, bucketInfo);
|
bucketInfoMap.put(totalBuckets, bucketInfo);
|
||||||
totalBuckets++;
|
totalBuckets++;
|
||||||
}
|
}
|
||||||
@@ -784,7 +708,6 @@ public class HoodieCopyOnWriteTable<T extends HoodieRecordPayload> extends Hoodi
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Returns a list of small files in the given partition path
|
* Returns a list of small files in the given partition path
|
||||||
*/
|
*/
|
||||||
|
|||||||
@@ -121,13 +121,13 @@ public class HoodieMergeOnReadTable<T extends HoodieRecordPayload> extends
|
|||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public Iterator<List<WriteStatus>> handleInsert(String commitTime,
|
public Iterator<List<WriteStatus>> handleInsert(String commitTime, String idPfx,
|
||||||
Iterator<HoodieRecord<T>> recordItr) throws Exception {
|
Iterator<HoodieRecord<T>> recordItr) throws Exception {
|
||||||
// If canIndexLogFiles, write inserts to log files else write inserts to parquet files
|
// If canIndexLogFiles, write inserts to log files else write inserts to parquet files
|
||||||
if (index.canIndexLogFiles()) {
|
if (index.canIndexLogFiles()) {
|
||||||
return new MergeOnReadLazyInsertIterable<>(recordItr, config, commitTime, this);
|
return new MergeOnReadLazyInsertIterable<>(recordItr, config, commitTime, this, idPfx);
|
||||||
} else {
|
} else {
|
||||||
return super.handleInsert(commitTime, recordItr);
|
return super.handleInsert(commitTime, idPfx, recordItr);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -325,10 +325,10 @@ public class HoodieMergeOnReadTable<T extends HoodieRecordPayload> extends
|
|||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void finalizeWrite(JavaSparkContext jsc, List<HoodieWriteStat> stats)
|
public void finalizeWrite(JavaSparkContext jsc, String instantTs, List<HoodieWriteStat> stats)
|
||||||
throws HoodieIOException {
|
throws HoodieIOException {
|
||||||
// delegate to base class for MOR tables
|
// delegate to base class for MOR tables
|
||||||
super.finalizeWrite(jsc, stats);
|
super.finalizeWrite(jsc, instantTs, stats);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
@@ -362,6 +362,7 @@ public class HoodieMergeOnReadTable<T extends HoodieRecordPayload> extends
|
|||||||
super(profile);
|
super(profile);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
protected List<SmallFile> getSmallFiles(String partitionPath) {
|
protected List<SmallFile> getSmallFiles(String partitionPath) {
|
||||||
|
|
||||||
// smallFiles only for partitionPath
|
// smallFiles only for partitionPath
|
||||||
|
|||||||
@@ -34,19 +34,30 @@ import com.uber.hoodie.common.table.timeline.HoodieInstant;
|
|||||||
import com.uber.hoodie.common.table.view.FileSystemViewManager;
|
import com.uber.hoodie.common.table.view.FileSystemViewManager;
|
||||||
import com.uber.hoodie.common.table.view.HoodieTableFileSystemView;
|
import com.uber.hoodie.common.table.view.HoodieTableFileSystemView;
|
||||||
import com.uber.hoodie.common.util.AvroUtils;
|
import com.uber.hoodie.common.util.AvroUtils;
|
||||||
|
import com.uber.hoodie.common.util.ConsistencyGuard;
|
||||||
|
import com.uber.hoodie.common.util.ConsistencyGuard.FileVisibility;
|
||||||
|
import com.uber.hoodie.common.util.FSUtils;
|
||||||
|
import com.uber.hoodie.common.util.FailSafeConsistencyGuard;
|
||||||
|
import com.uber.hoodie.common.util.collection.Pair;
|
||||||
import com.uber.hoodie.config.HoodieWriteConfig;
|
import com.uber.hoodie.config.HoodieWriteConfig;
|
||||||
import com.uber.hoodie.exception.HoodieException;
|
import com.uber.hoodie.exception.HoodieException;
|
||||||
import com.uber.hoodie.exception.HoodieIOException;
|
import com.uber.hoodie.exception.HoodieIOException;
|
||||||
import com.uber.hoodie.exception.HoodieSavepointException;
|
import com.uber.hoodie.exception.HoodieSavepointException;
|
||||||
import com.uber.hoodie.index.HoodieIndex;
|
import com.uber.hoodie.index.HoodieIndex;
|
||||||
import com.uber.hoodie.io.ConsistencyCheck;
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.io.Serializable;
|
import java.io.Serializable;
|
||||||
|
import java.util.ArrayList;
|
||||||
import java.util.Iterator;
|
import java.util.Iterator;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
|
import java.util.Map;
|
||||||
|
import java.util.concurrent.TimeoutException;
|
||||||
import java.util.stream.Collectors;
|
import java.util.stream.Collectors;
|
||||||
import java.util.stream.Stream;
|
import java.util.stream.Stream;
|
||||||
import org.apache.hadoop.conf.Configuration;
|
import org.apache.hadoop.conf.Configuration;
|
||||||
|
import org.apache.hadoop.fs.FileSystem;
|
||||||
|
import org.apache.hadoop.fs.Path;
|
||||||
|
import org.apache.log4j.LogManager;
|
||||||
|
import org.apache.log4j.Logger;
|
||||||
import org.apache.spark.Partitioner;
|
import org.apache.spark.Partitioner;
|
||||||
import org.apache.spark.api.java.JavaRDD;
|
import org.apache.spark.api.java.JavaRDD;
|
||||||
import org.apache.spark.api.java.JavaSparkContext;
|
import org.apache.spark.api.java.JavaSparkContext;
|
||||||
@@ -56,10 +67,7 @@ import org.apache.spark.api.java.JavaSparkContext;
|
|||||||
*/
|
*/
|
||||||
public abstract class HoodieTable<T extends HoodieRecordPayload> implements Serializable {
|
public abstract class HoodieTable<T extends HoodieRecordPayload> implements Serializable {
|
||||||
|
|
||||||
// time between successive attempts to ensure written data's metadata is consistent on storage
|
private static Logger logger = LogManager.getLogger(HoodieTable.class);
|
||||||
private static long INITIAL_CONSISTENCY_CHECK_INTERVAL_MS = 2000L;
|
|
||||||
// maximum number of checks, for consistency of written data. Will wait upto 256 Secs
|
|
||||||
private static int MAX_CONSISTENCY_CHECKS = 7;
|
|
||||||
|
|
||||||
protected final HoodieWriteConfig config;
|
protected final HoodieWriteConfig config;
|
||||||
protected final HoodieTableMetaClient metaClient;
|
protected final HoodieTableMetaClient metaClient;
|
||||||
@@ -279,20 +287,126 @@ public abstract class HoodieTable<T extends HoodieRecordPayload> implements Seri
|
|||||||
* @param stats List of HoodieWriteStats
|
* @param stats List of HoodieWriteStats
|
||||||
* @throws HoodieIOException if some paths can't be finalized on storage
|
* @throws HoodieIOException if some paths can't be finalized on storage
|
||||||
*/
|
*/
|
||||||
public void finalizeWrite(JavaSparkContext jsc, List<HoodieWriteStat> stats)
|
public void finalizeWrite(JavaSparkContext jsc, String instantTs, List<HoodieWriteStat> stats)
|
||||||
throws HoodieIOException {
|
throws HoodieIOException {
|
||||||
if (config.isConsistencyCheckEnabled()) {
|
cleanFailedWrites(jsc, instantTs, stats, config.isConsistencyCheckEnabled());
|
||||||
List<String> pathsToCheck = stats.stream()
|
}
|
||||||
.map(stat -> stat.getTempPath() != null
|
|
||||||
? stat.getTempPath() : stat.getPath())
|
|
||||||
.collect(Collectors.toList());
|
|
||||||
|
|
||||||
List<String> failingPaths = new ConsistencyCheck(config.getBasePath(), pathsToCheck, jsc,
|
/**
|
||||||
config.getFinalizeWriteParallelism())
|
* Reconciles WriteStats and marker files to detect and safely delete duplicate data files created because of Spark
|
||||||
.check(MAX_CONSISTENCY_CHECKS, INITIAL_CONSISTENCY_CHECK_INTERVAL_MS);
|
* retries.
|
||||||
if (failingPaths.size() > 0) {
|
*
|
||||||
throw new HoodieIOException("Could not verify consistency of paths : " + failingPaths);
|
* @param jsc Spark Context
|
||||||
|
* @param instantTs Instant Timestamp
|
||||||
|
* @param stats Hoodie Write Stat
|
||||||
|
* @param consistencyCheckEnabled Consistency Check Enabled
|
||||||
|
* @throws HoodieIOException
|
||||||
|
*/
|
||||||
|
protected void cleanFailedWrites(JavaSparkContext jsc, String instantTs, List<HoodieWriteStat> stats,
|
||||||
|
boolean consistencyCheckEnabled) throws HoodieIOException {
|
||||||
|
try {
|
||||||
|
// Reconcile marker and data files with WriteStats so that partially written data-files due to failed
|
||||||
|
// (but succeeded on retry) tasks are removed.
|
||||||
|
String basePath = getMetaClient().getBasePath();
|
||||||
|
FileSystem fs = getMetaClient().getFs();
|
||||||
|
Path markerDir = new Path(metaClient.getMarkerFolderPath(instantTs));
|
||||||
|
|
||||||
|
if (!fs.exists(markerDir)) {
|
||||||
|
// Happens when all writes are appends
|
||||||
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
List<String> invalidDataPaths = FSUtils.getAllDataFilesForMarkers(fs, basePath, instantTs, markerDir.toString());
|
||||||
|
List<String> validDataPaths = stats.stream().map(w -> String.format("%s/%s", basePath, w.getPath()))
|
||||||
|
.filter(p -> p.endsWith(".parquet")).collect(Collectors.toList());
|
||||||
|
// Contains list of partially created files. These needs to be cleaned up.
|
||||||
|
invalidDataPaths.removeAll(validDataPaths);
|
||||||
|
logger.warn("InValid data paths=" + invalidDataPaths);
|
||||||
|
|
||||||
|
Map<String, List<Pair<String, String>>> groupByPartition = invalidDataPaths.stream()
|
||||||
|
.map(dp -> Pair.of(new Path(dp).getParent().toString(), dp))
|
||||||
|
.collect(Collectors.groupingBy(Pair::getKey));
|
||||||
|
|
||||||
|
if (!groupByPartition.isEmpty()) {
|
||||||
|
// Ensure all files in delete list is actually present. This is mandatory for an eventually consistent FS.
|
||||||
|
// Otherwise, we may miss deleting such files. If files are not found even after retries, fail the commit
|
||||||
|
if (consistencyCheckEnabled) {
|
||||||
|
// This will either ensure all files to be deleted are present.
|
||||||
|
waitForAllFiles(jsc, groupByPartition, FileVisibility.APPEAR);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Now delete partially written files
|
||||||
|
jsc.parallelize(new ArrayList<>(groupByPartition.values()), config.getFinalizeWriteParallelism())
|
||||||
|
.map(partitionWithFileList -> {
|
||||||
|
final FileSystem fileSystem = metaClient.getFs();
|
||||||
|
logger.info("Deleting invalid data files=" + partitionWithFileList);
|
||||||
|
if (partitionWithFileList.isEmpty()) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
// Delete
|
||||||
|
partitionWithFileList.stream().map(Pair::getValue).forEach(file -> {
|
||||||
|
try {
|
||||||
|
fileSystem.delete(new Path(file), false);
|
||||||
|
} catch (IOException e) {
|
||||||
|
throw new HoodieIOException(e.getMessage(), e);
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
return true;
|
||||||
|
}).collect();
|
||||||
|
|
||||||
|
// Now ensure the deleted files disappear
|
||||||
|
if (consistencyCheckEnabled) {
|
||||||
|
// This will either ensure all files to be deleted are absent.
|
||||||
|
waitForAllFiles(jsc, groupByPartition, FileVisibility.DISAPPEAR);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// Now delete the marker directory
|
||||||
|
if (fs.exists(markerDir)) {
|
||||||
|
// For append only case, we do not write to marker dir. Hence, the above check
|
||||||
|
logger.info("Removing marker directory=" + markerDir);
|
||||||
|
fs.delete(markerDir, true);
|
||||||
|
}
|
||||||
|
} catch (IOException ioe) {
|
||||||
|
throw new HoodieIOException(ioe.getMessage(), ioe);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Ensures all files passed either appear or disappear
|
||||||
|
* @param jsc JavaSparkContext
|
||||||
|
* @param groupByPartition Files grouped by partition
|
||||||
|
* @param visibility Appear/Disappear
|
||||||
|
*/
|
||||||
|
private void waitForAllFiles(JavaSparkContext jsc, Map<String, List<Pair<String, String>>> groupByPartition,
|
||||||
|
FileVisibility visibility) {
|
||||||
|
// This will either ensure all files to be deleted are present.
|
||||||
|
boolean checkPassed =
|
||||||
|
jsc.parallelize(new ArrayList<>(groupByPartition.entrySet()), config.getFinalizeWriteParallelism())
|
||||||
|
.map(partitionWithFileList -> waitForCondition(partitionWithFileList.getKey(),
|
||||||
|
partitionWithFileList.getValue().stream(), visibility))
|
||||||
|
.collect().stream().allMatch(x -> x);
|
||||||
|
if (!checkPassed) {
|
||||||
|
throw new HoodieIOException("Consistency check failed to ensure all files " + visibility);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private boolean waitForCondition(String partitionPath, Stream<Pair<String, String>> partitionFilePaths,
|
||||||
|
FileVisibility visibility) {
|
||||||
|
final FileSystem fileSystem = metaClient.getFs();
|
||||||
|
List<String> fileList = partitionFilePaths.map(Pair::getValue).collect(Collectors.toList());
|
||||||
|
try {
|
||||||
|
getFailSafeConsistencyGuard(fileSystem).waitTill(partitionPath, fileList, visibility);
|
||||||
|
} catch (IOException | TimeoutException ioe) {
|
||||||
|
logger.error("Got exception while waiting for files to show up", ioe);
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
private ConsistencyGuard getFailSafeConsistencyGuard(FileSystem fileSystem) {
|
||||||
|
return new FailSafeConsistencyGuard(fileSystem, config.getMaxConsistencyChecks(),
|
||||||
|
config.getInitialConsistencyCheckIntervalMs(),
|
||||||
|
config.getMaxConsistencyCheckIntervalMs());
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -66,6 +66,7 @@ import java.util.function.Predicate;
|
|||||||
import java.util.stream.Collectors;
|
import java.util.stream.Collectors;
|
||||||
import java.util.stream.Stream;
|
import java.util.stream.Stream;
|
||||||
import org.apache.hadoop.fs.Path;
|
import org.apache.hadoop.fs.Path;
|
||||||
|
import org.apache.hadoop.fs.RemoteIterator;
|
||||||
import org.apache.log4j.LogManager;
|
import org.apache.log4j.LogManager;
|
||||||
import org.apache.log4j.Logger;
|
import org.apache.log4j.Logger;
|
||||||
import org.apache.spark.api.java.JavaRDD;
|
import org.apache.spark.api.java.JavaRDD;
|
||||||
@@ -620,18 +621,17 @@ public class TestCleaner extends TestHoodieClientBase {
|
|||||||
* Test Cleaning functionality of table.rollback() API.
|
* Test Cleaning functionality of table.rollback() API.
|
||||||
*/
|
*/
|
||||||
@Test
|
@Test
|
||||||
public void testCleanTemporaryDataFilesOnRollback() throws IOException {
|
public void testCleanMarkerDataFilesOnRollback() throws IOException {
|
||||||
HoodieTestUtils.createCommitFiles(basePath, "000");
|
HoodieTestUtils.createCommitFiles(basePath, "000");
|
||||||
List<String> tempFiles = createTempFiles("000", 10);
|
List<String> markerFiles = createMarkerFiles("000", 10);
|
||||||
assertEquals("Some temp files are created.", 10, tempFiles.size());
|
assertEquals("Some marker files are created.", 10, markerFiles.size());
|
||||||
assertEquals("Some temp files are created.", tempFiles.size(), getTotalTempFiles());
|
assertEquals("Some marker files are created.", markerFiles.size(), getTotalTempFiles());
|
||||||
|
|
||||||
|
HoodieWriteConfig config = HoodieWriteConfig.newBuilder().withPath(basePath).build();
|
||||||
|
HoodieTable table = HoodieTable.getHoodieTable(
|
||||||
|
new HoodieTableMetaClient(jsc.hadoopConfiguration(), config.getBasePath(), true), config,
|
||||||
|
jsc);
|
||||||
|
|
||||||
HoodieWriteConfig config = HoodieWriteConfig.newBuilder().withPath(basePath)
|
|
||||||
.withUseTempFolderCopyOnWriteForCreate(true)
|
|
||||||
.withUseTempFolderCopyOnWriteForMerge(false).build();
|
|
||||||
HoodieTable table = HoodieTable.getHoodieTable(new HoodieTableMetaClient(jsc.hadoopConfiguration(), config
|
|
||||||
.getBasePath(), true),
|
|
||||||
config, jsc);
|
|
||||||
table.rollback(jsc, "000", true);
|
table.rollback(jsc, "000", true);
|
||||||
assertEquals("All temp files are deleted.", 0, getTotalTempFiles());
|
assertEquals("All temp files are deleted.", 0, getTotalTempFiles());
|
||||||
}
|
}
|
||||||
@@ -901,10 +901,10 @@ public class TestCleaner extends TestHoodieClientBase {
|
|||||||
* @return generated files
|
* @return generated files
|
||||||
* @throws IOException in case of error
|
* @throws IOException in case of error
|
||||||
*/
|
*/
|
||||||
private List<String> createTempFiles(String commitTime, int numFiles) throws IOException {
|
private List<String> createMarkerFiles(String commitTime, int numFiles) throws IOException {
|
||||||
List<String> files = new ArrayList<>();
|
List<String> files = new ArrayList<>();
|
||||||
for (int i = 0; i < numFiles; i++) {
|
for (int i = 0; i < numFiles; i++) {
|
||||||
files.add(HoodieTestUtils.createNewDataFile(basePath, HoodieTableMetaClient.TEMPFOLDER_NAME, commitTime));
|
files.add(HoodieTestUtils.createNewMarkerFile(basePath, "2019/03/29", commitTime));
|
||||||
}
|
}
|
||||||
return files;
|
return files;
|
||||||
}
|
}
|
||||||
@@ -915,7 +915,13 @@ public class TestCleaner extends TestHoodieClientBase {
|
|||||||
* @throws IOException in case of error
|
* @throws IOException in case of error
|
||||||
*/
|
*/
|
||||||
private int getTotalTempFiles() throws IOException {
|
private int getTotalTempFiles() throws IOException {
|
||||||
return fs.listStatus(new Path(basePath, HoodieTableMetaClient.TEMPFOLDER_NAME)).length;
|
RemoteIterator itr = fs.listFiles(new Path(basePath, HoodieTableMetaClient.TEMPFOLDER_NAME), true);
|
||||||
|
int count = 0;
|
||||||
|
while (itr.hasNext()) {
|
||||||
|
count++;
|
||||||
|
itr.next();
|
||||||
|
}
|
||||||
|
return count;
|
||||||
}
|
}
|
||||||
|
|
||||||
private Stream<Pair<String, String>> convertPathToFileIdWithCommitTime(
|
private Stream<Pair<String, String>> convertPathToFileIdWithCommitTime(
|
||||||
|
|||||||
@@ -0,0 +1,108 @@
|
|||||||
|
/*
|
||||||
|
* Copyright (c) 2019 Uber Technologies, Inc. (hoodie-dev-group@uber.com)
|
||||||
|
*
|
||||||
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
* you may not use this file except in compliance with the License.
|
||||||
|
* You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package com.uber.hoodie;
|
||||||
|
|
||||||
|
import com.uber.hoodie.common.HoodieClientTestUtils;
|
||||||
|
import com.uber.hoodie.common.util.ConsistencyGuard;
|
||||||
|
import com.uber.hoodie.common.util.FSUtils;
|
||||||
|
import com.uber.hoodie.common.util.FailSafeConsistencyGuard;
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.util.Arrays;
|
||||||
|
import java.util.concurrent.TimeoutException;
|
||||||
|
import org.apache.hadoop.conf.Configuration;
|
||||||
|
import org.apache.hadoop.fs.FileSystem;
|
||||||
|
import org.apache.hadoop.fs.LocalFileSystem;
|
||||||
|
import org.apache.hadoop.fs.Path;
|
||||||
|
import org.junit.Before;
|
||||||
|
import org.junit.Test;
|
||||||
|
import org.junit.rules.TemporaryFolder;
|
||||||
|
|
||||||
|
public class TestConsistencyGuard {
|
||||||
|
private String basePath;
|
||||||
|
protected transient FileSystem fs;
|
||||||
|
|
||||||
|
@Before
|
||||||
|
public void setup() throws IOException {
|
||||||
|
TemporaryFolder testFolder = new TemporaryFolder();
|
||||||
|
testFolder.create();
|
||||||
|
basePath = testFolder.getRoot().getAbsolutePath();
|
||||||
|
fs = FSUtils.getFs(basePath, new Configuration());
|
||||||
|
if (fs instanceof LocalFileSystem) {
|
||||||
|
LocalFileSystem lfs = (LocalFileSystem) fs;
|
||||||
|
// With LocalFileSystem, with checksum disabled, fs.open() returns an inputStream which is FSInputStream
|
||||||
|
// This causes ClassCastExceptions in LogRecordScanner (and potentially other places) calling fs.open
|
||||||
|
// So, for the tests, we enforce checksum verification to circumvent the problem
|
||||||
|
lfs.setVerifyChecksum(true);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testCheckPassingAppearAndDisAppear() throws Exception {
|
||||||
|
HoodieClientTestUtils.fakeDataFile(basePath, "partition/path", "000", "f1");
|
||||||
|
HoodieClientTestUtils.fakeDataFile(basePath, "partition/path", "000", "f2");
|
||||||
|
HoodieClientTestUtils.fakeDataFile(basePath, "partition/path", "000", "f3");
|
||||||
|
|
||||||
|
ConsistencyGuard passing = new FailSafeConsistencyGuard(fs, 1, 1000, 1000);
|
||||||
|
passing.waitTillFileAppears(new Path(basePath + "/partition/path/f1_1-0-1_000.parquet"));
|
||||||
|
passing.waitTillFileAppears(new Path(basePath + "/partition/path/f2_1-0-1_000.parquet"));
|
||||||
|
passing.waitTillAllFilesAppear(basePath + "/partition/path",
|
||||||
|
Arrays.asList(basePath + "/partition/path/f1_1-0-1_000.parquet",
|
||||||
|
basePath + "/partition/path/f2_1-0-1_000.parquet"));
|
||||||
|
|
||||||
|
fs.delete(new Path(basePath + "/partition/path/f1_1-0-1_000.parquet"), false);
|
||||||
|
fs.delete(new Path(basePath + "/partition/path/f2_1-0-1_000.parquet"), false);
|
||||||
|
passing.waitTillFileDisappears(new Path(basePath + "/partition/path/f1_1-0-1_000.parquet"));
|
||||||
|
passing.waitTillFileDisappears(new Path(basePath + "/partition/path/f2_1-0-1_000.parquet"));
|
||||||
|
passing.waitTillAllFilesDisappear(basePath + "/partition/path",
|
||||||
|
Arrays.asList(basePath + "/partition/path/f1_1-0-1_000.parquet",
|
||||||
|
basePath + "/partition/path/f2_1-0-1_000.parquet"));
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test(expected = TimeoutException.class)
|
||||||
|
public void testCheckFailingAppear() throws Exception {
|
||||||
|
HoodieClientTestUtils.fakeDataFile(basePath, "partition/path", "000", "f1");
|
||||||
|
ConsistencyGuard passing = new FailSafeConsistencyGuard(fs, 3, 10, 10);
|
||||||
|
passing.waitTillAllFilesAppear(basePath + "/partition/path",
|
||||||
|
Arrays.asList(basePath + "/partition/path/f1_1-0-2_000.parquet",
|
||||||
|
basePath + "/partition/path/f2_1-0-2_000.parquet"));
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@Test(expected = TimeoutException.class)
|
||||||
|
public void testCheckFailingAppears() throws Exception {
|
||||||
|
HoodieClientTestUtils.fakeDataFile(basePath, "partition/path", "000", "f1");
|
||||||
|
ConsistencyGuard passing = new FailSafeConsistencyGuard(fs, 3, 10, 10);
|
||||||
|
passing.waitTillFileAppears(new Path(basePath + "/partition/path/f1_1-0-2_000.parquet"));
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test(expected = TimeoutException.class)
|
||||||
|
public void testCheckFailingDisappear() throws Exception {
|
||||||
|
HoodieClientTestUtils.fakeDataFile(basePath, "partition/path", "000", "f1");
|
||||||
|
ConsistencyGuard passing = new FailSafeConsistencyGuard(fs, 3, 10, 10);
|
||||||
|
passing.waitTillAllFilesDisappear(basePath + "/partition/path",
|
||||||
|
Arrays.asList(basePath + "/partition/path/f1_1-0-1_000.parquet",
|
||||||
|
basePath + "/partition/path/f2_1-0-2_000.parquet"));
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test(expected = TimeoutException.class)
|
||||||
|
public void testCheckFailingDisappears() throws Exception {
|
||||||
|
HoodieClientTestUtils.fakeDataFile(basePath, "partition/path", "000", "f1");
|
||||||
|
HoodieClientTestUtils.fakeDataFile(basePath, "partition/path", "000", "f1");
|
||||||
|
ConsistencyGuard passing = new FailSafeConsistencyGuard(fs, 3, 10, 10);
|
||||||
|
passing.waitTillFileDisappears(new Path(basePath + "/partition/path/f1_1-0-1_000.parquet"));
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -16,6 +16,7 @@
|
|||||||
|
|
||||||
package com.uber.hoodie;
|
package com.uber.hoodie;
|
||||||
|
|
||||||
|
import static com.uber.hoodie.common.table.HoodieTableMetaClient.MARKER_EXTN;
|
||||||
import static org.junit.Assert.assertEquals;
|
import static org.junit.Assert.assertEquals;
|
||||||
import static org.junit.Assert.assertFalse;
|
import static org.junit.Assert.assertFalse;
|
||||||
import static org.junit.Assert.assertTrue;
|
import static org.junit.Assert.assertTrue;
|
||||||
@@ -38,6 +39,7 @@ import com.uber.hoodie.common.table.TableFileSystemView;
|
|||||||
import com.uber.hoodie.common.table.timeline.HoodieInstant;
|
import com.uber.hoodie.common.table.timeline.HoodieInstant;
|
||||||
import com.uber.hoodie.common.util.FSUtils;
|
import com.uber.hoodie.common.util.FSUtils;
|
||||||
import com.uber.hoodie.common.util.ParquetUtils;
|
import com.uber.hoodie.common.util.ParquetUtils;
|
||||||
|
import com.uber.hoodie.common.util.collection.Pair;
|
||||||
import com.uber.hoodie.config.HoodieCompactionConfig;
|
import com.uber.hoodie.config.HoodieCompactionConfig;
|
||||||
import com.uber.hoodie.config.HoodieStorageConfig;
|
import com.uber.hoodie.config.HoodieStorageConfig;
|
||||||
import com.uber.hoodie.config.HoodieWriteConfig;
|
import com.uber.hoodie.config.HoodieWriteConfig;
|
||||||
@@ -244,19 +246,6 @@ public class TestHoodieClientOnCopyOnWriteStorage extends TestHoodieClientBase {
|
|||||||
HoodieWriteClient::upsert, false);
|
HoodieWriteClient::upsert, false);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
|
||||||
* Test Upsert API using temporary folders.
|
|
||||||
*/
|
|
||||||
@Test
|
|
||||||
public void testUpsertsWithFinalizeWrite() throws Exception {
|
|
||||||
HoodieWriteConfig hoodieWriteConfig = getConfigBuilder()
|
|
||||||
.withUseTempFolderCopyOnWriteForCreate(true)
|
|
||||||
.withUseTempFolderCopyOnWriteForMerge(true)
|
|
||||||
.build();
|
|
||||||
testUpsertsInternal(hoodieWriteConfig,
|
|
||||||
HoodieWriteClient::upsert, false);
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Test UpsertPrepped API
|
* Test UpsertPrepped API
|
||||||
*/
|
*/
|
||||||
@@ -266,19 +255,6 @@ public class TestHoodieClientOnCopyOnWriteStorage extends TestHoodieClientBase {
|
|||||||
HoodieWriteClient::upsertPreppedRecords, true);
|
HoodieWriteClient::upsertPreppedRecords, true);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
|
||||||
* Test UpsertPrepped API using temporary folders.
|
|
||||||
*/
|
|
||||||
@Test
|
|
||||||
public void testUpsertsPreppedWithFinalizeWrite() throws Exception {
|
|
||||||
HoodieWriteConfig hoodieWriteConfig = getConfigBuilder()
|
|
||||||
.withUseTempFolderCopyOnWriteForCreate(true)
|
|
||||||
.withUseTempFolderCopyOnWriteForMerge(true)
|
|
||||||
.build();
|
|
||||||
testUpsertsInternal(hoodieWriteConfig,
|
|
||||||
HoodieWriteClient::upsertPreppedRecords, true);
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Test one of HoodieWriteClient upsert(Prepped) APIs
|
* Test one of HoodieWriteClient upsert(Prepped) APIs
|
||||||
*
|
*
|
||||||
@@ -385,7 +361,7 @@ public class TestHoodieClientOnCopyOnWriteStorage extends TestHoodieClientBase {
|
|||||||
assertEquals("Just 1 file needs to be added.", 1, statuses.size());
|
assertEquals("Just 1 file needs to be added.", 1, statuses.size());
|
||||||
String file1 = statuses.get(0).getFileId();
|
String file1 = statuses.get(0).getFileId();
|
||||||
assertEquals("file should contain 100 records", ParquetUtils.readRowKeysFromParquet(jsc.hadoopConfiguration(),
|
assertEquals("file should contain 100 records", ParquetUtils.readRowKeysFromParquet(jsc.hadoopConfiguration(),
|
||||||
new Path(basePath, testPartitionPath + "/" + FSUtils.makeDataFileName(commitTime1, 0, file1))).size(), 100);
|
new Path(basePath, statuses.get(0).getStat().getPath())).size(), 100);
|
||||||
|
|
||||||
// Update + Inserts such that they just expand file1
|
// Update + Inserts such that they just expand file1
|
||||||
String commitTime2 = "002";
|
String commitTime2 = "002";
|
||||||
@@ -403,7 +379,7 @@ public class TestHoodieClientOnCopyOnWriteStorage extends TestHoodieClientBase {
|
|||||||
assertEquals("Just 1 file needs to be updated.", 1, statuses.size());
|
assertEquals("Just 1 file needs to be updated.", 1, statuses.size());
|
||||||
assertEquals("Existing file should be expanded", file1, statuses.get(0).getFileId());
|
assertEquals("Existing file should be expanded", file1, statuses.get(0).getFileId());
|
||||||
assertEquals("Existing file should be expanded", commitTime1, statuses.get(0).getStat().getPrevCommit());
|
assertEquals("Existing file should be expanded", commitTime1, statuses.get(0).getStat().getPrevCommit());
|
||||||
Path newFile = new Path(basePath, testPartitionPath + "/" + FSUtils.makeDataFileName(commitTime2, 0, file1));
|
Path newFile = new Path(basePath, statuses.get(0).getStat().getPath());
|
||||||
assertEquals("file should contain 140 records",
|
assertEquals("file should contain 140 records",
|
||||||
ParquetUtils.readRowKeysFromParquet(jsc.hadoopConfiguration(), newFile).size(), 140);
|
ParquetUtils.readRowKeysFromParquet(jsc.hadoopConfiguration(), newFile).size(), 140);
|
||||||
|
|
||||||
@@ -499,7 +475,7 @@ public class TestHoodieClientOnCopyOnWriteStorage extends TestHoodieClientBase {
|
|||||||
assertEquals("Just 1 file needs to be added.", 1, statuses.size());
|
assertEquals("Just 1 file needs to be added.", 1, statuses.size());
|
||||||
String file1 = statuses.get(0).getFileId();
|
String file1 = statuses.get(0).getFileId();
|
||||||
assertEquals("file should contain 100 records", ParquetUtils.readRowKeysFromParquet(jsc.hadoopConfiguration(),
|
assertEquals("file should contain 100 records", ParquetUtils.readRowKeysFromParquet(jsc.hadoopConfiguration(),
|
||||||
new Path(basePath, testPartitionPath + "/" + FSUtils.makeDataFileName(commitTime1, 0, file1))).size(), 100);
|
new Path(basePath, statuses.get(0).getStat().getPath())).size(), 100);
|
||||||
|
|
||||||
// Second, set of Inserts should just expand file1
|
// Second, set of Inserts should just expand file1
|
||||||
String commitTime2 = "002";
|
String commitTime2 = "002";
|
||||||
@@ -513,7 +489,7 @@ public class TestHoodieClientOnCopyOnWriteStorage extends TestHoodieClientBase {
|
|||||||
assertEquals("Just 1 file needs to be updated.", 1, statuses.size());
|
assertEquals("Just 1 file needs to be updated.", 1, statuses.size());
|
||||||
assertEquals("Existing file should be expanded", file1, statuses.get(0).getFileId());
|
assertEquals("Existing file should be expanded", file1, statuses.get(0).getFileId());
|
||||||
assertEquals("Existing file should be expanded", commitTime1, statuses.get(0).getStat().getPrevCommit());
|
assertEquals("Existing file should be expanded", commitTime1, statuses.get(0).getStat().getPrevCommit());
|
||||||
Path newFile = new Path(basePath, testPartitionPath + "/" + FSUtils.makeDataFileName(commitTime2, 0, file1));
|
Path newFile = new Path(basePath, statuses.get(0).getStat().getPath());
|
||||||
assertEquals("file should contain 140 records",
|
assertEquals("file should contain 140 records",
|
||||||
ParquetUtils.readRowKeysFromParquet(jsc.hadoopConfiguration(), newFile).size(), 140);
|
ParquetUtils.readRowKeysFromParquet(jsc.hadoopConfiguration(), newFile).size(), 140);
|
||||||
|
|
||||||
@@ -678,22 +654,59 @@ public class TestHoodieClientOnCopyOnWriteStorage extends TestHoodieClientBase {
|
|||||||
*/
|
*/
|
||||||
@Test
|
@Test
|
||||||
public void testConsistencyCheckDuringFinalize() throws Exception {
|
public void testConsistencyCheckDuringFinalize() throws Exception {
|
||||||
HoodieWriteConfig cfg = getConfigBuilder().withAutoCommit(false).build();
|
|
||||||
HoodieWriteClient client = getHoodieWriteClient(cfg);
|
|
||||||
HoodieTableMetaClient metaClient = new HoodieTableMetaClient(jsc.hadoopConfiguration(),
|
HoodieTableMetaClient metaClient = new HoodieTableMetaClient(jsc.hadoopConfiguration(),
|
||||||
basePath);
|
basePath);
|
||||||
|
|
||||||
String commitTime = "000";
|
String commitTime = "000";
|
||||||
client.startCommitWithTime(commitTime);
|
HoodieWriteConfig cfg = getConfigBuilder().withAutoCommit(false).build();
|
||||||
JavaRDD<HoodieRecord> writeRecords = jsc
|
HoodieWriteClient client = getHoodieWriteClient(cfg);
|
||||||
.parallelize(dataGen.generateInserts(commitTime, 200), 1);
|
Pair<Path, JavaRDD<WriteStatus>> result = testConsistencyCheck(metaClient, commitTime);
|
||||||
JavaRDD<WriteStatus> result = client.bulkInsert(writeRecords, commitTime);
|
|
||||||
|
|
||||||
// move one of the files & commit should fail
|
// Delete orphan marker and commit should succeed
|
||||||
WriteStatus status = result.take(1).get(0);
|
metaClient.getFs().delete(result.getKey(), false);
|
||||||
Path origPath = new Path(basePath + "/" + status.getStat().getPath());
|
assertTrue("Commit should succeed", client.commit(commitTime, result.getRight()));
|
||||||
Path hidePath = new Path(basePath + "/" + status.getStat().getPath() + "_hide");
|
assertTrue("After explicit commit, commit file should be created",
|
||||||
metaClient.getFs().rename(origPath, hidePath);
|
HoodieTestUtils.doesCommitExist(basePath, commitTime));
|
||||||
|
// Marker directory must be removed
|
||||||
|
assertFalse(metaClient.getFs().exists(new Path(metaClient.getMarkerFolderPath(commitTime))));
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testRollbackAfterConsistencyCheckFailure() throws Exception {
|
||||||
|
String commitTime = "000";
|
||||||
|
HoodieTableMetaClient metaClient = new HoodieTableMetaClient(jsc.hadoopConfiguration(), basePath);
|
||||||
|
HoodieWriteConfig cfg = getConfigBuilder().withAutoCommit(false).build();
|
||||||
|
HoodieWriteClient client = getHoodieWriteClient(cfg);
|
||||||
|
testConsistencyCheck(metaClient, commitTime);
|
||||||
|
|
||||||
|
// Rollback of this commit should succeed
|
||||||
|
client.rollback(commitTime);
|
||||||
|
assertFalse("After explicit rollback, commit file should not be present",
|
||||||
|
HoodieTestUtils.doesCommitExist(basePath, commitTime));
|
||||||
|
// Marker directory must be removed after rollback
|
||||||
|
assertFalse(metaClient.getFs().exists(new Path(metaClient.getMarkerFolderPath(commitTime))));
|
||||||
|
}
|
||||||
|
|
||||||
|
private Pair<Path, JavaRDD<WriteStatus>> testConsistencyCheck(HoodieTableMetaClient metaClient, String commitTime)
|
||||||
|
throws Exception {
|
||||||
|
HoodieWriteConfig cfg = getConfigBuilder().withAutoCommit(false).withMaxConsistencyCheckIntervalMs(1)
|
||||||
|
.withInitialConsistencyCheckIntervalMs(1).build();
|
||||||
|
HoodieWriteClient client = getHoodieWriteClient(cfg);
|
||||||
|
|
||||||
|
client.startCommitWithTime(commitTime);
|
||||||
|
JavaRDD<HoodieRecord> writeRecords = jsc.parallelize(dataGen.generateInserts(commitTime, 200), 1);
|
||||||
|
JavaRDD<WriteStatus> result = client.bulkInsert(writeRecords, commitTime);
|
||||||
|
result.collect();
|
||||||
|
|
||||||
|
// Create a dummy marker file to simulate the case that a marker file was created without data file.
|
||||||
|
// This should fail the commit
|
||||||
|
String partitionPath = Arrays.stream(fs.globStatus(new Path(String.format("%s/*/*/*/*",
|
||||||
|
metaClient.getMarkerFolderPath(commitTime))),
|
||||||
|
path -> path.toString().endsWith(MARKER_EXTN))).limit(1)
|
||||||
|
.map(status -> status.getPath().getParent().toString()).collect(Collectors.toList()).get(0);
|
||||||
|
Path markerFilePath = new Path(String.format("%s/%s", partitionPath,
|
||||||
|
FSUtils.makeMarkerFile(commitTime, "1-0-1", UUID.randomUUID().toString())));
|
||||||
|
metaClient.getFs().create(markerFilePath);
|
||||||
|
logger.info("Created a dummy marker path=" + markerFilePath);
|
||||||
|
|
||||||
try {
|
try {
|
||||||
client.commit(commitTime, result);
|
client.commit(commitTime, result);
|
||||||
@@ -701,12 +714,7 @@ public class TestHoodieClientOnCopyOnWriteStorage extends TestHoodieClientBase {
|
|||||||
} catch (HoodieCommitException cme) {
|
} catch (HoodieCommitException cme) {
|
||||||
assertTrue(cme.getCause() instanceof HoodieIOException);
|
assertTrue(cme.getCause() instanceof HoodieIOException);
|
||||||
}
|
}
|
||||||
|
return Pair.of(markerFilePath, result);
|
||||||
// Re-introduce & commit should succeed
|
|
||||||
metaClient.getFs().rename(hidePath, origPath);
|
|
||||||
assertTrue("Commit should succeed", client.commit(commitTime, result));
|
|
||||||
assertTrue("After explicit commit, commit file should be created",
|
|
||||||
HoodieTestUtils.doesCommitExist(basePath, commitTime));
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|||||||
@@ -27,6 +27,7 @@ import java.util.List;
|
|||||||
import java.util.Optional;
|
import java.util.Optional;
|
||||||
import java.util.stream.Collectors;
|
import java.util.stream.Collectors;
|
||||||
import org.apache.spark.api.java.JavaRDD;
|
import org.apache.spark.api.java.JavaRDD;
|
||||||
|
import org.junit.Assert;
|
||||||
import org.junit.Test;
|
import org.junit.Test;
|
||||||
import scala.Option;
|
import scala.Option;
|
||||||
|
|
||||||
@@ -107,7 +108,7 @@ public class TestHoodieReadClient extends TestHoodieClientBase {
|
|||||||
filteredRDD = readClient.filterExists(recordsRDD);
|
filteredRDD = readClient.filterExists(recordsRDD);
|
||||||
List<HoodieRecord> result = filteredRDD.collect();
|
List<HoodieRecord> result = filteredRDD.collect();
|
||||||
// Check results
|
// Check results
|
||||||
assertTrue(result.size() == 25);
|
Assert.assertEquals(25, result.size());
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|||||||
@@ -105,7 +105,7 @@ public class HoodieClientTestUtils {
|
|||||||
throws Exception {
|
throws Exception {
|
||||||
String parentPath = String.format("%s/%s", basePath, partitionPath);
|
String parentPath = String.format("%s/%s", basePath, partitionPath);
|
||||||
new File(parentPath).mkdirs();
|
new File(parentPath).mkdirs();
|
||||||
String path = String.format("%s/%s", parentPath, FSUtils.makeDataFileName(commitTime, 0, fileId));
|
String path = String.format("%s/%s", parentPath, FSUtils.makeDataFileName(commitTime, "1-0-1", fileId));
|
||||||
new File(path).createNewFile();
|
new File(path).createNewFile();
|
||||||
new RandomAccessFile(path, "rw").setLength(length);
|
new RandomAccessFile(path, "rw").setLength(length);
|
||||||
}
|
}
|
||||||
@@ -236,7 +236,7 @@ public class HoodieClientTestUtils {
|
|||||||
Thread.sleep(1000);
|
Thread.sleep(1000);
|
||||||
String commitTime = HoodieTestUtils.makeNewCommitTime();
|
String commitTime = HoodieTestUtils.makeNewCommitTime();
|
||||||
String fileId = UUID.randomUUID().toString();
|
String fileId = UUID.randomUUID().toString();
|
||||||
String filename = FSUtils.makeDataFileName(commitTime, 1, fileId);
|
String filename = FSUtils.makeDataFileName(commitTime, "1-0-1", fileId);
|
||||||
HoodieTestUtils.createCommitFiles(basePath, commitTime);
|
HoodieTestUtils.createCommitFiles(basePath, commitTime);
|
||||||
return HoodieClientTestUtils
|
return HoodieClientTestUtils
|
||||||
.writeParquetFile(basePath, partitionPath, filename, records, schema, filter, createCommitTime);
|
.writeParquetFile(basePath, partitionPath, filename, records, schema, filter, createCommitTime);
|
||||||
|
|||||||
@@ -25,24 +25,32 @@ import com.uber.hoodie.common.model.HoodieKey;
|
|||||||
import com.uber.hoodie.common.model.HoodieRecord;
|
import com.uber.hoodie.common.model.HoodieRecord;
|
||||||
import com.uber.hoodie.common.model.HoodieRecordLocation;
|
import com.uber.hoodie.common.model.HoodieRecordLocation;
|
||||||
import com.uber.hoodie.common.model.HoodieTestUtils;
|
import com.uber.hoodie.common.model.HoodieTestUtils;
|
||||||
import com.uber.hoodie.common.table.HoodieTableMetaClient;
|
|
||||||
import com.uber.hoodie.common.table.HoodieTimeline;
|
import com.uber.hoodie.common.table.HoodieTimeline;
|
||||||
import com.uber.hoodie.common.util.FSUtils;
|
import com.uber.hoodie.common.util.FSUtils;
|
||||||
|
import com.uber.hoodie.common.util.ParquetUtils;
|
||||||
import com.uber.hoodie.config.HoodieWriteConfig;
|
import com.uber.hoodie.config.HoodieWriteConfig;
|
||||||
|
import com.uber.hoodie.io.HoodieCreateHandle;
|
||||||
|
import com.uber.hoodie.io.HoodieMergeHandle;
|
||||||
import com.uber.hoodie.table.HoodieCopyOnWriteTable;
|
import com.uber.hoodie.table.HoodieCopyOnWriteTable;
|
||||||
import java.io.File;
|
import java.io.File;
|
||||||
|
import java.io.Serializable;
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
import java.util.Iterator;
|
import java.util.Arrays;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
|
import org.apache.avro.Schema;
|
||||||
|
import org.apache.avro.generic.GenericRecord;
|
||||||
import org.apache.commons.io.IOUtils;
|
import org.apache.commons.io.IOUtils;
|
||||||
|
import org.apache.hadoop.conf.Configuration;
|
||||||
import org.apache.hadoop.fs.Path;
|
import org.apache.hadoop.fs.Path;
|
||||||
|
import org.apache.parquet.avro.AvroReadSupport;
|
||||||
import org.apache.spark.api.java.JavaSparkContext;
|
import org.apache.spark.api.java.JavaSparkContext;
|
||||||
import org.junit.After;
|
import org.junit.After;
|
||||||
|
import org.junit.Assert;
|
||||||
import org.junit.Before;
|
import org.junit.Before;
|
||||||
import org.junit.Test;
|
import org.junit.Test;
|
||||||
import org.junit.rules.TemporaryFolder;
|
import org.junit.rules.TemporaryFolder;
|
||||||
|
|
||||||
public class TestUpdateMapFunction {
|
public class TestUpdateMapFunction implements Serializable {
|
||||||
|
|
||||||
private String basePath = null;
|
private String basePath = null;
|
||||||
private transient JavaSparkContext jsc = null;
|
private transient JavaSparkContext jsc = null;
|
||||||
@@ -71,51 +79,73 @@ public class TestUpdateMapFunction {
|
|||||||
@Test
|
@Test
|
||||||
public void testSchemaEvolutionOnUpdate() throws Exception {
|
public void testSchemaEvolutionOnUpdate() throws Exception {
|
||||||
// Create a bunch of records with a old version of schema
|
// Create a bunch of records with a old version of schema
|
||||||
HoodieWriteConfig config = makeHoodieClientConfig("/exampleSchema.txt");
|
final HoodieWriteConfig config = makeHoodieClientConfig("/exampleSchema.txt");
|
||||||
HoodieTableMetaClient metaClient = new HoodieTableMetaClient(HoodieTestUtils.getDefaultHadoopConf(), basePath);
|
System.out.println("JSC =" + jsc);
|
||||||
HoodieCopyOnWriteTable table = new HoodieCopyOnWriteTable(config, jsc);
|
final HoodieCopyOnWriteTable table = new HoodieCopyOnWriteTable(config, jsc);
|
||||||
|
|
||||||
String recordStr1 = "{\"_row_key\":\"8eb5b87a-1feh-4edd-87b4-6ec96dc405a0\","
|
final List<WriteStatus> statuses = jsc.parallelize(Arrays.asList(1)).map(x -> {
|
||||||
+ "\"time\":\"2016-01-31T03:16:41.415Z\",\"number\":12}";
|
String recordStr1 = "{\"_row_key\":\"8eb5b87a-1feh-4edd-87b4-6ec96dc405a0\","
|
||||||
String recordStr2 = "{\"_row_key\":\"8eb5b87b-1feu-4edd-87b4-6ec96dc405a0\","
|
+ "\"time\":\"2016-01-31T03:16:41.415Z\",\"number\":12}";
|
||||||
+ "\"time\":\"2016-01-31T03:20:41.415Z\",\"number\":100}";
|
String recordStr2 = "{\"_row_key\":\"8eb5b87b-1feu-4edd-87b4-6ec96dc405a0\","
|
||||||
String recordStr3 = "{\"_row_key\":\"8eb5b87c-1fej-4edd-87b4-6ec96dc405a0\","
|
+ "\"time\":\"2016-01-31T03:20:41.415Z\",\"number\":100}";
|
||||||
+ "\"time\":\"2016-01-31T03:16:41.415Z\",\"number\":15}";
|
String recordStr3 = "{\"_row_key\":\"8eb5b87c-1fej-4edd-87b4-6ec96dc405a0\","
|
||||||
List<HoodieRecord> records = new ArrayList<>();
|
+ "\"time\":\"2016-01-31T03:16:41.415Z\",\"number\":15}";
|
||||||
TestRawTripPayload rowChange1 = new TestRawTripPayload(recordStr1);
|
List<HoodieRecord> insertRecords = new ArrayList<>();
|
||||||
records.add(new HoodieRecord(new HoodieKey(rowChange1.getRowKey(), rowChange1.getPartitionPath()), rowChange1));
|
TestRawTripPayload rowChange1 = new TestRawTripPayload(recordStr1);
|
||||||
TestRawTripPayload rowChange2 = new TestRawTripPayload(recordStr2);
|
insertRecords
|
||||||
records.add(new HoodieRecord(new HoodieKey(rowChange2.getRowKey(), rowChange2.getPartitionPath()), rowChange2));
|
.add(new HoodieRecord(new HoodieKey(rowChange1.getRowKey(), rowChange1.getPartitionPath()), rowChange1));
|
||||||
TestRawTripPayload rowChange3 = new TestRawTripPayload(recordStr3);
|
TestRawTripPayload rowChange2 = new TestRawTripPayload(recordStr2);
|
||||||
records.add(new HoodieRecord(new HoodieKey(rowChange3.getRowKey(), rowChange3.getPartitionPath()), rowChange3));
|
insertRecords
|
||||||
Iterator<List<WriteStatus>> insertResult = table.handleInsert("100", records.iterator());
|
.add(new HoodieRecord(new HoodieKey(rowChange2.getRowKey(), rowChange2.getPartitionPath()), rowChange2));
|
||||||
Path commitFile = new Path(config.getBasePath() + "/.hoodie/" + HoodieTimeline.makeCommitFileName("100"));
|
TestRawTripPayload rowChange3 = new TestRawTripPayload(recordStr3);
|
||||||
|
insertRecords
|
||||||
|
.add(new HoodieRecord(new HoodieKey(rowChange3.getRowKey(), rowChange3.getPartitionPath()), rowChange3));
|
||||||
|
|
||||||
|
HoodieCreateHandle createHandle = new HoodieCreateHandle(config, "100", table, rowChange1.getPartitionPath(),
|
||||||
|
"f1-0", insertRecords.iterator());
|
||||||
|
createHandle.write();
|
||||||
|
WriteStatus insertResult = createHandle.close();
|
||||||
|
return insertResult;
|
||||||
|
}).collect();
|
||||||
|
|
||||||
|
final Path commitFile = new Path(config.getBasePath() + "/.hoodie/" + HoodieTimeline.makeCommitFileName("100"));
|
||||||
FSUtils.getFs(basePath, HoodieTestUtils.getDefaultHadoopConf()).create(commitFile);
|
FSUtils.getFs(basePath, HoodieTestUtils.getDefaultHadoopConf()).create(commitFile);
|
||||||
|
|
||||||
// Now try an update with an evolved schema
|
// Now try an update with an evolved schema
|
||||||
// Evolved schema does not have guarantee on preserving the original field ordering
|
// Evolved schema does not have guarantee on preserving the original field ordering
|
||||||
config = makeHoodieClientConfig("/exampleEvolvedSchema.txt");
|
final HoodieWriteConfig config2 = makeHoodieClientConfig("/exampleEvolvedSchema.txt");
|
||||||
metaClient = new HoodieTableMetaClient(HoodieTestUtils.getDefaultHadoopConf(), basePath);
|
final Schema schema = Schema.parse(config2.getSchema());
|
||||||
String fileId = insertResult.next().get(0).getFileId();
|
final WriteStatus insertResult = statuses.get(0);
|
||||||
System.out.println(fileId);
|
String fileId = insertResult.getFileId();
|
||||||
|
|
||||||
table = new HoodieCopyOnWriteTable(config, jsc);
|
final HoodieCopyOnWriteTable table2 = new HoodieCopyOnWriteTable(config2, jsc);
|
||||||
// New content with values for the newly added field
|
Assert.assertEquals(1, jsc.parallelize(Arrays.asList(1)).map(x -> {
|
||||||
recordStr1 = "{\"_row_key\":\"8eb5b87a-1feh-4edd-87b4-6ec96dc405a0\","
|
// New content with values for the newly added field
|
||||||
+ "\"time\":\"2016-01-31T03:16:41.415Z\",\"number\":12,\"added_field\":1}";
|
String recordStr1 = "{\"_row_key\":\"8eb5b87a-1feh-4edd-87b4-6ec96dc405a0\","
|
||||||
records = new ArrayList<>();
|
+ "\"time\":\"2016-01-31T03:16:41.415Z\",\"number\":12,\"added_field\":1}";
|
||||||
rowChange1 = new TestRawTripPayload(recordStr1);
|
List<HoodieRecord> updateRecords = new ArrayList<>();
|
||||||
HoodieRecord record1 = new HoodieRecord(new HoodieKey(rowChange1.getRowKey(), rowChange1.getPartitionPath()),
|
TestRawTripPayload rowChange1 = new TestRawTripPayload(recordStr1);
|
||||||
rowChange1);
|
HoodieRecord record1 = new HoodieRecord(new HoodieKey(rowChange1.getRowKey(), rowChange1.getPartitionPath()),
|
||||||
record1.setCurrentLocation(new HoodieRecordLocation("100", fileId));
|
rowChange1);
|
||||||
records.add(record1);
|
record1.setCurrentLocation(new HoodieRecordLocation("100", fileId));
|
||||||
|
updateRecords.add(record1);
|
||||||
|
|
||||||
try {
|
try {
|
||||||
table.handleUpdate("101", fileId, records.iterator());
|
HoodieMergeHandle mergeHandle = new HoodieMergeHandle(config2, "101", table2, updateRecords.iterator(), fileId);
|
||||||
} catch (ClassCastException e) {
|
Configuration conf = new Configuration();
|
||||||
fail("UpdateFunction could not read records written with exampleSchema.txt using the "
|
AvroReadSupport.setAvroReadSchema(conf, mergeHandle.getWriterSchema());
|
||||||
+ "exampleEvolvedSchema.txt");
|
List<GenericRecord> oldRecords = ParquetUtils.readAvroRecords(conf,
|
||||||
}
|
new Path(config2.getBasePath() + "/" + insertResult.getStat().getPath()));
|
||||||
|
for (GenericRecord rec : oldRecords) {
|
||||||
|
mergeHandle.write(rec);
|
||||||
|
}
|
||||||
|
mergeHandle.close();
|
||||||
|
} catch (ClassCastException e) {
|
||||||
|
fail("UpdateFunction could not read records written with exampleSchema.txt using the "
|
||||||
|
+ "exampleEvolvedSchema.txt");
|
||||||
|
}
|
||||||
|
return 1;
|
||||||
|
}).collect().size());
|
||||||
}
|
}
|
||||||
|
|
||||||
private HoodieWriteConfig makeHoodieClientConfig(String schema) throws Exception {
|
private HoodieWriteConfig makeHoodieClientConfig(String schema) throws Exception {
|
||||||
@@ -123,5 +153,4 @@ public class TestUpdateMapFunction {
|
|||||||
String schemaStr = IOUtils.toString(getClass().getResourceAsStream(schema), "UTF-8");
|
String schemaStr = IOUtils.toString(getClass().getResourceAsStream(schema), "UTF-8");
|
||||||
return HoodieWriteConfig.newBuilder().withPath(basePath).withSchema(schemaStr).build();
|
return HoodieWriteConfig.newBuilder().withPath(basePath).withSchema(schemaStr).build();
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
@@ -1,91 +0,0 @@
|
|||||||
/*
|
|
||||||
* Copyright (c) 2018 Uber Technologies, Inc. (hoodie-dev-group@uber.com)
|
|
||||||
*
|
|
||||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
|
||||||
* you may not use this file except in compliance with the License.
|
|
||||||
* You may obtain a copy of the License at
|
|
||||||
*
|
|
||||||
* http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
*
|
|
||||||
* Unless required by applicable law or agreed to in writing, software
|
|
||||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
* See the License for the specific language governing permissions and
|
|
||||||
* limitations under the License.
|
|
||||||
*/
|
|
||||||
|
|
||||||
package com.uber.hoodie.io;
|
|
||||||
|
|
||||||
import static org.junit.Assert.assertEquals;
|
|
||||||
import static org.junit.Assert.assertTrue;
|
|
||||||
import static org.mockito.Mockito.anyInt;
|
|
||||||
import static org.mockito.Mockito.anyList;
|
|
||||||
import static org.mockito.Mockito.spy;
|
|
||||||
import static org.mockito.Mockito.times;
|
|
||||||
import static org.mockito.Mockito.verify;
|
|
||||||
|
|
||||||
import com.uber.hoodie.common.HoodieClientTestUtils;
|
|
||||||
import java.io.File;
|
|
||||||
import java.io.IOException;
|
|
||||||
import java.util.Arrays;
|
|
||||||
import org.apache.spark.api.java.JavaSparkContext;
|
|
||||||
import org.junit.After;
|
|
||||||
import org.junit.Before;
|
|
||||||
import org.junit.Test;
|
|
||||||
import org.junit.rules.TemporaryFolder;
|
|
||||||
|
|
||||||
public class TestConsistencyCheck {
|
|
||||||
|
|
||||||
private String basePath;
|
|
||||||
private JavaSparkContext jsc;
|
|
||||||
|
|
||||||
@Before
|
|
||||||
public void setup() throws IOException {
|
|
||||||
jsc = new JavaSparkContext(HoodieClientTestUtils.getSparkConfForTest("ConsistencyCheckTest"));
|
|
||||||
TemporaryFolder testFolder = new TemporaryFolder();
|
|
||||||
testFolder.create();
|
|
||||||
basePath = testFolder.getRoot().getAbsolutePath();
|
|
||||||
}
|
|
||||||
|
|
||||||
@After
|
|
||||||
public void teardown() {
|
|
||||||
if (jsc != null) {
|
|
||||||
jsc.stop();
|
|
||||||
}
|
|
||||||
File testFolderPath = new File(basePath);
|
|
||||||
if (testFolderPath.exists()) {
|
|
||||||
testFolderPath.delete();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
@Test
|
|
||||||
public void testExponentialBackoff() throws Exception {
|
|
||||||
HoodieClientTestUtils.fakeDataFile(basePath, "partition/path", "000", "f1");
|
|
||||||
JavaSparkContext jscSpy = spy(jsc);
|
|
||||||
|
|
||||||
ConsistencyCheck failing = new ConsistencyCheck(basePath,
|
|
||||||
Arrays.asList("partition/path/f1_0_000.parquet", "partition/path/f2_0_000.parquet"),
|
|
||||||
jscSpy, 2);
|
|
||||||
long startMs = System.currentTimeMillis();
|
|
||||||
assertEquals(1, failing.check(5, 10).size());
|
|
||||||
assertTrue((System.currentTimeMillis() - startMs) > (10 + 20 + 40 + 80));
|
|
||||||
verify(jscSpy, times(5)).parallelize(anyList(), anyInt());
|
|
||||||
}
|
|
||||||
|
|
||||||
@Test
|
|
||||||
public void testCheckPassingAndFailing() throws Exception {
|
|
||||||
HoodieClientTestUtils.fakeDataFile(basePath, "partition/path", "000", "f1");
|
|
||||||
HoodieClientTestUtils.fakeDataFile(basePath, "partition/path", "000", "f2");
|
|
||||||
HoodieClientTestUtils.fakeDataFile(basePath, "partition/path", "000", "f3");
|
|
||||||
|
|
||||||
ConsistencyCheck passing = new ConsistencyCheck(basePath,
|
|
||||||
Arrays.asList("partition/path/f1_0_000.parquet", "partition/path/f2_0_000.parquet"),
|
|
||||||
jsc, 2);
|
|
||||||
assertEquals(0, passing.check(1, 1000).size());
|
|
||||||
|
|
||||||
ConsistencyCheck failing = new ConsistencyCheck(basePath,
|
|
||||||
Arrays.asList("partition/path/f1_0_000.parquet", "partition/path/f4_0_000.parquet"),
|
|
||||||
jsc, 2);
|
|
||||||
assertEquals(1, failing.check(1, 1000).size());
|
|
||||||
}
|
|
||||||
}
|
|
||||||
@@ -74,13 +74,14 @@ public class TestHoodieCommitArchiveLog {
|
|||||||
|
|
||||||
@AfterClass
|
@AfterClass
|
||||||
public static void cleanUp() throws Exception {
|
public static void cleanUp() throws Exception {
|
||||||
|
// Need to closeAll to clear FileSystem.Cache, required because DFS and LocalFS used in the
|
||||||
|
// same JVM
|
||||||
|
FileSystem.closeAll();
|
||||||
|
|
||||||
if (hdfsTestService != null) {
|
if (hdfsTestService != null) {
|
||||||
hdfsTestService.stop();
|
hdfsTestService.stop();
|
||||||
dfsCluster.shutdown();
|
dfsCluster.shutdown();
|
||||||
}
|
}
|
||||||
// Need to closeAll to clear FileSystem.Cache, required because DFS and LocalFS used in the
|
|
||||||
// same JVM
|
|
||||||
FileSystem.closeAll();
|
|
||||||
}
|
}
|
||||||
|
|
||||||
@BeforeClass
|
@BeforeClass
|
||||||
@@ -245,7 +246,7 @@ public class TestHoodieCommitArchiveLog {
|
|||||||
|
|
||||||
//read the file
|
//read the file
|
||||||
HoodieLogFormat.Reader reader = HoodieLogFormat.newReader(dfs,
|
HoodieLogFormat.Reader reader = HoodieLogFormat.newReader(dfs,
|
||||||
new HoodieLogFile(new Path(basePath + "/.hoodie/.commits_.archive.1")),
|
new HoodieLogFile(new Path(basePath + "/.hoodie/.commits_.archive.1_1-0-1")),
|
||||||
HoodieArchivedMetaEntry.getClassSchema());
|
HoodieArchivedMetaEntry.getClassSchema());
|
||||||
|
|
||||||
int archivedRecordsCount = 0;
|
int archivedRecordsCount = 0;
|
||||||
|
|||||||
@@ -17,7 +17,6 @@
|
|||||||
package com.uber.hoodie.table;
|
package com.uber.hoodie.table;
|
||||||
|
|
||||||
import static org.junit.Assert.assertEquals;
|
import static org.junit.Assert.assertEquals;
|
||||||
import static org.junit.Assert.assertFalse;
|
|
||||||
import static org.junit.Assert.assertTrue;
|
import static org.junit.Assert.assertTrue;
|
||||||
import static org.mockito.Mockito.mock;
|
import static org.mockito.Mockito.mock;
|
||||||
import static org.mockito.Mockito.when;
|
import static org.mockito.Mockito.when;
|
||||||
@@ -36,6 +35,7 @@ import com.uber.hoodie.common.table.HoodieTableMetaClient;
|
|||||||
import com.uber.hoodie.common.table.HoodieTimeline;
|
import com.uber.hoodie.common.table.HoodieTimeline;
|
||||||
import com.uber.hoodie.common.util.FSUtils;
|
import com.uber.hoodie.common.util.FSUtils;
|
||||||
import com.uber.hoodie.common.util.ParquetUtils;
|
import com.uber.hoodie.common.util.ParquetUtils;
|
||||||
|
import com.uber.hoodie.common.util.collection.Pair;
|
||||||
import com.uber.hoodie.config.HoodieCompactionConfig;
|
import com.uber.hoodie.config.HoodieCompactionConfig;
|
||||||
import com.uber.hoodie.config.HoodieStorageConfig;
|
import com.uber.hoodie.config.HoodieStorageConfig;
|
||||||
import com.uber.hoodie.config.HoodieWriteConfig;
|
import com.uber.hoodie.config.HoodieWriteConfig;
|
||||||
@@ -44,18 +44,18 @@ import com.uber.hoodie.table.HoodieCopyOnWriteTable.UpsertPartitioner;
|
|||||||
import java.io.File;
|
import java.io.File;
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
import java.util.Arrays;
|
import java.util.Arrays;
|
||||||
import java.util.Iterator;
|
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
import java.util.UUID;
|
import java.util.UUID;
|
||||||
import org.apache.avro.generic.GenericRecord;
|
import org.apache.avro.generic.GenericRecord;
|
||||||
import org.apache.commons.io.IOUtils;
|
import org.apache.commons.io.IOUtils;
|
||||||
import org.apache.hadoop.fs.FileSystem;
|
|
||||||
import org.apache.hadoop.fs.Path;
|
import org.apache.hadoop.fs.Path;
|
||||||
import org.apache.parquet.avro.AvroReadSupport;
|
import org.apache.parquet.avro.AvroReadSupport;
|
||||||
import org.apache.parquet.hadoop.ParquetReader;
|
import org.apache.parquet.hadoop.ParquetReader;
|
||||||
|
import org.apache.spark.TaskContext;
|
||||||
import org.apache.spark.api.java.JavaSparkContext;
|
import org.apache.spark.api.java.JavaSparkContext;
|
||||||
import org.junit.After;
|
import org.junit.After;
|
||||||
|
import org.junit.Assert;
|
||||||
import org.junit.Before;
|
import org.junit.Before;
|
||||||
import org.junit.Test;
|
import org.junit.Test;
|
||||||
import org.junit.rules.TemporaryFolder;
|
import org.junit.rules.TemporaryFolder;
|
||||||
@@ -85,20 +85,24 @@ public class TestCopyOnWriteTable {
|
|||||||
public void testMakeNewPath() throws Exception {
|
public void testMakeNewPath() throws Exception {
|
||||||
String fileName = UUID.randomUUID().toString();
|
String fileName = UUID.randomUUID().toString();
|
||||||
String partitionPath = "2016/05/04";
|
String partitionPath = "2016/05/04";
|
||||||
int unitNumber = (int) (Math.random() * 10);
|
|
||||||
HoodieRecord record = mock(HoodieRecord.class);
|
|
||||||
when(record.getPartitionPath()).thenReturn(partitionPath);
|
|
||||||
|
|
||||||
String commitTime = HoodieTestUtils.makeNewCommitTime();
|
String commitTime = HoodieTestUtils.makeNewCommitTime();
|
||||||
HoodieWriteConfig config = makeHoodieClientConfig();
|
HoodieWriteConfig config = makeHoodieClientConfig();
|
||||||
HoodieTableMetaClient metaClient = new HoodieTableMetaClient(jsc.hadoopConfiguration(), basePath);
|
HoodieTableMetaClient metaClient = new HoodieTableMetaClient(jsc.hadoopConfiguration(), basePath);
|
||||||
HoodieTable table = HoodieTable.getHoodieTable(metaClient, config, jsc);
|
HoodieTable table = HoodieTable.getHoodieTable(metaClient, config, jsc);
|
||||||
|
|
||||||
HoodieCreateHandle io = new HoodieCreateHandle(config, commitTime, table, partitionPath,
|
Pair<Path, String> newPathWithWriteToken = jsc.parallelize(Arrays.asList(1)).map(x -> {
|
||||||
UUID.randomUUID().toString());
|
HoodieRecord record = mock(HoodieRecord.class);
|
||||||
Path newPath = io.makeNewPath(record.getPartitionPath(), unitNumber, fileName);
|
when(record.getPartitionPath()).thenReturn(partitionPath);
|
||||||
assertTrue(newPath.toString().equals(
|
String writeToken = FSUtils.makeWriteToken(TaskContext.getPartitionId(), TaskContext.get().stageId(),
|
||||||
this.basePath + "/" + partitionPath + "/" + FSUtils.makeDataFileName(commitTime, unitNumber, fileName)));
|
TaskContext.get().taskAttemptId());
|
||||||
|
HoodieCreateHandle io = new HoodieCreateHandle(config, commitTime, table, partitionPath, fileName);
|
||||||
|
return Pair.of(io.makeNewPath(record.getPartitionPath()), writeToken);
|
||||||
|
}).collect().get(0);
|
||||||
|
|
||||||
|
Assert.assertEquals(newPathWithWriteToken.getKey().toString(),
|
||||||
|
this.basePath + "/" + partitionPath + "/"
|
||||||
|
+ FSUtils.makeDataFileName(commitTime, newPathWithWriteToken.getRight(), fileName));
|
||||||
}
|
}
|
||||||
|
|
||||||
private HoodieWriteConfig makeHoodieClientConfig() throws Exception {
|
private HoodieWriteConfig makeHoodieClientConfig() throws Exception {
|
||||||
@@ -141,7 +145,11 @@ public class TestCopyOnWriteTable {
|
|||||||
records.add(new HoodieRecord(new HoodieKey(rowChange3.getRowKey(), rowChange3.getPartitionPath()), rowChange3));
|
records.add(new HoodieRecord(new HoodieKey(rowChange3.getRowKey(), rowChange3.getPartitionPath()), rowChange3));
|
||||||
|
|
||||||
// Insert new records
|
// Insert new records
|
||||||
HoodieClientTestUtils.collectStatuses(table.handleInsert(firstCommitTime, records.iterator()));
|
final HoodieCopyOnWriteTable cowTable = table;
|
||||||
|
jsc.parallelize(Arrays.asList(1)).map(x -> {
|
||||||
|
return cowTable.handleInsert(firstCommitTime, FSUtils.createNewFileIdPfx(), records.iterator());
|
||||||
|
}).map(x -> HoodieClientTestUtils.collectStatuses(x)).collect();
|
||||||
|
|
||||||
// We should have a parquet file generated (TODO: better control # files after we revise
|
// We should have a parquet file generated (TODO: better control # files after we revise
|
||||||
// AvroParquetIO)
|
// AvroParquetIO)
|
||||||
File parquetFile = null;
|
File parquetFile = null;
|
||||||
@@ -190,10 +198,12 @@ public class TestCopyOnWriteTable {
|
|||||||
Thread.sleep(1000);
|
Thread.sleep(1000);
|
||||||
String newCommitTime = HoodieTestUtils.makeNewCommitTime();
|
String newCommitTime = HoodieTestUtils.makeNewCommitTime();
|
||||||
metadata = new HoodieTableMetaClient(jsc.hadoopConfiguration(), basePath);
|
metadata = new HoodieTableMetaClient(jsc.hadoopConfiguration(), basePath);
|
||||||
table = new HoodieCopyOnWriteTable(config, jsc);
|
final HoodieCopyOnWriteTable newTable = new HoodieCopyOnWriteTable(config, jsc);
|
||||||
Iterator<List<WriteStatus>> iter = table
|
List<WriteStatus> statuses =
|
||||||
.handleUpdate(newCommitTime, updatedRecord1.getCurrentLocation().getFileId(),
|
jsc.parallelize(Arrays.asList(1)).map(x -> {
|
||||||
updatedRecords.iterator());
|
return newTable.handleUpdate(newCommitTime, updatedRecord1.getCurrentLocation().getFileId(),
|
||||||
|
updatedRecords.iterator());
|
||||||
|
}).flatMap(x -> HoodieClientTestUtils.collectStatuses(x).iterator()).collect();
|
||||||
|
|
||||||
// Check the updated file
|
// Check the updated file
|
||||||
File updatedParquetFile = null;
|
File updatedParquetFile = null;
|
||||||
@@ -231,7 +241,6 @@ public class TestCopyOnWriteTable {
|
|||||||
}
|
}
|
||||||
updatedReader.close();
|
updatedReader.close();
|
||||||
// Also check the numRecordsWritten
|
// Also check the numRecordsWritten
|
||||||
List<WriteStatus> statuses = HoodieClientTestUtils.collectStatuses(iter);
|
|
||||||
WriteStatus writeStatus = statuses.get(0);
|
WriteStatus writeStatus = statuses.get(0);
|
||||||
assertTrue("Should be only one file generated", statuses.size() == 1);
|
assertTrue("Should be only one file generated", statuses.size() == 1);
|
||||||
assertEquals(4, writeStatus.getStat().getNumWrites());//3 rewritten records + 1 new record
|
assertEquals(4, writeStatus.getStat().getNumWrites());//3 rewritten records + 1 new record
|
||||||
@@ -277,8 +286,10 @@ public class TestCopyOnWriteTable {
|
|||||||
records.add(new HoodieRecord(new HoodieKey(rowChange3.getRowKey(), rowChange3.getPartitionPath()), rowChange3));
|
records.add(new HoodieRecord(new HoodieKey(rowChange3.getRowKey(), rowChange3.getPartitionPath()), rowChange3));
|
||||||
|
|
||||||
// Insert new records
|
// Insert new records
|
||||||
List<WriteStatus> writeStatuses = HoodieClientTestUtils
|
List<WriteStatus> writeStatuses = jsc.parallelize(Arrays.asList(1)).map(x -> {
|
||||||
.collectStatuses(table.handleInsert(firstCommitTime, records.iterator()));
|
return table.handleInsert(firstCommitTime, FSUtils.createNewFileIdPfx(), records.iterator());
|
||||||
|
}).flatMap(x -> HoodieClientTestUtils.collectStatuses(x).iterator()).collect();
|
||||||
|
|
||||||
Map<String, String> allWriteStatusMergedMetadataMap = MetadataMergeWriteStatus
|
Map<String, String> allWriteStatusMergedMetadataMap = MetadataMergeWriteStatus
|
||||||
.mergeMetadataForWriteStatuses(writeStatuses);
|
.mergeMetadataForWriteStatuses(writeStatuses);
|
||||||
assertTrue(allWriteStatusMergedMetadataMap.containsKey("InputRecordCount_1506582000"));
|
assertTrue(allWriteStatusMergedMetadataMap.containsKey("InputRecordCount_1506582000"));
|
||||||
@@ -287,41 +298,6 @@ public class TestCopyOnWriteTable {
|
|||||||
assertEquals("6", allWriteStatusMergedMetadataMap.get("InputRecordCount_1506582000"));
|
assertEquals("6", allWriteStatusMergedMetadataMap.get("InputRecordCount_1506582000"));
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
|
||||||
public void testInsertWithPartialFailures() throws Exception {
|
|
||||||
HoodieWriteConfig config = makeHoodieClientConfig();
|
|
||||||
String commitTime = HoodieTestUtils.makeNewCommitTime();
|
|
||||||
FileSystem fs = FSUtils.getFs(basePath, jsc.hadoopConfiguration());
|
|
||||||
HoodieTableMetaClient metadata = new HoodieTableMetaClient(jsc.hadoopConfiguration(), basePath);
|
|
||||||
HoodieCopyOnWriteTable table = new HoodieCopyOnWriteTable(config, jsc);
|
|
||||||
|
|
||||||
// Write a few records, and get atleast one file
|
|
||||||
// 10 records for partition 1, 1 record for partition 2.
|
|
||||||
List<HoodieRecord> records = newHoodieRecords(10, "2016-01-31T03:16:41.415Z");
|
|
||||||
records.addAll(newHoodieRecords(1, "2016-02-01T03:16:41.415Z"));
|
|
||||||
|
|
||||||
// Simulate crash after first file
|
|
||||||
List<WriteStatus> statuses = HoodieClientTestUtils
|
|
||||||
.collectStatuses(table.handleInsert(commitTime, records.iterator()));
|
|
||||||
WriteStatus status = statuses.get(0);
|
|
||||||
Path partialFile = new Path(String.format("%s/%s/%s", basePath, status.getPartitionPath(),
|
|
||||||
FSUtils.makeDataFileName(commitTime, 0, status.getFileId())));
|
|
||||||
assertTrue(fs.exists(partialFile));
|
|
||||||
|
|
||||||
// When we retry
|
|
||||||
records = newHoodieRecords(10, "2016-01-31T03:16:41.415Z");
|
|
||||||
records.addAll(newHoodieRecords(1, "2016-02-01T03:16:41.415Z"));
|
|
||||||
|
|
||||||
statuses = HoodieClientTestUtils.collectStatuses(table.handleInsert(commitTime, records.iterator()));
|
|
||||||
status = statuses.get(0);
|
|
||||||
|
|
||||||
Path retriedFIle = new Path(String.format("%s/%s/%s", basePath, status.getPartitionPath(),
|
|
||||||
FSUtils.makeDataFileName(commitTime, 0, status.getFileId())));
|
|
||||||
assertTrue(fs.exists(retriedFIle));
|
|
||||||
assertFalse(fs.exists(partialFile));
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testInsertRecords() throws Exception {
|
public void testInsertRecords() throws Exception {
|
||||||
HoodieWriteConfig config = makeHoodieClientConfig();
|
HoodieWriteConfig config = makeHoodieClientConfig();
|
||||||
@@ -335,8 +311,10 @@ public class TestCopyOnWriteTable {
|
|||||||
records.addAll(newHoodieRecords(1, "2016-02-01T03:16:41.415Z"));
|
records.addAll(newHoodieRecords(1, "2016-02-01T03:16:41.415Z"));
|
||||||
|
|
||||||
// Insert new records
|
// Insert new records
|
||||||
List<WriteStatus> returnedStatuses = HoodieClientTestUtils
|
final List<HoodieRecord> recs2 = records;
|
||||||
.collectStatuses(table.handleInsert(commitTime, records.iterator()));
|
List<WriteStatus> returnedStatuses = jsc.parallelize(Arrays.asList(1)).map(x -> {
|
||||||
|
return table.handleInsert(commitTime, FSUtils.createNewFileIdPfx(), recs2.iterator());
|
||||||
|
}).flatMap(x -> HoodieClientTestUtils.collectStatuses(x).iterator()).collect();
|
||||||
|
|
||||||
// TODO: check the actual files and make sure 11 records, total were written.
|
// TODO: check the actual files and make sure 11 records, total were written.
|
||||||
assertEquals(2, returnedStatuses.size());
|
assertEquals(2, returnedStatuses.size());
|
||||||
@@ -354,7 +332,11 @@ public class TestCopyOnWriteTable {
|
|||||||
records.addAll(newHoodieRecords(1, "2016-02-02T03:16:41.415Z"));
|
records.addAll(newHoodieRecords(1, "2016-02-02T03:16:41.415Z"));
|
||||||
|
|
||||||
// Insert new records
|
// Insert new records
|
||||||
returnedStatuses = HoodieClientTestUtils.collectStatuses(table.handleInsert(commitTime, records.iterator()));
|
final List<HoodieRecord> recs3 = records;
|
||||||
|
|
||||||
|
returnedStatuses = jsc.parallelize(Arrays.asList(1)).map(x -> {
|
||||||
|
return table.handleInsert(commitTime, FSUtils.createNewFileIdPfx(), recs3.iterator());
|
||||||
|
}).flatMap(x -> HoodieClientTestUtils.collectStatuses(x).iterator()).collect();
|
||||||
|
|
||||||
assertEquals(3, returnedStatuses.size());
|
assertEquals(3, returnedStatuses.size());
|
||||||
assertEquals("2016/01/31", returnedStatuses.get(0).getPartitionPath());
|
assertEquals("2016/01/31", returnedStatuses.get(0).getPartitionPath());
|
||||||
@@ -388,7 +370,9 @@ public class TestCopyOnWriteTable {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Insert new records
|
// Insert new records
|
||||||
HoodieClientTestUtils.collectStatuses(table.handleInsert(commitTime, records.iterator()));
|
jsc.parallelize(Arrays.asList(1))
|
||||||
|
.map(i -> table.handleInsert(commitTime, FSUtils.createNewFileIdPfx(), records.iterator()))
|
||||||
|
.map(x -> HoodieClientTestUtils.collectStatuses(x)).collect();
|
||||||
|
|
||||||
// Check the updated file
|
// Check the updated file
|
||||||
int counts = 0;
|
int counts = 0;
|
||||||
@@ -487,19 +471,26 @@ public class TestCopyOnWriteTable {
|
|||||||
HoodieWriteConfig config = makeHoodieClientConfigBuilder().withStorageConfig(
|
HoodieWriteConfig config = makeHoodieClientConfigBuilder().withStorageConfig(
|
||||||
HoodieStorageConfig.newBuilder().limitFileSize(1000 * 1024).build()).build();
|
HoodieStorageConfig.newBuilder().limitFileSize(1000 * 1024).build()).build();
|
||||||
HoodieTableMetaClient metadata = new HoodieTableMetaClient(jsc.hadoopConfiguration(), basePath);
|
HoodieTableMetaClient metadata = new HoodieTableMetaClient(jsc.hadoopConfiguration(), basePath);
|
||||||
HoodieCopyOnWriteTable table = new HoodieCopyOnWriteTable(config, jsc);
|
final HoodieCopyOnWriteTable table = new HoodieCopyOnWriteTable(config, jsc);
|
||||||
String commitTime = "000";
|
String commitTime = "000";
|
||||||
HoodieTestDataGenerator dataGenerator = new HoodieTestDataGenerator();
|
HoodieTestDataGenerator dataGenerator = new HoodieTestDataGenerator();
|
||||||
// Perform inserts of 100 records to test CreateHandle and BufferedExecutor
|
// Perform inserts of 100 records to test CreateHandle and BufferedExecutor
|
||||||
List<HoodieRecord> inserts = dataGenerator.generateInsertsWithHoodieAvroPayload(commitTime, 100);
|
final List<HoodieRecord> inserts = dataGenerator.generateInsertsWithHoodieAvroPayload(commitTime, 100);
|
||||||
Iterator<List<WriteStatus>> ws = table.handleInsert(commitTime, inserts.iterator());
|
final List<List<WriteStatus>> ws = jsc.parallelize(Arrays.asList(1)).map(x -> {
|
||||||
WriteStatus writeStatus = ws.next().get(0);
|
return table.handleInsert(commitTime, UUID.randomUUID().toString(), inserts.iterator());
|
||||||
|
}).map(x -> (List<WriteStatus>)HoodieClientTestUtils.collectStatuses(x)).collect();
|
||||||
|
|
||||||
|
WriteStatus writeStatus = ws.get(0).get(0);
|
||||||
String fileId = writeStatus.getFileId();
|
String fileId = writeStatus.getFileId();
|
||||||
metadata.getFs().create(new Path(basePath + "/.hoodie/000.commit")).close();
|
metadata.getFs().create(new Path(basePath + "/.hoodie/000.commit")).close();
|
||||||
table = new HoodieCopyOnWriteTable(config, jsc);
|
final HoodieCopyOnWriteTable table2 = new HoodieCopyOnWriteTable(config, jsc);
|
||||||
// Perform update of 100 records to test MergeHandle and BufferedExecutor
|
|
||||||
table.handleUpdate("001", fileId,
|
final List<HoodieRecord> updates =
|
||||||
dataGenerator.generateUpdatesWithHoodieAvroPayload(commitTime, writeStatus.getWrittenRecords()).iterator());
|
dataGenerator.generateUpdatesWithHoodieAvroPayload(commitTime, writeStatus.getWrittenRecords());
|
||||||
|
|
||||||
|
jsc.parallelize(Arrays.asList(1)).map(x -> {
|
||||||
|
return table2.handleUpdate("001", fileId, updates.iterator());
|
||||||
|
}).map(x -> (List<WriteStatus>)HoodieClientTestUtils.collectStatuses(x)).collect();
|
||||||
}
|
}
|
||||||
|
|
||||||
@After
|
@After
|
||||||
|
|||||||
@@ -14,10 +14,12 @@
|
|||||||
* limitations under the License.
|
* limitations under the License.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
package com.uber.hoodie.io.storage;
|
package com.uber.hoodie.common.io.storage;
|
||||||
|
|
||||||
import com.uber.hoodie.common.storage.StorageSchemes;
|
import com.uber.hoodie.common.storage.StorageSchemes;
|
||||||
|
import com.uber.hoodie.common.util.ConsistencyGuard;
|
||||||
import com.uber.hoodie.common.util.FSUtils;
|
import com.uber.hoodie.common.util.FSUtils;
|
||||||
|
import com.uber.hoodie.common.util.NoOpConsistencyGuard;
|
||||||
import com.uber.hoodie.exception.HoodieIOException;
|
import com.uber.hoodie.exception.HoodieIOException;
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.net.URI;
|
import java.net.URI;
|
||||||
@@ -27,6 +29,7 @@ import java.util.List;
|
|||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
import java.util.concurrent.ConcurrentHashMap;
|
import java.util.concurrent.ConcurrentHashMap;
|
||||||
import java.util.concurrent.ConcurrentMap;
|
import java.util.concurrent.ConcurrentMap;
|
||||||
|
import java.util.concurrent.TimeoutException;
|
||||||
import org.apache.hadoop.conf.Configuration;
|
import org.apache.hadoop.conf.Configuration;
|
||||||
import org.apache.hadoop.fs.BlockLocation;
|
import org.apache.hadoop.fs.BlockLocation;
|
||||||
import org.apache.hadoop.fs.ContentSummary;
|
import org.apache.hadoop.fs.ContentSummary;
|
||||||
@@ -64,6 +67,16 @@ public class HoodieWrapperFileSystem extends FileSystem {
|
|||||||
ConcurrentHashMap<>();
|
ConcurrentHashMap<>();
|
||||||
private FileSystem fileSystem;
|
private FileSystem fileSystem;
|
||||||
private URI uri;
|
private URI uri;
|
||||||
|
private ConsistencyGuard consistencyGuard = new NoOpConsistencyGuard();
|
||||||
|
|
||||||
|
public HoodieWrapperFileSystem() {
|
||||||
|
}
|
||||||
|
|
||||||
|
public HoodieWrapperFileSystem(FileSystem fileSystem, ConsistencyGuard consistencyGuard) {
|
||||||
|
this.fileSystem = fileSystem;
|
||||||
|
this.uri = fileSystem.getUri();
|
||||||
|
this.consistencyGuard = consistencyGuard;
|
||||||
|
}
|
||||||
|
|
||||||
public static Path convertToHoodiePath(Path file, Configuration conf) {
|
public static Path convertToHoodiePath(Path file, Configuration conf) {
|
||||||
try {
|
try {
|
||||||
@@ -139,8 +152,8 @@ public class HoodieWrapperFileSystem extends FileSystem {
|
|||||||
return fsDataOutputStream;
|
return fsDataOutputStream;
|
||||||
}
|
}
|
||||||
|
|
||||||
SizeAwareFSDataOutputStream os = new SizeAwareFSDataOutputStream(
|
SizeAwareFSDataOutputStream os = new SizeAwareFSDataOutputStream(path,
|
||||||
fsDataOutputStream, () -> openStreams.remove(path.getName()));
|
fsDataOutputStream, consistencyGuard, () -> openStreams.remove(path.getName()));
|
||||||
openStreams.put(path.getName(), os);
|
openStreams.put(path.getName(), os);
|
||||||
return os;
|
return os;
|
||||||
}
|
}
|
||||||
@@ -157,66 +170,66 @@ public class HoodieWrapperFileSystem extends FileSystem {
|
|||||||
|
|
||||||
@Override
|
@Override
|
||||||
public FSDataOutputStream create(Path f, Progressable progress) throws IOException {
|
public FSDataOutputStream create(Path f, Progressable progress) throws IOException {
|
||||||
return fileSystem.create(convertToDefaultPath(f), progress);
|
return wrapOutputStream(f, fileSystem.create(convertToDefaultPath(f), progress));
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public FSDataOutputStream create(Path f, short replication) throws IOException {
|
public FSDataOutputStream create(Path f, short replication) throws IOException {
|
||||||
return fileSystem.create(convertToDefaultPath(f), replication);
|
return wrapOutputStream(f, fileSystem.create(convertToDefaultPath(f), replication));
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public FSDataOutputStream create(Path f, short replication, Progressable progress)
|
public FSDataOutputStream create(Path f, short replication, Progressable progress)
|
||||||
throws IOException {
|
throws IOException {
|
||||||
return fileSystem.create(convertToDefaultPath(f), replication, progress);
|
return wrapOutputStream(f, fileSystem.create(convertToDefaultPath(f), replication, progress));
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public FSDataOutputStream create(Path f, boolean overwrite, int bufferSize) throws IOException {
|
public FSDataOutputStream create(Path f, boolean overwrite, int bufferSize) throws IOException {
|
||||||
return fileSystem.create(convertToDefaultPath(f), overwrite, bufferSize);
|
return wrapOutputStream(f, fileSystem.create(convertToDefaultPath(f), overwrite, bufferSize));
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public FSDataOutputStream create(Path f, boolean overwrite, int bufferSize, Progressable progress)
|
public FSDataOutputStream create(Path f, boolean overwrite, int bufferSize, Progressable progress)
|
||||||
throws IOException {
|
throws IOException {
|
||||||
return fileSystem.create(convertToDefaultPath(f), overwrite, bufferSize, progress);
|
return wrapOutputStream(f, fileSystem.create(convertToDefaultPath(f), overwrite, bufferSize, progress));
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public FSDataOutputStream create(Path f, boolean overwrite, int bufferSize, short replication,
|
public FSDataOutputStream create(Path f, boolean overwrite, int bufferSize, short replication,
|
||||||
long blockSize, Progressable progress) throws IOException {
|
long blockSize, Progressable progress) throws IOException {
|
||||||
return fileSystem
|
return wrapOutputStream(f, fileSystem
|
||||||
.create(convertToDefaultPath(f), overwrite, bufferSize, replication, blockSize, progress);
|
.create(convertToDefaultPath(f), overwrite, bufferSize, replication, blockSize, progress));
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public FSDataOutputStream create(Path f, FsPermission permission, EnumSet<CreateFlag> flags,
|
public FSDataOutputStream create(Path f, FsPermission permission, EnumSet<CreateFlag> flags,
|
||||||
int bufferSize, short replication, long blockSize, Progressable progress) throws IOException {
|
int bufferSize, short replication, long blockSize, Progressable progress) throws IOException {
|
||||||
return fileSystem
|
return wrapOutputStream(f, fileSystem
|
||||||
.create(convertToDefaultPath(f), permission, flags, bufferSize, replication, blockSize,
|
.create(convertToDefaultPath(f), permission, flags, bufferSize, replication, blockSize,
|
||||||
progress);
|
progress));
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public FSDataOutputStream create(Path f, FsPermission permission, EnumSet<CreateFlag> flags,
|
public FSDataOutputStream create(Path f, FsPermission permission, EnumSet<CreateFlag> flags,
|
||||||
int bufferSize, short replication, long blockSize, Progressable progress,
|
int bufferSize, short replication, long blockSize, Progressable progress,
|
||||||
Options.ChecksumOpt checksumOpt) throws IOException {
|
Options.ChecksumOpt checksumOpt) throws IOException {
|
||||||
return fileSystem
|
return wrapOutputStream(f, fileSystem
|
||||||
.create(convertToDefaultPath(f), permission, flags, bufferSize, replication, blockSize,
|
.create(convertToDefaultPath(f), permission, flags, bufferSize, replication, blockSize,
|
||||||
progress, checksumOpt);
|
progress, checksumOpt));
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public FSDataOutputStream create(Path f, boolean overwrite, int bufferSize, short replication,
|
public FSDataOutputStream create(Path f, boolean overwrite, int bufferSize, short replication,
|
||||||
long blockSize) throws IOException {
|
long blockSize) throws IOException {
|
||||||
return fileSystem
|
return wrapOutputStream(f, fileSystem
|
||||||
.create(convertToDefaultPath(f), overwrite, bufferSize, replication, blockSize);
|
.create(convertToDefaultPath(f), overwrite, bufferSize, replication, blockSize));
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public FSDataOutputStream append(Path f, int bufferSize, Progressable progress)
|
public FSDataOutputStream append(Path f, int bufferSize, Progressable progress)
|
||||||
throws IOException {
|
throws IOException {
|
||||||
return fileSystem.append(convertToDefaultPath(f), bufferSize, progress);
|
return wrapOutputStream(f, fileSystem.append(convertToDefaultPath(f), bufferSize, progress));
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
@@ -226,7 +239,16 @@ public class HoodieWrapperFileSystem extends FileSystem {
|
|||||||
|
|
||||||
@Override
|
@Override
|
||||||
public boolean delete(Path f, boolean recursive) throws IOException {
|
public boolean delete(Path f, boolean recursive) throws IOException {
|
||||||
return fileSystem.delete(convertToDefaultPath(f), recursive);
|
boolean success = fileSystem.delete(convertToDefaultPath(f), recursive);
|
||||||
|
|
||||||
|
if (success) {
|
||||||
|
try {
|
||||||
|
consistencyGuard.waitTillFileDisappears(f);
|
||||||
|
} catch (TimeoutException e) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return success;
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
@@ -251,6 +273,11 @@ public class HoodieWrapperFileSystem extends FileSystem {
|
|||||||
|
|
||||||
@Override
|
@Override
|
||||||
public FileStatus getFileStatus(Path f) throws IOException {
|
public FileStatus getFileStatus(Path f) throws IOException {
|
||||||
|
try {
|
||||||
|
consistencyGuard.waitTillFileAppears(convertToDefaultPath(f));
|
||||||
|
} catch (TimeoutException e) {
|
||||||
|
// pass
|
||||||
|
}
|
||||||
return fileSystem.getFileStatus(convertToDefaultPath(f));
|
return fileSystem.getFileStatus(convertToDefaultPath(f));
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -353,12 +380,12 @@ public class HoodieWrapperFileSystem extends FileSystem {
|
|||||||
|
|
||||||
@Override
|
@Override
|
||||||
public FSDataOutputStream append(Path f) throws IOException {
|
public FSDataOutputStream append(Path f) throws IOException {
|
||||||
return fileSystem.append(convertToDefaultPath(f));
|
return wrapOutputStream(f, fileSystem.append(convertToDefaultPath(f)));
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public FSDataOutputStream append(Path f, int bufferSize) throws IOException {
|
public FSDataOutputStream append(Path f, int bufferSize) throws IOException {
|
||||||
return fileSystem.append(convertToDefaultPath(f), bufferSize);
|
return wrapOutputStream(f, fileSystem.append(convertToDefaultPath(f), bufferSize));
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
@@ -14,11 +14,15 @@
|
|||||||
* limitations under the License.
|
* limitations under the License.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
package com.uber.hoodie.io.storage;
|
package com.uber.hoodie.common.io.storage;
|
||||||
|
|
||||||
|
import com.uber.hoodie.common.util.ConsistencyGuard;
|
||||||
|
import com.uber.hoodie.exception.HoodieException;
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
|
import java.util.concurrent.TimeoutException;
|
||||||
import java.util.concurrent.atomic.AtomicLong;
|
import java.util.concurrent.atomic.AtomicLong;
|
||||||
import org.apache.hadoop.fs.FSDataOutputStream;
|
import org.apache.hadoop.fs.FSDataOutputStream;
|
||||||
|
import org.apache.hadoop.fs.Path;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Wrapper over <code>FSDataOutputStream</code> to keep track of the size of the written bytes. This
|
* Wrapper over <code>FSDataOutputStream</code> to keep track of the size of the written bytes. This
|
||||||
@@ -30,11 +34,17 @@ public class SizeAwareFSDataOutputStream extends FSDataOutputStream {
|
|||||||
private final Runnable closeCallback;
|
private final Runnable closeCallback;
|
||||||
// Keep track of the bytes written
|
// Keep track of the bytes written
|
||||||
private final AtomicLong bytesWritten = new AtomicLong(0L);
|
private final AtomicLong bytesWritten = new AtomicLong(0L);
|
||||||
|
// Path
|
||||||
|
private final Path path;
|
||||||
|
// Consistency guard
|
||||||
|
private final ConsistencyGuard consistencyGuard;
|
||||||
|
|
||||||
public SizeAwareFSDataOutputStream(FSDataOutputStream out, Runnable closeCallback)
|
public SizeAwareFSDataOutputStream(Path path, FSDataOutputStream out,
|
||||||
throws IOException {
|
ConsistencyGuard consistencyGuard, Runnable closeCallback) throws IOException {
|
||||||
super(out);
|
super(out);
|
||||||
|
this.path = path;
|
||||||
this.closeCallback = closeCallback;
|
this.closeCallback = closeCallback;
|
||||||
|
this.consistencyGuard = consistencyGuard;
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
@@ -52,6 +62,11 @@ public class SizeAwareFSDataOutputStream extends FSDataOutputStream {
|
|||||||
@Override
|
@Override
|
||||||
public void close() throws IOException {
|
public void close() throws IOException {
|
||||||
super.close();
|
super.close();
|
||||||
|
try {
|
||||||
|
consistencyGuard.waitTillFileAppears(path);
|
||||||
|
} catch (TimeoutException e) {
|
||||||
|
throw new HoodieException(e);
|
||||||
|
}
|
||||||
closeCallback.run();
|
closeCallback.run();
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -59,7 +59,7 @@ public class FileSlice implements Serializable {
|
|||||||
this.fileGroupId = fileGroupId;
|
this.fileGroupId = fileGroupId;
|
||||||
this.baseInstantTime = baseInstantTime;
|
this.baseInstantTime = baseInstantTime;
|
||||||
this.dataFile = null;
|
this.dataFile = null;
|
||||||
this.logFiles = new TreeSet<>(HoodieLogFile.getBaseInstantAndLogVersionComparator());
|
this.logFiles = new TreeSet<>(HoodieLogFile.getReverseLogFileComparator());
|
||||||
}
|
}
|
||||||
|
|
||||||
public void setDataFile(HoodieDataFile dataFile) {
|
public void setDataFile(HoodieDataFile dataFile) {
|
||||||
@@ -94,6 +94,10 @@ public class FileSlice implements Serializable {
|
|||||||
return Optional.ofNullable(dataFile);
|
return Optional.ofNullable(dataFile);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public Optional<HoodieLogFile> getLatestLogFile() {
|
||||||
|
return logFiles.stream().findFirst();
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Returns true if there is no data file and no log files. Happens as part of pending compaction
|
* Returns true if there is no data file and no log files. Happens as part of pending compaction
|
||||||
* @return
|
* @return
|
||||||
|
|||||||
@@ -72,6 +72,10 @@ public class HoodieLogFile implements Serializable {
|
|||||||
return FSUtils.getFileVersionFromLog(getPath());
|
return FSUtils.getFileVersionFromLog(getPath());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public String getLogWriteToken() {
|
||||||
|
return FSUtils.getWriteTokenFromLogPath(getPath());
|
||||||
|
}
|
||||||
|
|
||||||
public String getFileExtension() {
|
public String getFileExtension() {
|
||||||
return FSUtils.getFileExtensionFromLog(getPath());
|
return FSUtils.getFileExtensionFromLog(getPath());
|
||||||
}
|
}
|
||||||
@@ -96,7 +100,11 @@ public class HoodieLogFile implements Serializable {
|
|||||||
return fileStatus;
|
return fileStatus;
|
||||||
}
|
}
|
||||||
|
|
||||||
public HoodieLogFile rollOver(FileSystem fs) throws IOException {
|
public void setFileStatus(FileStatus fileStatus) {
|
||||||
|
this.fileStatus = fileStatus;
|
||||||
|
}
|
||||||
|
|
||||||
|
public HoodieLogFile rollOver(FileSystem fs, String logWriteToken) throws IOException {
|
||||||
String fileId = getFileId();
|
String fileId = getFileId();
|
||||||
String baseCommitTime = getBaseCommitTime();
|
String baseCommitTime = getBaseCommitTime();
|
||||||
Path path = getPath();
|
Path path = getPath();
|
||||||
@@ -105,28 +113,50 @@ public class HoodieLogFile implements Serializable {
|
|||||||
.computeNextLogVersion(fs, path.getParent(), fileId,
|
.computeNextLogVersion(fs, path.getParent(), fileId,
|
||||||
extension, baseCommitTime);
|
extension, baseCommitTime);
|
||||||
return new HoodieLogFile(new Path(path.getParent(),
|
return new HoodieLogFile(new Path(path.getParent(),
|
||||||
FSUtils.makeLogFileName(fileId, extension, baseCommitTime, newVersion)));
|
FSUtils.makeLogFileName(fileId, extension, baseCommitTime, newVersion, logWriteToken)));
|
||||||
}
|
}
|
||||||
|
|
||||||
public static Comparator<HoodieLogFile> getBaseInstantAndLogVersionComparator() {
|
public static Comparator<HoodieLogFile> getLogFileComparator() {
|
||||||
return new BaseInstantAndLogVersionComparator();
|
return new LogFileComparator();
|
||||||
|
}
|
||||||
|
|
||||||
|
public static Comparator<HoodieLogFile> getReverseLogFileComparator() {
|
||||||
|
return new LogFileComparator().reversed();
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Comparator to order log-files
|
* Comparator to order log-files
|
||||||
*/
|
*/
|
||||||
private static class BaseInstantAndLogVersionComparator implements Comparator<HoodieLogFile>, Serializable {
|
public static class LogFileComparator implements Comparator<HoodieLogFile>, Serializable {
|
||||||
|
|
||||||
|
private transient Comparator<HoodieLogFile> writeTokenComparator;
|
||||||
|
|
||||||
|
private Comparator<HoodieLogFile> getWriteTokenComparator() {
|
||||||
|
if (null == writeTokenComparator) {
|
||||||
|
// writeTokenComparator is not serializable. Hence, lazy loading
|
||||||
|
writeTokenComparator = Comparator.nullsFirst(Comparator.comparing(HoodieLogFile::getLogWriteToken));
|
||||||
|
}
|
||||||
|
return writeTokenComparator;
|
||||||
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public int compare(HoodieLogFile o1, HoodieLogFile o2) {
|
public int compare(HoodieLogFile o1, HoodieLogFile o2) {
|
||||||
String baseInstantTime1 = o1.getBaseCommitTime();
|
String baseInstantTime1 = o1.getBaseCommitTime();
|
||||||
String baseInstantTime2 = o2.getBaseCommitTime();
|
String baseInstantTime2 = o2.getBaseCommitTime();
|
||||||
|
|
||||||
if (baseInstantTime1.equals(baseInstantTime2)) {
|
if (baseInstantTime1.equals(baseInstantTime2)) {
|
||||||
// reverse the order by log-version when base-commit is same
|
|
||||||
return new Integer(o2.getLogVersion()).compareTo(o1.getLogVersion());
|
if (o1.getLogVersion() == o2.getLogVersion()) {
|
||||||
|
// Compare by write token when base-commit and log-version is same
|
||||||
|
return getWriteTokenComparator().compare(o1, o2);
|
||||||
|
}
|
||||||
|
|
||||||
|
// compare by log-version when base-commit is same
|
||||||
|
return Integer.compare(o1.getLogVersion(), o2.getLogVersion());
|
||||||
}
|
}
|
||||||
// reverse the order by base-commits
|
|
||||||
return baseInstantTime2.compareTo(baseInstantTime1);
|
// compare by base-commits
|
||||||
|
return baseInstantTime1.compareTo(baseInstantTime2);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -310,11 +310,8 @@ public class HoodieWriteStat implements Serializable {
|
|||||||
/**
|
/**
|
||||||
* Set path and tempPath relative to the given basePath.
|
* Set path and tempPath relative to the given basePath.
|
||||||
*/
|
*/
|
||||||
public void setPaths(Path basePath, Path path, Path tempPath) {
|
public void setPath(Path basePath, Path path) {
|
||||||
this.path = path.toString().replace(basePath + "/", "");
|
this.path = path.toString().replace(basePath + "/", "");
|
||||||
if (tempPath != null) {
|
|
||||||
this.tempPath = tempPath.toString().replace(basePath + "/", "");
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
|||||||
@@ -61,6 +61,7 @@ public class HoodieTableMetaClient implements Serializable {
|
|||||||
public static String METAFOLDER_NAME = ".hoodie";
|
public static String METAFOLDER_NAME = ".hoodie";
|
||||||
public static String TEMPFOLDER_NAME = METAFOLDER_NAME + File.separator + ".temp";
|
public static String TEMPFOLDER_NAME = METAFOLDER_NAME + File.separator + ".temp";
|
||||||
public static String AUXILIARYFOLDER_NAME = METAFOLDER_NAME + File.separator + ".aux";
|
public static String AUXILIARYFOLDER_NAME = METAFOLDER_NAME + File.separator + ".aux";
|
||||||
|
public static final String MARKER_EXTN = ".marker";
|
||||||
|
|
||||||
private String basePath;
|
private String basePath;
|
||||||
private transient FileSystem fs;
|
private transient FileSystem fs;
|
||||||
@@ -142,6 +143,22 @@ public class HoodieTableMetaClient implements Serializable {
|
|||||||
return metaPath;
|
return metaPath;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @return Temp Folder path
|
||||||
|
*/
|
||||||
|
public String getTempFolderPath() {
|
||||||
|
return basePath + File.separator + TEMPFOLDER_NAME;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Returns Marker folder path
|
||||||
|
* @param instantTs Instant Timestamp
|
||||||
|
* @return
|
||||||
|
*/
|
||||||
|
public String getMarkerFolderPath(String instantTs) {
|
||||||
|
return String.format("%s%s%s", getTempFolderPath(), File.separator, instantTs);
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @return Auxiliary Meta path
|
* @return Auxiliary Meta path
|
||||||
*/
|
*/
|
||||||
|
|||||||
@@ -19,9 +19,11 @@ package com.uber.hoodie.common.table.log;
|
|||||||
import com.uber.hoodie.common.model.HoodieLogFile;
|
import com.uber.hoodie.common.model.HoodieLogFile;
|
||||||
import com.uber.hoodie.common.table.log.block.HoodieLogBlock;
|
import com.uber.hoodie.common.table.log.block.HoodieLogBlock;
|
||||||
import com.uber.hoodie.common.util.FSUtils;
|
import com.uber.hoodie.common.util.FSUtils;
|
||||||
|
import com.uber.hoodie.common.util.collection.Pair;
|
||||||
import java.io.Closeable;
|
import java.io.Closeable;
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.util.Iterator;
|
import java.util.Iterator;
|
||||||
|
import java.util.Optional;
|
||||||
import org.apache.avro.Schema;
|
import org.apache.avro.Schema;
|
||||||
import org.apache.hadoop.fs.FileSystem;
|
import org.apache.hadoop.fs.FileSystem;
|
||||||
import org.apache.hadoop.fs.Path;
|
import org.apache.hadoop.fs.Path;
|
||||||
@@ -48,6 +50,8 @@ public interface HoodieLogFormat {
|
|||||||
*/
|
*/
|
||||||
int currentVersion = 1;
|
int currentVersion = 1;
|
||||||
|
|
||||||
|
String UNKNOWN_WRITE_TOKEN = "1-0-1";
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Writer interface to allow appending block to this file format
|
* Writer interface to allow appending block to this file format
|
||||||
*/
|
*/
|
||||||
@@ -106,6 +110,10 @@ public interface HoodieLogFormat {
|
|||||||
private Integer logVersion;
|
private Integer logVersion;
|
||||||
// Location of the directory containing the log
|
// Location of the directory containing the log
|
||||||
private Path parentPath;
|
private Path parentPath;
|
||||||
|
// Log File Write Token
|
||||||
|
private String logWriteToken;
|
||||||
|
// Rollover Log file write token
|
||||||
|
private String rolloverLogWriteToken;
|
||||||
|
|
||||||
public WriterBuilder withBufferSize(int bufferSize) {
|
public WriterBuilder withBufferSize(int bufferSize) {
|
||||||
this.bufferSize = bufferSize;
|
this.bufferSize = bufferSize;
|
||||||
@@ -117,6 +125,16 @@ public interface HoodieLogFormat {
|
|||||||
return this;
|
return this;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public WriterBuilder withLogWriteToken(String writeToken) {
|
||||||
|
this.logWriteToken = writeToken;
|
||||||
|
return this;
|
||||||
|
}
|
||||||
|
|
||||||
|
public WriterBuilder withRolloverLogWriteToken(String rolloverLogWriteToken) {
|
||||||
|
this.rolloverLogWriteToken = rolloverLogWriteToken;
|
||||||
|
return this;
|
||||||
|
}
|
||||||
|
|
||||||
public WriterBuilder withFs(FileSystem fs) {
|
public WriterBuilder withFs(FileSystem fs) {
|
||||||
this.fs = fs;
|
this.fs = fs;
|
||||||
return this;
|
return this;
|
||||||
@@ -169,17 +187,37 @@ public interface HoodieLogFormat {
|
|||||||
if (parentPath == null) {
|
if (parentPath == null) {
|
||||||
throw new IllegalArgumentException("Log file parent location is not specified");
|
throw new IllegalArgumentException("Log file parent location is not specified");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (rolloverLogWriteToken == null) {
|
||||||
|
rolloverLogWriteToken = UNKNOWN_WRITE_TOKEN;
|
||||||
|
}
|
||||||
|
|
||||||
if (logVersion == null) {
|
if (logVersion == null) {
|
||||||
log.info("Computing the next log version for " + logFileId + " in " + parentPath);
|
log.info("Computing the next log version for " + logFileId + " in " + parentPath);
|
||||||
logVersion =
|
Optional<Pair<Integer, String>> versionAndWriteToken =
|
||||||
FSUtils.getCurrentLogVersion(fs, parentPath, logFileId, fileExtension, commitTime);
|
FSUtils.getLatestLogVersion(fs, parentPath, logFileId, fileExtension, commitTime);
|
||||||
|
if (versionAndWriteToken.isPresent()) {
|
||||||
|
logVersion = versionAndWriteToken.get().getKey();
|
||||||
|
logWriteToken = versionAndWriteToken.get().getValue();
|
||||||
|
} else {
|
||||||
|
logVersion = HoodieLogFile.LOGFILE_BASE_VERSION;
|
||||||
|
// this is the case where there is no existing log-file.
|
||||||
|
// Use rollover write token as write token to create new log file with tokens
|
||||||
|
logWriteToken = rolloverLogWriteToken;
|
||||||
|
}
|
||||||
log.info(
|
log.info(
|
||||||
"Computed the next log version for " + logFileId + " in " + parentPath + " as "
|
"Computed the next log version for " + logFileId + " in " + parentPath + " as "
|
||||||
+ logVersion);
|
+ logVersion + " with write-token " + logWriteToken);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (logWriteToken == null) {
|
||||||
|
// This is the case where we have existing log-file with old format. rollover to avoid any conflicts
|
||||||
|
logVersion += 1;
|
||||||
|
logWriteToken = rolloverLogWriteToken;
|
||||||
}
|
}
|
||||||
|
|
||||||
Path logPath = new Path(parentPath,
|
Path logPath = new Path(parentPath,
|
||||||
FSUtils.makeLogFileName(logFileId, fileExtension, commitTime, logVersion));
|
FSUtils.makeLogFileName(logFileId, fileExtension, commitTime, logVersion, logWriteToken));
|
||||||
log.info("HoodieLogFile on path " + logPath);
|
log.info("HoodieLogFile on path " + logPath);
|
||||||
HoodieLogFile logFile = new HoodieLogFile(logPath);
|
HoodieLogFile logFile = new HoodieLogFile(logPath);
|
||||||
|
|
||||||
@@ -192,9 +230,9 @@ public interface HoodieLogFormat {
|
|||||||
if (sizeThreshold == null) {
|
if (sizeThreshold == null) {
|
||||||
sizeThreshold = DEFAULT_SIZE_THRESHOLD;
|
sizeThreshold = DEFAULT_SIZE_THRESHOLD;
|
||||||
}
|
}
|
||||||
return new HoodieLogFormatWriter(fs, logFile, bufferSize, replication, sizeThreshold);
|
return new HoodieLogFormatWriter(fs, logFile, bufferSize, replication, sizeThreshold, logWriteToken,
|
||||||
|
rolloverLogWriteToken);
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static WriterBuilder newWriterBuilder() {
|
static WriterBuilder newWriterBuilder() {
|
||||||
|
|||||||
@@ -48,6 +48,8 @@ public class HoodieLogFormatWriter implements HoodieLogFormat.Writer {
|
|||||||
private final long sizeThreshold;
|
private final long sizeThreshold;
|
||||||
private final Integer bufferSize;
|
private final Integer bufferSize;
|
||||||
private final Short replication;
|
private final Short replication;
|
||||||
|
private final String logWriteToken;
|
||||||
|
private final String rolloverLogWriteToken;
|
||||||
private FSDataOutputStream output;
|
private FSDataOutputStream output;
|
||||||
private static final String APPEND_UNAVAILABLE_EXCEPTION_MESSAGE = "not sufficiently replicated yet";
|
private static final String APPEND_UNAVAILABLE_EXCEPTION_MESSAGE = "not sufficiently replicated yet";
|
||||||
|
|
||||||
@@ -59,14 +61,15 @@ public class HoodieLogFormatWriter implements HoodieLogFormat.Writer {
|
|||||||
* @param sizeThreshold
|
* @param sizeThreshold
|
||||||
*/
|
*/
|
||||||
HoodieLogFormatWriter(FileSystem fs, HoodieLogFile logFile, Integer bufferSize,
|
HoodieLogFormatWriter(FileSystem fs, HoodieLogFile logFile, Integer bufferSize,
|
||||||
Short replication, Long sizeThreshold)
|
Short replication, Long sizeThreshold, String logWriteToken, String rolloverLogWriteToken)
|
||||||
throws IOException, InterruptedException {
|
throws IOException, InterruptedException {
|
||||||
this.fs = fs;
|
this.fs = fs;
|
||||||
this.logFile = logFile;
|
this.logFile = logFile;
|
||||||
this.sizeThreshold = sizeThreshold;
|
this.sizeThreshold = sizeThreshold;
|
||||||
this.bufferSize = bufferSize;
|
this.bufferSize = bufferSize;
|
||||||
this.replication = replication;
|
this.replication = replication;
|
||||||
|
this.logWriteToken = logWriteToken;
|
||||||
|
this.rolloverLogWriteToken = rolloverLogWriteToken;
|
||||||
Path path = logFile.getPath();
|
Path path = logFile.getPath();
|
||||||
if (fs.exists(path)) {
|
if (fs.exists(path)) {
|
||||||
boolean isAppendSupported = StorageSchemes.isAppendSupported(fs.getScheme());
|
boolean isAppendSupported = StorageSchemes.isAppendSupported(fs.getScheme());
|
||||||
@@ -87,7 +90,7 @@ public class HoodieLogFormatWriter implements HoodieLogFormat.Writer {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (!isAppendSupported) {
|
if (!isAppendSupported) {
|
||||||
this.logFile = logFile.rollOver(fs);
|
this.logFile = logFile.rollOver(fs, rolloverLogWriteToken);
|
||||||
log.info("Append not supported.. Rolling over to " + logFile);
|
log.info("Append not supported.. Rolling over to " + logFile);
|
||||||
createNewFile();
|
createNewFile();
|
||||||
}
|
}
|
||||||
@@ -180,10 +183,11 @@ public class HoodieLogFormatWriter implements HoodieLogFormat.Writer {
|
|||||||
// file).
|
// file).
|
||||||
log.info("CurrentSize " + getCurrentSize() + " has reached threshold " + sizeThreshold
|
log.info("CurrentSize " + getCurrentSize() + " has reached threshold " + sizeThreshold
|
||||||
+ ". Rolling over to the next version");
|
+ ". Rolling over to the next version");
|
||||||
HoodieLogFile newLogFile = logFile.rollOver(fs);
|
HoodieLogFile newLogFile = logFile.rollOver(fs, rolloverLogWriteToken);
|
||||||
// close this writer and return the new writer
|
// close this writer and return the new writer
|
||||||
close();
|
close();
|
||||||
return new HoodieLogFormatWriter(fs, newLogFile, bufferSize, replication, sizeThreshold);
|
return new HoodieLogFormatWriter(fs, newLogFile, bufferSize, replication, sizeThreshold, logWriteToken,
|
||||||
|
rolloverLogWriteToken);
|
||||||
}
|
}
|
||||||
return this;
|
return this;
|
||||||
}
|
}
|
||||||
@@ -231,10 +235,15 @@ public class HoodieLogFormatWriter implements HoodieLogFormat.Writer {
|
|||||||
// last block. Find more information here : https://issues.apache.org/jira/browse/HDFS-6325
|
// last block. Find more information here : https://issues.apache.org/jira/browse/HDFS-6325
|
||||||
log.warn("Failed to open an append stream to the log file. Opening a new log file..", e);
|
log.warn("Failed to open an append stream to the log file. Opening a new log file..", e);
|
||||||
// Rollover the current log file (since cannot get a stream handle) and create new one
|
// Rollover the current log file (since cannot get a stream handle) and create new one
|
||||||
this.logFile = logFile.rollOver(fs);
|
this.logFile = logFile.rollOver(fs, rolloverLogWriteToken);
|
||||||
createNewFile();
|
createNewFile();
|
||||||
} else if ((e.getClassName().contentEquals(AlreadyBeingCreatedException.class.getName()) || e.getClassName()
|
} else if (e.getClassName().contentEquals(AlreadyBeingCreatedException.class.getName())) {
|
||||||
.contentEquals(RecoveryInProgressException.class.getName())) && (fs instanceof DistributedFileSystem)) {
|
log.warn("Another task executor writing to the same log file(" + logFile + ". Rolling over");
|
||||||
|
// Rollover the current log file (since cannot get a stream handle) and create new one
|
||||||
|
this.logFile = logFile.rollOver(fs, rolloverLogWriteToken);
|
||||||
|
createNewFile();
|
||||||
|
} else if (e.getClassName().contentEquals(RecoveryInProgressException.class.getName())
|
||||||
|
&& (fs instanceof DistributedFileSystem)) {
|
||||||
// this happens when either another task executor writing to this file died or
|
// this happens when either another task executor writing to this file died or
|
||||||
// data node is going down. Note that we can only try to recover lease for a DistributedFileSystem.
|
// data node is going down. Note that we can only try to recover lease for a DistributedFileSystem.
|
||||||
// ViewFileSystem unfortunately does not support this operation
|
// ViewFileSystem unfortunately does not support this operation
|
||||||
|
|||||||
@@ -0,0 +1,89 @@
|
|||||||
|
/*
|
||||||
|
* Copyright (c) 2019 Uber Technologies, Inc. (hoodie-dev-group@uber.com)
|
||||||
|
*
|
||||||
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
* you may not use this file except in compliance with the License.
|
||||||
|
* You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
|
||||||
|
package com.uber.hoodie.common.util;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.util.List;
|
||||||
|
import java.util.concurrent.TimeoutException;
|
||||||
|
import org.apache.hadoop.fs.Path;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Ensures file create/delete operation is visible
|
||||||
|
*/
|
||||||
|
public interface ConsistencyGuard {
|
||||||
|
|
||||||
|
/**
|
||||||
|
* File Visibility
|
||||||
|
*/
|
||||||
|
enum FileVisibility {
|
||||||
|
APPEAR,
|
||||||
|
DISAPPEAR,
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Wait for file to be listable based on configurable timeout
|
||||||
|
* @param filePath
|
||||||
|
* @throws IOException when having trouble listing the path
|
||||||
|
* @throws TimeoutException when retries exhausted
|
||||||
|
*/
|
||||||
|
void waitTillFileAppears(Path filePath) throws IOException, TimeoutException;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Wait for file to be listable based on configurable timeout
|
||||||
|
* @param filePath
|
||||||
|
* @throws IOException when having trouble listing the path
|
||||||
|
* @throws TimeoutException when retries exhausted
|
||||||
|
*/
|
||||||
|
void waitTillFileDisappears(Path filePath) throws IOException, TimeoutException;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Wait till all passed files belonging to a directory shows up in the listing
|
||||||
|
*/
|
||||||
|
void waitTillAllFilesAppear(String dirPath, List<String> files) throws IOException, TimeoutException;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Wait till all passed files belonging to a directory disappears from listing
|
||||||
|
*/
|
||||||
|
void waitTillAllFilesDisappear(String dirPath, List<String> files) throws IOException, TimeoutException;
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Wait Till target visibility is reached
|
||||||
|
* @param dirPath Directory Path
|
||||||
|
* @param files Files
|
||||||
|
* @param targetVisibility Target Visibitlity
|
||||||
|
* @throws IOException
|
||||||
|
* @throws TimeoutException
|
||||||
|
*/
|
||||||
|
default void waitTill(String dirPath, List<String> files, FileVisibility targetVisibility)
|
||||||
|
throws IOException, TimeoutException {
|
||||||
|
switch (targetVisibility) {
|
||||||
|
case APPEAR: {
|
||||||
|
waitTillAllFilesAppear(dirPath, files);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
case DISAPPEAR: {
|
||||||
|
waitTillAllFilesDisappear(dirPath, files);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
default:
|
||||||
|
throw new IllegalStateException("Unknown File Visibility");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -16,21 +16,28 @@
|
|||||||
|
|
||||||
package com.uber.hoodie.common.util;
|
package com.uber.hoodie.common.util;
|
||||||
|
|
||||||
|
import static com.uber.hoodie.common.table.HoodieTableMetaClient.MARKER_EXTN;
|
||||||
|
|
||||||
import com.google.common.base.Preconditions;
|
import com.google.common.base.Preconditions;
|
||||||
|
import com.uber.hoodie.common.model.HoodieFileFormat;
|
||||||
import com.uber.hoodie.common.model.HoodieLogFile;
|
import com.uber.hoodie.common.model.HoodieLogFile;
|
||||||
import com.uber.hoodie.common.model.HoodiePartitionMetadata;
|
import com.uber.hoodie.common.model.HoodiePartitionMetadata;
|
||||||
import com.uber.hoodie.common.table.HoodieTableMetaClient;
|
import com.uber.hoodie.common.table.HoodieTableMetaClient;
|
||||||
import com.uber.hoodie.common.table.timeline.HoodieInstant;
|
import com.uber.hoodie.common.table.timeline.HoodieInstant;
|
||||||
|
import com.uber.hoodie.common.util.collection.Pair;
|
||||||
|
import com.uber.hoodie.exception.HoodieException;
|
||||||
import com.uber.hoodie.exception.HoodieIOException;
|
import com.uber.hoodie.exception.HoodieIOException;
|
||||||
import com.uber.hoodie.exception.InvalidHoodiePathException;
|
import com.uber.hoodie.exception.InvalidHoodiePathException;
|
||||||
import java.io.File;
|
import java.io.File;
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
import java.util.Arrays;
|
import java.util.Arrays;
|
||||||
import java.util.Comparator;
|
import java.util.LinkedList;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
import java.util.Map.Entry;
|
import java.util.Map.Entry;
|
||||||
import java.util.Optional;
|
import java.util.Optional;
|
||||||
|
import java.util.UUID;
|
||||||
|
import java.util.function.Function;
|
||||||
import java.util.regex.Matcher;
|
import java.util.regex.Matcher;
|
||||||
import java.util.regex.Pattern;
|
import java.util.regex.Pattern;
|
||||||
import java.util.stream.Stream;
|
import java.util.stream.Stream;
|
||||||
@@ -52,13 +59,15 @@ public class FSUtils {
|
|||||||
|
|
||||||
private static final Logger LOG = LogManager.getLogger(FSUtils.class);
|
private static final Logger LOG = LogManager.getLogger(FSUtils.class);
|
||||||
// Log files are of this pattern - .b5068208-e1a4-11e6-bf01-fe55135034f3_20170101134598.log.1
|
// Log files are of this pattern - .b5068208-e1a4-11e6-bf01-fe55135034f3_20170101134598.log.1
|
||||||
private static final Pattern LOG_FILE_PATTERN = Pattern.compile("\\.(.*)_(.*)\\.(.*)\\.([0-9]*)");
|
private static final Pattern LOG_FILE_PATTERN =
|
||||||
|
Pattern.compile("\\.(.*)_(.*)\\.(.*)\\.([0-9]*)(_(([0-9]*)-([0-9]*)-([0-9]*)))?");
|
||||||
private static final String LOG_FILE_PREFIX = ".";
|
private static final String LOG_FILE_PREFIX = ".";
|
||||||
private static final int MAX_ATTEMPTS_RECOVER_LEASE = 10;
|
private static final int MAX_ATTEMPTS_RECOVER_LEASE = 10;
|
||||||
private static final long MIN_CLEAN_TO_KEEP = 10;
|
private static final long MIN_CLEAN_TO_KEEP = 10;
|
||||||
private static final long MIN_ROLLBACK_TO_KEEP = 10;
|
private static final long MIN_ROLLBACK_TO_KEEP = 10;
|
||||||
private static final String HOODIE_ENV_PROPS_PREFIX = "HOODIE_ENV_";
|
private static final String HOODIE_ENV_PROPS_PREFIX = "HOODIE_ENV_";
|
||||||
|
|
||||||
|
|
||||||
public static Configuration prepareHadoopConf(Configuration conf) {
|
public static Configuration prepareHadoopConf(Configuration conf) {
|
||||||
conf.set("fs.hdfs.impl", org.apache.hadoop.hdfs.DistributedFileSystem.class.getName());
|
conf.set("fs.hdfs.impl", org.apache.hadoop.hdfs.DistributedFileSystem.class.getName());
|
||||||
conf.set("fs.file.impl", org.apache.hadoop.fs.LocalFileSystem.class.getName());
|
conf.set("fs.file.impl", org.apache.hadoop.fs.LocalFileSystem.class.getName());
|
||||||
@@ -76,7 +85,6 @@ public class FSUtils {
|
|||||||
return conf;
|
return conf;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
public static FileSystem getFs(String path, Configuration conf) {
|
public static FileSystem getFs(String path, Configuration conf) {
|
||||||
FileSystem fs;
|
FileSystem fs;
|
||||||
conf = prepareHadoopConf(conf);
|
conf = prepareHadoopConf(conf);
|
||||||
@@ -92,26 +100,36 @@ public class FSUtils {
|
|||||||
return fs;
|
return fs;
|
||||||
}
|
}
|
||||||
|
|
||||||
public static String makeDataFileName(String commitTime, int taskPartitionId, String fileId) {
|
/**
|
||||||
return String.format("%s_%d_%s.parquet", fileId, taskPartitionId, commitTime);
|
* A write token uniquely identifies an attempt at one of the IOHandle operations (Merge/Create/Append)
|
||||||
|
*/
|
||||||
|
public static String makeWriteToken(int taskPartitionId, int stageId, long taskAttemptId) {
|
||||||
|
return String.format("%d-%d-%d", taskPartitionId, stageId, taskAttemptId);
|
||||||
}
|
}
|
||||||
|
|
||||||
public static String makeTempDataFileName(String partitionPath, String commitTime,
|
|
||||||
int taskPartitionId, String fileId, int stageId, long taskAttemptId) {
|
public static String makeDataFileName(String commitTime, String writeToken, String fileId) {
|
||||||
return String.format("%s_%s_%d_%s_%d_%d.parquet", partitionPath.replace("/", "-"), fileId,
|
return String.format("%s_%s_%s.parquet", fileId, writeToken, commitTime);
|
||||||
taskPartitionId, commitTime, stageId, taskAttemptId);
|
}
|
||||||
|
|
||||||
|
public static String makeMarkerFile(String commitTime, String writeToken, String fileId) {
|
||||||
|
return String.format("%s_%s_%s%s", fileId, writeToken, commitTime, MARKER_EXTN);
|
||||||
|
}
|
||||||
|
|
||||||
|
public static String translateMarkerToDataPath(String basePath, String markerPath, String instantTs) {
|
||||||
|
Preconditions.checkArgument(markerPath.endsWith(MARKER_EXTN));
|
||||||
|
String markerRootPath = Path.getPathWithoutSchemeAndAuthority(new Path(
|
||||||
|
String.format("%s/%s/%s", basePath, HoodieTableMetaClient.TEMPFOLDER_NAME, instantTs))).toString();
|
||||||
|
int begin = markerPath.indexOf(markerRootPath);
|
||||||
|
Preconditions.checkArgument(begin >= 0, "Not in marker dir. Marker Path=" + markerPath
|
||||||
|
+ ", Expected Marker Root=" + markerRootPath);
|
||||||
|
String rPath = markerPath.substring(begin + markerRootPath.length() + 1);
|
||||||
|
return String.format("%s/%s%s", basePath, rPath.replace(MARKER_EXTN, ""),
|
||||||
|
HoodieFileFormat.PARQUET.getFileExtension());
|
||||||
}
|
}
|
||||||
|
|
||||||
public static String maskWithoutFileId(String commitTime, int taskPartitionId) {
|
public static String maskWithoutFileId(String commitTime, int taskPartitionId) {
|
||||||
return String.format("*_%s_%s.parquet", taskPartitionId, commitTime);
|
return String.format("*_%s_%s%s", taskPartitionId, commitTime, HoodieFileFormat.PARQUET.getFileExtension());
|
||||||
}
|
|
||||||
|
|
||||||
public static String maskWithoutTaskPartitionId(String commitTime, String fileId) {
|
|
||||||
return String.format("%s_*_%s.parquet", fileId, commitTime);
|
|
||||||
}
|
|
||||||
|
|
||||||
public static String maskWithOnlyCommitTime(String commitTime) {
|
|
||||||
return String.format("*_*_%s.parquet", commitTime);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
public static String getCommitFromCommitFile(String commitFileName) {
|
public static String getCommitFromCommitFile(String commitFileName) {
|
||||||
@@ -175,18 +193,43 @@ public class FSUtils {
|
|||||||
*/
|
*/
|
||||||
public static List<String> getAllFoldersWithPartitionMetaFile(FileSystem fs, String basePathStr)
|
public static List<String> getAllFoldersWithPartitionMetaFile(FileSystem fs, String basePathStr)
|
||||||
throws IOException {
|
throws IOException {
|
||||||
List<String> partitions = new ArrayList<>();
|
final Path basePath = new Path(basePathStr);
|
||||||
Path basePath = new Path(basePathStr);
|
final List<String> partitions = new ArrayList<>();
|
||||||
RemoteIterator<LocatedFileStatus> allFiles = fs.listFiles(new Path(basePathStr), true);
|
processFiles(fs, basePathStr, (locatedFileStatus) -> {
|
||||||
while (allFiles.hasNext()) {
|
Path filePath = locatedFileStatus.getPath();
|
||||||
Path filePath = allFiles.next().getPath();
|
|
||||||
if (filePath.getName().equals(HoodiePartitionMetadata.HOODIE_PARTITION_METAFILE)) {
|
if (filePath.getName().equals(HoodiePartitionMetadata.HOODIE_PARTITION_METAFILE)) {
|
||||||
partitions.add(getRelativePartitionPath(basePath, filePath.getParent()));
|
partitions.add(getRelativePartitionPath(basePath, filePath.getParent()));
|
||||||
}
|
}
|
||||||
}
|
return true;
|
||||||
|
});
|
||||||
return partitions;
|
return partitions;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public static final List<String> getAllDataFilesForMarkers(FileSystem fs, String basePath, String instantTs,
|
||||||
|
String markerDir) throws IOException {
|
||||||
|
List<String> dataFiles = new LinkedList<>();
|
||||||
|
FSUtils.processFiles(fs, markerDir, (status) -> {
|
||||||
|
String pathStr = status.getPath().toString();
|
||||||
|
if (pathStr.endsWith(MARKER_EXTN)) {
|
||||||
|
dataFiles.add(FSUtils.translateMarkerToDataPath(basePath, pathStr, instantTs));
|
||||||
|
}
|
||||||
|
return true;
|
||||||
|
});
|
||||||
|
return dataFiles;
|
||||||
|
}
|
||||||
|
|
||||||
|
private static final void processFiles(FileSystem fs, String basePathStr,
|
||||||
|
Function<LocatedFileStatus, Boolean> consumer) throws IOException {
|
||||||
|
RemoteIterator<LocatedFileStatus> allFiles = fs.listFiles(new Path(basePathStr), true);
|
||||||
|
while (allFiles.hasNext()) {
|
||||||
|
LocatedFileStatus status = allFiles.next();
|
||||||
|
boolean success = consumer.apply(status);
|
||||||
|
if (!success) {
|
||||||
|
throw new HoodieException("Failed to process file-status=" + status);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
public static List<String> getAllPartitionPaths(FileSystem fs, String basePathStr,
|
public static List<String> getAllPartitionPaths(FileSystem fs, String basePathStr,
|
||||||
boolean assumeDatePartitioning)
|
boolean assumeDatePartitioning)
|
||||||
throws IOException {
|
throws IOException {
|
||||||
@@ -208,6 +251,12 @@ public class FSUtils {
|
|||||||
return name.replace(getFileExtension(name), "");
|
return name.replace(getFileExtension(name), "");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Returns a new unique prefix for creating a file group.
|
||||||
|
*/
|
||||||
|
public static String createNewFileIdPfx() {
|
||||||
|
return UUID.randomUUID().toString();
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Get the file extension from the log file
|
* Get the file extension from the log file
|
||||||
@@ -254,6 +303,53 @@ public class FSUtils {
|
|||||||
return matcher.group(2);
|
return matcher.group(2);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Get TaskId used in log-path
|
||||||
|
*/
|
||||||
|
public static Integer getTaskPartitionIdFromLogPath(Path path) {
|
||||||
|
Matcher matcher = LOG_FILE_PATTERN.matcher(path.getName());
|
||||||
|
if (!matcher.find()) {
|
||||||
|
throw new InvalidHoodiePathException(path, "LogFile");
|
||||||
|
}
|
||||||
|
String val = matcher.group(7);
|
||||||
|
return val == null ? null : Integer.parseInt(val);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Get Write-Token used in log-path
|
||||||
|
*/
|
||||||
|
public static String getWriteTokenFromLogPath(Path path) {
|
||||||
|
Matcher matcher = LOG_FILE_PATTERN.matcher(path.getName());
|
||||||
|
if (!matcher.find()) {
|
||||||
|
throw new InvalidHoodiePathException(path, "LogFile");
|
||||||
|
}
|
||||||
|
return matcher.group(6);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Get StageId used in log-path
|
||||||
|
*/
|
||||||
|
public static Integer getStageIdFromLogPath(Path path) {
|
||||||
|
Matcher matcher = LOG_FILE_PATTERN.matcher(path.getName());
|
||||||
|
if (!matcher.find()) {
|
||||||
|
throw new InvalidHoodiePathException(path, "LogFile");
|
||||||
|
}
|
||||||
|
String val = matcher.group(8);
|
||||||
|
return val == null ? null : Integer.parseInt(val);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Get Task Attempt Id used in log-path
|
||||||
|
*/
|
||||||
|
public static Integer getTaskAttemptIdFromLogPath(Path path) {
|
||||||
|
Matcher matcher = LOG_FILE_PATTERN.matcher(path.getName());
|
||||||
|
if (!matcher.find()) {
|
||||||
|
throw new InvalidHoodiePathException(path, "LogFile");
|
||||||
|
}
|
||||||
|
String val = matcher.group(9);
|
||||||
|
return val == null ? null : Integer.parseInt(val);
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Get the last part of the file name in the log file and convert to int.
|
* Get the last part of the file name in the log file and convert to int.
|
||||||
*/
|
*/
|
||||||
@@ -266,14 +362,10 @@ public class FSUtils {
|
|||||||
}
|
}
|
||||||
|
|
||||||
public static String makeLogFileName(String fileId, String logFileExtension,
|
public static String makeLogFileName(String fileId, String logFileExtension,
|
||||||
String baseCommitTime, int version) {
|
String baseCommitTime, int version, String writeToken) {
|
||||||
return LOG_FILE_PREFIX + String
|
String suffix = (writeToken == null) ? String.format("%s_%s%s.%d",fileId, baseCommitTime, logFileExtension, version)
|
||||||
.format("%s_%s%s.%d", fileId, baseCommitTime, logFileExtension, version);
|
: String.format("%s_%s%s.%d_%s", fileId, baseCommitTime, logFileExtension, version, writeToken);
|
||||||
}
|
return LOG_FILE_PREFIX + suffix;
|
||||||
|
|
||||||
public static String maskWithoutLogVersion(String commitTime, String fileId,
|
|
||||||
String logFileExtension) {
|
|
||||||
return LOG_FILE_PREFIX + String.format("%s_%s%s*", fileId, commitTime, logFileExtension);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
public static boolean isLogFile(Path logPath) {
|
public static boolean isLogFile(Path logPath) {
|
||||||
@@ -288,9 +380,7 @@ public class FSUtils {
|
|||||||
* Get the latest log file written from the list of log files passed in
|
* Get the latest log file written from the list of log files passed in
|
||||||
*/
|
*/
|
||||||
public static Optional<HoodieLogFile> getLatestLogFile(Stream<HoodieLogFile> logFiles) {
|
public static Optional<HoodieLogFile> getLatestLogFile(Stream<HoodieLogFile> logFiles) {
|
||||||
return logFiles.sorted(Comparator
|
return logFiles.sorted(HoodieLogFile.getReverseLogFileComparator()).findFirst();
|
||||||
.comparing(s -> s.getLogVersion(),
|
|
||||||
Comparator.reverseOrder())).findFirst();
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@@ -308,36 +398,28 @@ public class FSUtils {
|
|||||||
/**
|
/**
|
||||||
* Get the latest log version for the fileId in the partition path
|
* Get the latest log version for the fileId in the partition path
|
||||||
*/
|
*/
|
||||||
public static Optional<Integer> getLatestLogVersion(FileSystem fs, Path partitionPath,
|
public static Optional<Pair<Integer, String>> getLatestLogVersion(FileSystem fs, Path partitionPath,
|
||||||
final String fileId, final String logFileExtension, final String baseCommitTime)
|
final String fileId, final String logFileExtension, final String baseCommitTime)
|
||||||
throws IOException {
|
throws IOException {
|
||||||
Optional<HoodieLogFile> latestLogFile =
|
Optional<HoodieLogFile> latestLogFile =
|
||||||
getLatestLogFile(
|
getLatestLogFile(
|
||||||
getAllLogFiles(fs, partitionPath, fileId, logFileExtension, baseCommitTime));
|
getAllLogFiles(fs, partitionPath, fileId, logFileExtension, baseCommitTime));
|
||||||
if (latestLogFile.isPresent()) {
|
if (latestLogFile.isPresent()) {
|
||||||
return Optional.of(latestLogFile.get().getLogVersion());
|
return Optional.of(Pair.of(latestLogFile.get().getLogVersion(),
|
||||||
|
getWriteTokenFromLogPath(latestLogFile.get().getPath())));
|
||||||
}
|
}
|
||||||
return Optional.empty();
|
return Optional.empty();
|
||||||
}
|
}
|
||||||
|
|
||||||
public static int getCurrentLogVersion(FileSystem fs, Path partitionPath,
|
|
||||||
final String fileId, final String logFileExtension, final String baseCommitTime)
|
|
||||||
throws IOException {
|
|
||||||
Optional<Integer> currentVersion =
|
|
||||||
getLatestLogVersion(fs, partitionPath, fileId, logFileExtension, baseCommitTime);
|
|
||||||
// handle potential overflow
|
|
||||||
return (currentVersion.isPresent()) ? currentVersion.get() : HoodieLogFile.LOGFILE_BASE_VERSION;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* computes the next log version for the specified fileId in the partition path
|
* computes the next log version for the specified fileId in the partition path
|
||||||
*/
|
*/
|
||||||
public static int computeNextLogVersion(FileSystem fs, Path partitionPath, final String fileId,
|
public static int computeNextLogVersion(FileSystem fs, Path partitionPath, final String fileId,
|
||||||
final String logFileExtension, final String baseCommitTime) throws IOException {
|
final String logFileExtension, final String baseCommitTime) throws IOException {
|
||||||
Optional<Integer> currentVersion =
|
Optional<Pair<Integer, String>> currentVersionWithWriteToken =
|
||||||
getLatestLogVersion(fs, partitionPath, fileId, logFileExtension, baseCommitTime);
|
getLatestLogVersion(fs, partitionPath, fileId, logFileExtension, baseCommitTime);
|
||||||
// handle potential overflow
|
// handle potential overflow
|
||||||
return (currentVersion.isPresent()) ? currentVersion.get() + 1
|
return (currentVersionWithWriteToken.isPresent()) ? currentVersionWithWriteToken.get().getKey() + 1
|
||||||
: HoodieLogFile.LOGFILE_BASE_VERSION;
|
: HoodieLogFile.LOGFILE_BASE_VERSION;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -349,10 +431,6 @@ public class FSUtils {
|
|||||||
return fs.getDefaultReplication(path);
|
return fs.getDefaultReplication(path);
|
||||||
}
|
}
|
||||||
|
|
||||||
public static Long getDefaultBlockSize(FileSystem fs, Path path) {
|
|
||||||
return fs.getDefaultBlockSize(path);
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* When a file was opened and the task died without closing the stream, another task executor
|
* When a file was opened and the task died without closing the stream, another task executor
|
||||||
* cannot open because the existing lease will be active. We will try to recover the lease, from
|
* cannot open because the existing lease will be active. We will try to recover the lease, from
|
||||||
@@ -431,8 +509,12 @@ public class FSUtils {
|
|||||||
}
|
}
|
||||||
|
|
||||||
public static Path getPartitionPath(String basePath, String partitionPath) {
|
public static Path getPartitionPath(String basePath, String partitionPath) {
|
||||||
|
return getPartitionPath(new Path(basePath), partitionPath);
|
||||||
|
}
|
||||||
|
|
||||||
|
public static Path getPartitionPath(Path basePath, String partitionPath) {
|
||||||
// FOr non-partitioned table, return only base-path
|
// FOr non-partitioned table, return only base-path
|
||||||
return ((partitionPath == null) || (partitionPath.isEmpty())) ? new Path(basePath) :
|
return ((partitionPath == null) || (partitionPath.isEmpty())) ? basePath :
|
||||||
new Path(basePath, partitionPath);
|
new Path(basePath, partitionPath);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -0,0 +1,200 @@
|
|||||||
|
/*
|
||||||
|
* Copyright (c) 2019 Uber Technologies, Inc. (hoodie-dev-group@uber.com)
|
||||||
|
*
|
||||||
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
* you may not use this file except in compliance with the License.
|
||||||
|
* You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
|
||||||
|
package com.uber.hoodie.common.util;
|
||||||
|
|
||||||
|
import java.io.FileNotFoundException;
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.util.ArrayList;
|
||||||
|
import java.util.Arrays;
|
||||||
|
import java.util.List;
|
||||||
|
import java.util.concurrent.TimeoutException;
|
||||||
|
import java.util.function.Function;
|
||||||
|
import java.util.stream.Collectors;
|
||||||
|
import org.apache.hadoop.fs.FileStatus;
|
||||||
|
import org.apache.hadoop.fs.FileSystem;
|
||||||
|
import org.apache.hadoop.fs.Path;
|
||||||
|
import org.apache.log4j.LogManager;
|
||||||
|
import org.apache.log4j.Logger;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* A consistency checker that fails if it is unable to meet the required condition within a specified timeout
|
||||||
|
*/
|
||||||
|
public class FailSafeConsistencyGuard implements ConsistencyGuard {
|
||||||
|
|
||||||
|
private static final transient Logger log = LogManager.getLogger(FailSafeConsistencyGuard.class);
|
||||||
|
|
||||||
|
private final FileSystem fs;
|
||||||
|
private final int maxAttempts;
|
||||||
|
private final long initialDelayMs;
|
||||||
|
private final long maxDelayMs;
|
||||||
|
|
||||||
|
public FailSafeConsistencyGuard(FileSystem fs, int maxAttempts, long initalDelayMs, long maxDelayMs) {
|
||||||
|
this.fs = fs;
|
||||||
|
this.maxAttempts = maxAttempts;
|
||||||
|
this.initialDelayMs = initalDelayMs;
|
||||||
|
this.maxDelayMs = maxDelayMs;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void waitTillFileAppears(Path filePath) throws TimeoutException {
|
||||||
|
waitForFileVisibility(filePath, FileVisibility.APPEAR);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void waitTillFileDisappears(Path filePath)
|
||||||
|
throws TimeoutException {
|
||||||
|
waitForFileVisibility(filePath, FileVisibility.DISAPPEAR);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void waitTillAllFilesAppear(String dirPath, List<String> files) throws TimeoutException {
|
||||||
|
waitForFilesVisibility(dirPath, files, FileVisibility.APPEAR);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void waitTillAllFilesDisappear(String dirPath, List<String> files) throws TimeoutException {
|
||||||
|
waitForFilesVisibility(dirPath, files, FileVisibility.DISAPPEAR);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Helper function to wait for all files belonging to single directory to appear
|
||||||
|
* @param dirPath Dir Path
|
||||||
|
* @param files Files to appear/disappear
|
||||||
|
* @param event Appear/Disappear
|
||||||
|
* @throws TimeoutException
|
||||||
|
*/
|
||||||
|
public void waitForFilesVisibility(String dirPath, List<String> files, FileVisibility event)
|
||||||
|
throws TimeoutException {
|
||||||
|
Path dir = new Path(dirPath);
|
||||||
|
List<String> filesWithoutSchemeAndAuthority =
|
||||||
|
files.stream().map(f -> Path.getPathWithoutSchemeAndAuthority(new Path(f))).map(p -> p.toString())
|
||||||
|
.collect(Collectors.toList());
|
||||||
|
|
||||||
|
retryTillSuccess((retryNum) -> {
|
||||||
|
try {
|
||||||
|
log.info("Trying " + retryNum);
|
||||||
|
FileStatus[] entries = fs.listStatus(dir);
|
||||||
|
List<String> gotFiles = Arrays.stream(entries).map(e -> Path.getPathWithoutSchemeAndAuthority(e.getPath()))
|
||||||
|
.map(p -> p.toString()).collect(Collectors.toList());
|
||||||
|
List<String> candidateFiles = new ArrayList<>(filesWithoutSchemeAndAuthority);
|
||||||
|
boolean altered = candidateFiles.removeAll(gotFiles);
|
||||||
|
|
||||||
|
switch (event) {
|
||||||
|
case DISAPPEAR:
|
||||||
|
log.info("Following files are visible" + candidateFiles);
|
||||||
|
// If no candidate files gets removed, it means all of them have disappeared
|
||||||
|
return !altered;
|
||||||
|
case APPEAR:
|
||||||
|
default:
|
||||||
|
// if all files appear, the list is empty
|
||||||
|
return candidateFiles.isEmpty();
|
||||||
|
}
|
||||||
|
} catch (IOException ioe) {
|
||||||
|
log.warn("Got IOException waiting for file event. Have tried " + retryNum + " time(s)", ioe);
|
||||||
|
}
|
||||||
|
return false;
|
||||||
|
}, "Timed out waiting for filles to become visible");
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Helper to check of file visibility
|
||||||
|
* @param filePath File Path
|
||||||
|
* @param visibility Visibility
|
||||||
|
* @return
|
||||||
|
* @throws IOException
|
||||||
|
*/
|
||||||
|
private boolean checkFileVisibility(Path filePath, FileVisibility visibility) throws IOException {
|
||||||
|
try {
|
||||||
|
FileStatus[] status = fs.listStatus(filePath);
|
||||||
|
switch (visibility) {
|
||||||
|
case APPEAR:
|
||||||
|
return status.length != 0;
|
||||||
|
case DISAPPEAR:
|
||||||
|
default:
|
||||||
|
return status.length == 0;
|
||||||
|
}
|
||||||
|
} catch (FileNotFoundException nfe) {
|
||||||
|
switch (visibility) {
|
||||||
|
case APPEAR:
|
||||||
|
return false;
|
||||||
|
case DISAPPEAR:
|
||||||
|
default:
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Helper function to wait till file either appears/disappears
|
||||||
|
* @param filePath File Path
|
||||||
|
* @param visibility
|
||||||
|
* @throws TimeoutException
|
||||||
|
*/
|
||||||
|
private void waitForFileVisibility(Path filePath, FileVisibility visibility) throws TimeoutException {
|
||||||
|
long waitMs = initialDelayMs;
|
||||||
|
int attempt = 0;
|
||||||
|
while (attempt < maxAttempts) {
|
||||||
|
try {
|
||||||
|
if (checkFileVisibility(filePath, visibility)) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
} catch (IOException ioe) {
|
||||||
|
log.warn("Got IOException waiting for file visibility. Retrying", ioe);
|
||||||
|
}
|
||||||
|
|
||||||
|
sleepSafe(waitMs);
|
||||||
|
waitMs = waitMs * 2; // double check interval every attempt
|
||||||
|
waitMs = waitMs > maxDelayMs ? maxDelayMs : waitMs;
|
||||||
|
attempt++;
|
||||||
|
}
|
||||||
|
throw new TimeoutException("Timed-out waiting for the file to " + visibility.name());
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Retries the predicate for condfigurable number of times till we the predicate returns success
|
||||||
|
* @param predicate Predicate Function
|
||||||
|
* @param timedOutMessage Timed-Out message for logging
|
||||||
|
* @throws TimeoutException when retries are exhausted
|
||||||
|
*/
|
||||||
|
private void retryTillSuccess(Function<Integer, Boolean> predicate, String timedOutMessage) throws TimeoutException {
|
||||||
|
long waitMs = initialDelayMs;
|
||||||
|
int attempt = 0;
|
||||||
|
log.warn("Max Attempts=" + maxAttempts);
|
||||||
|
while (attempt < maxAttempts) {
|
||||||
|
boolean success = predicate.apply(attempt);
|
||||||
|
if (success) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
sleepSafe(waitMs);
|
||||||
|
waitMs = waitMs * 2; // double check interval every attempt
|
||||||
|
waitMs = waitMs > maxDelayMs ? maxDelayMs : waitMs;
|
||||||
|
attempt++;
|
||||||
|
}
|
||||||
|
throw new TimeoutException(timedOutMessage);
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
void sleepSafe(long waitMs) {
|
||||||
|
try {
|
||||||
|
Thread.sleep(waitMs);
|
||||||
|
} catch (InterruptedException e) {
|
||||||
|
// ignore & continue next attempt
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -0,0 +1,46 @@
|
|||||||
|
/*
|
||||||
|
* Copyright (c) 2019 Uber Technologies, Inc. (hoodie-dev-group@uber.com)
|
||||||
|
*
|
||||||
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
* you may not use this file except in compliance with the License.
|
||||||
|
* You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
|
||||||
|
package com.uber.hoodie.common.util;
|
||||||
|
|
||||||
|
import java.util.List;
|
||||||
|
import org.apache.hadoop.fs.Path;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Default Consistency guard that does nothing. Used for HDFS deployments
|
||||||
|
*/
|
||||||
|
public class NoOpConsistencyGuard implements ConsistencyGuard {
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void waitTillFileAppears(Path filePath) {
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void waitTillFileDisappears(Path filePath) {
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void waitTillAllFilesAppear(String dirPath, List<String> files) {
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void waitTillAllFilesDisappear(String dirPath, List<String> files) {
|
||||||
|
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -113,7 +113,6 @@ public class ParquetUtils {
|
|||||||
return footer;
|
return footer;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Get the schema of the given parquet file.
|
* Get the schema of the given parquet file.
|
||||||
*/
|
*/
|
||||||
@@ -121,7 +120,6 @@ public class ParquetUtils {
|
|||||||
return readMetadata(configuration, parquetFilePath).getFileMetaData().getSchema();
|
return readMetadata(configuration, parquetFilePath).getFileMetaData().getSchema();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
private static List<String> readParquetFooter(Configuration configuration, Path parquetFilePath,
|
private static List<String> readParquetFooter(Configuration configuration, Path parquetFilePath,
|
||||||
String... footerNames) {
|
String... footerNames) {
|
||||||
List<String> footerVals = new ArrayList<>();
|
List<String> footerVals = new ArrayList<>();
|
||||||
|
|||||||
@@ -79,7 +79,8 @@ public class HoodieTestUtils {
|
|||||||
|
|
||||||
public static final String TEST_EXTENSION = ".test";
|
public static final String TEST_EXTENSION = ".test";
|
||||||
public static final String RAW_TRIPS_TEST_NAME = "raw_trips";
|
public static final String RAW_TRIPS_TEST_NAME = "raw_trips";
|
||||||
public static final int DEFAULT_TASK_PARTITIONID = 1;
|
public static final String DEFAULT_WRITE_TOKEN = "1-0-1";
|
||||||
|
public static final int DEFAULT_LOG_VERSION = 1;
|
||||||
public static final String[] DEFAULT_PARTITION_PATHS = {"2016/03/15", "2015/03/16", "2015/03/17"};
|
public static final String[] DEFAULT_PARTITION_PATHS = {"2016/03/15", "2015/03/16", "2015/03/17"};
|
||||||
private static Random rand = new Random(46474747);
|
private static Random rand = new Random(46474747);
|
||||||
|
|
||||||
@@ -92,8 +93,7 @@ public class HoodieTestUtils {
|
|||||||
return init(basePath, HoodieTableType.COPY_ON_WRITE);
|
return init(basePath, HoodieTableType.COPY_ON_WRITE);
|
||||||
}
|
}
|
||||||
|
|
||||||
public static HoodieTableMetaClient init(String basePath, HoodieTableType tableType)
|
public static HoodieTableMetaClient init(String basePath, HoodieTableType tableType) throws IOException {
|
||||||
throws IOException {
|
|
||||||
return initTableType(getDefaultHadoopConf(), basePath, tableType);
|
return initTableType(getDefaultHadoopConf(), basePath, tableType);
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -163,14 +163,30 @@ public class HoodieTestUtils {
|
|||||||
return createDataFile(basePath, partitionPath, commitTime, fileID);
|
return createDataFile(basePath, partitionPath, commitTime, fileID);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public static final String createNewMarkerFile(String basePath, String partitionPath, String commitTime)
|
||||||
|
throws IOException {
|
||||||
|
String fileID = UUID.randomUUID().toString();
|
||||||
|
return createMarkerFile(basePath, partitionPath, commitTime, fileID);
|
||||||
|
}
|
||||||
|
|
||||||
public static final String createDataFile(String basePath, String partitionPath, String commitTime, String fileID)
|
public static final String createDataFile(String basePath, String partitionPath, String commitTime, String fileID)
|
||||||
throws IOException {
|
throws IOException {
|
||||||
String folderPath = basePath + "/" + partitionPath + "/";
|
String folderPath = basePath + "/" + partitionPath + "/";
|
||||||
new File(folderPath).mkdirs();
|
new File(folderPath).mkdirs();
|
||||||
new File(folderPath + FSUtils.makeDataFileName(commitTime, DEFAULT_TASK_PARTITIONID, fileID)).createNewFile();
|
new File(folderPath + FSUtils.makeDataFileName(commitTime, DEFAULT_WRITE_TOKEN, fileID)).createNewFile();
|
||||||
return fileID;
|
return fileID;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public static final String createMarkerFile(String basePath, String partitionPath, String commitTime, String fileID)
|
||||||
|
throws IOException {
|
||||||
|
String folderPath = basePath + "/" + HoodieTableMetaClient.TEMPFOLDER_NAME + "/" + commitTime + "/"
|
||||||
|
+ partitionPath + "/";
|
||||||
|
new File(folderPath).mkdirs();
|
||||||
|
File f = new File(folderPath + FSUtils.makeMarkerFile(commitTime, DEFAULT_WRITE_TOKEN, fileID));
|
||||||
|
f.createNewFile();
|
||||||
|
return f.getAbsolutePath();
|
||||||
|
}
|
||||||
|
|
||||||
public static final String createNewLogFile(FileSystem fs, String basePath, String partitionPath, String commitTime,
|
public static final String createNewLogFile(FileSystem fs, String basePath, String partitionPath, String commitTime,
|
||||||
String fileID, Optional<Integer> version) throws IOException {
|
String fileID, Optional<Integer> version) throws IOException {
|
||||||
String folderPath = basePath + "/" + partitionPath + "/";
|
String folderPath = basePath + "/" + partitionPath + "/";
|
||||||
@@ -179,7 +195,9 @@ public class HoodieTestUtils {
|
|||||||
throw new IOException("cannot create directory for path " + folderPath);
|
throw new IOException("cannot create directory for path " + folderPath);
|
||||||
}
|
}
|
||||||
boolean createFile = fs.createNewFile(new Path(
|
boolean createFile = fs.createNewFile(new Path(
|
||||||
folderPath + FSUtils.makeLogFileName(fileID, ".log", commitTime, version.orElse(DEFAULT_TASK_PARTITIONID))));
|
folderPath + FSUtils
|
||||||
|
.makeLogFileName(fileID, ".log", commitTime, version.orElse(DEFAULT_LOG_VERSION),
|
||||||
|
HoodieLogFormat.UNKNOWN_WRITE_TOKEN)));
|
||||||
if (!createFile) {
|
if (!createFile) {
|
||||||
throw new IOException(
|
throw new IOException(
|
||||||
StringUtils.format("cannot create data file for commit %s and fileId %s", commitTime, fileID));
|
StringUtils.format("cannot create data file for commit %s and fileId %s", commitTime, fileID));
|
||||||
@@ -208,39 +226,38 @@ public class HoodieTestUtils {
|
|||||||
AvroUtils.serializeCompactionPlan(plan));
|
AvroUtils.serializeCompactionPlan(plan));
|
||||||
}
|
}
|
||||||
|
|
||||||
public static final String getDataFilePath(String basePath, String partitionPath, String commitTime, String fileID)
|
public static final String getDataFilePath(String basePath, String partitionPath, String commitTime, String fileID) {
|
||||||
throws IOException {
|
|
||||||
return basePath + "/" + partitionPath + "/" + FSUtils
|
return basePath + "/" + partitionPath + "/" + FSUtils
|
||||||
.makeDataFileName(commitTime, DEFAULT_TASK_PARTITIONID, fileID);
|
.makeDataFileName(commitTime, DEFAULT_WRITE_TOKEN, fileID);
|
||||||
}
|
}
|
||||||
|
|
||||||
public static final String getLogFilePath(String basePath, String partitionPath, String commitTime, String fileID,
|
public static final String getLogFilePath(String basePath, String partitionPath, String commitTime, String fileID,
|
||||||
Optional<Integer> version) throws IOException {
|
Optional<Integer> version) {
|
||||||
return basePath + "/" + partitionPath + "/" + FSUtils.makeLogFileName(fileID, ".log", commitTime,
|
return basePath + "/" + partitionPath + "/" + FSUtils.makeLogFileName(fileID, ".log", commitTime,
|
||||||
version.orElse(DEFAULT_TASK_PARTITIONID));
|
version.orElse(DEFAULT_LOG_VERSION), HoodieLogFormat.UNKNOWN_WRITE_TOKEN);
|
||||||
}
|
}
|
||||||
|
|
||||||
public static final String getCommitFilePath(String basePath, String commitTime) throws IOException {
|
public static final String getCommitFilePath(String basePath, String commitTime) {
|
||||||
return basePath + "/" + HoodieTableMetaClient.METAFOLDER_NAME + "/" + commitTime + HoodieTimeline.COMMIT_EXTENSION;
|
return basePath + "/" + HoodieTableMetaClient.METAFOLDER_NAME + "/" + commitTime + HoodieTimeline.COMMIT_EXTENSION;
|
||||||
}
|
}
|
||||||
|
|
||||||
public static final String getInflightCommitFilePath(String basePath, String commitTime) throws IOException {
|
public static final String getInflightCommitFilePath(String basePath, String commitTime) {
|
||||||
return basePath + "/" + HoodieTableMetaClient.METAFOLDER_NAME + "/" + commitTime
|
return basePath + "/" + HoodieTableMetaClient.METAFOLDER_NAME + "/" + commitTime
|
||||||
+ HoodieTimeline.INFLIGHT_COMMIT_EXTENSION;
|
+ HoodieTimeline.INFLIGHT_COMMIT_EXTENSION;
|
||||||
}
|
}
|
||||||
|
|
||||||
public static final String getRequestedCompactionFilePath(String basePath, String commitTime) throws IOException {
|
public static final String getRequestedCompactionFilePath(String basePath, String commitTime) {
|
||||||
return basePath + "/" + HoodieTableMetaClient.AUXILIARYFOLDER_NAME + "/" + commitTime
|
return basePath + "/" + HoodieTableMetaClient.AUXILIARYFOLDER_NAME + "/" + commitTime
|
||||||
+ HoodieTimeline.INFLIGHT_COMMIT_EXTENSION;
|
+ HoodieTimeline.INFLIGHT_COMMIT_EXTENSION;
|
||||||
}
|
}
|
||||||
|
|
||||||
public static final boolean doesDataFileExist(String basePath, String partitionPath, String commitTime, String fileID)
|
public static final boolean doesDataFileExist(String basePath, String partitionPath, String commitTime,
|
||||||
throws IOException {
|
String fileID) {
|
||||||
return new File(getDataFilePath(basePath, partitionPath, commitTime, fileID)).exists();
|
return new File(getDataFilePath(basePath, partitionPath, commitTime, fileID)).exists();
|
||||||
}
|
}
|
||||||
|
|
||||||
public static final boolean doesLogFileExist(String basePath, String partitionPath, String commitTime, String fileID,
|
public static final boolean doesLogFileExist(String basePath, String partitionPath, String commitTime, String fileID,
|
||||||
Optional<Integer> version) throws IOException {
|
Optional<Integer> version) {
|
||||||
return new File(getLogFilePath(basePath, partitionPath, commitTime, fileID, version)).exists();
|
return new File(getLogFilePath(basePath, partitionPath, commitTime, fileID, version)).exists();
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -256,10 +273,6 @@ public class HoodieTestUtils {
|
|||||||
.exists();
|
.exists();
|
||||||
}
|
}
|
||||||
|
|
||||||
public static String makeInflightTestFileName(String instant) {
|
|
||||||
return instant + TEST_EXTENSION + HoodieTimeline.INFLIGHT_EXTENSION;
|
|
||||||
}
|
|
||||||
|
|
||||||
public static void createCleanFiles(String basePath, String commitTime, Configuration configuration)
|
public static void createCleanFiles(String basePath, String commitTime, Configuration configuration)
|
||||||
throws IOException {
|
throws IOException {
|
||||||
Path commitFile = new Path(
|
Path commitFile = new Path(
|
||||||
|
|||||||
@@ -35,27 +35,20 @@ public class TestHoodieWriteStat {
|
|||||||
String basePathString = "/data/tables/some-hoodie-table";
|
String basePathString = "/data/tables/some-hoodie-table";
|
||||||
String partitionPathString = "2017/12/31";
|
String partitionPathString = "2017/12/31";
|
||||||
String fileName = UUID.randomUUID().toString();
|
String fileName = UUID.randomUUID().toString();
|
||||||
int taskPartitionId = Integer.MAX_VALUE;
|
String writeToken = "1-0-1";
|
||||||
int stageId = Integer.MAX_VALUE;
|
|
||||||
long taskAttemptId = Long.MAX_VALUE;
|
|
||||||
|
|
||||||
Path basePath = new Path(basePathString);
|
Path basePath = new Path(basePathString);
|
||||||
Path partitionPath = new Path(basePath, partitionPathString);
|
Path partitionPath = new Path(basePath, partitionPathString);
|
||||||
Path tempPath = new Path(basePath, HoodieTableMetaClient.TEMPFOLDER_NAME);
|
Path tempPath = new Path(basePath, HoodieTableMetaClient.TEMPFOLDER_NAME);
|
||||||
|
|
||||||
Path finalizeFilePath = new Path(partitionPath, FSUtils.makeDataFileName(commitTime, taskPartitionId, fileName));
|
Path finalizeFilePath = new Path(partitionPath, FSUtils.makeDataFileName(commitTime, writeToken, fileName));
|
||||||
Path tempFilePath = new Path(tempPath, FSUtils
|
|
||||||
.makeTempDataFileName(partitionPathString, commitTime, taskPartitionId,
|
|
||||||
fileName, stageId, taskAttemptId));
|
|
||||||
|
|
||||||
HoodieWriteStat writeStat = new HoodieWriteStat();
|
HoodieWriteStat writeStat = new HoodieWriteStat();
|
||||||
writeStat.setPaths(basePath, finalizeFilePath, tempFilePath);
|
writeStat.setPath(basePath, finalizeFilePath);
|
||||||
assertEquals(finalizeFilePath, new Path(basePath, writeStat.getPath()));
|
assertEquals(finalizeFilePath, new Path(basePath, writeStat.getPath()));
|
||||||
assertEquals(tempFilePath, new Path(basePath, writeStat.getTempPath()));
|
|
||||||
|
|
||||||
// test for null tempFilePath
|
// test for null tempFilePath
|
||||||
writeStat = new HoodieWriteStat();
|
writeStat = new HoodieWriteStat();
|
||||||
writeStat.setPaths(basePath, finalizeFilePath, null);
|
writeStat.setPath(basePath, finalizeFilePath);
|
||||||
assertEquals(finalizeFilePath, new Path(basePath, writeStat.getPath()));
|
assertEquals(finalizeFilePath, new Path(basePath, writeStat.getPath()));
|
||||||
assertNull(writeStat.getTempPath());
|
assertNull(writeStat.getTempPath());
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -19,6 +19,7 @@ package com.uber.hoodie.common.table.log;
|
|||||||
import static com.uber.hoodie.common.util.SchemaTestUtil.getSimpleSchema;
|
import static com.uber.hoodie.common.util.SchemaTestUtil.getSimpleSchema;
|
||||||
import static org.junit.Assert.assertEquals;
|
import static org.junit.Assert.assertEquals;
|
||||||
import static org.junit.Assert.assertFalse;
|
import static org.junit.Assert.assertFalse;
|
||||||
|
import static org.junit.Assert.assertNotNull;
|
||||||
import static org.junit.Assert.assertTrue;
|
import static org.junit.Assert.assertTrue;
|
||||||
import static org.junit.Assert.fail;
|
import static org.junit.Assert.fail;
|
||||||
|
|
||||||
@@ -175,6 +176,63 @@ public class HoodieLogFormatTest {
|
|||||||
writer.close();
|
writer.close();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testConcurrentAppendOnExistingLogFileWithoutWriteToken() throws Exception {
|
||||||
|
testConcurrentAppend(true, false);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testConcurrentAppendOnExistingLogFileWithWriteToken() throws Exception {
|
||||||
|
testConcurrentAppend(true, true);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testConcurrentAppendOnFirstLogFileVersion() throws Exception {
|
||||||
|
testConcurrentAppend(false, true);
|
||||||
|
}
|
||||||
|
|
||||||
|
private void testConcurrentAppend(boolean logFileExists, boolean newLogFileFormat) throws Exception {
|
||||||
|
HoodieLogFormat.WriterBuilder builder1 = HoodieLogFormat.newWriterBuilder().onParentPath(partitionPath)
|
||||||
|
.withFileExtension(HoodieLogFile.DELTA_EXTENSION).withFileId("test-fileid1")
|
||||||
|
.overBaseCommit("100").withFs(fs);
|
||||||
|
HoodieLogFormat.WriterBuilder builder2 = HoodieLogFormat.newWriterBuilder().onParentPath(partitionPath)
|
||||||
|
.withFileExtension(HoodieLogFile.DELTA_EXTENSION).withFileId("test-fileid1")
|
||||||
|
.overBaseCommit("100").withFs(fs);
|
||||||
|
|
||||||
|
if (newLogFileFormat && logFileExists) {
|
||||||
|
// Assume there is an existing log-file with write token
|
||||||
|
builder1 = builder1.withLogVersion(1).withLogWriteToken(HoodieLogFormat.UNKNOWN_WRITE_TOKEN)
|
||||||
|
.withRolloverLogWriteToken(HoodieLogFormat.UNKNOWN_WRITE_TOKEN);
|
||||||
|
builder2 = builder2.withLogVersion(1).withLogWriteToken(HoodieLogFormat.UNKNOWN_WRITE_TOKEN)
|
||||||
|
.withRolloverLogWriteToken(HoodieLogFormat.UNKNOWN_WRITE_TOKEN);
|
||||||
|
} else if (newLogFileFormat) {
|
||||||
|
// First log file of the file-slice
|
||||||
|
builder1 = builder1.withLogVersion(HoodieLogFile.LOGFILE_BASE_VERSION)
|
||||||
|
.withLogWriteToken(HoodieLogFormat.UNKNOWN_WRITE_TOKEN)
|
||||||
|
.withRolloverLogWriteToken(HoodieLogFormat.UNKNOWN_WRITE_TOKEN);
|
||||||
|
builder2 = builder2.withLogVersion(HoodieLogFile.LOGFILE_BASE_VERSION)
|
||||||
|
.withLogWriteToken(HoodieLogFormat.UNKNOWN_WRITE_TOKEN)
|
||||||
|
.withRolloverLogWriteToken(HoodieLogFormat.UNKNOWN_WRITE_TOKEN);
|
||||||
|
} else {
|
||||||
|
builder1 = builder1.withLogVersion(1).withRolloverLogWriteToken(HoodieLogFormat.UNKNOWN_WRITE_TOKEN);
|
||||||
|
}
|
||||||
|
Writer writer = builder1.build();
|
||||||
|
Writer writer2 = builder2.build();
|
||||||
|
HoodieLogFile logFile1 = writer.getLogFile();
|
||||||
|
HoodieLogFile logFile2 = writer2.getLogFile();
|
||||||
|
List<IndexedRecord> records = SchemaTestUtil.generateTestRecords(0, 100);
|
||||||
|
Map<HoodieLogBlock.HeaderMetadataType, String> header = Maps.newHashMap();
|
||||||
|
header.put(HoodieLogBlock.HeaderMetadataType.INSTANT_TIME, "100");
|
||||||
|
header.put(HoodieLogBlock.HeaderMetadataType.SCHEMA, getSimpleSchema().toString());
|
||||||
|
HoodieAvroDataBlock dataBlock = new HoodieAvroDataBlock(records, header);
|
||||||
|
writer = writer.appendBlock(dataBlock);
|
||||||
|
writer2 = writer2.appendBlock(dataBlock);
|
||||||
|
writer.close();
|
||||||
|
writer2.close();
|
||||||
|
assertNotNull(logFile1.getLogWriteToken());
|
||||||
|
assertEquals("Log Files must have different versions", logFile1.getLogVersion(), logFile2.getLogVersion() - 1);
|
||||||
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testMultipleAppend() throws IOException, URISyntaxException, InterruptedException {
|
public void testMultipleAppend() throws IOException, URISyntaxException, InterruptedException {
|
||||||
Writer writer = HoodieLogFormat.newWriterBuilder().onParentPath(partitionPath)
|
Writer writer = HoodieLogFormat.newWriterBuilder().onParentPath(partitionPath)
|
||||||
@@ -225,6 +283,12 @@ public class HoodieLogFormatTest {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* This is actually a test on concurrent append and not recovery lease.
|
||||||
|
* Commenting this out.
|
||||||
|
* https://issues.apache.org/jira/browse/HUDI-117
|
||||||
|
*/
|
||||||
|
/**
|
||||||
@Test
|
@Test
|
||||||
public void testLeaseRecovery() throws IOException, URISyntaxException, InterruptedException {
|
public void testLeaseRecovery() throws IOException, URISyntaxException, InterruptedException {
|
||||||
Writer writer = HoodieLogFormat.newWriterBuilder().onParentPath(partitionPath)
|
Writer writer = HoodieLogFormat.newWriterBuilder().onParentPath(partitionPath)
|
||||||
@@ -253,6 +317,7 @@ public class HoodieLogFormatTest {
|
|||||||
fs.getFileStatus(writer.getLogFile().getPath()).getLen());
|
fs.getFileStatus(writer.getLogFile().getPath()).getLen());
|
||||||
writer.close();
|
writer.close();
|
||||||
}
|
}
|
||||||
|
**/
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testAppendNotSupported() throws IOException, URISyntaxException, InterruptedException {
|
public void testAppendNotSupported() throws IOException, URISyntaxException, InterruptedException {
|
||||||
|
|||||||
@@ -65,6 +65,8 @@ import org.junit.rules.TemporaryFolder;
|
|||||||
@SuppressWarnings("ResultOfMethodCallIgnored")
|
@SuppressWarnings("ResultOfMethodCallIgnored")
|
||||||
public class HoodieTableFileSystemViewTest {
|
public class HoodieTableFileSystemViewTest {
|
||||||
|
|
||||||
|
private static String TEST_WRITE_TOKEN = "1-0-1";
|
||||||
|
|
||||||
protected HoodieTableMetaClient metaClient;
|
protected HoodieTableMetaClient metaClient;
|
||||||
protected String basePath;
|
protected String basePath;
|
||||||
protected SyncableFileSystemView fsView;
|
protected SyncableFileSystemView fsView;
|
||||||
@@ -119,8 +121,10 @@ public class HoodieTableFileSystemViewTest {
|
|||||||
String instantTime1 = "1";
|
String instantTime1 = "1";
|
||||||
String deltaInstantTime1 = "2";
|
String deltaInstantTime1 = "2";
|
||||||
String deltaInstantTime2 = "3";
|
String deltaInstantTime2 = "3";
|
||||||
String fileName1 = FSUtils.makeLogFileName(fileId, HoodieLogFile.DELTA_EXTENSION, instantTime1, 0);
|
String fileName1 = FSUtils.makeLogFileName(fileId, HoodieLogFile.DELTA_EXTENSION,
|
||||||
String fileName2 = FSUtils.makeLogFileName(fileId, HoodieLogFile.DELTA_EXTENSION, instantTime1, 1);
|
instantTime1, 0, TEST_WRITE_TOKEN);
|
||||||
|
String fileName2 = FSUtils.makeLogFileName(fileId, HoodieLogFile.DELTA_EXTENSION,
|
||||||
|
instantTime1, 1, TEST_WRITE_TOKEN);
|
||||||
new File(basePath + "/" + partitionPath + "/" + fileName1).createNewFile();
|
new File(basePath + "/" + partitionPath + "/" + fileName1).createNewFile();
|
||||||
new File(basePath + "/" + partitionPath + "/" + fileName2).createNewFile();
|
new File(basePath + "/" + partitionPath + "/" + fileName2).createNewFile();
|
||||||
HoodieActiveTimeline commitTimeline = metaClient.getActiveTimeline();
|
HoodieActiveTimeline commitTimeline = metaClient.getActiveTimeline();
|
||||||
@@ -248,11 +252,13 @@ public class HoodieTableFileSystemViewTest {
|
|||||||
|
|
||||||
String dataFileName = null;
|
String dataFileName = null;
|
||||||
if (!skipCreatingDataFile) {
|
if (!skipCreatingDataFile) {
|
||||||
dataFileName = FSUtils.makeDataFileName(instantTime1, 1, fileId);
|
dataFileName = FSUtils.makeDataFileName(instantTime1, TEST_WRITE_TOKEN, fileId);
|
||||||
new File(basePath + "/" + partitionPath + "/" + dataFileName).createNewFile();
|
new File(basePath + "/" + partitionPath + "/" + dataFileName).createNewFile();
|
||||||
}
|
}
|
||||||
String fileName1 = FSUtils.makeLogFileName(fileId, HoodieLogFile.DELTA_EXTENSION, instantTime1, 0);
|
String fileName1 = FSUtils.makeLogFileName(fileId, HoodieLogFile.DELTA_EXTENSION,
|
||||||
String fileName2 = FSUtils.makeLogFileName(fileId, HoodieLogFile.DELTA_EXTENSION, instantTime1, 1);
|
instantTime1, 0, TEST_WRITE_TOKEN);
|
||||||
|
String fileName2 = FSUtils.makeLogFileName(fileId, HoodieLogFile.DELTA_EXTENSION,
|
||||||
|
instantTime1, 1, TEST_WRITE_TOKEN);
|
||||||
new File(basePath + "/" + partitionPath + "/" + fileName1).createNewFile();
|
new File(basePath + "/" + partitionPath + "/" + fileName1).createNewFile();
|
||||||
new File(basePath + "/" + partitionPath + "/" + fileName2).createNewFile();
|
new File(basePath + "/" + partitionPath + "/" + fileName2).createNewFile();
|
||||||
HoodieActiveTimeline commitTimeline = metaClient.getActiveTimeline();
|
HoodieActiveTimeline commitTimeline = metaClient.getActiveTimeline();
|
||||||
@@ -267,7 +273,7 @@ public class HoodieTableFileSystemViewTest {
|
|||||||
refreshFsView();
|
refreshFsView();
|
||||||
List<FileSlice> fileSlices = rtView.getLatestFileSlices(partitionPath).collect(Collectors.toList());
|
List<FileSlice> fileSlices = rtView.getLatestFileSlices(partitionPath).collect(Collectors.toList());
|
||||||
String compactionRequestedTime = "4";
|
String compactionRequestedTime = "4";
|
||||||
String compactDataFileName = FSUtils.makeDataFileName(compactionRequestedTime, 1, fileId);
|
String compactDataFileName = FSUtils.makeDataFileName(compactionRequestedTime, TEST_WRITE_TOKEN, fileId);
|
||||||
List<Pair<String, FileSlice>> partitionFileSlicesPairs = new ArrayList<>();
|
List<Pair<String, FileSlice>> partitionFileSlicesPairs = new ArrayList<>();
|
||||||
partitionFileSlicesPairs.add(Pair.of(partitionPath, fileSlices.get(0)));
|
partitionFileSlicesPairs.add(Pair.of(partitionPath, fileSlices.get(0)));
|
||||||
HoodieCompactionPlan compactionPlan = CompactionUtils.buildFromFileSlices(partitionFileSlicesPairs,
|
HoodieCompactionPlan compactionPlan = CompactionUtils.buildFromFileSlices(partitionFileSlicesPairs,
|
||||||
@@ -299,8 +305,10 @@ public class HoodieTableFileSystemViewTest {
|
|||||||
String deltaInstantTime5 = "6";
|
String deltaInstantTime5 = "6";
|
||||||
List<String> allInstantTimes = Arrays.asList(instantTime1, deltaInstantTime1, deltaInstantTime2,
|
List<String> allInstantTimes = Arrays.asList(instantTime1, deltaInstantTime1, deltaInstantTime2,
|
||||||
compactionRequestedTime, deltaInstantTime4, deltaInstantTime5);
|
compactionRequestedTime, deltaInstantTime4, deltaInstantTime5);
|
||||||
String fileName3 = FSUtils.makeLogFileName(fileId, HoodieLogFile.DELTA_EXTENSION, compactionRequestedTime, 0);
|
String fileName3 = FSUtils.makeLogFileName(fileId, HoodieLogFile.DELTA_EXTENSION,
|
||||||
String fileName4 = FSUtils.makeLogFileName(fileId, HoodieLogFile.DELTA_EXTENSION, compactionRequestedTime, 1);
|
compactionRequestedTime, 0, TEST_WRITE_TOKEN);
|
||||||
|
String fileName4 = FSUtils.makeLogFileName(fileId, HoodieLogFile.DELTA_EXTENSION,
|
||||||
|
compactionRequestedTime, 1, TEST_WRITE_TOKEN);
|
||||||
new File(basePath + "/" + partitionPath + "/" + fileName3).createNewFile();
|
new File(basePath + "/" + partitionPath + "/" + fileName3).createNewFile();
|
||||||
new File(basePath + "/" + partitionPath + "/" + fileName4).createNewFile();
|
new File(basePath + "/" + partitionPath + "/" + fileName4).createNewFile();
|
||||||
HoodieInstant deltaInstant4 = new HoodieInstant(true, HoodieTimeline.DELTA_COMMIT_ACTION, deltaInstantTime4);
|
HoodieInstant deltaInstant4 = new HoodieInstant(true, HoodieTimeline.DELTA_COMMIT_ACTION, deltaInstantTime4);
|
||||||
@@ -400,15 +408,15 @@ public class HoodieTableFileSystemViewTest {
|
|||||||
final String orphanFileId2 = UUID.randomUUID().toString();
|
final String orphanFileId2 = UUID.randomUUID().toString();
|
||||||
final String invalidInstantId = "INVALIDTIME";
|
final String invalidInstantId = "INVALIDTIME";
|
||||||
String inflightDeltaInstantTime = "7";
|
String inflightDeltaInstantTime = "7";
|
||||||
String orphanDataFileName = FSUtils.makeDataFileName(invalidInstantId, 1, orphanFileId1);
|
String orphanDataFileName = FSUtils.makeDataFileName(invalidInstantId, TEST_WRITE_TOKEN, orphanFileId1);
|
||||||
new File(basePath + "/" + partitionPath + "/" + orphanDataFileName).createNewFile();
|
new File(basePath + "/" + partitionPath + "/" + orphanDataFileName).createNewFile();
|
||||||
String orphanLogFileName =
|
String orphanLogFileName =
|
||||||
FSUtils.makeLogFileName(orphanFileId2, HoodieLogFile.DELTA_EXTENSION, invalidInstantId, 0);
|
FSUtils.makeLogFileName(orphanFileId2, HoodieLogFile.DELTA_EXTENSION, invalidInstantId, 0, TEST_WRITE_TOKEN);
|
||||||
new File(basePath + "/" + partitionPath + "/" + orphanLogFileName).createNewFile();
|
new File(basePath + "/" + partitionPath + "/" + orphanLogFileName).createNewFile();
|
||||||
String inflightDataFileName = FSUtils.makeDataFileName(inflightDeltaInstantTime, 1, inflightFileId1);
|
String inflightDataFileName = FSUtils.makeDataFileName(inflightDeltaInstantTime, TEST_WRITE_TOKEN, inflightFileId1);
|
||||||
new File(basePath + "/" + partitionPath + "/" + inflightDataFileName).createNewFile();
|
new File(basePath + "/" + partitionPath + "/" + inflightDataFileName).createNewFile();
|
||||||
String inflightLogFileName =
|
String inflightLogFileName = FSUtils.makeLogFileName(inflightFileId2, HoodieLogFile.DELTA_EXTENSION,
|
||||||
FSUtils.makeLogFileName(inflightFileId2, HoodieLogFile.DELTA_EXTENSION, inflightDeltaInstantTime, 0);
|
inflightDeltaInstantTime, 0, TEST_WRITE_TOKEN);
|
||||||
new File(basePath + "/" + partitionPath + "/" + inflightLogFileName).createNewFile();
|
new File(basePath + "/" + partitionPath + "/" + inflightLogFileName).createNewFile();
|
||||||
// Mark instant as inflight
|
// Mark instant as inflight
|
||||||
commitTimeline.saveToInflight(new HoodieInstant(State.INFLIGHT, HoodieTimeline.DELTA_COMMIT_ACTION,
|
commitTimeline.saveToInflight(new HoodieInstant(State.INFLIGHT, HoodieTimeline.DELTA_COMMIT_ACTION,
|
||||||
@@ -558,7 +566,7 @@ public class HoodieTableFileSystemViewTest {
|
|||||||
|
|
||||||
// Only one commit, but is not safe
|
// Only one commit, but is not safe
|
||||||
String commitTime1 = "1";
|
String commitTime1 = "1";
|
||||||
String fileName1 = FSUtils.makeDataFileName(commitTime1, 1, fileId);
|
String fileName1 = FSUtils.makeDataFileName(commitTime1, TEST_WRITE_TOKEN, fileId);
|
||||||
new File(basePath + "/" + partitionPath + "/" + fileName1).createNewFile();
|
new File(basePath + "/" + partitionPath + "/" + fileName1).createNewFile();
|
||||||
refreshFsView();
|
refreshFsView();
|
||||||
assertFalse("No commit, should not find any data file",
|
assertFalse("No commit, should not find any data file",
|
||||||
@@ -576,7 +584,7 @@ public class HoodieTableFileSystemViewTest {
|
|||||||
|
|
||||||
// Do another commit, but not safe
|
// Do another commit, but not safe
|
||||||
String commitTime2 = "2";
|
String commitTime2 = "2";
|
||||||
String fileName2 = FSUtils.makeDataFileName(commitTime2, 1, fileId);
|
String fileName2 = FSUtils.makeDataFileName(commitTime2, TEST_WRITE_TOKEN, fileId);
|
||||||
new File(basePath + "/" + partitionPath + "/" + fileName2).createNewFile();
|
new File(basePath + "/" + partitionPath + "/" + fileName2).createNewFile();
|
||||||
refreshFsView();
|
refreshFsView();
|
||||||
assertEquals("", fileName1,
|
assertEquals("", fileName1,
|
||||||
@@ -610,21 +618,21 @@ public class HoodieTableFileSystemViewTest {
|
|||||||
String fileId3 = UUID.randomUUID().toString();
|
String fileId3 = UUID.randomUUID().toString();
|
||||||
String fileId4 = UUID.randomUUID().toString();
|
String fileId4 = UUID.randomUUID().toString();
|
||||||
|
|
||||||
new File(fullPartitionPath + FSUtils.makeDataFileName(commitTime1, 1, fileId1)).createNewFile();
|
new File(fullPartitionPath + FSUtils.makeDataFileName(commitTime1, TEST_WRITE_TOKEN, fileId1)).createNewFile();
|
||||||
new File(fullPartitionPath + FSUtils.makeDataFileName(commitTime4, 1, fileId1)).createNewFile();
|
new File(fullPartitionPath + FSUtils.makeDataFileName(commitTime4, TEST_WRITE_TOKEN, fileId1)).createNewFile();
|
||||||
new File(fullPartitionPath + FSUtils.makeLogFileName(fileId1, HoodieLogFile.DELTA_EXTENSION, commitTime4, 0))
|
new File(fullPartitionPath + FSUtils.makeLogFileName(fileId1, HoodieLogFile.DELTA_EXTENSION,
|
||||||
.createNewFile();
|
commitTime4, 0, TEST_WRITE_TOKEN)).createNewFile();
|
||||||
new File(fullPartitionPath + FSUtils.makeLogFileName(fileId1, HoodieLogFile.DELTA_EXTENSION, commitTime4, 1))
|
new File(fullPartitionPath + FSUtils.makeLogFileName(fileId1, HoodieLogFile.DELTA_EXTENSION,
|
||||||
.createNewFile();
|
commitTime4, 1, TEST_WRITE_TOKEN)).createNewFile();
|
||||||
new File(fullPartitionPath + FSUtils.makeDataFileName(commitTime1, 1, fileId2)).createNewFile();
|
new File(fullPartitionPath + FSUtils.makeDataFileName(commitTime1, TEST_WRITE_TOKEN, fileId2)).createNewFile();
|
||||||
new File(fullPartitionPath + FSUtils.makeDataFileName(commitTime2, 1, fileId2)).createNewFile();
|
new File(fullPartitionPath + FSUtils.makeDataFileName(commitTime2, TEST_WRITE_TOKEN, fileId2)).createNewFile();
|
||||||
new File(fullPartitionPath + FSUtils.makeDataFileName(commitTime3, 1, fileId2)).createNewFile();
|
new File(fullPartitionPath + FSUtils.makeDataFileName(commitTime3, TEST_WRITE_TOKEN, fileId2)).createNewFile();
|
||||||
new File(fullPartitionPath + FSUtils.makeLogFileName(fileId2, HoodieLogFile.DELTA_EXTENSION, commitTime3, 0))
|
new File(fullPartitionPath + FSUtils.makeLogFileName(fileId2,
|
||||||
.createNewFile();
|
HoodieLogFile.DELTA_EXTENSION, commitTime3, 0, TEST_WRITE_TOKEN)).createNewFile();
|
||||||
new File(fullPartitionPath + FSUtils.makeDataFileName(commitTime3, 1, fileId3)).createNewFile();
|
new File(fullPartitionPath + FSUtils.makeDataFileName(commitTime3, TEST_WRITE_TOKEN, fileId3)).createNewFile();
|
||||||
new File(fullPartitionPath + FSUtils.makeDataFileName(commitTime4, 1, fileId3)).createNewFile();
|
new File(fullPartitionPath + FSUtils.makeDataFileName(commitTime4, TEST_WRITE_TOKEN, fileId3)).createNewFile();
|
||||||
new File(fullPartitionPath + FSUtils.makeLogFileName(fileId4, HoodieLogFile.DELTA_EXTENSION, commitTime4, 0))
|
new File(fullPartitionPath + FSUtils.makeLogFileName(fileId4, HoodieLogFile.DELTA_EXTENSION,
|
||||||
.createNewFile();
|
commitTime4, 0, TEST_WRITE_TOKEN)).createNewFile();
|
||||||
|
|
||||||
new File(basePath + "/.hoodie/" + commitTime1 + ".commit").createNewFile();
|
new File(basePath + "/.hoodie/" + commitTime1 + ".commit").createNewFile();
|
||||||
new File(basePath + "/.hoodie/" + commitTime2 + ".commit").createNewFile();
|
new File(basePath + "/.hoodie/" + commitTime2 + ".commit").createNewFile();
|
||||||
@@ -653,9 +661,9 @@ public class HoodieTableFileSystemViewTest {
|
|||||||
for (HoodieDataFile status : dataFileList) {
|
for (HoodieDataFile status : dataFileList) {
|
||||||
filenames.add(status.getFileName());
|
filenames.add(status.getFileName());
|
||||||
}
|
}
|
||||||
assertTrue(filenames.contains(FSUtils.makeDataFileName(commitTime4, 1, fileId1)));
|
assertTrue(filenames.contains(FSUtils.makeDataFileName(commitTime4, TEST_WRITE_TOKEN, fileId1)));
|
||||||
assertTrue(filenames.contains(FSUtils.makeDataFileName(commitTime3, 1, fileId2)));
|
assertTrue(filenames.contains(FSUtils.makeDataFileName(commitTime3, TEST_WRITE_TOKEN, fileId2)));
|
||||||
assertTrue(filenames.contains(FSUtils.makeDataFileName(commitTime4, 1, fileId3)));
|
assertTrue(filenames.contains(FSUtils.makeDataFileName(commitTime4, TEST_WRITE_TOKEN, fileId3)));
|
||||||
|
|
||||||
filenames = Sets.newHashSet();
|
filenames = Sets.newHashSet();
|
||||||
List<HoodieLogFile> logFilesList = rtView.getLatestFileSlicesBeforeOrOn("2016/05/01", commitTime4)
|
List<HoodieLogFile> logFilesList = rtView.getLatestFileSlicesBeforeOrOn("2016/05/01", commitTime4)
|
||||||
@@ -665,10 +673,14 @@ public class HoodieTableFileSystemViewTest {
|
|||||||
for (HoodieLogFile logFile : logFilesList) {
|
for (HoodieLogFile logFile : logFilesList) {
|
||||||
filenames.add(logFile.getFileName());
|
filenames.add(logFile.getFileName());
|
||||||
}
|
}
|
||||||
assertTrue(filenames.contains(FSUtils.makeLogFileName(fileId1, HoodieLogFile.DELTA_EXTENSION, commitTime4, 0)));
|
assertTrue(filenames.contains(FSUtils.makeLogFileName(fileId1, HoodieLogFile.DELTA_EXTENSION,
|
||||||
assertTrue(filenames.contains(FSUtils.makeLogFileName(fileId1, HoodieLogFile.DELTA_EXTENSION, commitTime4, 1)));
|
commitTime4, 0, TEST_WRITE_TOKEN)));
|
||||||
assertTrue(filenames.contains(FSUtils.makeLogFileName(fileId2, HoodieLogFile.DELTA_EXTENSION, commitTime3, 0)));
|
assertTrue(filenames.contains(FSUtils.makeLogFileName(fileId1, HoodieLogFile.DELTA_EXTENSION,
|
||||||
assertTrue(filenames.contains(FSUtils.makeLogFileName(fileId4, HoodieLogFile.DELTA_EXTENSION, commitTime4, 0)));
|
commitTime4, 1, TEST_WRITE_TOKEN)));
|
||||||
|
assertTrue(filenames.contains(FSUtils.makeLogFileName(fileId2, HoodieLogFile.DELTA_EXTENSION,
|
||||||
|
commitTime3, 0, TEST_WRITE_TOKEN)));
|
||||||
|
assertTrue(filenames.contains(FSUtils.makeLogFileName(fileId4, HoodieLogFile.DELTA_EXTENSION,
|
||||||
|
commitTime4, 0, TEST_WRITE_TOKEN)));
|
||||||
|
|
||||||
// Reset the max commit time
|
// Reset the max commit time
|
||||||
List<HoodieDataFile> dataFiles = roView.getLatestDataFilesBeforeOrOn("2016/05/01", commitTime3)
|
List<HoodieDataFile> dataFiles = roView.getLatestDataFilesBeforeOrOn("2016/05/01", commitTime3)
|
||||||
@@ -679,12 +691,12 @@ public class HoodieTableFileSystemViewTest {
|
|||||||
}
|
}
|
||||||
if (!isLatestFileSliceOnly) {
|
if (!isLatestFileSliceOnly) {
|
||||||
assertEquals(3, dataFiles.size());
|
assertEquals(3, dataFiles.size());
|
||||||
assertTrue(filenames.contains(FSUtils.makeDataFileName(commitTime1, 1, fileId1)));
|
assertTrue(filenames.contains(FSUtils.makeDataFileName(commitTime1, TEST_WRITE_TOKEN, fileId1)));
|
||||||
assertTrue(filenames.contains(FSUtils.makeDataFileName(commitTime3, 1, fileId2)));
|
assertTrue(filenames.contains(FSUtils.makeDataFileName(commitTime3, TEST_WRITE_TOKEN, fileId2)));
|
||||||
assertTrue(filenames.contains(FSUtils.makeDataFileName(commitTime3, 1, fileId3)));
|
assertTrue(filenames.contains(FSUtils.makeDataFileName(commitTime3, TEST_WRITE_TOKEN, fileId3)));
|
||||||
} else {
|
} else {
|
||||||
assertEquals(1, dataFiles.size());
|
assertEquals(1, dataFiles.size());
|
||||||
assertTrue(filenames.contains(FSUtils.makeDataFileName(commitTime3, 1, fileId2)));
|
assertTrue(filenames.contains(FSUtils.makeDataFileName(commitTime3, TEST_WRITE_TOKEN, fileId2)));
|
||||||
}
|
}
|
||||||
|
|
||||||
logFilesList =
|
logFilesList =
|
||||||
@@ -692,7 +704,7 @@ public class HoodieTableFileSystemViewTest {
|
|||||||
.flatMap(logFileList -> logFileList).collect(Collectors.toList());
|
.flatMap(logFileList -> logFileList).collect(Collectors.toList());
|
||||||
assertEquals(logFilesList.size(), 1);
|
assertEquals(logFilesList.size(), 1);
|
||||||
assertTrue(logFilesList.get(0).getFileName()
|
assertTrue(logFilesList.get(0).getFileName()
|
||||||
.equals(FSUtils.makeLogFileName(fileId2, HoodieLogFile.DELTA_EXTENSION, commitTime3, 0)));
|
.equals(FSUtils.makeLogFileName(fileId2, HoodieLogFile.DELTA_EXTENSION, commitTime3, 0, TEST_WRITE_TOKEN)));
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -713,13 +725,13 @@ public class HoodieTableFileSystemViewTest {
|
|||||||
String fileId2 = UUID.randomUUID().toString();
|
String fileId2 = UUID.randomUUID().toString();
|
||||||
String fileId3 = UUID.randomUUID().toString();
|
String fileId3 = UUID.randomUUID().toString();
|
||||||
|
|
||||||
new File(fullPartitionPath + FSUtils.makeDataFileName(commitTime1, 1, fileId1)).createNewFile();
|
new File(fullPartitionPath + FSUtils.makeDataFileName(commitTime1, TEST_WRITE_TOKEN, fileId1)).createNewFile();
|
||||||
new File(fullPartitionPath + FSUtils.makeDataFileName(commitTime4, 1, fileId1)).createNewFile();
|
new File(fullPartitionPath + FSUtils.makeDataFileName(commitTime4, TEST_WRITE_TOKEN, fileId1)).createNewFile();
|
||||||
new File(fullPartitionPath + FSUtils.makeDataFileName(commitTime1, 1, fileId2)).createNewFile();
|
new File(fullPartitionPath + FSUtils.makeDataFileName(commitTime1, TEST_WRITE_TOKEN, fileId2)).createNewFile();
|
||||||
new File(fullPartitionPath + FSUtils.makeDataFileName(commitTime2, 1, fileId2)).createNewFile();
|
new File(fullPartitionPath + FSUtils.makeDataFileName(commitTime2, TEST_WRITE_TOKEN, fileId2)).createNewFile();
|
||||||
new File(fullPartitionPath + FSUtils.makeDataFileName(commitTime3, 1, fileId2)).createNewFile();
|
new File(fullPartitionPath + FSUtils.makeDataFileName(commitTime3, TEST_WRITE_TOKEN, fileId2)).createNewFile();
|
||||||
new File(fullPartitionPath + FSUtils.makeDataFileName(commitTime3, 1, fileId3)).createNewFile();
|
new File(fullPartitionPath + FSUtils.makeDataFileName(commitTime3, TEST_WRITE_TOKEN, fileId3)).createNewFile();
|
||||||
new File(fullPartitionPath + FSUtils.makeDataFileName(commitTime4, 1, fileId3)).createNewFile();
|
new File(fullPartitionPath + FSUtils.makeDataFileName(commitTime4, TEST_WRITE_TOKEN, fileId3)).createNewFile();
|
||||||
|
|
||||||
new File(basePath + "/.hoodie/" + commitTime1 + ".commit").createNewFile();
|
new File(basePath + "/.hoodie/" + commitTime1 + ".commit").createNewFile();
|
||||||
new File(basePath + "/.hoodie/" + commitTime2 + ".commit").createNewFile();
|
new File(basePath + "/.hoodie/" + commitTime2 + ".commit").createNewFile();
|
||||||
@@ -744,22 +756,22 @@ public class HoodieTableFileSystemViewTest {
|
|||||||
Set<String> expFileNames = new HashSet<>();
|
Set<String> expFileNames = new HashSet<>();
|
||||||
if (fileId.equals(fileId1)) {
|
if (fileId.equals(fileId1)) {
|
||||||
if (!isLatestFileSliceOnly) {
|
if (!isLatestFileSliceOnly) {
|
||||||
expFileNames.add(FSUtils.makeDataFileName(commitTime1, 1, fileId1));
|
expFileNames.add(FSUtils.makeDataFileName(commitTime1, TEST_WRITE_TOKEN, fileId1));
|
||||||
}
|
}
|
||||||
expFileNames.add(FSUtils.makeDataFileName(commitTime4, 1, fileId1));
|
expFileNames.add(FSUtils.makeDataFileName(commitTime4, TEST_WRITE_TOKEN, fileId1));
|
||||||
assertEquals(expFileNames, filenames);
|
assertEquals(expFileNames, filenames);
|
||||||
} else if (fileId.equals(fileId2)) {
|
} else if (fileId.equals(fileId2)) {
|
||||||
if (!isLatestFileSliceOnly) {
|
if (!isLatestFileSliceOnly) {
|
||||||
expFileNames.add(FSUtils.makeDataFileName(commitTime1, 1, fileId2));
|
expFileNames.add(FSUtils.makeDataFileName(commitTime1, TEST_WRITE_TOKEN, fileId2));
|
||||||
expFileNames.add(FSUtils.makeDataFileName(commitTime2, 1, fileId2));
|
expFileNames.add(FSUtils.makeDataFileName(commitTime2, TEST_WRITE_TOKEN, fileId2));
|
||||||
}
|
}
|
||||||
expFileNames.add(FSUtils.makeDataFileName(commitTime3, 1, fileId2));
|
expFileNames.add(FSUtils.makeDataFileName(commitTime3, TEST_WRITE_TOKEN, fileId2));
|
||||||
assertEquals(expFileNames, filenames);
|
assertEquals(expFileNames, filenames);
|
||||||
} else {
|
} else {
|
||||||
if (!isLatestFileSliceOnly) {
|
if (!isLatestFileSliceOnly) {
|
||||||
expFileNames.add(FSUtils.makeDataFileName(commitTime3, 1, fileId3));
|
expFileNames.add(FSUtils.makeDataFileName(commitTime3, TEST_WRITE_TOKEN, fileId3));
|
||||||
}
|
}
|
||||||
expFileNames.add(FSUtils.makeDataFileName(commitTime4, 1, fileId3));
|
expFileNames.add(FSUtils.makeDataFileName(commitTime4, TEST_WRITE_TOKEN, fileId3));
|
||||||
assertEquals(expFileNames, filenames);
|
assertEquals(expFileNames, filenames);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -782,19 +794,19 @@ public class HoodieTableFileSystemViewTest {
|
|||||||
String fileId2 = UUID.randomUUID().toString();
|
String fileId2 = UUID.randomUUID().toString();
|
||||||
String fileId3 = UUID.randomUUID().toString();
|
String fileId3 = UUID.randomUUID().toString();
|
||||||
|
|
||||||
new File(fullPartitionPath + FSUtils.makeDataFileName(commitTime1, 1, fileId1)).createNewFile();
|
new File(fullPartitionPath + FSUtils.makeDataFileName(commitTime1, TEST_WRITE_TOKEN, fileId1)).createNewFile();
|
||||||
new File(fullPartitionPath + FSUtils.makeLogFileName(fileId1, HoodieLogFile.DELTA_EXTENSION, commitTime1, 0))
|
new File(fullPartitionPath + FSUtils.makeLogFileName(fileId1, HoodieLogFile.DELTA_EXTENSION,
|
||||||
.createNewFile();
|
commitTime1, 0, TEST_WRITE_TOKEN)).createNewFile();
|
||||||
new File(fullPartitionPath + FSUtils.makeDataFileName(commitTime3, 1, fileId1)).createNewFile();
|
new File(fullPartitionPath + FSUtils.makeDataFileName(commitTime3, TEST_WRITE_TOKEN, fileId1)).createNewFile();
|
||||||
|
|
||||||
new File(fullPartitionPath + FSUtils.makeDataFileName(commitTime1, 1, fileId2)).createNewFile();
|
new File(fullPartitionPath + FSUtils.makeDataFileName(commitTime1, TEST_WRITE_TOKEN, fileId2)).createNewFile();
|
||||||
new File(fullPartitionPath + FSUtils.makeDataFileName(commitTime2, 1, fileId2)).createNewFile();
|
new File(fullPartitionPath + FSUtils.makeDataFileName(commitTime2, TEST_WRITE_TOKEN, fileId2)).createNewFile();
|
||||||
new File(fullPartitionPath + FSUtils.makeDataFileName(commitTime3, 1, fileId2)).createNewFile();
|
new File(fullPartitionPath + FSUtils.makeDataFileName(commitTime3, TEST_WRITE_TOKEN, fileId2)).createNewFile();
|
||||||
new File(fullPartitionPath + FSUtils.makeLogFileName(fileId2, HoodieLogFile.DELTA_EXTENSION, commitTime3, 0))
|
new File(fullPartitionPath + FSUtils.makeLogFileName(fileId2, HoodieLogFile.DELTA_EXTENSION,
|
||||||
.createNewFile();
|
commitTime3, 0, TEST_WRITE_TOKEN)).createNewFile();
|
||||||
|
|
||||||
new File(fullPartitionPath + FSUtils.makeDataFileName(commitTime3, 1, fileId3)).createNewFile();
|
new File(fullPartitionPath + FSUtils.makeDataFileName(commitTime3, TEST_WRITE_TOKEN, fileId3)).createNewFile();
|
||||||
new File(fullPartitionPath + FSUtils.makeDataFileName(commitTime4, 1, fileId3)).createNewFile();
|
new File(fullPartitionPath + FSUtils.makeDataFileName(commitTime4, TEST_WRITE_TOKEN, fileId3)).createNewFile();
|
||||||
|
|
||||||
new File(basePath + "/.hoodie/" + commitTime1 + ".commit").createNewFile();
|
new File(basePath + "/.hoodie/" + commitTime1 + ".commit").createNewFile();
|
||||||
new File(basePath + "/.hoodie/" + commitTime2 + ".commit").createNewFile();
|
new File(basePath + "/.hoodie/" + commitTime2 + ".commit").createNewFile();
|
||||||
@@ -817,10 +829,10 @@ public class HoodieTableFileSystemViewTest {
|
|||||||
filenames.add(status.getFileName());
|
filenames.add(status.getFileName());
|
||||||
}
|
}
|
||||||
|
|
||||||
assertTrue(filenames.contains(FSUtils.makeDataFileName(commitTime3, 1, fileId1)));
|
assertTrue(filenames.contains(FSUtils.makeDataFileName(commitTime3, TEST_WRITE_TOKEN, fileId1)));
|
||||||
assertTrue(filenames.contains(FSUtils.makeDataFileName(commitTime3, 1, fileId2)));
|
assertTrue(filenames.contains(FSUtils.makeDataFileName(commitTime3, TEST_WRITE_TOKEN, fileId2)));
|
||||||
if (!isLatestFileSliceOnly) {
|
if (!isLatestFileSliceOnly) {
|
||||||
assertTrue(filenames.contains(FSUtils.makeDataFileName(commitTime3, 1, fileId3)));
|
assertTrue(filenames.contains(FSUtils.makeDataFileName(commitTime3, TEST_WRITE_TOKEN, fileId3)));
|
||||||
}
|
}
|
||||||
|
|
||||||
List<FileSlice> slices = rtView.getLatestFileSliceInRange(Lists.newArrayList(commitTime3, commitTime4))
|
List<FileSlice> slices = rtView.getLatestFileSliceInRange(Lists.newArrayList(commitTime3, commitTime4))
|
||||||
@@ -861,13 +873,13 @@ public class HoodieTableFileSystemViewTest {
|
|||||||
String fileId2 = UUID.randomUUID().toString();
|
String fileId2 = UUID.randomUUID().toString();
|
||||||
String fileId3 = UUID.randomUUID().toString();
|
String fileId3 = UUID.randomUUID().toString();
|
||||||
|
|
||||||
new File(fullPartitionPath + FSUtils.makeDataFileName(commitTime1, 1, fileId1)).createNewFile();
|
new File(fullPartitionPath + FSUtils.makeDataFileName(commitTime1, TEST_WRITE_TOKEN, fileId1)).createNewFile();
|
||||||
new File(fullPartitionPath + FSUtils.makeDataFileName(commitTime4, 1, fileId1)).createNewFile();
|
new File(fullPartitionPath + FSUtils.makeDataFileName(commitTime4, TEST_WRITE_TOKEN, fileId1)).createNewFile();
|
||||||
new File(fullPartitionPath + FSUtils.makeDataFileName(commitTime1, 1, fileId2)).createNewFile();
|
new File(fullPartitionPath + FSUtils.makeDataFileName(commitTime1, TEST_WRITE_TOKEN, fileId2)).createNewFile();
|
||||||
new File(fullPartitionPath + FSUtils.makeDataFileName(commitTime2, 1, fileId2)).createNewFile();
|
new File(fullPartitionPath + FSUtils.makeDataFileName(commitTime2, TEST_WRITE_TOKEN, fileId2)).createNewFile();
|
||||||
new File(fullPartitionPath + FSUtils.makeDataFileName(commitTime3, 1, fileId2)).createNewFile();
|
new File(fullPartitionPath + FSUtils.makeDataFileName(commitTime3, TEST_WRITE_TOKEN, fileId2)).createNewFile();
|
||||||
new File(fullPartitionPath + FSUtils.makeDataFileName(commitTime3, 1, fileId3)).createNewFile();
|
new File(fullPartitionPath + FSUtils.makeDataFileName(commitTime3, TEST_WRITE_TOKEN, fileId3)).createNewFile();
|
||||||
new File(fullPartitionPath + FSUtils.makeDataFileName(commitTime4, 1, fileId3)).createNewFile();
|
new File(fullPartitionPath + FSUtils.makeDataFileName(commitTime4, TEST_WRITE_TOKEN, fileId3)).createNewFile();
|
||||||
|
|
||||||
new File(basePath + "/.hoodie/" + commitTime1 + ".commit").createNewFile();
|
new File(basePath + "/.hoodie/" + commitTime1 + ".commit").createNewFile();
|
||||||
new File(basePath + "/.hoodie/" + commitTime2 + ".commit").createNewFile();
|
new File(basePath + "/.hoodie/" + commitTime2 + ".commit").createNewFile();
|
||||||
@@ -887,8 +899,8 @@ public class HoodieTableFileSystemViewTest {
|
|||||||
for (HoodieDataFile status : dataFiles) {
|
for (HoodieDataFile status : dataFiles) {
|
||||||
filenames.add(status.getFileName());
|
filenames.add(status.getFileName());
|
||||||
}
|
}
|
||||||
assertTrue(filenames.contains(FSUtils.makeDataFileName(commitTime1, 1, fileId1)));
|
assertTrue(filenames.contains(FSUtils.makeDataFileName(commitTime1, TEST_WRITE_TOKEN, fileId1)));
|
||||||
assertTrue(filenames.contains(FSUtils.makeDataFileName(commitTime2, 1, fileId2)));
|
assertTrue(filenames.contains(FSUtils.makeDataFileName(commitTime2, TEST_WRITE_TOKEN, fileId2)));
|
||||||
} else {
|
} else {
|
||||||
assertEquals(0, dataFiles.size());
|
assertEquals(0, dataFiles.size());
|
||||||
}
|
}
|
||||||
@@ -912,28 +924,31 @@ public class HoodieTableFileSystemViewTest {
|
|||||||
String fileId2 = UUID.randomUUID().toString();
|
String fileId2 = UUID.randomUUID().toString();
|
||||||
String fileId3 = UUID.randomUUID().toString();
|
String fileId3 = UUID.randomUUID().toString();
|
||||||
|
|
||||||
new File(fullPartitionPath + "/" + FSUtils.makeDataFileName(commitTime1, 1, fileId1)).createNewFile();
|
new File(fullPartitionPath + "/" + FSUtils.makeDataFileName(commitTime1,
|
||||||
|
TEST_WRITE_TOKEN, fileId1)).createNewFile();
|
||||||
new File(fullPartitionPath + "/"
|
new File(fullPartitionPath + "/"
|
||||||
+ FSUtils.makeLogFileName(fileId1, HoodieLogFile.DELTA_EXTENSION, commitTime1, 0)).createNewFile();
|
+ FSUtils.makeLogFileName(fileId1, HoodieLogFile.DELTA_EXTENSION,
|
||||||
|
commitTime1, 0, TEST_WRITE_TOKEN)).createNewFile();
|
||||||
new File(fullPartitionPath + "/"
|
new File(fullPartitionPath + "/"
|
||||||
+ FSUtils.makeDataFileName(commitTime4, 1, fileId1)).createNewFile();
|
+ FSUtils.makeDataFileName(commitTime4, TEST_WRITE_TOKEN, fileId1)).createNewFile();
|
||||||
new File(fullPartitionPath + "/"
|
new File(fullPartitionPath + "/"
|
||||||
+ FSUtils.makeLogFileName(fileId1, HoodieLogFile.DELTA_EXTENSION, commitTime4, 0)).createNewFile();
|
+ FSUtils.makeLogFileName(fileId1, HoodieLogFile.DELTA_EXTENSION,
|
||||||
|
commitTime4, 0, TEST_WRITE_TOKEN)).createNewFile();
|
||||||
|
|
||||||
new File(fullPartitionPath + "/"
|
new File(fullPartitionPath + "/"
|
||||||
+ FSUtils.makeDataFileName(commitTime1, 1, fileId2)).createNewFile();
|
+ FSUtils.makeDataFileName(commitTime1, TEST_WRITE_TOKEN, fileId2)).createNewFile();
|
||||||
new File(fullPartitionPath + "/"
|
new File(fullPartitionPath + "/"
|
||||||
+ FSUtils.makeDataFileName(commitTime2, 1, fileId2)).createNewFile();
|
+ FSUtils.makeDataFileName(commitTime2, TEST_WRITE_TOKEN, fileId2)).createNewFile();
|
||||||
new File(fullPartitionPath + "/"
|
new File(fullPartitionPath + "/"
|
||||||
+ FSUtils.makeLogFileName(fileId2, HoodieLogFile.DELTA_EXTENSION, commitTime2, 0))
|
+ FSUtils.makeLogFileName(fileId2, HoodieLogFile.DELTA_EXTENSION, commitTime2, 0, TEST_WRITE_TOKEN))
|
||||||
.createNewFile();
|
.createNewFile();
|
||||||
new File(fullPartitionPath + "/"
|
new File(fullPartitionPath + "/"
|
||||||
+ FSUtils.makeDataFileName(commitTime3, 1, fileId2)).createNewFile();
|
+ FSUtils.makeDataFileName(commitTime3, TEST_WRITE_TOKEN, fileId2)).createNewFile();
|
||||||
|
|
||||||
new File(fullPartitionPath + "/"
|
new File(fullPartitionPath + "/"
|
||||||
+ FSUtils.makeDataFileName(commitTime3, 1, fileId3)).createNewFile();
|
+ FSUtils.makeDataFileName(commitTime3, TEST_WRITE_TOKEN, fileId3)).createNewFile();
|
||||||
new File(fullPartitionPath + "/"
|
new File(fullPartitionPath + "/"
|
||||||
+ FSUtils.makeDataFileName(commitTime4, 1, fileId3)).createNewFile();
|
+ FSUtils.makeDataFileName(commitTime4, TEST_WRITE_TOKEN, fileId3)).createNewFile();
|
||||||
|
|
||||||
new File(basePath + "/.hoodie/" + commitTime1 + ".commit").createNewFile();
|
new File(basePath + "/.hoodie/" + commitTime1 + ".commit").createNewFile();
|
||||||
new File(basePath + "/.hoodie/" + commitTime2 + ".commit").createNewFile();
|
new File(basePath + "/.hoodie/" + commitTime2 + ".commit").createNewFile();
|
||||||
@@ -979,9 +994,9 @@ public class HoodieTableFileSystemViewTest {
|
|||||||
for (HoodieDataFile status : statuses1) {
|
for (HoodieDataFile status : statuses1) {
|
||||||
filenames.add(status.getFileName());
|
filenames.add(status.getFileName());
|
||||||
}
|
}
|
||||||
assertTrue(filenames.contains(FSUtils.makeDataFileName(commitTime4, 1, fileId1)));
|
assertTrue(filenames.contains(FSUtils.makeDataFileName(commitTime4, TEST_WRITE_TOKEN, fileId1)));
|
||||||
assertTrue(filenames.contains(FSUtils.makeDataFileName(commitTime3, 1, fileId2)));
|
assertTrue(filenames.contains(FSUtils.makeDataFileName(commitTime3, TEST_WRITE_TOKEN, fileId2)));
|
||||||
assertTrue(filenames.contains(FSUtils.makeDataFileName(commitTime4, 1, fileId3)));
|
assertTrue(filenames.contains(FSUtils.makeDataFileName(commitTime4, TEST_WRITE_TOKEN, fileId3)));
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
@@ -1002,16 +1017,17 @@ public class HoodieTableFileSystemViewTest {
|
|||||||
String deltaInstantTime2 = "3";
|
String deltaInstantTime2 = "3";
|
||||||
String fileId = UUID.randomUUID().toString();
|
String fileId = UUID.randomUUID().toString();
|
||||||
|
|
||||||
String dataFileName = FSUtils.makeDataFileName(instantTime1, 1, fileId);
|
String dataFileName = FSUtils.makeDataFileName(instantTime1, TEST_WRITE_TOKEN, fileId);
|
||||||
new File(fullPartitionPath1 + dataFileName).createNewFile();
|
new File(fullPartitionPath1 + dataFileName).createNewFile();
|
||||||
|
|
||||||
String fileName1 = FSUtils.makeLogFileName(fileId, HoodieLogFile.DELTA_EXTENSION, instantTime1, 0);
|
String fileName1 = FSUtils.makeLogFileName(fileId, HoodieLogFile.DELTA_EXTENSION,
|
||||||
|
instantTime1, 0, TEST_WRITE_TOKEN);
|
||||||
new File(fullPartitionPath1 + fileName1)
|
new File(fullPartitionPath1 + fileName1)
|
||||||
.createNewFile();
|
.createNewFile();
|
||||||
new File(fullPartitionPath2 + FSUtils.makeDataFileName(instantTime1, 1, fileId)).createNewFile();
|
new File(fullPartitionPath2 + FSUtils.makeDataFileName(instantTime1, TEST_WRITE_TOKEN, fileId)).createNewFile();
|
||||||
new File(fullPartitionPath2 + fileName1)
|
new File(fullPartitionPath2 + fileName1)
|
||||||
.createNewFile();
|
.createNewFile();
|
||||||
new File(fullPartitionPath3 + FSUtils.makeDataFileName(instantTime1, 1, fileId)).createNewFile();
|
new File(fullPartitionPath3 + FSUtils.makeDataFileName(instantTime1, TEST_WRITE_TOKEN, fileId)).createNewFile();
|
||||||
new File(fullPartitionPath3 + fileName1)
|
new File(fullPartitionPath3 + fileName1)
|
||||||
.createNewFile();
|
.createNewFile();
|
||||||
|
|
||||||
@@ -1052,7 +1068,7 @@ public class HoodieTableFileSystemViewTest {
|
|||||||
partitionFileSlicesPairs.add(Pair.of(partitionPath3, fileSlices.get(0)));
|
partitionFileSlicesPairs.add(Pair.of(partitionPath3, fileSlices.get(0)));
|
||||||
|
|
||||||
String compactionRequestedTime = "2";
|
String compactionRequestedTime = "2";
|
||||||
String compactDataFileName = FSUtils.makeDataFileName(compactionRequestedTime, 1, fileId);
|
String compactDataFileName = FSUtils.makeDataFileName(compactionRequestedTime, TEST_WRITE_TOKEN, fileId);
|
||||||
HoodieCompactionPlan compactionPlan = CompactionUtils.buildFromFileSlices(partitionFileSlicesPairs,
|
HoodieCompactionPlan compactionPlan = CompactionUtils.buildFromFileSlices(partitionFileSlicesPairs,
|
||||||
Optional.empty(), Optional.empty());
|
Optional.empty(), Optional.empty());
|
||||||
|
|
||||||
@@ -1072,8 +1088,10 @@ public class HoodieTableFileSystemViewTest {
|
|||||||
String deltaInstantTime5 = "6";
|
String deltaInstantTime5 = "6";
|
||||||
List<String> allInstantTimes = Arrays.asList(instantTime1, deltaInstantTime1, deltaInstantTime2,
|
List<String> allInstantTimes = Arrays.asList(instantTime1, deltaInstantTime1, deltaInstantTime2,
|
||||||
compactionRequestedTime, deltaInstantTime4, deltaInstantTime5);
|
compactionRequestedTime, deltaInstantTime4, deltaInstantTime5);
|
||||||
String fileName3 = FSUtils.makeLogFileName(fileId, HoodieLogFile.DELTA_EXTENSION, compactionRequestedTime, 0);
|
String fileName3 = FSUtils.makeLogFileName(fileId, HoodieLogFile.DELTA_EXTENSION,
|
||||||
String fileName4 = FSUtils.makeLogFileName(fileId, HoodieLogFile.DELTA_EXTENSION, compactionRequestedTime, 1);
|
compactionRequestedTime, 0, TEST_WRITE_TOKEN);
|
||||||
|
String fileName4 = FSUtils.makeLogFileName(fileId, HoodieLogFile.DELTA_EXTENSION,
|
||||||
|
compactionRequestedTime, 1, TEST_WRITE_TOKEN);
|
||||||
new File(basePath + "/" + partitionPath1 + "/" + fileName3).createNewFile();
|
new File(basePath + "/" + partitionPath1 + "/" + fileName3).createNewFile();
|
||||||
new File(basePath + "/" + partitionPath1 + "/" + fileName4).createNewFile();
|
new File(basePath + "/" + partitionPath1 + "/" + fileName4).createNewFile();
|
||||||
new File(basePath + "/" + partitionPath2 + "/" + fileName3).createNewFile();
|
new File(basePath + "/" + partitionPath2 + "/" + fileName3).createNewFile();
|
||||||
|
|||||||
@@ -69,6 +69,8 @@ import org.junit.rules.TemporaryFolder;
|
|||||||
|
|
||||||
public class IncrementalFSViewSyncTest {
|
public class IncrementalFSViewSyncTest {
|
||||||
|
|
||||||
|
private static String TEST_WRITE_TOKEN = "1-0-1";
|
||||||
|
|
||||||
protected HoodieTableMetaClient metaClient;
|
protected HoodieTableMetaClient metaClient;
|
||||||
protected String basePath;
|
protected String basePath;
|
||||||
|
|
||||||
@@ -756,8 +758,8 @@ public class IncrementalFSViewSyncTest {
|
|||||||
return fileIdsPerPartition.stream().map(f -> {
|
return fileIdsPerPartition.stream().map(f -> {
|
||||||
try {
|
try {
|
||||||
File file = new File(basePath + "/" + p + "/"
|
File file = new File(basePath + "/" + p + "/"
|
||||||
+ (deltaCommit ? FSUtils.makeLogFileName(f, ".log", baseInstant, Integer.parseInt(instant)) :
|
+ (deltaCommit ? FSUtils.makeLogFileName(f, ".log", baseInstant,
|
||||||
FSUtils.makeDataFileName(instant, 0, f)));
|
Integer.parseInt(instant), TEST_WRITE_TOKEN) : FSUtils.makeDataFileName(instant, TEST_WRITE_TOKEN, f)));
|
||||||
file.createNewFile();
|
file.createNewFile();
|
||||||
HoodieWriteStat w = new HoodieWriteStat();
|
HoodieWriteStat w = new HoodieWriteStat();
|
||||||
w.setFileId(f);
|
w.setFileId(f);
|
||||||
|
|||||||
@@ -49,6 +49,8 @@ import org.junit.Assert;
|
|||||||
|
|
||||||
public class CompactionTestUtils {
|
public class CompactionTestUtils {
|
||||||
|
|
||||||
|
private static String TEST_WRITE_TOKEN = "1-0-1";
|
||||||
|
|
||||||
public static Map<HoodieFileGroupId, Pair<String, HoodieCompactionOperation>> setupAndValidateCompactionOperations(
|
public static Map<HoodieFileGroupId, Pair<String, HoodieCompactionOperation>> setupAndValidateCompactionOperations(
|
||||||
HoodieTableMetaClient metaClient, boolean inflight,
|
HoodieTableMetaClient metaClient, boolean inflight,
|
||||||
int numEntriesInPlan1, int numEntriesInPlan2,
|
int numEntriesInPlan1, int numEntriesInPlan2,
|
||||||
@@ -151,7 +153,7 @@ public class CompactionTestUtils {
|
|||||||
FileSlice slice = new FileSlice(DEFAULT_PARTITION_PATHS[0], instantId, fileId);
|
FileSlice slice = new FileSlice(DEFAULT_PARTITION_PATHS[0], instantId, fileId);
|
||||||
if (createDataFile) {
|
if (createDataFile) {
|
||||||
slice.setDataFile(new TestHoodieDataFile(metaClient.getBasePath() + "/" + DEFAULT_PARTITION_PATHS[0]
|
slice.setDataFile(new TestHoodieDataFile(metaClient.getBasePath() + "/" + DEFAULT_PARTITION_PATHS[0]
|
||||||
+ "/" + FSUtils.makeDataFileName(instantId, 1, fileId)));
|
+ "/" + FSUtils.makeDataFileName(instantId, TEST_WRITE_TOKEN, fileId)));
|
||||||
}
|
}
|
||||||
String logFilePath1 = HoodieTestUtils
|
String logFilePath1 = HoodieTestUtils
|
||||||
.getLogFilePath(metaClient.getBasePath(), DEFAULT_PARTITION_PATHS[0], instantId, fileId,
|
.getLogFilePath(metaClient.getBasePath(), DEFAULT_PARTITION_PATHS[0], instantId, fileId,
|
||||||
|
|||||||
@@ -50,6 +50,8 @@ import org.junit.rules.TemporaryFolder;
|
|||||||
|
|
||||||
public class TestCompactionUtils {
|
public class TestCompactionUtils {
|
||||||
|
|
||||||
|
private static String TEST_WRITE_TOKEN = "1-0-1";
|
||||||
|
|
||||||
private static final Map<String, Double> metrics =
|
private static final Map<String, Double> metrics =
|
||||||
new ImmutableMap.Builder<String, Double>()
|
new ImmutableMap.Builder<String, Double>()
|
||||||
.put("key1", 1.0)
|
.put("key1", 1.0)
|
||||||
@@ -85,9 +87,9 @@ public class TestCompactionUtils {
|
|||||||
//File Slice with no data-file but log files present
|
//File Slice with no data-file but log files present
|
||||||
FileSlice noDataFileSlice = new FileSlice(DEFAULT_PARTITION_PATHS[0],"000", "noData1");
|
FileSlice noDataFileSlice = new FileSlice(DEFAULT_PARTITION_PATHS[0],"000", "noData1");
|
||||||
noDataFileSlice.addLogFile(new HoodieLogFile(new Path(
|
noDataFileSlice.addLogFile(new HoodieLogFile(new Path(
|
||||||
FSUtils.makeLogFileName("noData1", ".log", "000", 1))));
|
FSUtils.makeLogFileName("noData1", ".log", "000", 1, TEST_WRITE_TOKEN))));
|
||||||
noDataFileSlice.addLogFile(new HoodieLogFile(new Path(
|
noDataFileSlice.addLogFile(new HoodieLogFile(new Path(
|
||||||
FSUtils.makeLogFileName("noData1", ".log", "000", 2))));
|
FSUtils.makeLogFileName("noData1", ".log", "000", 2, TEST_WRITE_TOKEN))));
|
||||||
op = CompactionUtils.buildFromFileSlice(
|
op = CompactionUtils.buildFromFileSlice(
|
||||||
DEFAULT_PARTITION_PATHS[0], noDataFileSlice, Optional.of(metricsCaptureFn));
|
DEFAULT_PARTITION_PATHS[0], noDataFileSlice, Optional.of(metricsCaptureFn));
|
||||||
testFileSliceCompactionOpEquality(noDataFileSlice, op, DEFAULT_PARTITION_PATHS[0]);
|
testFileSliceCompactionOpEquality(noDataFileSlice, op, DEFAULT_PARTITION_PATHS[0]);
|
||||||
@@ -96,9 +98,9 @@ public class TestCompactionUtils {
|
|||||||
FileSlice fileSlice = new FileSlice(DEFAULT_PARTITION_PATHS[0],"000", "noData1");
|
FileSlice fileSlice = new FileSlice(DEFAULT_PARTITION_PATHS[0],"000", "noData1");
|
||||||
fileSlice.setDataFile(new TestHoodieDataFile("/tmp/noLog_1_000.parquet"));
|
fileSlice.setDataFile(new TestHoodieDataFile("/tmp/noLog_1_000.parquet"));
|
||||||
fileSlice.addLogFile(new HoodieLogFile(new Path(
|
fileSlice.addLogFile(new HoodieLogFile(new Path(
|
||||||
FSUtils.makeLogFileName("noData1", ".log", "000", 1))));
|
FSUtils.makeLogFileName("noData1", ".log", "000", 1, TEST_WRITE_TOKEN))));
|
||||||
fileSlice.addLogFile(new HoodieLogFile(new Path(
|
fileSlice.addLogFile(new HoodieLogFile(new Path(
|
||||||
FSUtils.makeLogFileName("noData1", ".log", "000", 2))));
|
FSUtils.makeLogFileName("noData1", ".log", "000", 2, TEST_WRITE_TOKEN))));
|
||||||
op = CompactionUtils.buildFromFileSlice(
|
op = CompactionUtils.buildFromFileSlice(
|
||||||
DEFAULT_PARTITION_PATHS[0], fileSlice, Optional.of(metricsCaptureFn));
|
DEFAULT_PARTITION_PATHS[0], fileSlice, Optional.of(metricsCaptureFn));
|
||||||
testFileSliceCompactionOpEquality(fileSlice, op, DEFAULT_PARTITION_PATHS[0]);
|
testFileSliceCompactionOpEquality(fileSlice, op, DEFAULT_PARTITION_PATHS[0]);
|
||||||
@@ -112,16 +114,16 @@ public class TestCompactionUtils {
|
|||||||
FileSlice fileSlice = new FileSlice(DEFAULT_PARTITION_PATHS[0],"000", "noData1");
|
FileSlice fileSlice = new FileSlice(DEFAULT_PARTITION_PATHS[0],"000", "noData1");
|
||||||
fileSlice.setDataFile(new TestHoodieDataFile("/tmp/noLog_1_000.parquet"));
|
fileSlice.setDataFile(new TestHoodieDataFile("/tmp/noLog_1_000.parquet"));
|
||||||
fileSlice.addLogFile(new HoodieLogFile(new Path(
|
fileSlice.addLogFile(new HoodieLogFile(new Path(
|
||||||
FSUtils.makeLogFileName("noData1", ".log", "000", 1))));
|
FSUtils.makeLogFileName("noData1", ".log", "000", 1, TEST_WRITE_TOKEN))));
|
||||||
fileSlice.addLogFile(new HoodieLogFile(new Path(
|
fileSlice.addLogFile(new HoodieLogFile(new Path(
|
||||||
FSUtils.makeLogFileName("noData1", ".log", "000", 2))));
|
FSUtils.makeLogFileName("noData1", ".log", "000", 2, TEST_WRITE_TOKEN))));
|
||||||
FileSlice noLogFileSlice = new FileSlice(DEFAULT_PARTITION_PATHS[0],"000", "noLog1");
|
FileSlice noLogFileSlice = new FileSlice(DEFAULT_PARTITION_PATHS[0],"000", "noLog1");
|
||||||
noLogFileSlice.setDataFile(new TestHoodieDataFile("/tmp/noLog_1_000.parquet"));
|
noLogFileSlice.setDataFile(new TestHoodieDataFile("/tmp/noLog_1_000.parquet"));
|
||||||
FileSlice noDataFileSlice = new FileSlice(DEFAULT_PARTITION_PATHS[0],"000", "noData1");
|
FileSlice noDataFileSlice = new FileSlice(DEFAULT_PARTITION_PATHS[0],"000", "noData1");
|
||||||
noDataFileSlice.addLogFile(new HoodieLogFile(new Path(
|
noDataFileSlice.addLogFile(new HoodieLogFile(new Path(
|
||||||
FSUtils.makeLogFileName("noData1", ".log", "000", 1))));
|
FSUtils.makeLogFileName("noData1", ".log", "000", 1, TEST_WRITE_TOKEN))));
|
||||||
noDataFileSlice.addLogFile(new HoodieLogFile(new Path(
|
noDataFileSlice.addLogFile(new HoodieLogFile(new Path(
|
||||||
FSUtils.makeLogFileName("noData1", ".log", "000", 2))));
|
FSUtils.makeLogFileName("noData1", ".log", "000", 2, TEST_WRITE_TOKEN))));
|
||||||
List<FileSlice> fileSliceList = Arrays.asList(emptyFileSlice, noDataFileSlice, fileSlice, noLogFileSlice);
|
List<FileSlice> fileSliceList = Arrays.asList(emptyFileSlice, noDataFileSlice, fileSlice, noLogFileSlice);
|
||||||
List<Pair<String, FileSlice>> input = fileSliceList.stream().map(f -> Pair.of(DEFAULT_PARTITION_PATHS[0], f))
|
List<Pair<String, FileSlice>> input = fileSliceList.stream().map(f -> Pair.of(DEFAULT_PARTITION_PATHS[0], f))
|
||||||
.collect(Collectors.toList());
|
.collect(Collectors.toList());
|
||||||
|
|||||||
@@ -23,14 +23,18 @@ import com.uber.hoodie.common.model.HoodieTestUtils;
|
|||||||
import java.text.SimpleDateFormat;
|
import java.text.SimpleDateFormat;
|
||||||
import java.util.Date;
|
import java.util.Date;
|
||||||
import java.util.UUID;
|
import java.util.UUID;
|
||||||
|
import java.util.regex.Pattern;
|
||||||
import org.apache.hadoop.conf.Configuration;
|
import org.apache.hadoop.conf.Configuration;
|
||||||
import org.apache.hadoop.fs.Path;
|
import org.apache.hadoop.fs.Path;
|
||||||
|
import org.junit.Assert;
|
||||||
import org.junit.Rule;
|
import org.junit.Rule;
|
||||||
import org.junit.Test;
|
import org.junit.Test;
|
||||||
import org.junit.contrib.java.lang.system.EnvironmentVariables;
|
import org.junit.contrib.java.lang.system.EnvironmentVariables;
|
||||||
|
|
||||||
public class TestFSUtils {
|
public class TestFSUtils {
|
||||||
|
|
||||||
|
private static String TEST_WRITE_TOKEN = "1-0-1";
|
||||||
|
|
||||||
@Rule
|
@Rule
|
||||||
public final EnvironmentVariables environmentVariables = new EnvironmentVariables();
|
public final EnvironmentVariables environmentVariables = new EnvironmentVariables();
|
||||||
|
|
||||||
@@ -39,22 +43,8 @@ public class TestFSUtils {
|
|||||||
String commitTime = new SimpleDateFormat("yyyyMMddHHmmss").format(new Date());
|
String commitTime = new SimpleDateFormat("yyyyMMddHHmmss").format(new Date());
|
||||||
int taskPartitionId = 2;
|
int taskPartitionId = 2;
|
||||||
String fileName = UUID.randomUUID().toString();
|
String fileName = UUID.randomUUID().toString();
|
||||||
assertTrue(FSUtils.makeDataFileName(commitTime, taskPartitionId, fileName)
|
assertTrue(FSUtils.makeDataFileName(commitTime, TEST_WRITE_TOKEN, fileName)
|
||||||
.equals(fileName + "_" + taskPartitionId + "_" + commitTime + ".parquet"));
|
.equals(fileName + "_" + TEST_WRITE_TOKEN + "_" + commitTime + ".parquet"));
|
||||||
}
|
|
||||||
|
|
||||||
@Test
|
|
||||||
public void testMakeTempDataFileName() {
|
|
||||||
String commitTime = new SimpleDateFormat("yyyyMMddHHmmss").format(new Date());
|
|
||||||
String partitionPath = "2017/12/31";
|
|
||||||
int taskPartitionId = Integer.MAX_VALUE;
|
|
||||||
int stageId = Integer.MAX_VALUE;
|
|
||||||
long taskAttemptId = Long.MAX_VALUE;
|
|
||||||
String fileName = UUID.randomUUID().toString();
|
|
||||||
assertTrue(
|
|
||||||
FSUtils.makeTempDataFileName(partitionPath, commitTime, taskPartitionId, fileName, stageId, taskAttemptId)
|
|
||||||
.equals(partitionPath.replace("/", "-") + "_" + fileName + "_" + taskPartitionId + "_" + commitTime + "_"
|
|
||||||
+ stageId + "_" + taskAttemptId + ".parquet"));
|
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
@@ -70,7 +60,7 @@ public class TestFSUtils {
|
|||||||
String commitTime = new SimpleDateFormat("yyyyMMddHHmmss").format(new Date());
|
String commitTime = new SimpleDateFormat("yyyyMMddHHmmss").format(new Date());
|
||||||
int taskPartitionId = 2;
|
int taskPartitionId = 2;
|
||||||
String fileName = UUID.randomUUID().toString();
|
String fileName = UUID.randomUUID().toString();
|
||||||
String fullFileName = FSUtils.makeDataFileName(commitTime, taskPartitionId, fileName);
|
String fullFileName = FSUtils.makeDataFileName(commitTime, TEST_WRITE_TOKEN, fileName);
|
||||||
assertTrue(FSUtils.getCommitTime(fullFileName).equals(commitTime));
|
assertTrue(FSUtils.getCommitTime(fullFileName).equals(commitTime));
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -79,7 +69,7 @@ public class TestFSUtils {
|
|||||||
String commitTime = new SimpleDateFormat("yyyyMMddHHmmss").format(new Date());
|
String commitTime = new SimpleDateFormat("yyyyMMddHHmmss").format(new Date());
|
||||||
int taskPartitionId = 2;
|
int taskPartitionId = 2;
|
||||||
String fileName = UUID.randomUUID().toString();
|
String fileName = UUID.randomUUID().toString();
|
||||||
String fullFileName = FSUtils.makeDataFileName(commitTime, taskPartitionId, fileName);
|
String fullFileName = FSUtils.makeDataFileName(commitTime, TEST_WRITE_TOKEN, fileName);
|
||||||
assertTrue(FSUtils.getFileId(fullFileName).equals(fileName));
|
assertTrue(FSUtils.getFileId(fullFileName).equals(fileName));
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -121,4 +111,47 @@ public class TestFSUtils {
|
|||||||
Path partitionPath = new Path("/test/apache/apache/hudi");
|
Path partitionPath = new Path("/test/apache/apache/hudi");
|
||||||
assertEquals("apache/hudi", FSUtils.getRelativePartitionPath(basePath, partitionPath));
|
assertEquals("apache/hudi", FSUtils.getRelativePartitionPath(basePath, partitionPath));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testOldLogFileName() {
|
||||||
|
// Check if old log file names are still parseable by FSUtils method
|
||||||
|
String partitionPath = "2019/01/01/";
|
||||||
|
String fileName = UUID.randomUUID().toString();
|
||||||
|
String oldLogFile = makeOldLogFileName(fileName, ".log", "100", 1);
|
||||||
|
Path rlPath = new Path(new Path(partitionPath), oldLogFile);
|
||||||
|
Assert.assertTrue(FSUtils.isLogFile(rlPath));
|
||||||
|
Assert.assertEquals(fileName, FSUtils.getFileIdFromLogPath(rlPath));
|
||||||
|
Assert.assertEquals("100", FSUtils.getBaseCommitTimeFromLogPath(rlPath));
|
||||||
|
Assert.assertEquals(1, FSUtils.getFileVersionFromLog(rlPath));
|
||||||
|
Assert.assertNull(FSUtils.getTaskPartitionIdFromLogPath(rlPath));
|
||||||
|
Assert.assertNull(FSUtils.getStageIdFromLogPath(rlPath));
|
||||||
|
Assert.assertNull(FSUtils.getTaskAttemptIdFromLogPath(rlPath));
|
||||||
|
Assert.assertNull(FSUtils.getWriteTokenFromLogPath(rlPath));
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void tesLogFileName() {
|
||||||
|
// Check if log file names are parseable by FSUtils method
|
||||||
|
String partitionPath = "2019/01/01/";
|
||||||
|
String fileName = UUID.randomUUID().toString();
|
||||||
|
String logFile = FSUtils.makeLogFileName(fileName, ".log", "100", 2, "1-0-1");
|
||||||
|
System.out.println("Log File =" + logFile);
|
||||||
|
Path rlPath = new Path(new Path(partitionPath), logFile);
|
||||||
|
Assert.assertTrue(FSUtils.isLogFile(rlPath));
|
||||||
|
Assert.assertEquals(fileName, FSUtils.getFileIdFromLogPath(rlPath));
|
||||||
|
Assert.assertEquals("100", FSUtils.getBaseCommitTimeFromLogPath(rlPath));
|
||||||
|
Assert.assertEquals(2, FSUtils.getFileVersionFromLog(rlPath));
|
||||||
|
Assert.assertEquals(new Integer(1), FSUtils.getTaskPartitionIdFromLogPath(rlPath));
|
||||||
|
Assert.assertEquals(new Integer(0), FSUtils.getStageIdFromLogPath(rlPath));
|
||||||
|
Assert.assertEquals(new Integer(1), FSUtils.getTaskAttemptIdFromLogPath(rlPath));
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
public static String makeOldLogFileName(String fileId, String logFileExtension,
|
||||||
|
String baseCommitTime, int version) {
|
||||||
|
Pattern oldLogFilePattern =
|
||||||
|
Pattern.compile("\\.(.*)_(.*)\\.(.*)\\.([0-9]*)(\\.([0-9]*))");
|
||||||
|
return "." + String
|
||||||
|
.format("%s_%s%s.%d", fileId, baseCommitTime, logFileExtension, version);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -97,4 +97,14 @@ public class HoodieRealtimeFileSplit extends FileSplit {
|
|||||||
deltaFilePaths.add(readString(in));
|
deltaFilePaths.add(readString(in));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public String toString() {
|
||||||
|
return "HoodieRealtimeFileSplit{"
|
||||||
|
+ "DataPath=" + getPath()
|
||||||
|
+ ", deltaFilePaths=" + deltaFilePaths
|
||||||
|
+ ", maxCommitTime='" + maxCommitTime + '\''
|
||||||
|
+ ", basePath='" + basePath + '\''
|
||||||
|
+ '}';
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -36,6 +36,8 @@ import org.junit.rules.TemporaryFolder;
|
|||||||
|
|
||||||
public class InputFormatTestUtil {
|
public class InputFormatTestUtil {
|
||||||
|
|
||||||
|
private static String TEST_WRITE_TOKEN = "1-0-1";
|
||||||
|
|
||||||
public static File prepareDataset(TemporaryFolder basePath, int numberOfFiles,
|
public static File prepareDataset(TemporaryFolder basePath, int numberOfFiles,
|
||||||
String commitNumber) throws IOException {
|
String commitNumber) throws IOException {
|
||||||
basePath.create();
|
basePath.create();
|
||||||
@@ -43,7 +45,7 @@ public class InputFormatTestUtil {
|
|||||||
File partitionPath = basePath.newFolder("2016", "05", "01");
|
File partitionPath = basePath.newFolder("2016", "05", "01");
|
||||||
for (int i = 0; i < numberOfFiles; i++) {
|
for (int i = 0; i < numberOfFiles; i++) {
|
||||||
File dataFile = new File(partitionPath,
|
File dataFile = new File(partitionPath,
|
||||||
FSUtils.makeDataFileName(commitNumber, 1, "fileid" + i));
|
FSUtils.makeDataFileName(commitNumber, TEST_WRITE_TOKEN, "fileid" + i));
|
||||||
dataFile.createNewFile();
|
dataFile.createNewFile();
|
||||||
}
|
}
|
||||||
return partitionPath;
|
return partitionPath;
|
||||||
@@ -65,7 +67,7 @@ public class InputFormatTestUtil {
|
|||||||
.subList(0, Math.min(numberOfFilesUpdated, dataFiles.size()));
|
.subList(0, Math.min(numberOfFilesUpdated, dataFiles.size()));
|
||||||
for (File file : toUpdateList) {
|
for (File file : toUpdateList) {
|
||||||
String fileId = FSUtils.getFileId(file.getName());
|
String fileId = FSUtils.getFileId(file.getName());
|
||||||
File dataFile = new File(directory, FSUtils.makeDataFileName(newCommit, 1, fileId));
|
File dataFile = new File(directory, FSUtils.makeDataFileName(newCommit, TEST_WRITE_TOKEN, fileId));
|
||||||
dataFile.createNewFile();
|
dataFile.createNewFile();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -117,7 +119,7 @@ public class InputFormatTestUtil {
|
|||||||
throws IOException {
|
throws IOException {
|
||||||
AvroParquetWriter parquetWriter;
|
AvroParquetWriter parquetWriter;
|
||||||
for (int i = 0; i < numberOfFiles; i++) {
|
for (int i = 0; i < numberOfFiles; i++) {
|
||||||
String fileId = FSUtils.makeDataFileName(commitNumber, 1, "fileid" + i);
|
String fileId = FSUtils.makeDataFileName(commitNumber, TEST_WRITE_TOKEN, "fileid" + i);
|
||||||
File dataFile = new File(partitionPath, fileId);
|
File dataFile = new File(partitionPath, fileId);
|
||||||
parquetWriter = new AvroParquetWriter(new Path(dataFile.getAbsolutePath()), schema);
|
parquetWriter = new AvroParquetWriter(new Path(dataFile.getAbsolutePath()), schema);
|
||||||
try {
|
try {
|
||||||
@@ -149,7 +151,7 @@ public class InputFormatTestUtil {
|
|||||||
}
|
}
|
||||||
})[0];
|
})[0];
|
||||||
String fileId = FSUtils.getFileId(fileToUpdate.getName());
|
String fileId = FSUtils.getFileId(fileToUpdate.getName());
|
||||||
File dataFile = new File(directory, FSUtils.makeDataFileName(newCommit, 1, fileId));
|
File dataFile = new File(directory, FSUtils.makeDataFileName(newCommit, TEST_WRITE_TOKEN, fileId));
|
||||||
AvroParquetWriter parquetWriter = new AvroParquetWriter(new Path(dataFile.getAbsolutePath()),
|
AvroParquetWriter parquetWriter = new AvroParquetWriter(new Path(dataFile.getAbsolutePath()),
|
||||||
schema);
|
schema);
|
||||||
try {
|
try {
|
||||||
|
|||||||
@@ -141,7 +141,7 @@ public class HoodieRealtimeRecordReaderTest {
|
|||||||
//create a split with baseFile (parquet file written earlier) and new log file(s)
|
//create a split with baseFile (parquet file written earlier) and new log file(s)
|
||||||
String logFilePath = writer.getLogFile().getPath().toString();
|
String logFilePath = writer.getLogFile().getPath().toString();
|
||||||
HoodieRealtimeFileSplit split = new HoodieRealtimeFileSplit(
|
HoodieRealtimeFileSplit split = new HoodieRealtimeFileSplit(
|
||||||
new FileSplit(new Path(partitionDir + "/fileid0_1_" + commitTime + ".parquet"), 0, 1,
|
new FileSplit(new Path(partitionDir + "/fileid0_1-0-1_" + commitTime + ".parquet"), 0, 1,
|
||||||
jobConf), basePath.getRoot().getPath(), Arrays.asList(logFilePath), newCommitTime);
|
jobConf), basePath.getRoot().getPath(), Arrays.asList(logFilePath), newCommitTime);
|
||||||
|
|
||||||
//create a RecordReader to be used by HoodieRealtimeRecordReader
|
//create a RecordReader to be used by HoodieRealtimeRecordReader
|
||||||
@@ -203,7 +203,7 @@ public class HoodieRealtimeRecordReaderTest {
|
|||||||
//create a split with baseFile (parquet file written earlier) and new log file(s)
|
//create a split with baseFile (parquet file written earlier) and new log file(s)
|
||||||
String logFilePath = writer.getLogFile().getPath().toString();
|
String logFilePath = writer.getLogFile().getPath().toString();
|
||||||
HoodieRealtimeFileSplit split = new HoodieRealtimeFileSplit(
|
HoodieRealtimeFileSplit split = new HoodieRealtimeFileSplit(
|
||||||
new FileSplit(new Path(partitionDir + "/fileid0_1_" + commitTime + ".parquet"), 0, 1,
|
new FileSplit(new Path(partitionDir + "/fileid0_1-0-1_" + commitTime + ".parquet"), 0, 1,
|
||||||
jobConf), basePath.getRoot().getPath(), Arrays.asList(logFilePath), newCommitTime);
|
jobConf), basePath.getRoot().getPath(), Arrays.asList(logFilePath), newCommitTime);
|
||||||
|
|
||||||
//create a RecordReader to be used by HoodieRealtimeRecordReader
|
//create a RecordReader to be used by HoodieRealtimeRecordReader
|
||||||
@@ -286,7 +286,7 @@ public class HoodieRealtimeRecordReaderTest {
|
|||||||
//create a split with baseFile (parquet file written earlier) and new log file(s)
|
//create a split with baseFile (parquet file written earlier) and new log file(s)
|
||||||
String logFilePath = writer.getLogFile().getPath().toString();
|
String logFilePath = writer.getLogFile().getPath().toString();
|
||||||
HoodieRealtimeFileSplit split = new HoodieRealtimeFileSplit(
|
HoodieRealtimeFileSplit split = new HoodieRealtimeFileSplit(
|
||||||
new FileSplit(new Path(partitionDir + "/fileid0_1_" + commitTime + ".parquet"), 0, 1,
|
new FileSplit(new Path(partitionDir + "/fileid0_1-0-1_" + commitTime + ".parquet"), 0, 1,
|
||||||
jobConf), basePath.getRoot().getPath(), Arrays.asList(logFilePath), newCommitTime);
|
jobConf), basePath.getRoot().getPath(), Arrays.asList(logFilePath), newCommitTime);
|
||||||
|
|
||||||
//create a RecordReader to be used by HoodieRealtimeRecordReader
|
//create a RecordReader to be used by HoodieRealtimeRecordReader
|
||||||
|
|||||||
@@ -16,7 +16,6 @@
|
|||||||
|
|
||||||
package com.uber.hoodie.hive;
|
package com.uber.hoodie.hive;
|
||||||
|
|
||||||
import static com.uber.hoodie.common.model.HoodieTestUtils.DEFAULT_TASK_PARTITIONID;
|
|
||||||
import static org.junit.Assert.fail;
|
import static org.junit.Assert.fail;
|
||||||
|
|
||||||
import com.google.common.collect.Lists;
|
import com.google.common.collect.Lists;
|
||||||
@@ -260,7 +259,7 @@ public class TestUtil {
|
|||||||
// Create 5 files
|
// Create 5 files
|
||||||
String fileId = UUID.randomUUID().toString();
|
String fileId = UUID.randomUUID().toString();
|
||||||
Path filePath = new Path(partPath.toString() + "/" + FSUtils.makeDataFileName(commitTime,
|
Path filePath = new Path(partPath.toString() + "/" + FSUtils.makeDataFileName(commitTime,
|
||||||
DEFAULT_TASK_PARTITIONID, fileId));
|
"1-0-1", fileId));
|
||||||
generateParquetData(filePath, isParquetSchemaSimple);
|
generateParquetData(filePath, isParquetSchemaSimple);
|
||||||
HoodieWriteStat writeStat = new HoodieWriteStat();
|
HoodieWriteStat writeStat = new HoodieWriteStat();
|
||||||
writeStat.setFileId(fileId);
|
writeStat.setFileId(fileId);
|
||||||
|
|||||||
@@ -119,7 +119,7 @@
|
|||||||
<dependency>
|
<dependency>
|
||||||
<groupId>io.javalin</groupId>
|
<groupId>io.javalin</groupId>
|
||||||
<artifactId>javalin</artifactId>
|
<artifactId>javalin</artifactId>
|
||||||
<version>2.4.0</version>
|
<version>2.8.0</version>
|
||||||
</dependency>
|
</dependency>
|
||||||
<dependency>
|
<dependency>
|
||||||
<groupId>org.apache.httpcomponents</groupId>
|
<groupId>org.apache.httpcomponents</groupId>
|
||||||
|
|||||||
@@ -20,6 +20,6 @@ public class RowBasedSchemaProvider extends SchemaProvider {
|
|||||||
@Override
|
@Override
|
||||||
public Schema getSourceSchema() {
|
public Schema getSourceSchema() {
|
||||||
return AvroConversionUtils.convertStructTypeToAvroSchema(rowStruct, HOODIE_RECORD_STRUCT_NAME,
|
return AvroConversionUtils.convertStructTypeToAvroSchema(rowStruct, HOODIE_RECORD_STRUCT_NAME,
|
||||||
HOODIE_RECORD_NAMESPACE);
|
HOODIE_RECORD_NAMESPACE);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -37,6 +37,8 @@ import org.junit.rules.TemporaryFolder;
|
|||||||
|
|
||||||
public class TestHoodieSnapshotCopier {
|
public class TestHoodieSnapshotCopier {
|
||||||
|
|
||||||
|
private static String TEST_WRITE_TOKEN = "1-0-1";
|
||||||
|
|
||||||
private String rootPath = null;
|
private String rootPath = null;
|
||||||
private String basePath = null;
|
private String basePath = null;
|
||||||
private String outputPath = null;
|
private String outputPath = null;
|
||||||
@@ -102,35 +104,35 @@ public class TestHoodieSnapshotCopier {
|
|||||||
basePath);
|
basePath);
|
||||||
// Make commit1
|
// Make commit1
|
||||||
File file11 = new File(
|
File file11 = new File(
|
||||||
basePath + "/2016/05/01/" + FSUtils.makeDataFileName(commitTime1, 1, "id11"));
|
basePath + "/2016/05/01/" + FSUtils.makeDataFileName(commitTime1, TEST_WRITE_TOKEN, "id11"));
|
||||||
file11.createNewFile();
|
file11.createNewFile();
|
||||||
File file12 = new File(
|
File file12 = new File(
|
||||||
basePath + "/2016/05/02/" + FSUtils.makeDataFileName(commitTime1, 1, "id12"));
|
basePath + "/2016/05/02/" + FSUtils.makeDataFileName(commitTime1, TEST_WRITE_TOKEN, "id12"));
|
||||||
file12.createNewFile();
|
file12.createNewFile();
|
||||||
File file13 = new File(
|
File file13 = new File(
|
||||||
basePath + "/2016/05/06/" + FSUtils.makeDataFileName(commitTime1, 1, "id13"));
|
basePath + "/2016/05/06/" + FSUtils.makeDataFileName(commitTime1, TEST_WRITE_TOKEN, "id13"));
|
||||||
file13.createNewFile();
|
file13.createNewFile();
|
||||||
|
|
||||||
// Make commit2
|
// Make commit2
|
||||||
File file21 = new File(
|
File file21 = new File(
|
||||||
basePath + "/2016/05/01/" + FSUtils.makeDataFileName(commitTime2, 1, "id21"));
|
basePath + "/2016/05/01/" + FSUtils.makeDataFileName(commitTime2, TEST_WRITE_TOKEN, "id21"));
|
||||||
file21.createNewFile();
|
file21.createNewFile();
|
||||||
File file22 = new File(
|
File file22 = new File(
|
||||||
basePath + "/2016/05/02/" + FSUtils.makeDataFileName(commitTime2, 1, "id22"));
|
basePath + "/2016/05/02/" + FSUtils.makeDataFileName(commitTime2, TEST_WRITE_TOKEN, "id22"));
|
||||||
file22.createNewFile();
|
file22.createNewFile();
|
||||||
File file23 = new File(
|
File file23 = new File(
|
||||||
basePath + "/2016/05/06/" + FSUtils.makeDataFileName(commitTime2, 1, "id23"));
|
basePath + "/2016/05/06/" + FSUtils.makeDataFileName(commitTime2, TEST_WRITE_TOKEN, "id23"));
|
||||||
file23.createNewFile();
|
file23.createNewFile();
|
||||||
|
|
||||||
// Make commit3
|
// Make commit3
|
||||||
File file31 = new File(
|
File file31 = new File(
|
||||||
basePath + "/2016/05/01/" + FSUtils.makeDataFileName(commitTime3, 1, "id31"));
|
basePath + "/2016/05/01/" + FSUtils.makeDataFileName(commitTime3, TEST_WRITE_TOKEN, "id31"));
|
||||||
file31.createNewFile();
|
file31.createNewFile();
|
||||||
File file32 = new File(
|
File file32 = new File(
|
||||||
basePath + "/2016/05/02/" + FSUtils.makeDataFileName(commitTime3, 1, "id32"));
|
basePath + "/2016/05/02/" + FSUtils.makeDataFileName(commitTime3, TEST_WRITE_TOKEN, "id32"));
|
||||||
file32.createNewFile();
|
file32.createNewFile();
|
||||||
File file33 = new File(
|
File file33 = new File(
|
||||||
basePath + "/2016/05/06/" + FSUtils.makeDataFileName(commitTime3, 1, "id33"));
|
basePath + "/2016/05/06/" + FSUtils.makeDataFileName(commitTime3, TEST_WRITE_TOKEN, "id33"));
|
||||||
file33.createNewFile();
|
file33.createNewFile();
|
||||||
|
|
||||||
// Do a snapshot copy
|
// Do a snapshot copy
|
||||||
|
|||||||
Reference in New Issue
Block a user