(1) Define CompactionWorkload in avro to allow storing them in instant files.
(2) Split APIs in HoodieRealtimeCompactor to separate generating compaction workload from running compaction
This commit is contained in:
committed by
vinoth chandar
parent
6d01ae8ca0
commit
1b61f04e05
@@ -0,0 +1,113 @@
|
||||
/*
|
||||
* Copyright (c) 2016 Uber Technologies, Inc. (hoodie-dev-group@uber.com)
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package com.uber.hoodie.common.model;
|
||||
|
||||
import com.uber.hoodie.avro.model.HoodieCompactionOperation;
|
||||
import com.uber.hoodie.common.util.FSUtils;
|
||||
import java.io.Serializable;
|
||||
import java.util.ArrayList;
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Optional;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
/**
|
||||
* Encapsulates all the needed information about a compaction and make a decision whether this
|
||||
* compaction is effective or not
|
||||
*
|
||||
*/
|
||||
public class CompactionOperation implements Serializable {
|
||||
|
||||
private String baseInstantTime;
|
||||
private Optional<String> dataFileCommitTime;
|
||||
private List<String> deltaFilePaths;
|
||||
private Optional<String> dataFilePath;
|
||||
private String fileId;
|
||||
private String partitionPath;
|
||||
private Map<String, Double> metrics;
|
||||
|
||||
//Only for serialization/de-serialization
|
||||
@Deprecated
|
||||
public CompactionOperation() {
|
||||
}
|
||||
|
||||
public CompactionOperation(Optional<HoodieDataFile> dataFile, String partitionPath,
|
||||
List<HoodieLogFile> logFiles, Map<String, Double> metrics) {
|
||||
if (dataFile.isPresent()) {
|
||||
this.baseInstantTime = dataFile.get().getCommitTime();
|
||||
this.dataFilePath = Optional.of(dataFile.get().getPath());
|
||||
this.fileId = dataFile.get().getFileId();
|
||||
this.dataFileCommitTime = Optional.of(dataFile.get().getCommitTime());
|
||||
} else {
|
||||
assert logFiles.size() > 0;
|
||||
this.dataFilePath = Optional.empty();
|
||||
this.baseInstantTime = FSUtils.getBaseCommitTimeFromLogPath(logFiles.get(0).getPath());
|
||||
this.fileId = FSUtils.getFileIdFromLogPath(logFiles.get(0).getPath());
|
||||
this.dataFileCommitTime = Optional.empty();
|
||||
}
|
||||
|
||||
this.partitionPath = partitionPath;
|
||||
this.deltaFilePaths = logFiles.stream().map(s -> s.getPath().toString())
|
||||
.collect(Collectors.toList());
|
||||
this.metrics = metrics;
|
||||
}
|
||||
|
||||
public String getBaseInstantTime() {
|
||||
return baseInstantTime;
|
||||
}
|
||||
|
||||
public Optional<String> getDataFileCommitTime() {
|
||||
return dataFileCommitTime;
|
||||
}
|
||||
|
||||
public List<String> getDeltaFilePaths() {
|
||||
return deltaFilePaths;
|
||||
}
|
||||
|
||||
public Optional<String> getDataFilePath() {
|
||||
return dataFilePath;
|
||||
}
|
||||
|
||||
public String getFileId() {
|
||||
return fileId;
|
||||
}
|
||||
|
||||
public String getPartitionPath() {
|
||||
return partitionPath;
|
||||
}
|
||||
|
||||
public Map<String, Double> getMetrics() {
|
||||
return metrics;
|
||||
}
|
||||
|
||||
/**
|
||||
* Convert Avro generated Compaction operation to POJO for Spark RDD operation
|
||||
* @param operation Hoodie Compaction Operation
|
||||
* @return
|
||||
*/
|
||||
public static CompactionOperation convertFromAvroRecordInstance(HoodieCompactionOperation operation) {
|
||||
CompactionOperation op = new CompactionOperation();
|
||||
op.baseInstantTime = operation.getBaseInstantTime();
|
||||
op.dataFilePath = Optional.ofNullable(operation.getDataFilePath());
|
||||
op.deltaFilePaths = new ArrayList<>(operation.getDeltaFilePaths());
|
||||
op.fileId = operation.getFileId();
|
||||
op.metrics = new HashMap<>(operation.getMetrics());
|
||||
op.partitionPath = operation.getPartitionPath();
|
||||
return op;
|
||||
}
|
||||
}
|
||||
@@ -21,6 +21,7 @@ import com.google.common.collect.ImmutableMap;
|
||||
import com.google.common.collect.Lists;
|
||||
import com.uber.hoodie.avro.model.HoodieCleanMetadata;
|
||||
import com.uber.hoodie.avro.model.HoodieCleanPartitionMetadata;
|
||||
import com.uber.hoodie.avro.model.HoodieCompactionPlan;
|
||||
import com.uber.hoodie.avro.model.HoodieRollbackMetadata;
|
||||
import com.uber.hoodie.avro.model.HoodieRollbackPartitionMetadata;
|
||||
import com.uber.hoodie.avro.model.HoodieSavepointMetadata;
|
||||
@@ -142,6 +143,10 @@ public class AvroUtils {
|
||||
partitionMetadataBuilder.build());
|
||||
}
|
||||
|
||||
public static Optional<byte[]> serializeCompactionWorkload(HoodieCompactionPlan compactionWorkload)
|
||||
throws IOException {
|
||||
return serializeAvroMetadata(compactionWorkload, HoodieCompactionPlan.class);
|
||||
}
|
||||
|
||||
public static Optional<byte[]> serializeCleanMetadata(HoodieCleanMetadata metadata)
|
||||
throws IOException {
|
||||
@@ -169,6 +174,11 @@ public class AvroUtils {
|
||||
return Optional.of(baos.toByteArray());
|
||||
}
|
||||
|
||||
public static HoodieCompactionPlan deserializeHoodieCompactionPlan(byte[] bytes)
|
||||
throws IOException {
|
||||
return deserializeAvroMetadata(bytes, HoodieCompactionPlan.class);
|
||||
}
|
||||
|
||||
public static HoodieCleanMetadata deserializeHoodieCleanMetadata(byte[] bytes)
|
||||
throws IOException {
|
||||
return deserializeAvroMetadata(bytes, HoodieCleanMetadata.class);
|
||||
@@ -188,5 +198,4 @@ public class AvroUtils {
|
||||
.checkArgument(fileReader.hasNext(), "Could not deserialize metadata of type " + clazz);
|
||||
return fileReader.next();
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@@ -0,0 +1,100 @@
|
||||
/*
|
||||
* Copyright (c) 2018 Uber Technologies, Inc. (hoodie-dev-group@uber.com)
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package com.uber.hoodie.common.util;
|
||||
|
||||
import com.uber.hoodie.avro.model.HoodieCompactionOperation;
|
||||
import com.uber.hoodie.avro.model.HoodieCompactionPlan;
|
||||
import com.uber.hoodie.common.model.CompactionOperation;
|
||||
import com.uber.hoodie.common.model.FileSlice;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Optional;
|
||||
import java.util.function.Function;
|
||||
import java.util.stream.Collectors;
|
||||
import javafx.util.Pair;
|
||||
|
||||
/**
|
||||
* Helper class to generate compaction workload from FileGroup/FileSlice abstraction
|
||||
*/
|
||||
public class CompactionUtils {
|
||||
|
||||
/**
|
||||
* Generate compaction operation from file-slice
|
||||
*
|
||||
* @param partitionPath Partition path
|
||||
* @param fileSlice File Slice
|
||||
* @param metricsCaptureFunction Metrics Capture function
|
||||
* @return Compaction Operation
|
||||
*/
|
||||
public static HoodieCompactionOperation buildFromFileSlice(String partitionPath, FileSlice fileSlice,
|
||||
Optional<Function<Pair<String, FileSlice>, Map<String, Long>>> metricsCaptureFunction) {
|
||||
HoodieCompactionOperation.Builder builder = HoodieCompactionOperation.newBuilder();
|
||||
builder.setPartitionPath(partitionPath);
|
||||
builder.setFileId(fileSlice.getFileId());
|
||||
builder.setBaseInstantTime(fileSlice.getBaseInstantTime());
|
||||
builder.setDeltaFilePaths(fileSlice.getLogFiles().map(lf -> lf.getPath().toString()).collect(Collectors.toList()));
|
||||
if (fileSlice.getDataFile().isPresent()) {
|
||||
builder.setDataFilePath(fileSlice.getDataFile().get().getPath());
|
||||
}
|
||||
|
||||
if (metricsCaptureFunction.isPresent()) {
|
||||
builder.setMetrics(metricsCaptureFunction.get().apply(new Pair(partitionPath, fileSlice)));
|
||||
}
|
||||
return builder.build();
|
||||
}
|
||||
|
||||
/**
|
||||
* Generate compaction workload from file-slices
|
||||
*
|
||||
* @param compactorId Compactor Id to set
|
||||
* @param partitionFileSlicePairs list of partition file-slice pairs
|
||||
* @param extraMetadata Extra Metadata
|
||||
* @param metricsCaptureFunction Metrics Capture function
|
||||
*/
|
||||
public static HoodieCompactionPlan buildFromFileSlices(String compactorId,
|
||||
List<Pair<String, FileSlice>> partitionFileSlicePairs,
|
||||
Optional<Map<String, String>> extraMetadata,
|
||||
Optional<Function<Pair<String, FileSlice>, Map<String, Long>>> metricsCaptureFunction) {
|
||||
HoodieCompactionPlan.Builder builder = HoodieCompactionPlan.newBuilder();
|
||||
builder.setCompactorId(compactorId);
|
||||
extraMetadata.ifPresent(m -> builder.setExtraMetadata(m));
|
||||
builder.setOperations(partitionFileSlicePairs.stream().map(pfPair ->
|
||||
buildFromFileSlice(pfPair.getKey(), pfPair.getValue(), metricsCaptureFunction)).collect(Collectors.toList()));
|
||||
return builder.build();
|
||||
}
|
||||
|
||||
/**
|
||||
* Build Avro generated Compaction operation payload from compaction operation POJO for serialization
|
||||
*/
|
||||
public static HoodieCompactionOperation buildHoodieCompactionOperation(CompactionOperation op) {
|
||||
return HoodieCompactionOperation.newBuilder().setFileId(op.getFileId())
|
||||
.setBaseInstantTime(op.getBaseInstantTime())
|
||||
.setPartitionPath(op.getPartitionPath())
|
||||
.setDataFilePath(op.getDataFilePath().isPresent() ? op.getDataFilePath().get() : null)
|
||||
.setDeltaFilePaths(op.getDeltaFilePaths())
|
||||
.setMetrics(op.getMetrics()).build();
|
||||
}
|
||||
|
||||
/**
|
||||
* Build Compaction operation payload from Avro version for using in Spark executors
|
||||
*
|
||||
* @param hc HoodieCompactionOperation
|
||||
*/
|
||||
public static CompactionOperation buildCompactionOperation(HoodieCompactionOperation hc) {
|
||||
return CompactionOperation.convertFromAvroRecordInstance(hc);
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user