1
0

Removing compaction action type and associated compaction timeline operations, replace with commit action type

This commit is contained in:
Nishith Agarwal
2017-12-05 00:58:53 -08:00
committed by vinoth chandar
parent a1c0d0dbad
commit 44839b88c6
34 changed files with 265 additions and 450 deletions

View File

@@ -1,108 +0,0 @@
/*
* Copyright (c) 2016 Uber Technologies, Inc. (hoodie-dev-group@uber.com)
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.uber.hoodie.common.model;
import com.fasterxml.jackson.annotation.JsonIgnoreProperties;
import java.io.Serializable;
@JsonIgnoreProperties(ignoreUnknown = true)
public class CompactionWriteStat implements Serializable {
private HoodieWriteStat writeStat;
private String partitionPath;
private long totalLogRecords;
private long totalLogFiles;
private long totalRecordsToBeUpdate;
public CompactionWriteStat(HoodieWriteStat writeStat, String partitionPath, long totalLogFiles,
long totalLogRecords,
long totalRecordsToUpdate) {
this.writeStat = writeStat;
this.partitionPath = partitionPath;
this.totalLogFiles = totalLogFiles;
this.totalLogRecords = totalLogRecords;
this.totalRecordsToBeUpdate = totalRecordsToUpdate;
}
public CompactionWriteStat() {
// For de-serialization
}
public long getTotalLogRecords() {
return totalLogRecords;
}
public long getTotalLogFiles() {
return totalLogFiles;
}
public long getTotalRecordsToBeUpdate() {
return totalRecordsToBeUpdate;
}
public HoodieWriteStat getHoodieWriteStat() {
return writeStat;
}
public String getPartitionPath() {
return partitionPath;
}
public static Builder newBuilder() {
return new Builder();
}
public static class Builder {
private HoodieWriteStat writeStat;
private long totalLogRecords;
private long totalRecordsToUpdate;
private long totalLogFiles;
private String partitionPath;
public Builder withHoodieWriteStat(HoodieWriteStat writeStat) {
this.writeStat = writeStat;
return this;
}
public Builder setTotalLogRecords(long records) {
this.totalLogRecords = records;
return this;
}
public Builder setTotalLogFiles(long totalLogFiles) {
this.totalLogFiles = totalLogFiles;
return this;
}
public Builder setTotalRecordsToUpdate(long records) {
this.totalRecordsToUpdate = records;
return this;
}
public Builder onPartition(String path) {
this.partitionPath = path;
return this;
}
public CompactionWriteStat build() {
return new CompactionWriteStat(writeStat, partitionPath, totalLogFiles, totalLogRecords,
totalRecordsToUpdate);
}
}
}

View File

@@ -40,12 +40,19 @@ public class HoodieCommitMetadata implements Serializable {
private static volatile Logger log = LogManager.getLogger(HoodieCommitMetadata.class);
protected Map<String, List<HoodieWriteStat>> partitionToWriteStats;
protected Boolean compacted;
private Map<String, String> extraMetadataMap;
// for ser/deser
public HoodieCommitMetadata() {
this(false);
}
public HoodieCommitMetadata(boolean compacted) {
extraMetadataMap = new HashMap<>();
partitionToWriteStats = new HashMap<>();
this.compacted = compacted;
}
public void addWriteStat(String partitionPath, HoodieWriteStat stat) {
@@ -75,6 +82,14 @@ public class HoodieCommitMetadata implements Serializable {
return extraMetadataMap.get(metaKey);
}
public Boolean getCompacted() {
return compacted;
}
public void setCompacted(Boolean compacted) {
this.compacted = compacted;
}
public HashMap<String, String> getFileIdAndRelativePaths() {
HashMap<String, String> filePaths = new HashMap<>();
// list all partitions paths
@@ -200,24 +215,21 @@ public class HoodieCommitMetadata implements Serializable {
@Override
public boolean equals(Object o) {
if (this == o) {
return true;
}
if (o == null || getClass() != o.getClass()) {
return false;
}
if (this == o) return true;
if (o == null || getClass() != o.getClass()) return false;
HoodieCommitMetadata that = (HoodieCommitMetadata) o;
return partitionToWriteStats != null ?
partitionToWriteStats.equals(that.partitionToWriteStats) :
that.partitionToWriteStats == null;
if (!partitionToWriteStats.equals(that.partitionToWriteStats)) return false;
return compacted.equals(that.compacted);
}
@Override
public int hashCode() {
return partitionToWriteStats != null ? partitionToWriteStats.hashCode() : 0;
int result = partitionToWriteStats.hashCode();
result = 31 * result + compacted.hashCode();
return result;
}
public static HoodieCommitMetadata fromBytes(byte[] bytes) throws IOException {

View File

@@ -1,86 +0,0 @@
/*
* Copyright (c) 2016 Uber Technologies, Inc. (hoodie-dev-group@uber.com)
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.uber.hoodie.common.model;
import java.io.IOException;
import java.nio.charset.Charset;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import org.apache.log4j.LogManager;
import org.apache.log4j.Logger;
import org.codehaus.jackson.annotate.JsonAutoDetect;
import org.codehaus.jackson.annotate.JsonMethod;
import org.codehaus.jackson.map.DeserializationConfig.Feature;
import org.codehaus.jackson.map.ObjectMapper;
/**
* Place holder for the compaction specific meta-data, uses all the details used in a normal
* HoodieCommitMetadata
*/
public class HoodieCompactionMetadata extends HoodieCommitMetadata {
private static volatile Logger log = LogManager.getLogger(HoodieCompactionMetadata.class);
protected HashMap<String, List<CompactionWriteStat>> partitionToCompactionWriteStats;
public HoodieCompactionMetadata() {
partitionToCompactionWriteStats = new HashMap<>();
}
public void addWriteStat(String partitionPath, CompactionWriteStat stat) {
addWriteStat(partitionPath, stat.getHoodieWriteStat());
if (!partitionToCompactionWriteStats.containsKey(partitionPath)) {
partitionToCompactionWriteStats.put(partitionPath, new ArrayList<>());
}
partitionToCompactionWriteStats.get(partitionPath).add(stat);
}
public List<CompactionWriteStat> getCompactionWriteStats(String partitionPath) {
return partitionToCompactionWriteStats.get(partitionPath);
}
public Map<String, List<CompactionWriteStat>> getPartitionToCompactionWriteStats() {
return partitionToCompactionWriteStats;
}
public String toJsonString() throws IOException {
if (partitionToCompactionWriteStats.containsKey(null)) {
log.info("partition path is null for " + partitionToCompactionWriteStats.get(null));
partitionToCompactionWriteStats.remove(null);
}
ObjectMapper mapper = new ObjectMapper();
mapper.setVisibility(JsonMethod.FIELD, JsonAutoDetect.Visibility.ANY);
return mapper.defaultPrettyPrintingWriter().writeValueAsString(this);
}
public static HoodieCompactionMetadata fromJsonString(String jsonStr) throws IOException {
if (jsonStr == null || jsonStr.isEmpty()) {
// For empty commit file (no data or somethings bad happen).
return new HoodieCompactionMetadata();
}
ObjectMapper mapper = new ObjectMapper();
mapper.configure(Feature.FAIL_ON_UNKNOWN_PROPERTIES, false);
mapper.setVisibility(JsonMethod.FIELD, JsonAutoDetect.Visibility.ANY);
return mapper.readValue(jsonStr, HoodieCompactionMetadata.class);
}
public static HoodieCompactionMetadata fromBytes(byte[] bytes) throws IOException {
return fromJsonString(new String(bytes, Charset.forName("utf-8")));
}
}

View File

@@ -17,6 +17,8 @@
package com.uber.hoodie.common.model;
import com.fasterxml.jackson.annotation.JsonIgnoreProperties;
import javax.annotation.Nullable;
import java.io.Serializable;
/**
@@ -68,6 +70,34 @@ public class HoodieWriteStat implements Serializable {
*/
private long totalWriteErrors;
/**
* Following properties are associated only with the result of a Compaction Operation
*/
/**
* Partition Path associated with this writeStat
*/
@Nullable
private String partitionPath;
/**
* Total number of log records that were compacted by a compaction operation
*/
@Nullable
private Long totalLogRecords;
/**
* Total number of log files that were compacted by a compaction operation
*/
@Nullable
private Long totalLogFiles;
/**
* Total number of records updated by a compaction operation
*/
@Nullable
private Long totalRecordsToBeUpdate;
public HoodieWriteStat() {
// called by jackson json lib
}
@@ -136,6 +166,37 @@ public class HoodieWriteStat implements Serializable {
return path;
}
public String getPartitionPath() {
return partitionPath;
}
public void setPartitionPath(String partitionPath) {
this.partitionPath = partitionPath;
}
public Long getTotalLogRecords() {
return totalLogRecords;
}
public void setTotalLogRecords(Long totalLogRecords) {
this.totalLogRecords = totalLogRecords;
}
public Long getTotalLogFiles() {
return totalLogFiles;
}
public void setTotalLogFiles(Long totalLogFiles) {
this.totalLogFiles = totalLogFiles;
}
public Long getTotalRecordsToBeUpdate() {
return totalRecordsToBeUpdate;
}
public void setTotalRecordsToBeUpdate(Long totalRecordsToBeUpdate) {
this.totalRecordsToBeUpdate = totalRecordsToBeUpdate;
}
@Override
public String toString() {

View File

@@ -41,7 +41,6 @@ public interface HoodieTimeline extends Serializable {
String CLEAN_ACTION = "clean";
String ROLLBACK_ACTION = "rollback";
String SAVEPOINT_ACTION = "savepoint";
String COMPACTION_ACTION = "compaction";
String INFLIGHT_EXTENSION = ".inflight";
String COMMIT_EXTENSION = "." + COMMIT_ACTION;
@@ -49,14 +48,12 @@ public interface HoodieTimeline extends Serializable {
String CLEAN_EXTENSION = "." + CLEAN_ACTION;
String ROLLBACK_EXTENSION = "." + ROLLBACK_ACTION;
String SAVEPOINT_EXTENSION = "." + SAVEPOINT_ACTION;
String COMPACTION_EXTENSION = "." + COMPACTION_ACTION;
//this is to preserve backwards compatibility on commit in-flight filenames
String INFLIGHT_COMMIT_EXTENSION = INFLIGHT_EXTENSION;
String INFLIGHT_DELTA_COMMIT_EXTENSION = "." + DELTA_COMMIT_ACTION + INFLIGHT_EXTENSION;
String INFLIGHT_CLEAN_EXTENSION = "." + CLEAN_ACTION + INFLIGHT_EXTENSION;
String INFLIGHT_ROLLBACK_EXTENSION = "." + ROLLBACK_ACTION + INFLIGHT_EXTENSION;
String INFLIGHT_SAVEPOINT_EXTENSION = "." + SAVEPOINT_ACTION + INFLIGHT_EXTENSION;
String INFLIGHT_COMPACTION_EXTENSION = "." + COMPACTION_ACTION + INFLIGHT_EXTENSION;
/**
* Filter this timeline to just include the in-flights
@@ -197,14 +194,6 @@ public interface HoodieTimeline extends Serializable {
return commitTime + HoodieTimeline.SAVEPOINT_EXTENSION;
}
static String makeInflightCompactionFileName(String commitTime) {
return commitTime + HoodieTimeline.INFLIGHT_COMPACTION_EXTENSION;
}
static String makeCompactionFileName(String commitTime) {
return commitTime + HoodieTimeline.COMPACTION_EXTENSION;
}
static String makeInflightDeltaFileName(String commitTime) {
return commitTime + HoodieTimeline.INFLIGHT_DELTA_COMMIT_EXTENSION;
}

View File

@@ -94,8 +94,7 @@ public class HoodieActiveTimeline extends HoodieDefaultTimeline {
public HoodieActiveTimeline(FileSystem fs, String metaPath) {
this(fs, metaPath,
new String[]{COMMIT_EXTENSION, INFLIGHT_COMMIT_EXTENSION, DELTA_COMMIT_EXTENSION,
INFLIGHT_DELTA_COMMIT_EXTENSION, COMPACTION_EXTENSION,
INFLIGHT_COMPACTION_EXTENSION, SAVEPOINT_EXTENSION, INFLIGHT_SAVEPOINT_EXTENSION,
INFLIGHT_DELTA_COMMIT_EXTENSION, SAVEPOINT_EXTENSION, INFLIGHT_SAVEPOINT_EXTENSION,
CLEAN_EXTENSION, INFLIGHT_CLEAN_EXTENSION});
}
@@ -119,21 +118,21 @@ public class HoodieActiveTimeline extends HoodieDefaultTimeline {
}
/**
* Get all instants (commits, delta commits, compactions) that produce new data, in the active
* Get all instants (commits, delta commits) that produce new data, in the active
* timeline *
*/
public HoodieTimeline getCommitsAndCompactionsTimeline() {
public HoodieTimeline getCommitsTimeline() {
return getTimelineOfActions(
Sets.newHashSet(COMMIT_ACTION, COMPACTION_ACTION, DELTA_COMMIT_ACTION));
Sets.newHashSet(COMMIT_ACTION, DELTA_COMMIT_ACTION));
}
/**
* Get all instants (commits, delta commits, compactions, clean, savepoint, rollback) that result
* Get all instants (commits, delta commits, clean, savepoint, rollback) that result
* in actions, in the active timeline *
*/
public HoodieTimeline getAllCommitsTimeline() {
return getTimelineOfActions(
Sets.newHashSet(COMMIT_ACTION, COMPACTION_ACTION, DELTA_COMMIT_ACTION, CLEAN_ACTION,
Sets.newHashSet(COMMIT_ACTION, DELTA_COMMIT_ACTION, CLEAN_ACTION,
SAVEPOINT_ACTION, ROLLBACK_ACTION));
}
@@ -152,14 +151,6 @@ public class HoodieActiveTimeline extends HoodieDefaultTimeline {
(Function<HoodieInstant, Optional<byte[]>> & Serializable) this::getInstantDetails);
}
/**
* Get only the commits (inflight and completed) in the compaction timeline
*/
public HoodieTimeline getCompactionTimeline() {
return new HoodieDefaultTimeline(filterInstantsByAction(COMPACTION_ACTION),
(Function<HoodieInstant, Optional<byte[]>> & Serializable) this::getInstantDetails);
}
/**
* Get a timeline of a specific set of actions. useful to create a merged timeline of multiple
* actions

View File

@@ -94,10 +94,6 @@ public class HoodieInstant implements Serializable {
return isInflight ?
HoodieTimeline.makeInflightSavePointFileName(timestamp) :
HoodieTimeline.makeSavePointFileName(timestamp);
} else if (HoodieTimeline.COMPACTION_ACTION.equals(action)) {
return isInflight ?
HoodieTimeline.makeInflightCompactionFileName(timestamp) :
HoodieTimeline.makeCompactionFileName(timestamp);
} else if (HoodieTimeline.DELTA_COMMIT_ACTION.equals(action)) {
return isInflight ?
HoodieTimeline.makeInflightDeltaFileName(timestamp) :