1
0

1. Use HoodieLogFormat to archive commits and other actions 2. Introduced avro schema for commits and compactions and an avro wrapper schema

This commit is contained in:
Nishith Agarwal
2017-06-20 23:50:23 -07:00
committed by vinoth chandar
parent 616c9a68c3
commit 19c22b231e
17 changed files with 546 additions and 3854 deletions

View File

@@ -0,0 +1,55 @@
{
"type":"record",
"name":"HoodieArchivedMetaEntry",
"namespace":"com.uber.hoodie.avro.model",
"fields":[
{
"name":"hoodieCommitMetadata",
"type":[
"null",
"HoodieCommitMetadata"
],
"default": "null"
},
{
"name":"hoodieCleanMetadata",
"type":[
"null",
"HoodieCleanMetadata"
],
"default": "null"
},
{
"name":"hoodieCompactionMetadata",
"type":[
"null",
"HoodieCompactionMetadata"
],
"default": "null"
},
{
"name":"hoodieRollbackMetadata",
"type":[
"null",
"HoodieRollbackMetadata"
],
"default": "null"
},
{
"name":"hoodieSavePointMetadata",
"type":[
"null",
"HoodieSavepointMetadata"
],
"default": "null"
},
{
"name":"commitTime",
"type":["null","string"]
},
{
"name":"actionType",
"type":["null","string"]
}
]
}

View File

@@ -0,0 +1,61 @@
{
"namespace":"com.uber.hoodie.avro.model",
"type":"record",
"name":"HoodieCommitMetadata",
"fields":[
{
"name":"partitionToWriteStats",
"type":["null", {
"type":"map",
"values":{
"type":"array",
"items":{
"name":"HoodieWriteStat",
"type":"record",
"fields":[
{
"name":"fileId",
"type":["null","string"]
},
{
"name":"path",
"type":["null","string"]
},
{
"name":"prevCommit",
"type":["null","string"]
},
{
"name":"numWrites",
"type":["null","long"]
},
{
"name":"numDeletes",
"type":["null","long"]
},
{
"name":"numUpdateWrites",
"type":["null","long"]
},
{
"name":"totalWriteBytes",
"type":["null","long"]
},
{
"name":"totalWriteErrors",
"type":["null","long"]
}
]
}
}
}]
},
{
"name":"extraMetadata",
"type":["null", {
"type":"map",
"values":"string"
}]
}
]
}

View File

@@ -0,0 +1,42 @@
{
"namespace":"com.uber.hoodie.avro.model",
"type":"record",
"name":"HoodieCompactionMetadata",
"fields":[
{
"name":"partitionToCompactionWriteStats",
"type": ["null", {
"type":"map",
"values":{
"type":"array",
"items":{
"name":"HoodieCompactionWriteStat",
"type":"record",
"fields":[
{
"name":"partitionPath",
"type":["null","string"]
},
{
"name":"totalLogRecords",
"type":["null","long"]
},
{
"name":"totalLogFiles",
"type":["null","long"]
},
{
"name":"totalRecordsToBeUpdate",
"type":["null","long"]
},
{
"name":"hoodieWriteStat",
"type":["null","HoodieWriteStat"]
}
]
}
}
}]
}
]
}

View File

@@ -0,0 +1,21 @@
/*
* Copyright (c) 2016 Uber Technologies, Inc. (hoodie-dev-group@uber.com)
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.uber.hoodie.common.model;
public enum ActionType {
commit, savepoint, compaction, clean, rollback;
}

View File

@@ -0,0 +1,40 @@
/*
* Copyright (c) 2017 Uber Technologies, Inc. (hoodie-dev-group@uber.com)
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
*
*/
package com.uber.hoodie.common.model;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.Path;
public class HoodieArchivedLogFile extends HoodieLogFile {
public static final String ARCHIVE_EXTENSION = ".archive";
public HoodieArchivedLogFile(FileStatus fileStatus) {
super(fileStatus);
}
public HoodieArchivedLogFile(Path logPath) {
super(logPath);
}
@Override
public String toString() {
return "HoodieArchivedLogFile {" + super.getPath() + '}';
}
}

View File

@@ -41,9 +41,9 @@ import java.util.Map;
@JsonIgnoreProperties(ignoreUnknown = true)
public class HoodieCommitMetadata implements Serializable {
private static volatile Logger log = LogManager.getLogger(HoodieCommitMetadata.class);
protected HashMap<String, List<HoodieWriteStat>> partitionToWriteStats;
protected Map<String, List<HoodieWriteStat>> partitionToWriteStats;
private HashMap<String, String> extraMetadataMap;
private Map<String, String> extraMetadataMap;
public HoodieCommitMetadata() {
extraMetadataMap = new HashMap<>();
@@ -65,7 +65,9 @@ public class HoodieCommitMetadata implements Serializable {
return partitionToWriteStats.get(partitionPath);
}
public HashMap<String, List<HoodieWriteStat>> getPartitionToWriteStats() {
public Map<String, String> getExtraMetadata() { return extraMetadataMap; }
public Map<String, List<HoodieWriteStat>> getPartitionToWriteStats() {
return partitionToWriteStats;
}

View File

@@ -83,11 +83,12 @@ public class HoodieLogFile {
public HoodieLogFile rollOver(FileSystem fs) throws IOException {
String fileId = getFileId();
String baseCommitTime = getBaseCommitTime();
String extension = "." + FSUtils.getFileExtensionFromLog(path);
int newVersion = FSUtils
.computeNextLogVersion(fs, path.getParent(), fileId,
DELTA_EXTENSION, baseCommitTime);
extension, baseCommitTime);
return new HoodieLogFile(new Path(path.getParent(),
FSUtils.makeLogFileName(fileId, DELTA_EXTENSION, baseCommitTime, newVersion)));
FSUtils.makeLogFileName(fileId, extension, baseCommitTime, newVersion)));
}
public static Comparator<HoodieLogFile> getLogVersionComparator() {

View File

@@ -129,6 +129,15 @@ public class HoodieActiveTimeline extends HoodieDefaultTimeline {
return getTimelineOfActions(Sets.newHashSet(COMMIT_ACTION, COMPACTION_ACTION, DELTA_COMMIT_ACTION));
}
/**
* Get all instants (commits, delta commits, compactions, clean, savepoint, rollback) that result in actions, in the active timeline
**
* @return
*/
public HoodieTimeline getAllCommitsTimeline() {
return getTimelineOfActions(Sets.newHashSet(COMMIT_ACTION, COMPACTION_ACTION, DELTA_COMMIT_ACTION, CLEAN_ACTION, SAVEPOINT_ACTION, ROLLBACK_ACTION));
}
/**
* Get only pure commits (inflight and completed) in the active timeline
*

View File

@@ -45,7 +45,7 @@ import java.util.stream.Collectors;
* This class can be serialized and de-serialized and on de-serialization the FileSystem is re-initialized.
*/
public class HoodieArchivedTimeline extends HoodieDefaultTimeline {
private static final String HOODIE_COMMIT_ARCHIVE_LOG_FILE = "commits.archived";
private static final String HOODIE_COMMIT_ARCHIVE_LOG_FILE = "commits";
private transient FileSystem fs;
private String metaPath;
private Map<String, byte[]> readCommits = new HashMap<>();