1. Use HoodieLogFormat to archive commits and other actions 2. Introduced avro schema for commits and compactions and an avro wrapper schema
This commit is contained in:
committed by
vinoth chandar
parent
616c9a68c3
commit
19c22b231e
55
hoodie-common/src/main/avro/HoodieArchivedMetaEntry.avsc
Normal file
55
hoodie-common/src/main/avro/HoodieArchivedMetaEntry.avsc
Normal file
@@ -0,0 +1,55 @@
|
||||
{
|
||||
"type":"record",
|
||||
"name":"HoodieArchivedMetaEntry",
|
||||
"namespace":"com.uber.hoodie.avro.model",
|
||||
"fields":[
|
||||
{
|
||||
"name":"hoodieCommitMetadata",
|
||||
"type":[
|
||||
"null",
|
||||
"HoodieCommitMetadata"
|
||||
],
|
||||
"default": "null"
|
||||
},
|
||||
{
|
||||
"name":"hoodieCleanMetadata",
|
||||
"type":[
|
||||
"null",
|
||||
"HoodieCleanMetadata"
|
||||
],
|
||||
"default": "null"
|
||||
},
|
||||
{
|
||||
"name":"hoodieCompactionMetadata",
|
||||
"type":[
|
||||
"null",
|
||||
"HoodieCompactionMetadata"
|
||||
],
|
||||
"default": "null"
|
||||
},
|
||||
{
|
||||
"name":"hoodieRollbackMetadata",
|
||||
"type":[
|
||||
"null",
|
||||
"HoodieRollbackMetadata"
|
||||
],
|
||||
"default": "null"
|
||||
},
|
||||
{
|
||||
"name":"hoodieSavePointMetadata",
|
||||
"type":[
|
||||
"null",
|
||||
"HoodieSavepointMetadata"
|
||||
],
|
||||
"default": "null"
|
||||
},
|
||||
{
|
||||
"name":"commitTime",
|
||||
"type":["null","string"]
|
||||
},
|
||||
{
|
||||
"name":"actionType",
|
||||
"type":["null","string"]
|
||||
}
|
||||
]
|
||||
}
|
||||
61
hoodie-common/src/main/avro/HoodieCommitMetadata.avsc
Normal file
61
hoodie-common/src/main/avro/HoodieCommitMetadata.avsc
Normal file
@@ -0,0 +1,61 @@
|
||||
{
|
||||
"namespace":"com.uber.hoodie.avro.model",
|
||||
"type":"record",
|
||||
"name":"HoodieCommitMetadata",
|
||||
"fields":[
|
||||
{
|
||||
"name":"partitionToWriteStats",
|
||||
"type":["null", {
|
||||
"type":"map",
|
||||
"values":{
|
||||
"type":"array",
|
||||
"items":{
|
||||
"name":"HoodieWriteStat",
|
||||
"type":"record",
|
||||
"fields":[
|
||||
{
|
||||
"name":"fileId",
|
||||
"type":["null","string"]
|
||||
},
|
||||
{
|
||||
"name":"path",
|
||||
"type":["null","string"]
|
||||
},
|
||||
{
|
||||
"name":"prevCommit",
|
||||
"type":["null","string"]
|
||||
},
|
||||
{
|
||||
"name":"numWrites",
|
||||
"type":["null","long"]
|
||||
},
|
||||
{
|
||||
"name":"numDeletes",
|
||||
"type":["null","long"]
|
||||
},
|
||||
{
|
||||
"name":"numUpdateWrites",
|
||||
"type":["null","long"]
|
||||
},
|
||||
{
|
||||
"name":"totalWriteBytes",
|
||||
"type":["null","long"]
|
||||
},
|
||||
{
|
||||
"name":"totalWriteErrors",
|
||||
"type":["null","long"]
|
||||
}
|
||||
]
|
||||
}
|
||||
}
|
||||
}]
|
||||
},
|
||||
{
|
||||
"name":"extraMetadata",
|
||||
"type":["null", {
|
||||
"type":"map",
|
||||
"values":"string"
|
||||
}]
|
||||
}
|
||||
]
|
||||
}
|
||||
42
hoodie-common/src/main/avro/HoodieCompactionMetadata.avsc
Normal file
42
hoodie-common/src/main/avro/HoodieCompactionMetadata.avsc
Normal file
@@ -0,0 +1,42 @@
|
||||
{
|
||||
"namespace":"com.uber.hoodie.avro.model",
|
||||
"type":"record",
|
||||
"name":"HoodieCompactionMetadata",
|
||||
"fields":[
|
||||
{
|
||||
"name":"partitionToCompactionWriteStats",
|
||||
"type": ["null", {
|
||||
"type":"map",
|
||||
"values":{
|
||||
"type":"array",
|
||||
"items":{
|
||||
"name":"HoodieCompactionWriteStat",
|
||||
"type":"record",
|
||||
"fields":[
|
||||
{
|
||||
"name":"partitionPath",
|
||||
"type":["null","string"]
|
||||
},
|
||||
{
|
||||
"name":"totalLogRecords",
|
||||
"type":["null","long"]
|
||||
},
|
||||
{
|
||||
"name":"totalLogFiles",
|
||||
"type":["null","long"]
|
||||
},
|
||||
{
|
||||
"name":"totalRecordsToBeUpdate",
|
||||
"type":["null","long"]
|
||||
},
|
||||
{
|
||||
"name":"hoodieWriteStat",
|
||||
"type":["null","HoodieWriteStat"]
|
||||
}
|
||||
]
|
||||
}
|
||||
}
|
||||
}]
|
||||
}
|
||||
]
|
||||
}
|
||||
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1,21 @@
|
||||
/*
|
||||
* Copyright (c) 2016 Uber Technologies, Inc. (hoodie-dev-group@uber.com)
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package com.uber.hoodie.common.model;
|
||||
|
||||
public enum ActionType {
|
||||
commit, savepoint, compaction, clean, rollback;
|
||||
}
|
||||
@@ -0,0 +1,40 @@
|
||||
/*
|
||||
* Copyright (c) 2017 Uber Technologies, Inc. (hoodie-dev-group@uber.com)
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*
|
||||
*
|
||||
*/
|
||||
|
||||
package com.uber.hoodie.common.model;
|
||||
|
||||
import org.apache.hadoop.fs.FileStatus;
|
||||
import org.apache.hadoop.fs.Path;
|
||||
|
||||
public class HoodieArchivedLogFile extends HoodieLogFile {
|
||||
|
||||
public static final String ARCHIVE_EXTENSION = ".archive";
|
||||
|
||||
public HoodieArchivedLogFile(FileStatus fileStatus) {
|
||||
super(fileStatus);
|
||||
}
|
||||
|
||||
public HoodieArchivedLogFile(Path logPath) {
|
||||
super(logPath);
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return "HoodieArchivedLogFile {" + super.getPath() + '}';
|
||||
}
|
||||
}
|
||||
@@ -41,9 +41,9 @@ import java.util.Map;
|
||||
@JsonIgnoreProperties(ignoreUnknown = true)
|
||||
public class HoodieCommitMetadata implements Serializable {
|
||||
private static volatile Logger log = LogManager.getLogger(HoodieCommitMetadata.class);
|
||||
protected HashMap<String, List<HoodieWriteStat>> partitionToWriteStats;
|
||||
protected Map<String, List<HoodieWriteStat>> partitionToWriteStats;
|
||||
|
||||
private HashMap<String, String> extraMetadataMap;
|
||||
private Map<String, String> extraMetadataMap;
|
||||
|
||||
public HoodieCommitMetadata() {
|
||||
extraMetadataMap = new HashMap<>();
|
||||
@@ -65,7 +65,9 @@ public class HoodieCommitMetadata implements Serializable {
|
||||
return partitionToWriteStats.get(partitionPath);
|
||||
}
|
||||
|
||||
public HashMap<String, List<HoodieWriteStat>> getPartitionToWriteStats() {
|
||||
public Map<String, String> getExtraMetadata() { return extraMetadataMap; }
|
||||
|
||||
public Map<String, List<HoodieWriteStat>> getPartitionToWriteStats() {
|
||||
return partitionToWriteStats;
|
||||
}
|
||||
|
||||
|
||||
@@ -83,11 +83,12 @@ public class HoodieLogFile {
|
||||
public HoodieLogFile rollOver(FileSystem fs) throws IOException {
|
||||
String fileId = getFileId();
|
||||
String baseCommitTime = getBaseCommitTime();
|
||||
String extension = "." + FSUtils.getFileExtensionFromLog(path);
|
||||
int newVersion = FSUtils
|
||||
.computeNextLogVersion(fs, path.getParent(), fileId,
|
||||
DELTA_EXTENSION, baseCommitTime);
|
||||
extension, baseCommitTime);
|
||||
return new HoodieLogFile(new Path(path.getParent(),
|
||||
FSUtils.makeLogFileName(fileId, DELTA_EXTENSION, baseCommitTime, newVersion)));
|
||||
FSUtils.makeLogFileName(fileId, extension, baseCommitTime, newVersion)));
|
||||
}
|
||||
|
||||
public static Comparator<HoodieLogFile> getLogVersionComparator() {
|
||||
|
||||
@@ -129,6 +129,15 @@ public class HoodieActiveTimeline extends HoodieDefaultTimeline {
|
||||
return getTimelineOfActions(Sets.newHashSet(COMMIT_ACTION, COMPACTION_ACTION, DELTA_COMMIT_ACTION));
|
||||
}
|
||||
|
||||
/**
|
||||
* Get all instants (commits, delta commits, compactions, clean, savepoint, rollback) that result in actions, in the active timeline
|
||||
**
|
||||
* @return
|
||||
*/
|
||||
public HoodieTimeline getAllCommitsTimeline() {
|
||||
return getTimelineOfActions(Sets.newHashSet(COMMIT_ACTION, COMPACTION_ACTION, DELTA_COMMIT_ACTION, CLEAN_ACTION, SAVEPOINT_ACTION, ROLLBACK_ACTION));
|
||||
}
|
||||
|
||||
/**
|
||||
* Get only pure commits (inflight and completed) in the active timeline
|
||||
*
|
||||
|
||||
@@ -45,7 +45,7 @@ import java.util.stream.Collectors;
|
||||
* This class can be serialized and de-serialized and on de-serialization the FileSystem is re-initialized.
|
||||
*/
|
||||
public class HoodieArchivedTimeline extends HoodieDefaultTimeline {
|
||||
private static final String HOODIE_COMMIT_ARCHIVE_LOG_FILE = "commits.archived";
|
||||
private static final String HOODIE_COMMIT_ARCHIVE_LOG_FILE = "commits";
|
||||
private transient FileSystem fs;
|
||||
private String metaPath;
|
||||
private Map<String, byte[]> readCommits = new HashMap<>();
|
||||
|
||||
@@ -22,17 +22,21 @@ import com.esotericsoftware.kryo.Kryo;
|
||||
import com.esotericsoftware.kryo.io.Input;
|
||||
import com.esotericsoftware.kryo.io.Output;
|
||||
import com.esotericsoftware.kryo.serializers.JavaSerializer;
|
||||
import com.uber.hoodie.avro.model.HoodieCleanMetadata;
|
||||
import com.uber.hoodie.common.HoodieCleanStat;
|
||||
import com.uber.hoodie.common.table.HoodieTableConfig;
|
||||
import com.uber.hoodie.common.table.HoodieTableMetaClient;
|
||||
import com.uber.hoodie.common.table.HoodieTimeline;
|
||||
import com.uber.hoodie.common.table.log.HoodieLogFormat;
|
||||
import com.uber.hoodie.common.table.log.HoodieLogFormat.Writer;
|
||||
import com.uber.hoodie.common.table.log.block.HoodieAvroDataBlock;
|
||||
import com.uber.hoodie.common.util.AvroUtils;
|
||||
import com.uber.hoodie.common.util.FSUtils;
|
||||
import com.uber.hoodie.common.util.HoodieAvroUtils;
|
||||
import org.apache.avro.Schema;
|
||||
import org.apache.avro.generic.GenericRecord;
|
||||
import org.apache.avro.generic.IndexedRecord;
|
||||
import org.apache.hadoop.fs.FSDataOutputStream;
|
||||
import org.apache.hadoop.fs.FileStatus;
|
||||
import org.apache.hadoop.fs.FileSystem;
|
||||
import org.apache.hadoop.fs.LocatedFileStatus;
|
||||
@@ -46,11 +50,15 @@ import java.io.File;
|
||||
import java.io.IOException;
|
||||
import java.io.Serializable;
|
||||
import java.text.SimpleDateFormat;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
import java.util.Date;
|
||||
import java.util.Iterator;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Optional;
|
||||
import java.util.Properties;
|
||||
import java.util.Random;
|
||||
import java.util.UUID;
|
||||
import java.util.stream.Collectors;
|
||||
import java.util.stream.Stream;
|
||||
@@ -63,6 +71,8 @@ public class HoodieTestUtils {
|
||||
public static final String TEST_EXTENSION = ".test";
|
||||
public static final String RAW_TRIPS_TEST_NAME = "raw_trips";
|
||||
public static final int DEFAULT_TASK_PARTITIONID = 1;
|
||||
public static final String[] DEFAULT_PARTITION_PATHS = {"2016/03/15", "2015/03/16", "2015/03/17"};
|
||||
private static Random rand = new Random(46474747);
|
||||
|
||||
public static void resetFS() {
|
||||
HoodieTestUtils.fs = FSUtils.getFs();
|
||||
@@ -139,6 +149,26 @@ public class HoodieTestUtils {
|
||||
return instant + TEST_EXTENSION + HoodieTimeline.INFLIGHT_EXTENSION;
|
||||
}
|
||||
|
||||
public static void createCleanFiles(String basePath, String commitTime) throws IOException {
|
||||
Path commitFile =
|
||||
new Path(basePath + "/" + HoodieTableMetaClient.METAFOLDER_NAME + "/" + HoodieTimeline.makeCleanerFileName(commitTime));
|
||||
FileSystem fs = FSUtils.getFs();
|
||||
FSDataOutputStream os = fs.create(commitFile, true);
|
||||
try {
|
||||
HoodieCleanStat cleanStats = new HoodieCleanStat(HoodieCleaningPolicy.KEEP_LATEST_FILE_VERSIONS,
|
||||
DEFAULT_PARTITION_PATHS[rand.nextInt(DEFAULT_PARTITION_PATHS.length)],
|
||||
new ArrayList<>(), new ArrayList<>(),
|
||||
new ArrayList<>(), commitTime);
|
||||
// Create the clean metadata
|
||||
HoodieCleanMetadata cleanMetadata =
|
||||
AvroUtils.convertCleanMetadata(commitTime, Optional.of(0L), Arrays.asList(cleanStats));
|
||||
// Write empty clean metadata
|
||||
os.write(AvroUtils.serializeCleanMetadata(cleanMetadata).get());
|
||||
} finally {
|
||||
os.close();
|
||||
}
|
||||
}
|
||||
|
||||
public static String makeTestFileName(String instant) {
|
||||
return instant + TEST_EXTENSION;
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user