1. Small file size handling for inserts into log files. In summary, the total size of the log file is compared with the parquet max file size and if there is scope to add inserts the add it.
This commit is contained in:
committed by
vinoth chandar
parent
324de298bc
commit
459e523d9e
@@ -66,6 +66,18 @@
|
||||
"name":"numInserts",
|
||||
"type":["null","long"],
|
||||
"default" : null
|
||||
},
|
||||
{
|
||||
"name":"totalLogBlocks",
|
||||
"type":["null","long"]
|
||||
},
|
||||
{
|
||||
"name":"totalCorruptLogBlock",
|
||||
"type":["null","long"]
|
||||
},
|
||||
{
|
||||
"name":"totalRollbackBlocks",
|
||||
"type":["null","long"]
|
||||
}
|
||||
]
|
||||
}
|
||||
|
||||
@@ -119,12 +119,12 @@ public class HoodieCommitMetadata implements Serializable {
|
||||
return getObjectMapper().writerWithDefaultPrettyPrinter().writeValueAsString(this);
|
||||
}
|
||||
|
||||
public static HoodieCommitMetadata fromJsonString(String jsonStr) throws IOException {
|
||||
public static <T> T fromJsonString(String jsonStr, Class<T> clazz) throws Exception {
|
||||
if (jsonStr == null || jsonStr.isEmpty()) {
|
||||
// For empty commit file (no data or somethings bad happen).
|
||||
return new HoodieCommitMetadata();
|
||||
return clazz.newInstance();
|
||||
}
|
||||
return getObjectMapper().readValue(jsonStr, HoodieCommitMetadata.class);
|
||||
return getObjectMapper().readValue(jsonStr, clazz);
|
||||
}
|
||||
|
||||
// Here the functions are named "fetch" instead of "get", to get avoid of the json conversion.
|
||||
@@ -319,11 +319,15 @@ public class HoodieCommitMetadata implements Serializable {
|
||||
return result;
|
||||
}
|
||||
|
||||
public static HoodieCommitMetadata fromBytes(byte[] bytes) throws IOException {
|
||||
return fromJsonString(new String(bytes, Charset.forName("utf-8")));
|
||||
public static <T> T fromBytes(byte[] bytes, Class<T> clazz) throws IOException {
|
||||
try {
|
||||
return fromJsonString(new String(bytes, Charset.forName("utf-8")), clazz);
|
||||
} catch (Exception e) {
|
||||
throw new IOException("unable to read commit metadata", e);
|
||||
}
|
||||
}
|
||||
|
||||
private static ObjectMapper getObjectMapper() {
|
||||
protected static ObjectMapper getObjectMapper() {
|
||||
ObjectMapper mapper = new ObjectMapper();
|
||||
mapper.disable(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES);
|
||||
mapper.setVisibility(PropertyAccessor.FIELD, JsonAutoDetect.Visibility.ANY);
|
||||
|
||||
@@ -0,0 +1,98 @@
|
||||
/*
|
||||
* Copyright (c) 2018 Uber Technologies, Inc. (hoodie-dev-group@uber.com)
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package com.uber.hoodie.common.model;
|
||||
|
||||
import com.fasterxml.jackson.annotation.JsonIgnoreProperties;
|
||||
import java.io.Serializable;
|
||||
import javax.annotation.Nullable;
|
||||
|
||||
@JsonIgnoreProperties(ignoreUnknown = true)
|
||||
public class HoodieRollingStat implements Serializable {
|
||||
|
||||
private String fileId;
|
||||
private long inserts;
|
||||
private long upserts;
|
||||
private long deletes;
|
||||
// TODO
|
||||
@Nullable
|
||||
private long totalInputWriteBytesToDisk;
|
||||
@Nullable
|
||||
private long totalInputWriteBytesOnDisk;
|
||||
|
||||
public HoodieRollingStat() {
|
||||
// called by jackson json lib
|
||||
}
|
||||
|
||||
public HoodieRollingStat(String fileId, long inserts, long upserts, long deletes, long totalInputWriteBytesOnDisk) {
|
||||
this.fileId = fileId;
|
||||
this.inserts = inserts;
|
||||
this.upserts = upserts;
|
||||
this.deletes = deletes;
|
||||
this.totalInputWriteBytesOnDisk = totalInputWriteBytesOnDisk;
|
||||
}
|
||||
|
||||
public String getFileId() {
|
||||
return fileId;
|
||||
}
|
||||
|
||||
public void setFileId(String fileId) {
|
||||
this.fileId = fileId;
|
||||
}
|
||||
|
||||
public long getInserts() {
|
||||
return inserts;
|
||||
}
|
||||
|
||||
public void setInserts(long inserts) {
|
||||
this.inserts = inserts;
|
||||
}
|
||||
|
||||
public long getUpserts() {
|
||||
return upserts;
|
||||
}
|
||||
|
||||
public void setUpserts(long upserts) {
|
||||
this.upserts = upserts;
|
||||
}
|
||||
|
||||
public long getDeletes() {
|
||||
return deletes;
|
||||
}
|
||||
|
||||
public void setDeletes(long deletes) {
|
||||
this.deletes = deletes;
|
||||
}
|
||||
|
||||
public long addInserts(long inserts) {
|
||||
this.inserts += inserts;
|
||||
return this.inserts;
|
||||
}
|
||||
|
||||
public long addUpserts(long upserts) {
|
||||
this.upserts += upserts;
|
||||
return this.upserts;
|
||||
}
|
||||
|
||||
public long addDeletes(long deletes) {
|
||||
this.deletes += deletes;
|
||||
return this.deletes;
|
||||
}
|
||||
|
||||
public long getTotalInputWriteBytesOnDisk() {
|
||||
return totalInputWriteBytesOnDisk;
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,101 @@
|
||||
/*
|
||||
* Copyright (c) 2018 Uber Technologies, Inc. (hoodie-dev-group@uber.com)
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package com.uber.hoodie.common.model;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.io.Serializable;
|
||||
import java.util.HashMap;
|
||||
import java.util.Map;
|
||||
import org.apache.log4j.LogManager;
|
||||
import org.apache.log4j.Logger;
|
||||
|
||||
/**
|
||||
* This class holds statistics about files belonging to a dataset
|
||||
*/
|
||||
public class HoodieRollingStatMetadata implements Serializable {
|
||||
|
||||
private static volatile Logger log = LogManager.getLogger(HoodieRollingStatMetadata.class);
|
||||
protected Map<String, Map<String, HoodieRollingStat>> partitionToRollingStats;
|
||||
private String actionType = "DUMMY_ACTION";
|
||||
public static final String ROLLING_STAT_METADATA_KEY = "ROLLING_STAT";
|
||||
|
||||
public void addRollingStat(String partitionPath, HoodieRollingStat stat) {
|
||||
if (!partitionToRollingStats.containsKey(partitionPath)) {
|
||||
partitionToRollingStats.put(partitionPath, new RollingStatsHashMap<>());
|
||||
}
|
||||
partitionToRollingStats.get(partitionPath).put(stat.getFileId(), stat);
|
||||
}
|
||||
|
||||
public HoodieRollingStatMetadata() {
|
||||
partitionToRollingStats = new HashMap<>();
|
||||
}
|
||||
|
||||
public HoodieRollingStatMetadata(String actionType) {
|
||||
this();
|
||||
this.actionType = actionType;
|
||||
}
|
||||
|
||||
class RollingStatsHashMap<K, V> extends HashMap<K, V> {
|
||||
|
||||
@Override
|
||||
public V put(K key, V value) {
|
||||
V v = this.get(key);
|
||||
if (v == null) {
|
||||
super.put(key, value);
|
||||
} else if (v instanceof HoodieRollingStat) {
|
||||
long inserts = ((HoodieRollingStat) v).getInserts();
|
||||
long upserts = ((HoodieRollingStat) v).getUpserts();
|
||||
long deletes = ((HoodieRollingStat) v).getDeletes();
|
||||
((HoodieRollingStat) value).addInserts(inserts);
|
||||
((HoodieRollingStat) value).addUpserts(upserts);
|
||||
((HoodieRollingStat) value).addDeletes(deletes);
|
||||
super.put(key, value);
|
||||
}
|
||||
return value;
|
||||
}
|
||||
}
|
||||
|
||||
public static HoodieRollingStatMetadata fromBytes(byte[] bytes) throws IOException {
|
||||
return HoodieCommitMetadata.fromBytes(bytes, HoodieRollingStatMetadata.class);
|
||||
}
|
||||
|
||||
public String toJsonString() throws IOException {
|
||||
if (partitionToRollingStats.containsKey(null)) {
|
||||
log.info("partition path is null for " + partitionToRollingStats.get(null));
|
||||
partitionToRollingStats.remove(null);
|
||||
}
|
||||
return HoodieCommitMetadata.getObjectMapper().writerWithDefaultPrettyPrinter().writeValueAsString(this);
|
||||
}
|
||||
|
||||
public HoodieRollingStatMetadata merge(HoodieRollingStatMetadata rollingStatMetadata) {
|
||||
for (Map.Entry<String, Map<String, HoodieRollingStat>> stat : rollingStatMetadata.partitionToRollingStats
|
||||
.entrySet()) {
|
||||
for (Map.Entry<String, HoodieRollingStat> innerStat : stat.getValue().entrySet()) {
|
||||
this.addRollingStat(stat.getKey(), innerStat.getValue());
|
||||
}
|
||||
}
|
||||
return this;
|
||||
}
|
||||
|
||||
public Map<String, Map<String, HoodieRollingStat>> getPartitionToRollingStats() {
|
||||
return partitionToRollingStats;
|
||||
}
|
||||
|
||||
public String getActionType() {
|
||||
return actionType;
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user