1
0

[HUDI-164] Fixes incorrect averageBytesPerRecord

When number of records written is zero, averageBytesPerRecord results in a huge size (division by zero and ceiled to Long.MAX_VALUE) causing OOM. This commit fixes this issue by reverse traversing the commits until a more reasonable average record size can be computed and if that is not possible returns the default configured record size.
This commit is contained in:
Bhavani Sudha Saktheeswaran
2019-08-30 16:29:23 -07:00
committed by vinoth chandar
parent 93bc5e2153
commit 64df98fc4a
4 changed files with 165 additions and 25 deletions

View File

@@ -187,6 +187,12 @@ public interface HoodieTimeline extends Serializable {
*/
Stream<HoodieInstant> getInstants();
/**
* @return Get the stream of completed instants in reverse order
* TODO Change code references to getInstants() that reverse the instants later on to use this method instead.
*/
Stream<HoodieInstant> getReverseOrderedInstants();
/**
* @return true if the passed in instant is before the first completed instant in the timeline
*/

View File

@@ -18,6 +18,8 @@
package org.apache.hudi.common.table.timeline;
import static java.util.Collections.reverse;
import com.google.common.collect.Sets;
import java.security.MessageDigest;
import java.security.NoSuchAlgorithmException;
@@ -195,6 +197,13 @@ public class HoodieDefaultTimeline implements HoodieTimeline {
return instants.stream();
}
@Override
public Stream<HoodieInstant> getReverseOrderedInstants() {
List<HoodieInstant> instants = getInstants().collect(Collectors.toList());
reverse(instants);
return instants.stream();
}
@Override
public boolean isBeforeTimelineStarts(String instant) {
Option<HoodieInstant> firstCommit = firstInstant();