[HUDI-164] Fixes incorrect averageBytesPerRecord
When number of records written is zero, averageBytesPerRecord results in a huge size (division by zero and ceiled to Long.MAX_VALUE) causing OOM. This commit fixes this issue by reverse traversing the commits until a more reasonable average record size can be computed and if that is not possible returns the default configured record size.
This commit is contained in:
committed by
vinoth chandar
parent
93bc5e2153
commit
64df98fc4a
@@ -187,6 +187,12 @@ public interface HoodieTimeline extends Serializable {
|
||||
*/
|
||||
Stream<HoodieInstant> getInstants();
|
||||
|
||||
/**
|
||||
* @return Get the stream of completed instants in reverse order
|
||||
* TODO Change code references to getInstants() that reverse the instants later on to use this method instead.
|
||||
*/
|
||||
Stream<HoodieInstant> getReverseOrderedInstants();
|
||||
|
||||
/**
|
||||
* @return true if the passed in instant is before the first completed instant in the timeline
|
||||
*/
|
||||
|
||||
@@ -18,6 +18,8 @@
|
||||
|
||||
package org.apache.hudi.common.table.timeline;
|
||||
|
||||
import static java.util.Collections.reverse;
|
||||
|
||||
import com.google.common.collect.Sets;
|
||||
import java.security.MessageDigest;
|
||||
import java.security.NoSuchAlgorithmException;
|
||||
@@ -195,6 +197,13 @@ public class HoodieDefaultTimeline implements HoodieTimeline {
|
||||
return instants.stream();
|
||||
}
|
||||
|
||||
@Override
|
||||
public Stream<HoodieInstant> getReverseOrderedInstants() {
|
||||
List<HoodieInstant> instants = getInstants().collect(Collectors.toList());
|
||||
reverse(instants);
|
||||
return instants.stream();
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean isBeforeTimelineStarts(String instant) {
|
||||
Option<HoodieInstant> firstCommit = firstInstant();
|
||||
|
||||
Reference in New Issue
Block a user