From 189d5297b8826f5345fcb132dc7318fc4253c94e Mon Sep 17 00:00:00 2001 From: Alexey Kudinkin Date: Sat, 26 Mar 2022 14:51:36 -0700 Subject: [PATCH] [HUDI-3709] Fixing `ParquetWriter` impls not respecting Parquet Max File Size limit (#5129) --- .../java/org/apache/hudi/io/storage/HoodieFileWriter.java | 2 -- .../java/org/apache/hudi/io/storage/HoodieHFileWriter.java | 5 ----- .../java/org/apache/hudi/io/storage/HoodieOrcWriter.java | 5 ----- .../org/apache/hudi/io/storage/HoodieParquetWriter.java | 7 +------ .../action/commit/TestJavaCopyOnWriteActionExecutor.java | 2 +- .../io/storage/row/HoodieInternalRowParquetWriter.java | 2 +- .../table/action/commit/TestCopyOnWriteActionExecutor.java | 2 +- 7 files changed, 4 insertions(+), 21 deletions(-) diff --git a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/io/storage/HoodieFileWriter.java b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/io/storage/HoodieFileWriter.java index a5792349c..9f749566b 100644 --- a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/io/storage/HoodieFileWriter.java +++ b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/io/storage/HoodieFileWriter.java @@ -37,8 +37,6 @@ public interface HoodieFileWriter { void writeAvro(String key, R oldRecord) throws IOException; - long getBytesWritten(); - default void prepRecordWithMetadata(R avroRecord, HoodieRecord record, String instantTime, Integer partitionId, AtomicLong recordIndex, String fileName) { String seqId = HoodieRecord.generateSequenceId(instantTime, partitionId, recordIndex.getAndIncrement()); HoodieAvroUtils.addHoodieKeyToRecord((GenericRecord) avroRecord, record.getRecordKey(), record.getPartitionPath(), fileName); diff --git a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/io/storage/HoodieHFileWriter.java b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/io/storage/HoodieHFileWriter.java index 5dcd2e0a3..be79f5033 100644 --- a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/io/storage/HoodieHFileWriter.java +++ b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/io/storage/HoodieHFileWriter.java @@ -187,9 +187,4 @@ public class HoodieHFileWriter @Override public boolean canWrite() { - return fs.getBytesWritten(file) < maxFileSize; + return getDataSize() < maxFileSize; } @Override diff --git a/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/table/action/commit/TestCopyOnWriteActionExecutor.java b/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/table/action/commit/TestCopyOnWriteActionExecutor.java index 0b29cf25f..4ae845636 100644 --- a/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/table/action/commit/TestCopyOnWriteActionExecutor.java +++ b/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/table/action/commit/TestCopyOnWriteActionExecutor.java @@ -437,7 +437,7 @@ public class TestCopyOnWriteActionExecutor extends HoodieClientTestBase { counts++; } } - assertEquals(3, counts, "If the number of records are more than 1150, then there should be a new file"); + assertEquals(5, counts, "If the number of records are more than 1150, then there should be a new file"); } @Test