1
0

[HUDI-1681] Support object storage for Flink writer (#2662)

In order to support object storage, we need these changes:

* Use the Hadoop filesystem so that we can find the plugin filesystem
* Do not fetch file size until the file handle is closed
* Do not close the opened filesystem because we want to use the
  filesystem cache
This commit is contained in:
Danny Chan
2021-03-12 16:39:24 +08:00
committed by GitHub
parent e8e6708aea
commit 20786ab8a2
18 changed files with 443 additions and 164 deletions

View File

@@ -274,6 +274,7 @@ public class HoodieAppendHandle<T extends HoodieRecordPayload, I, K, O> extends
if (!stat.getLogFiles().contains(result.logFile().getFileName())) {
stat.addLogFiles(result.logFile().getFileName());
}
stat.setFileSizeInBytes(result.size());
}
private void updateRuntimeStats(HoodieDeltaWriteStat stat) {
@@ -304,6 +305,7 @@ public class HoodieAppendHandle<T extends HoodieRecordPayload, I, K, O> extends
} else if (stat.getPath().endsWith(result.logFile().getFileName())) {
// append/continued writing to the same log file
stat.setLogOffset(Math.min(stat.getLogOffset(), result.offset()));
stat.setFileSizeInBytes(stat.getFileSizeInBytes() + result.size());
accumulateWriteCounts(stat, result);
accumulateRuntimeStats(stat);
} else {

View File

@@ -33,4 +33,6 @@ public interface HoodieFileWriter<R extends IndexedRecord> {
void close() throws IOException;
void writeAvro(String key, R oldRecord) throws IOException;
long getBytesWritten();
}

View File

@@ -156,4 +156,9 @@ public class HoodieHFileWriter<T extends HoodieRecordPayload, R extends IndexedR
writer.close();
writer = null;
}
@Override
public long getBytesWritten() {
return fs.getBytesWritten(file);
}
}

View File

@@ -94,4 +94,9 @@ public class HoodieParquetWriter<T extends HoodieRecordPayload, R extends Indexe
super.write(object);
writeSupport.add(key);
}
@Override
public long getBytesWritten() {
return fs.getBytesWritten(file);
}
}