1
0

[HUDI-2537] Fix metadata table for flink (#3774)

This commit is contained in:
Danny Chan
2021-10-10 09:30:39 +08:00
committed by GitHub
parent 2a392d8e8e
commit ad63938890
12 changed files with 231 additions and 152 deletions

View File

@@ -126,11 +126,6 @@ public class StreamWriteOperatorCoordinator
*/
private HiveSyncContext hiveSyncContext;
/**
* A single-thread executor to handle metadata table sync.
*/
private NonThrownExecutor metadataSyncExecutor;
/**
* The table state.
*/
@@ -294,7 +289,7 @@ public class StreamWriteOperatorCoordinator
}
private void initMetadataSync() {
this.metadataSyncExecutor = new NonThrownExecutor(LOG, true);
this.writeClient.initMetadataWriter();
}
private void reset() {
@@ -498,14 +493,6 @@ public class StreamWriteOperatorCoordinator
this.executor = executor;
}
@VisibleForTesting
public void setMetadataSyncExecutor(NonThrownExecutor executor) throws Exception {
if (this.metadataSyncExecutor != null) {
this.metadataSyncExecutor.close();
}
this.metadataSyncExecutor = executor;
}
// -------------------------------------------------------------------------
// Inner Class
// -------------------------------------------------------------------------

View File

@@ -72,8 +72,6 @@ public class AppendWriteFunction<I> extends AbstractStreamWriteFunction<I> {
// it would check the validity.
// wait for the buffer data flush out and request a new instant
flushData(false);
// nullify the write helper for next ckp
this.writerHelper = null;
}
@Override
@@ -133,5 +131,10 @@ public class AppendWriteFunction<I> extends AbstractStreamWriteFunction<I> {
.endInput(endInput)
.build();
this.eventGateway.sendEventToCoordinator(event);
// nullify the write helper for next ckp
this.writerHelper = null;
this.writeStatuses.addAll(writeStatus);
// blocks flushing until the coordinator starts a new instant
this.confirming = true;
}
}

View File

@@ -22,6 +22,7 @@ import org.apache.hudi.client.common.HoodieFlinkEngineContext;
import org.apache.hudi.common.config.HoodieMetadataConfig;
import org.apache.hudi.common.fs.FSUtils;
import org.apache.hudi.configuration.FlinkOptions;
import org.apache.hudi.util.StreamerUtil;
import org.apache.flink.annotation.VisibleForTesting;
import org.apache.flink.configuration.Configuration;
@@ -48,10 +49,12 @@ public class FileIndex {
private final Path path;
private final HoodieMetadataConfig metadataConfig;
private List<String> partitionPaths; // cache of partition paths
private final boolean tableExists;
private FileIndex(Path path, Configuration conf) {
this.path = path;
this.metadataConfig = metadataConfig(conf);
this.tableExists = StreamerUtil.tableExists(path.toString(), StreamerUtil.getHadoopConf());
}
public static FileIndex instance(Path path, Configuration conf) {
@@ -111,6 +114,9 @@ public class FileIndex {
* Returns all the file statuses under the table base path.
*/
public FileStatus[] getFilesInPartitions() {
if (!tableExists) {
return new FileStatus[0];
}
String[] partitions = getOrBuildPartitionPaths().stream().map(p -> fullPartitionPath(path, p)).toArray(String[]::new);
return FSUtils.getFilesInPartitions(HoodieFlinkEngineContext.DEFAULT, metadataConfig, path.toString(),
partitions, "/tmp/")
@@ -165,8 +171,9 @@ public class FileIndex {
if (this.partitionPaths != null) {
return this.partitionPaths;
}
this.partitionPaths = FSUtils.getAllPartitionPaths(HoodieFlinkEngineContext.DEFAULT,
metadataConfig, path.toString());
this.partitionPaths = this.tableExists
? FSUtils.getAllPartitionPaths(HoodieFlinkEngineContext.DEFAULT, metadataConfig, path.toString())
: Collections.emptyList();
return this.partitionPaths;
}
@@ -174,7 +181,7 @@ public class FileIndex {
Properties properties = new Properties();
// set up metadata.enabled=true in table DDL to enable metadata listing
properties.put(HoodieMetadataConfig.ENABLE, conf.getBoolean(FlinkOptions.METADATA_ENABLED));
properties.put(HoodieMetadataConfig.ENABLE.key(), conf.getBoolean(FlinkOptions.METADATA_ENABLED));
return HoodieMetadataConfig.newBuilder().fromProperties(properties).build();
}

View File

@@ -41,7 +41,6 @@ import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.junit.jupiter.api.AfterEach;
import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.Disabled;
import org.junit.jupiter.api.Test;
import org.junit.jupiter.api.io.TempDir;
@@ -181,7 +180,7 @@ public class TestStreamWriteOperatorCoordinator {
assertDoesNotThrow(() -> coordinator.notifyCheckpointComplete(1));
}
@Disabled
@Test
void testSyncMetadataTable() throws Exception {
// reset
reset();
@@ -193,7 +192,6 @@ public class TestStreamWriteOperatorCoordinator {
coordinator = new StreamWriteOperatorCoordinator(conf, context);
coordinator.start();
coordinator.setExecutor(new MockCoordinatorExecutor(context));
coordinator.setMetadataSyncExecutor(new MockCoordinatorExecutor(context));
final WriteMetadataEvent event0 = WriteMetadataEvent.emptyBootstrap(0);
@@ -209,7 +207,7 @@ public class TestStreamWriteOperatorCoordinator {
assertThat(completedTimeline.lastInstant().get().getTimestamp(), is("0000000000000"));
// test metadata table compaction
// write another 4 commits
// write another 3 commits
for (int i = 1; i < 4; i++) {
instant = mockWriteWithMetadata();
metadataTableMetaClient.reloadActiveTimeline();
@@ -247,7 +245,13 @@ public class TestStreamWriteOperatorCoordinator {
double failureFraction) {
final WriteStatus writeStatus = new WriteStatus(trackSuccessRecords, failureFraction);
writeStatus.setPartitionPath(partitionPath);
writeStatus.setStat(new HoodieWriteStat());
HoodieWriteStat writeStat = new HoodieWriteStat();
writeStat.setPartitionPath(partitionPath);
writeStat.setFileId("fileId123");
writeStat.setPath("path123");
writeStatus.setStat(writeStat);
return WriteMetadataEvent.builder()
.taskID(taskId)

View File

@@ -142,9 +142,6 @@ public class StreamWriteFunctionWrapper<I> {
public void openFunction() throws Exception {
this.coordinator.start();
this.coordinator.setExecutor(new MockCoordinatorExecutor(coordinatorContext));
if (conf.getBoolean(FlinkOptions.METADATA_ENABLED)) {
this.coordinator.setMetadataSyncExecutor(new MockCoordinatorExecutor(coordinatorContext));
}
toHoodieFunction = new RowDataToHoodieFunction<>(TestConfigurations.ROW_TYPE, conf);
toHoodieFunction.setRuntimeContext(runtimeContext);
toHoodieFunction.open(conf);

View File

@@ -27,7 +27,6 @@ import org.apache.hudi.utils.TestData;
import org.apache.flink.configuration.Configuration;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.Path;
import org.junit.jupiter.api.Disabled;
import org.junit.jupiter.api.Test;
import org.junit.jupiter.api.io.TempDir;
import org.junit.jupiter.params.ParameterizedTest;
@@ -90,7 +89,8 @@ public class TestFileIndex {
assertTrue(fileStatuses[0].getPath().toString().endsWith(HoodieFileFormat.PARQUET.getFileExtension()));
}
@Disabled
@ParameterizedTest
@ValueSource(booleans = {true, false})
void testFileListingEmptyTable(boolean enableMetadata) {
Configuration conf = TestConfigurations.getDefaultConf(tempFile.getAbsolutePath());
conf.setBoolean(FlinkOptions.METADATA_ENABLED, enableMetadata);