[HUDI-855] Run Cleaner async with writing (#1577)
- Cleaner can now run concurrently with write operation - Configs to turn on/off Co-authored-by: Vinoth Chandar <vinoth@apache.org>
This commit is contained in:
committed by
GitHub
parent
31247e9b34
commit
8919be6a5d
@@ -26,7 +26,6 @@ import org.apache.hudi.cli.TableHeader;
|
||||
import org.apache.hudi.cli.testutils.AbstractShellIntegrationTest;
|
||||
import org.apache.hudi.cli.testutils.HoodieTestCommitMetadataGenerator;
|
||||
import org.apache.hudi.common.model.HoodieCleaningPolicy;
|
||||
import org.apache.hudi.common.model.HoodiePartitionMetadata;
|
||||
import org.apache.hudi.common.model.HoodieTableType;
|
||||
import org.apache.hudi.common.table.HoodieTableMetaClient;
|
||||
import org.apache.hudi.common.table.timeline.HoodieActiveTimeline;
|
||||
@@ -36,6 +35,8 @@ import org.apache.hudi.common.table.timeline.TimelineMetadataUtils;
|
||||
import org.apache.hudi.common.table.timeline.versioning.TimelineLayoutVersion;
|
||||
|
||||
import org.apache.hadoop.conf.Configuration;
|
||||
import org.apache.hudi.common.util.Option;
|
||||
import org.apache.hudi.testutils.HoodieTestDataGenerator;
|
||||
import org.junit.jupiter.api.BeforeEach;
|
||||
import org.junit.jupiter.api.Test;
|
||||
import org.springframework.shell.core.CommandResult;
|
||||
@@ -43,11 +44,10 @@ import org.springframework.shell.core.CommandResult;
|
||||
import java.io.File;
|
||||
import java.io.IOException;
|
||||
import java.net.URL;
|
||||
import java.nio.file.Files;
|
||||
import java.nio.file.Paths;
|
||||
import java.util.ArrayList;
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.UUID;
|
||||
|
||||
import static org.junit.jupiter.api.Assertions.assertEquals;
|
||||
import static org.junit.jupiter.api.Assertions.assertNotNull;
|
||||
@@ -77,6 +77,10 @@ public class TestCleansCommand extends AbstractShellIntegrationTest {
|
||||
Configuration conf = HoodieCLI.conf;
|
||||
|
||||
metaClient = HoodieCLI.getTableMetaClient();
|
||||
String fileId1 = UUID.randomUUID().toString();
|
||||
String fileId2 = UUID.randomUUID().toString();
|
||||
HoodieTestDataGenerator.writePartitionMetadata(fs, HoodieTestDataGenerator.DEFAULT_PARTITION_PATHS, tablePath);
|
||||
|
||||
// Create four commits
|
||||
for (int i = 100; i < 104; i++) {
|
||||
String timestamp = String.valueOf(i);
|
||||
@@ -86,7 +90,8 @@ public class TestCleansCommand extends AbstractShellIntegrationTest {
|
||||
// Inflight Compaction
|
||||
HoodieTestCommitMetadataGenerator.createCompactionAuxiliaryMetadata(tablePath,
|
||||
new HoodieInstant(HoodieInstant.State.INFLIGHT, HoodieTimeline.COMPACTION_ACTION, timestamp), conf);
|
||||
HoodieTestCommitMetadataGenerator.createCommitFileWithMetadata(tablePath, timestamp, conf);
|
||||
HoodieTestCommitMetadataGenerator.createCommitFileWithMetadata(tablePath, timestamp, conf, fileId1, fileId2,
|
||||
Option.empty(), Option.empty());
|
||||
}
|
||||
|
||||
metaClient = HoodieTableMetaClient.reload(metaClient);
|
||||
@@ -103,9 +108,6 @@ public class TestCleansCommand extends AbstractShellIntegrationTest {
|
||||
assertNotNull(propsFilePath, "Not found properties file");
|
||||
|
||||
// First, run clean
|
||||
Files.createFile(Paths.get(tablePath,
|
||||
HoodieTestCommitMetadataGenerator.DEFAULT_FIRST_PARTITION_PATH,
|
||||
HoodiePartitionMetadata.HOODIE_PARTITION_METAFILE));
|
||||
SparkMain.clean(jsc, HoodieCLI.basePath, propsFilePath.getPath(), new ArrayList<>());
|
||||
assertEquals(1, metaClient.getActiveTimeline().reload().getCleanerTimeline().getInstants().count(),
|
||||
"Loaded 1 clean and the count should match");
|
||||
@@ -125,7 +127,7 @@ public class TestCleansCommand extends AbstractShellIntegrationTest {
|
||||
|
||||
// EarliestCommandRetained should be 102, since hoodie.cleaner.commits.retained=2
|
||||
// Total Time Taken need read from metadata
|
||||
rows.add(new Comparable[] {clean.getTimestamp(), "102", "0", getLatestCleanTimeTakenInMillis().toString()});
|
||||
rows.add(new Comparable[] {clean.getTimestamp(), "102", "2", getLatestCleanTimeTakenInMillis().toString()});
|
||||
|
||||
String expected = HoodiePrintHelper.print(header, new HashMap<>(), "", false, -1, false, rows);
|
||||
expected = removeNonWordAndStripSpace(expected);
|
||||
@@ -142,12 +144,6 @@ public class TestCleansCommand extends AbstractShellIntegrationTest {
|
||||
assertNotNull(propsFilePath, "Not found properties file");
|
||||
|
||||
// First, run clean with two partition
|
||||
Files.createFile(Paths.get(tablePath,
|
||||
HoodieTestCommitMetadataGenerator.DEFAULT_FIRST_PARTITION_PATH,
|
||||
HoodiePartitionMetadata.HOODIE_PARTITION_METAFILE));
|
||||
Files.createFile(Paths.get(tablePath,
|
||||
HoodieTestCommitMetadataGenerator.DEFAULT_SECOND_PARTITION_PATH,
|
||||
HoodiePartitionMetadata.HOODIE_PARTITION_METAFILE));
|
||||
SparkMain.clean(jsc, HoodieCLI.basePath, propsFilePath.toString(), new ArrayList<>());
|
||||
assertEquals(1, metaClient.getActiveTimeline().reload().getCleanerTimeline().getInstants().count(),
|
||||
"Loaded 1 clean and the count should match");
|
||||
@@ -165,9 +161,11 @@ public class TestCleansCommand extends AbstractShellIntegrationTest {
|
||||
// There should be two partition path
|
||||
List<Comparable[]> rows = new ArrayList<>();
|
||||
rows.add(new Comparable[] {HoodieTestCommitMetadataGenerator.DEFAULT_SECOND_PARTITION_PATH,
|
||||
HoodieCleaningPolicy.KEEP_LATEST_COMMITS, "1", "0"});
|
||||
rows.add(new Comparable[] {HoodieTestCommitMetadataGenerator.DEFAULT_THIRD_PARTITION_PATH,
|
||||
HoodieCleaningPolicy.KEEP_LATEST_COMMITS, "0", "0"});
|
||||
rows.add(new Comparable[] {HoodieTestCommitMetadataGenerator.DEFAULT_FIRST_PARTITION_PATH,
|
||||
HoodieCleaningPolicy.KEEP_LATEST_COMMITS, "0", "0"});
|
||||
HoodieCleaningPolicy.KEEP_LATEST_COMMITS, "1", "0"});
|
||||
|
||||
String expected = HoodiePrintHelper.print(header, new HashMap<>(), "", false, -1, false, rows);
|
||||
expected = removeNonWordAndStripSpace(expected);
|
||||
|
||||
@@ -1,106 +0,0 @@
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.hudi.cli.integ;
|
||||
|
||||
import org.apache.hudi.cli.HoodieCLI;
|
||||
import org.apache.hudi.cli.commands.TableCommand;
|
||||
import org.apache.hudi.cli.testutils.AbstractShellIntegrationTest;
|
||||
import org.apache.hudi.cli.testutils.HoodieTestCommitMetadataGenerator;
|
||||
import org.apache.hudi.common.model.HoodiePartitionMetadata;
|
||||
import org.apache.hudi.common.model.HoodieTableType;
|
||||
import org.apache.hudi.common.table.timeline.HoodieInstant;
|
||||
import org.apache.hudi.common.table.timeline.HoodieTimeline;
|
||||
import org.apache.hudi.common.table.timeline.versioning.TimelineLayoutVersion;
|
||||
|
||||
import org.apache.hadoop.conf.Configuration;
|
||||
import org.junit.jupiter.api.BeforeEach;
|
||||
import org.junit.jupiter.api.Test;
|
||||
import org.springframework.shell.core.CommandResult;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.IOException;
|
||||
import java.net.URL;
|
||||
import java.nio.file.Files;
|
||||
import java.nio.file.Paths;
|
||||
|
||||
import static org.junit.jupiter.api.Assertions.assertEquals;
|
||||
import static org.junit.jupiter.api.Assertions.assertNotNull;
|
||||
import static org.junit.jupiter.api.Assertions.assertTrue;
|
||||
|
||||
public class ITTestCleansCommand extends AbstractShellIntegrationTest {
|
||||
|
||||
private String tablePath;
|
||||
private URL propsFilePath;
|
||||
|
||||
@BeforeEach
|
||||
public void init() throws IOException {
|
||||
HoodieCLI.conf = jsc.hadoopConfiguration();
|
||||
|
||||
String tableName = "test_table";
|
||||
tablePath = basePath + File.separator + tableName;
|
||||
propsFilePath = this.getClass().getClassLoader().getResource("clean.properties");
|
||||
|
||||
// Create table and connect
|
||||
new TableCommand().createTable(
|
||||
tablePath, tableName, HoodieTableType.COPY_ON_WRITE.name(),
|
||||
"", TimelineLayoutVersion.VERSION_1, "org.apache.hudi.common.model.HoodieAvroPayload");
|
||||
|
||||
Configuration conf = HoodieCLI.conf;
|
||||
|
||||
metaClient = HoodieCLI.getTableMetaClient();
|
||||
// Create four commits
|
||||
for (int i = 100; i < 104; i++) {
|
||||
String timestamp = String.valueOf(i);
|
||||
// Requested Compaction
|
||||
HoodieTestCommitMetadataGenerator.createCompactionAuxiliaryMetadata(tablePath,
|
||||
new HoodieInstant(HoodieInstant.State.REQUESTED, HoodieTimeline.COMPACTION_ACTION, timestamp), conf);
|
||||
// Inflight Compaction
|
||||
HoodieTestCommitMetadataGenerator.createCompactionAuxiliaryMetadata(tablePath,
|
||||
new HoodieInstant(HoodieInstant.State.INFLIGHT, HoodieTimeline.COMPACTION_ACTION, timestamp), conf);
|
||||
HoodieTestCommitMetadataGenerator.createCommitFileWithMetadata(tablePath, timestamp, conf);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Test case for cleans run.
|
||||
*/
|
||||
@Test
|
||||
public void testRunClean() throws IOException {
|
||||
// First, there should none of clean instant.
|
||||
assertEquals(0, metaClient.getActiveTimeline().reload().getCleanerTimeline().getInstants().count());
|
||||
|
||||
// Check properties file exists.
|
||||
assertNotNull(propsFilePath, "Not found properties file");
|
||||
|
||||
// Create partition metadata
|
||||
Files.createFile(Paths.get(tablePath,
|
||||
HoodieTestCommitMetadataGenerator.DEFAULT_FIRST_PARTITION_PATH,
|
||||
HoodiePartitionMetadata.HOODIE_PARTITION_METAFILE));
|
||||
Files.createFile(Paths.get(tablePath,
|
||||
HoodieTestCommitMetadataGenerator.DEFAULT_SECOND_PARTITION_PATH,
|
||||
HoodiePartitionMetadata.HOODIE_PARTITION_METAFILE));
|
||||
|
||||
CommandResult cr = getShell().executeCommand("cleans run --sparkMaster local --propsFilePath " + propsFilePath.toString());
|
||||
assertTrue(cr.isSuccess());
|
||||
|
||||
// After run clean, there should have 1 clean instant
|
||||
assertEquals(1, metaClient.getActiveTimeline().reload().getCleanerTimeline().getInstants().count(),
|
||||
"Loaded 1 clean and the count should match");
|
||||
}
|
||||
}
|
||||
@@ -18,6 +18,7 @@
|
||||
|
||||
package org.apache.hudi.cli.testutils;
|
||||
|
||||
import java.util.UUID;
|
||||
import org.apache.hudi.common.fs.FSUtils;
|
||||
import org.apache.hudi.common.model.HoodieCommitMetadata;
|
||||
import org.apache.hudi.common.model.HoodieWriteStat;
|
||||
@@ -67,6 +68,12 @@ public class HoodieTestCommitMetadataGenerator extends HoodieTestDataGenerator {
|
||||
|
||||
public static void createCommitFileWithMetadata(String basePath, String commitTime, Configuration configuration,
|
||||
Option<Integer> writes, Option<Integer> updates) {
|
||||
createCommitFileWithMetadata(basePath, commitTime, configuration, UUID.randomUUID().toString(),
|
||||
UUID.randomUUID().toString(), writes, updates);
|
||||
}
|
||||
|
||||
public static void createCommitFileWithMetadata(String basePath, String commitTime, Configuration configuration,
|
||||
String fileId1, String fileId2, Option<Integer> writes, Option<Integer> updates) {
|
||||
Arrays.asList(HoodieTimeline.makeCommitFileName(commitTime), HoodieTimeline.makeInflightCommitFileName(commitTime),
|
||||
HoodieTimeline.makeRequestedCommitFileName(commitTime))
|
||||
.forEach(f -> {
|
||||
@@ -77,7 +84,8 @@ public class HoodieTestCommitMetadataGenerator extends HoodieTestDataGenerator {
|
||||
FileSystem fs = FSUtils.getFs(basePath, configuration);
|
||||
os = fs.create(commitFile, true);
|
||||
// Generate commitMetadata
|
||||
HoodieCommitMetadata commitMetadata = generateCommitMetadata(basePath, commitTime, writes, updates);
|
||||
HoodieCommitMetadata commitMetadata =
|
||||
generateCommitMetadata(basePath, commitTime, fileId1, fileId2, writes, updates);
|
||||
// Write empty commit metadata
|
||||
os.writeBytes(new String(commitMetadata.toJsonString().getBytes(StandardCharsets.UTF_8)));
|
||||
} catch (IOException ioe) {
|
||||
@@ -103,8 +111,14 @@ public class HoodieTestCommitMetadataGenerator extends HoodieTestDataGenerator {
|
||||
|
||||
public static HoodieCommitMetadata generateCommitMetadata(String basePath, String commitTime,
|
||||
Option<Integer> writes, Option<Integer> updates) throws IOException {
|
||||
String file1P0C0 = HoodieTestUtils.createNewDataFile(basePath, DEFAULT_FIRST_PARTITION_PATH, commitTime);
|
||||
String file1P1C0 = HoodieTestUtils.createNewDataFile(basePath, DEFAULT_SECOND_PARTITION_PATH, commitTime);
|
||||
return generateCommitMetadata(basePath, commitTime, UUID.randomUUID().toString(), UUID.randomUUID().toString(),
|
||||
writes, updates);
|
||||
}
|
||||
|
||||
public static HoodieCommitMetadata generateCommitMetadata(String basePath, String commitTime, String fileId1,
|
||||
String fileId2, Option<Integer> writes, Option<Integer> updates) throws IOException {
|
||||
String file1P0C0 = HoodieTestUtils.createDataFile(basePath, DEFAULT_FIRST_PARTITION_PATH, commitTime, fileId1);
|
||||
String file1P1C0 = HoodieTestUtils.createDataFile(basePath, DEFAULT_SECOND_PARTITION_PATH, commitTime, fileId2);
|
||||
return generateCommitMetadata(new HashMap<String, List<String>>() {
|
||||
{
|
||||
put(DEFAULT_FIRST_PARTITION_PATH, CollectionUtils.createImmutableList(file1P0C0));
|
||||
|
||||
Reference in New Issue
Block a user