1
0

[HUDI-810] Migrate ClientTestHarness to JUnit 5 (#1553)

This commit is contained in:
Raymond Xu
2020-04-28 08:38:16 -07:00
committed by GitHub
parent 6de9f5d9e5
commit 06dae30297
36 changed files with 1232 additions and 1243 deletions

View File

@@ -20,10 +20,10 @@ package org.apache.hudi.cli;
import org.apache.hudi.common.HoodieClientTestHarness; import org.apache.hudi.common.HoodieClientTestHarness;
import org.junit.After; import org.junit.jupiter.api.AfterAll;
import org.junit.AfterClass; import org.junit.jupiter.api.AfterEach;
import org.junit.Before; import org.junit.jupiter.api.BeforeAll;
import org.junit.BeforeClass; import org.junit.jupiter.api.BeforeEach;
import org.springframework.shell.Bootstrap; import org.springframework.shell.Bootstrap;
import org.springframework.shell.core.JLineShellComponent; import org.springframework.shell.core.JLineShellComponent;
@@ -31,25 +31,26 @@ import org.springframework.shell.core.JLineShellComponent;
* Class to start Bootstrap and JLineShellComponent. * Class to start Bootstrap and JLineShellComponent.
*/ */
public abstract class AbstractShellIntegrationTest extends HoodieClientTestHarness { public abstract class AbstractShellIntegrationTest extends HoodieClientTestHarness {
private static JLineShellComponent shell; private static JLineShellComponent shell;
@BeforeClass @BeforeAll
public static void startup() { public static void startup() {
Bootstrap bootstrap = new Bootstrap(); Bootstrap bootstrap = new Bootstrap();
shell = bootstrap.getJLineShellComponent(); shell = bootstrap.getJLineShellComponent();
} }
@AfterClass @AfterAll
public static void shutdown() { public static void shutdown() {
shell.stop(); shell.stop();
} }
@Before @BeforeEach
public void setup() throws Exception { public void setup() throws Exception {
initResources(); initResources();
} }
@After @AfterEach
public void teardown() throws Exception { public void teardown() throws Exception {
cleanupResources(); cleanupResources();
} }

View File

@@ -32,9 +32,9 @@ import org.apache.hudi.config.HoodieCompactionConfig;
import org.apache.hudi.config.HoodieWriteConfig; import org.apache.hudi.config.HoodieWriteConfig;
import org.apache.hudi.table.HoodieTimelineArchiveLog; import org.apache.hudi.table.HoodieTimelineArchiveLog;
import org.junit.After; import org.junit.jupiter.api.AfterEach;
import org.junit.Before; import org.junit.jupiter.api.BeforeEach;
import org.junit.Test; import org.junit.jupiter.api.Test;
import org.springframework.shell.core.CommandResult; import org.springframework.shell.core.CommandResult;
import java.io.File; import java.io.File;
@@ -43,8 +43,8 @@ import java.util.ArrayList;
import java.util.HashMap; import java.util.HashMap;
import java.util.List; import java.util.List;
import static org.junit.Assert.assertEquals; import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.Assert.assertTrue; import static org.junit.jupiter.api.Assertions.assertTrue;
/** /**
* Test Cases for {@link ArchivedCommitsCommand}. * Test Cases for {@link ArchivedCommitsCommand}.
@@ -53,7 +53,7 @@ public class TestArchivedCommitsCommand extends AbstractShellIntegrationTest {
private String tablePath; private String tablePath;
@Before @BeforeEach
public void init() throws IOException { public void init() throws IOException {
initDFS(); initDFS();
jsc.hadoopConfiguration().addResource(dfs.getConf()); jsc.hadoopConfiguration().addResource(dfs.getConf());
@@ -95,7 +95,7 @@ public class TestArchivedCommitsCommand extends AbstractShellIntegrationTest {
archiveLog.archiveIfRequired(jsc); archiveLog.archiveIfRequired(jsc);
} }
@After @AfterEach
public void clean() throws IOException { public void clean() throws IOException {
cleanupDFS(); cleanupDFS();
} }

View File

@@ -18,7 +18,6 @@
package org.apache.hudi.cli.commands; package org.apache.hudi.cli.commands;
import org.apache.hadoop.conf.Configuration;
import org.apache.hudi.avro.model.HoodieCleanMetadata; import org.apache.hudi.avro.model.HoodieCleanMetadata;
import org.apache.hudi.cli.AbstractShellIntegrationTest; import org.apache.hudi.cli.AbstractShellIntegrationTest;
import org.apache.hudi.cli.HoodieCLI; import org.apache.hudi.cli.HoodieCLI;
@@ -36,20 +35,23 @@ import org.apache.hudi.common.table.timeline.HoodieTimeline;
import org.apache.hudi.common.table.timeline.TimelineMetadataUtils; import org.apache.hudi.common.table.timeline.TimelineMetadataUtils;
import org.apache.hudi.common.table.timeline.versioning.TimelineLayoutVersion; import org.apache.hudi.common.table.timeline.versioning.TimelineLayoutVersion;
import org.junit.Before; import org.apache.hadoop.conf.Configuration;
import org.junit.Test; import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.Test;
import org.springframework.shell.core.CommandResult; import org.springframework.shell.core.CommandResult;
import java.io.File; import java.io.File;
import java.io.IOException; import java.io.IOException;
import java.net.URL; import java.net.URL;
import java.nio.file.Files;
import java.nio.file.Paths;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.HashMap; import java.util.HashMap;
import java.util.List; import java.util.List;
import static org.junit.Assert.assertEquals; import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.Assert.assertNotNull; import static org.junit.jupiter.api.Assertions.assertNotNull;
import static org.junit.Assert.assertTrue; import static org.junit.jupiter.api.Assertions.assertTrue;
/** /**
* Test Cases for {@link CleansCommand}. * Test Cases for {@link CleansCommand}.
@@ -59,7 +61,7 @@ public class TestCleansCommand extends AbstractShellIntegrationTest {
private String tablePath; private String tablePath;
private URL propsFilePath; private URL propsFilePath;
@Before @BeforeEach
public void init() throws IOException { public void init() throws IOException {
HoodieCLI.conf = jsc.hadoopConfiguration(); HoodieCLI.conf = jsc.hadoopConfiguration();
@@ -98,14 +100,15 @@ public class TestCleansCommand extends AbstractShellIntegrationTest {
@Test @Test
public void testShowCleans() throws Exception { public void testShowCleans() throws Exception {
// Check properties file exists. // Check properties file exists.
assertNotNull("Not found properties file", propsFilePath); assertNotNull(propsFilePath, "Not found properties file");
// First, run clean // First, run clean
new File(tablePath + File.separator + HoodieTestCommitMetadataGenerator.DEFAULT_FIRST_PARTITION_PATH Files.createFile(Paths.get(tablePath,
+ File.separator + HoodiePartitionMetadata.HOODIE_PARTITION_METAFILE).createNewFile(); HoodieTestCommitMetadataGenerator.DEFAULT_FIRST_PARTITION_PATH,
HoodiePartitionMetadata.HOODIE_PARTITION_METAFILE));
SparkMain.clean(jsc, HoodieCLI.basePath, propsFilePath.getPath(), new ArrayList<>()); SparkMain.clean(jsc, HoodieCLI.basePath, propsFilePath.getPath(), new ArrayList<>());
assertEquals("Loaded 1 clean and the count should match", 1, assertEquals(1, metaClient.getActiveTimeline().reload().getCleanerTimeline().getInstants().count(),
metaClient.getActiveTimeline().reload().getCleanerTimeline().getInstants().count()); "Loaded 1 clean and the count should match");
CommandResult cr = getShell().executeCommand("cleans show"); CommandResult cr = getShell().executeCommand("cleans show");
assertTrue(cr.isSuccess()); assertTrue(cr.isSuccess());
@@ -134,16 +137,18 @@ public class TestCleansCommand extends AbstractShellIntegrationTest {
@Test @Test
public void testShowCleanPartitions() throws IOException { public void testShowCleanPartitions() throws IOException {
// Check properties file exists. // Check properties file exists.
assertNotNull("Not found properties file", propsFilePath); assertNotNull(propsFilePath, "Not found properties file");
// First, run clean with two partition // First, run clean with two partition
new File(tablePath + File.separator + HoodieTestCommitMetadataGenerator.DEFAULT_FIRST_PARTITION_PATH Files.createFile(Paths.get(tablePath,
+ File.separator + HoodiePartitionMetadata.HOODIE_PARTITION_METAFILE).createNewFile(); HoodieTestCommitMetadataGenerator.DEFAULT_FIRST_PARTITION_PATH,
new File(tablePath + File.separator + HoodieTestCommitMetadataGenerator.DEFAULT_SECOND_PARTITION_PATH HoodiePartitionMetadata.HOODIE_PARTITION_METAFILE));
+ File.separator + HoodiePartitionMetadata.HOODIE_PARTITION_METAFILE).createNewFile(); Files.createFile(Paths.get(tablePath,
HoodieTestCommitMetadataGenerator.DEFAULT_SECOND_PARTITION_PATH,
HoodiePartitionMetadata.HOODIE_PARTITION_METAFILE));
SparkMain.clean(jsc, HoodieCLI.basePath, propsFilePath.toString(), new ArrayList<>()); SparkMain.clean(jsc, HoodieCLI.basePath, propsFilePath.toString(), new ArrayList<>());
assertEquals("Loaded 1 clean and the count should match", 1, assertEquals(1, metaClient.getActiveTimeline().reload().getCleanerTimeline().getInstants().count(),
metaClient.getActiveTimeline().reload().getCleanerTimeline().getInstants().count()); "Loaded 1 clean and the count should match");
HoodieInstant clean = metaClient.getActiveTimeline().reload().getCleanerTimeline().getInstants().findFirst().get(); HoodieInstant clean = metaClient.getActiveTimeline().reload().getCleanerTimeline().getInstants().findFirst().get();

View File

@@ -33,12 +33,13 @@ import org.apache.hudi.common.table.view.HoodieTableFileSystemView;
import org.apache.hudi.common.table.view.SyncableFileSystemView; import org.apache.hudi.common.table.view.SyncableFileSystemView;
import org.apache.hudi.common.util.NumericUtils; import org.apache.hudi.common.util.NumericUtils;
import org.junit.Before; import org.junit.jupiter.api.BeforeEach;
import org.junit.Test; import org.junit.jupiter.api.Test;
import org.springframework.shell.core.CommandResult; import org.springframework.shell.core.CommandResult;
import java.io.File;
import java.io.IOException; import java.io.IOException;
import java.nio.file.Files;
import java.nio.file.Paths;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.HashMap; import java.util.HashMap;
import java.util.List; import java.util.List;
@@ -48,23 +49,24 @@ import java.util.function.Function;
import java.util.stream.Collectors; import java.util.stream.Collectors;
import java.util.stream.Stream; import java.util.stream.Stream;
import static org.junit.Assert.assertEquals; import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.Assert.assertTrue; import static org.junit.jupiter.api.Assertions.assertTrue;
/** /**
* Test class for {@link FileSystemViewCommand}. * Test class for {@link FileSystemViewCommand}.
*/ */
public class TestFileSystemViewCommand extends AbstractShellIntegrationTest { public class TestFileSystemViewCommand extends AbstractShellIntegrationTest {
private String partitionPath; private String partitionPath;
private SyncableFileSystemView fsView; private SyncableFileSystemView fsView;
@Before @BeforeEach
public void init() throws IOException { public void init() throws IOException {
HoodieCLI.conf = jsc.hadoopConfiguration(); HoodieCLI.conf = jsc.hadoopConfiguration();
// Create table and connect // Create table and connect
String tableName = "test_table"; String tableName = "test_table";
String tablePath = basePath + File.separator + tableName; String tablePath = Paths.get(basePath, tableName).toString();
new TableCommand().createTable( new TableCommand().createTable(
tablePath, tableName, tablePath, tableName,
"COPY_ON_WRITE", "", 1, "org.apache.hudi.common.model.HoodieAvroPayload"); "COPY_ON_WRITE", "", 1, "org.apache.hudi.common.model.HoodieAvroPayload");
@@ -72,8 +74,8 @@ public class TestFileSystemViewCommand extends AbstractShellIntegrationTest {
metaClient = HoodieCLI.getTableMetaClient(); metaClient = HoodieCLI.getTableMetaClient();
partitionPath = HoodieTestCommitMetadataGenerator.DEFAULT_FIRST_PARTITION_PATH; partitionPath = HoodieTestCommitMetadataGenerator.DEFAULT_FIRST_PARTITION_PATH;
String fullPartitionPath = tablePath + "/" + partitionPath; String fullPartitionPath = Paths.get(tablePath, partitionPath).toString();
new File(fullPartitionPath).mkdirs(); Files.createDirectories(Paths.get(fullPartitionPath));
// Generate 2 commits // Generate 2 commits
String commitTime1 = "1"; String commitTime1 = "1";
@@ -83,20 +85,18 @@ public class TestFileSystemViewCommand extends AbstractShellIntegrationTest {
// Write date files and log file // Write date files and log file
String testWriteToken = "1-0-1"; String testWriteToken = "1-0-1";
new File(fullPartitionPath + "/" + FSUtils.makeDataFileName(commitTime1, testWriteToken, fileId1)) Files.createFile(Paths.get(fullPartitionPath, FSUtils
.createNewFile(); .makeDataFileName(commitTime1, testWriteToken, fileId1)));
new File(fullPartitionPath + "/" Files.createFile(Paths.get(fullPartitionPath, FSUtils
+ FSUtils.makeLogFileName(fileId1, HoodieLogFile.DELTA_EXTENSION, commitTime1, 0, testWriteToken)) .makeLogFileName(fileId1, HoodieLogFile.DELTA_EXTENSION, commitTime1, 0, testWriteToken)));
.createNewFile(); Files.createFile(Paths.get(fullPartitionPath, FSUtils
new File(fullPartitionPath + "/" + FSUtils.makeDataFileName(commitTime2, testWriteToken, fileId1)) .makeDataFileName(commitTime2, testWriteToken, fileId1)));
.createNewFile(); Files.createFile(Paths.get(fullPartitionPath, FSUtils
new File(fullPartitionPath + "/" .makeLogFileName(fileId1, HoodieLogFile.DELTA_EXTENSION, commitTime2, 0, testWriteToken)));
+ FSUtils.makeLogFileName(fileId1, HoodieLogFile.DELTA_EXTENSION, commitTime2, 0, testWriteToken))
.createNewFile();
// Write commit files // Write commit files
new File(tablePath + "/.hoodie/" + commitTime1 + ".commit").createNewFile(); Files.createFile(Paths.get(tablePath, ".hoodie", commitTime1 + ".commit"));
new File(tablePath + "/.hoodie/" + commitTime2 + ".commit").createNewFile(); Files.createFile(Paths.get(tablePath, ".hoodie", commitTime2 + ".commit"));
// Reload meta client and create fsView // Reload meta client and create fsView
metaClient = HoodieTableMetaClient.reload(metaClient); metaClient = HoodieTableMetaClient.reload(metaClient);

View File

@@ -21,11 +21,11 @@ package org.apache.hudi.cli.commands;
import org.apache.hudi.cli.AbstractShellIntegrationTest; import org.apache.hudi.cli.AbstractShellIntegrationTest;
import org.apache.hudi.cli.HoodiePrintHelper; import org.apache.hudi.cli.HoodiePrintHelper;
import org.junit.Test; import org.junit.jupiter.api.Test;
import org.springframework.shell.core.CommandResult; import org.springframework.shell.core.CommandResult;
import static org.junit.Assert.assertEquals; import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.Assert.assertTrue; import static org.junit.jupiter.api.Assertions.assertTrue;
/** /**
* Test Cases for {@link SparkEnvCommand}. * Test Cases for {@link SparkEnvCommand}.

View File

@@ -24,30 +24,30 @@ import org.apache.hudi.common.fs.ConsistencyGuardConfig;
import org.apache.hudi.common.model.HoodieTableType; import org.apache.hudi.common.model.HoodieTableType;
import org.apache.hudi.common.table.HoodieTableMetaClient; import org.apache.hudi.common.table.HoodieTableMetaClient;
import org.junit.Before; import org.junit.jupiter.api.BeforeEach;
import org.junit.Test; import org.junit.jupiter.api.Test;
import org.springframework.shell.core.CommandResult; import org.springframework.shell.core.CommandResult;
import java.io.File; import java.io.File;
import static org.apache.hudi.common.table.HoodieTableMetaClient.METAFOLDER_NAME; import static org.apache.hudi.common.table.HoodieTableMetaClient.METAFOLDER_NAME;
import static org.junit.Assert.assertEquals; import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.Assert.assertFalse; import static org.junit.jupiter.api.Assertions.assertFalse;
import static org.junit.Assert.assertTrue; import static org.junit.jupiter.api.Assertions.assertTrue;
/** /**
* Test Cases for {@link TableCommand}. * Test Cases for {@link TableCommand}.
*/ */
public class TestTableCommand extends AbstractShellIntegrationTest { public class TestTableCommand extends AbstractShellIntegrationTest {
private String tableName = "test_table"; private final String tableName = "test_table";
private String tablePath; private String tablePath;
private String metaPath; private String metaPath;
/** /**
* Init path after Mini hdfs init. * Init path after Mini hdfs init.
*/ */
@Before @BeforeEach
public void init() { public void init() {
HoodieCLI.conf = jsc.hadoopConfiguration(); HoodieCLI.conf = jsc.hadoopConfiguration();
tablePath = basePath + File.separator + tableName; tablePath = basePath + File.separator + tableName;

View File

@@ -18,7 +18,6 @@
package org.apache.hudi.cli.integ; package org.apache.hudi.cli.integ;
import org.apache.hadoop.conf.Configuration;
import org.apache.hudi.cli.AbstractShellIntegrationTest; import org.apache.hudi.cli.AbstractShellIntegrationTest;
import org.apache.hudi.cli.HoodieCLI; import org.apache.hudi.cli.HoodieCLI;
import org.apache.hudi.cli.commands.TableCommand; import org.apache.hudi.cli.commands.TableCommand;
@@ -29,23 +28,27 @@ import org.apache.hudi.common.table.timeline.HoodieInstant;
import org.apache.hudi.common.table.timeline.HoodieTimeline; import org.apache.hudi.common.table.timeline.HoodieTimeline;
import org.apache.hudi.common.table.timeline.versioning.TimelineLayoutVersion; import org.apache.hudi.common.table.timeline.versioning.TimelineLayoutVersion;
import org.junit.Before; import org.apache.hadoop.conf.Configuration;
import org.junit.Test; import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.Test;
import org.springframework.shell.core.CommandResult; import org.springframework.shell.core.CommandResult;
import java.io.File; import java.io.File;
import java.io.IOException; import java.io.IOException;
import java.net.URL; import java.net.URL;
import java.nio.file.Files;
import java.nio.file.Paths;
import static org.junit.Assert.assertEquals; import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.Assert.assertNotNull; import static org.junit.jupiter.api.Assertions.assertNotNull;
import static org.junit.Assert.assertTrue; import static org.junit.jupiter.api.Assertions.assertTrue;
public class ITTestCleansCommand extends AbstractShellIntegrationTest { public class ITTestCleansCommand extends AbstractShellIntegrationTest {
private String tablePath; private String tablePath;
private URL propsFilePath; private URL propsFilePath;
@Before @BeforeEach
public void init() throws IOException { public void init() throws IOException {
HoodieCLI.conf = jsc.hadoopConfiguration(); HoodieCLI.conf = jsc.hadoopConfiguration();
@@ -83,19 +86,21 @@ public class ITTestCleansCommand extends AbstractShellIntegrationTest {
assertEquals(0, metaClient.getActiveTimeline().reload().getCleanerTimeline().getInstants().count()); assertEquals(0, metaClient.getActiveTimeline().reload().getCleanerTimeline().getInstants().count());
// Check properties file exists. // Check properties file exists.
assertNotNull("Not found properties file", propsFilePath); assertNotNull(propsFilePath, "Not found properties file");
// Create partition metadata // Create partition metadata
new File(tablePath + File.separator + HoodieTestCommitMetadataGenerator.DEFAULT_FIRST_PARTITION_PATH Files.createFile(Paths.get(tablePath,
+ File.separator + HoodiePartitionMetadata.HOODIE_PARTITION_METAFILE).createNewFile(); HoodieTestCommitMetadataGenerator.DEFAULT_FIRST_PARTITION_PATH,
new File(tablePath + File.separator + HoodieTestCommitMetadataGenerator.DEFAULT_SECOND_PARTITION_PATH HoodiePartitionMetadata.HOODIE_PARTITION_METAFILE));
+ File.separator + HoodiePartitionMetadata.HOODIE_PARTITION_METAFILE).createNewFile(); Files.createFile(Paths.get(tablePath,
HoodieTestCommitMetadataGenerator.DEFAULT_SECOND_PARTITION_PATH,
HoodiePartitionMetadata.HOODIE_PARTITION_METAFILE));
CommandResult cr = getShell().executeCommand("cleans run --sparkMaster local --propsFilePath " + propsFilePath.toString()); CommandResult cr = getShell().executeCommand("cleans run --sparkMaster local --propsFilePath " + propsFilePath.toString());
assertTrue(cr.isSuccess()); assertTrue(cr.isSuccess());
// After run clean, there should have 1 clean instant // After run clean, there should have 1 clean instant
assertEquals("Loaded 1 clean and the count should match", 1, assertEquals(1, metaClient.getActiveTimeline().reload().getCleanerTimeline().getInstants().count(),
metaClient.getActiveTimeline().reload().getCleanerTimeline().getInstants().count()); "Loaded 1 clean and the count should match");
} }
} }

View File

@@ -35,16 +35,16 @@ import org.apache.hudi.index.HoodieIndex;
import org.apache.hudi.table.HoodieTable; import org.apache.hudi.table.HoodieTable;
import org.apache.spark.api.java.JavaRDD; import org.apache.spark.api.java.JavaRDD;
import org.junit.Test; import org.junit.jupiter.api.Test;
import java.io.File; import java.io.File;
import java.util.List; import java.util.List;
import java.util.stream.Collectors; import java.util.stream.Collectors;
import static org.junit.Assert.assertEquals; import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.Assert.assertFalse; import static org.junit.jupiter.api.Assertions.assertFalse;
import static org.junit.Assert.assertTrue; import static org.junit.jupiter.api.Assertions.assertThrows;
import static org.junit.Assert.fail; import static org.junit.jupiter.api.Assertions.assertTrue;
/** /**
* Test Cases for rollback of snapshots and commits. * Test Cases for rollback of snapshots and commits.
@@ -105,12 +105,12 @@ public class TestClientRollback extends TestHoodieClientBase {
List<HoodieBaseFile> dataFiles = partitionPaths.stream().flatMap(s -> { List<HoodieBaseFile> dataFiles = partitionPaths.stream().flatMap(s -> {
return view1.getAllBaseFiles(s).filter(f -> f.getCommitTime().equals("003")); return view1.getAllBaseFiles(s).filter(f -> f.getCommitTime().equals("003"));
}).collect(Collectors.toList()); }).collect(Collectors.toList());
assertEquals("The data files for commit 003 should be present", 3, dataFiles.size()); assertEquals(3, dataFiles.size(), "The data files for commit 003 should be present");
dataFiles = partitionPaths.stream().flatMap(s -> { dataFiles = partitionPaths.stream().flatMap(s -> {
return view1.getAllBaseFiles(s).filter(f -> f.getCommitTime().equals("002")); return view1.getAllBaseFiles(s).filter(f -> f.getCommitTime().equals("002"));
}).collect(Collectors.toList()); }).collect(Collectors.toList());
assertEquals("The data files for commit 002 should be present", 3, dataFiles.size()); assertEquals(3, dataFiles.size(), "The data files for commit 002 should be present");
/** /**
* Write 4 (updates) * Write 4 (updates)
@@ -128,15 +128,12 @@ public class TestClientRollback extends TestHoodieClientBase {
final BaseFileOnlyView view2 = table.getBaseFileOnlyView(); final BaseFileOnlyView view2 = table.getBaseFileOnlyView();
dataFiles = partitionPaths.stream().flatMap(s -> view2.getAllBaseFiles(s).filter(f -> f.getCommitTime().equals("004"))).collect(Collectors.toList()); dataFiles = partitionPaths.stream().flatMap(s -> view2.getAllBaseFiles(s).filter(f -> f.getCommitTime().equals("004"))).collect(Collectors.toList());
assertEquals("The data files for commit 004 should be present", 3, dataFiles.size()); assertEquals(3, dataFiles.size(), "The data files for commit 004 should be present");
// rolling back to a non existent savepoint must not succeed // rolling back to a non existent savepoint must not succeed
try { assertThrows(HoodieRollbackException.class, () -> {
client.restoreToSavepoint("001"); client.restoreToSavepoint("001");
fail("Rolling back to non-existent savepoint should not be allowed"); }, "Rolling back to non-existent savepoint should not be allowed");
} catch (HoodieRollbackException e) {
// this is good
}
// rollback to savepoint 002 // rollback to savepoint 002
HoodieInstant savepoint = table.getCompletedSavepointTimeline().getInstants().findFirst().get(); HoodieInstant savepoint = table.getCompletedSavepointTimeline().getInstants().findFirst().get();
@@ -146,13 +143,13 @@ public class TestClientRollback extends TestHoodieClientBase {
table = HoodieTable.create(metaClient, getConfig(), jsc); table = HoodieTable.create(metaClient, getConfig(), jsc);
final BaseFileOnlyView view3 = table.getBaseFileOnlyView(); final BaseFileOnlyView view3 = table.getBaseFileOnlyView();
dataFiles = partitionPaths.stream().flatMap(s -> view3.getAllBaseFiles(s).filter(f -> f.getCommitTime().equals("002"))).collect(Collectors.toList()); dataFiles = partitionPaths.stream().flatMap(s -> view3.getAllBaseFiles(s).filter(f -> f.getCommitTime().equals("002"))).collect(Collectors.toList());
assertEquals("The data files for commit 002 be available", 3, dataFiles.size()); assertEquals(3, dataFiles.size(), "The data files for commit 002 be available");
dataFiles = partitionPaths.stream().flatMap(s -> view3.getAllBaseFiles(s).filter(f -> f.getCommitTime().equals("003"))).collect(Collectors.toList()); dataFiles = partitionPaths.stream().flatMap(s -> view3.getAllBaseFiles(s).filter(f -> f.getCommitTime().equals("003"))).collect(Collectors.toList());
assertEquals("The data files for commit 003 should be rolled back", 0, dataFiles.size()); assertEquals(0, dataFiles.size(), "The data files for commit 003 should be rolled back");
dataFiles = partitionPaths.stream().flatMap(s -> view3.getAllBaseFiles(s).filter(f -> f.getCommitTime().equals("004"))).collect(Collectors.toList()); dataFiles = partitionPaths.stream().flatMap(s -> view3.getAllBaseFiles(s).filter(f -> f.getCommitTime().equals("004"))).collect(Collectors.toList());
assertEquals("The data files for commit 004 should be rolled back", 0, dataFiles.size()); assertEquals(0, dataFiles.size(), "The data files for commit 004 should be rolled back");
} }
} }
@@ -195,12 +192,9 @@ public class TestClientRollback extends TestHoodieClientBase {
try (HoodieWriteClient client = getHoodieWriteClient(config, false);) { try (HoodieWriteClient client = getHoodieWriteClient(config, false);) {
// Rollback commit 1 (this should fail, since commit2 is still around) // Rollback commit 1 (this should fail, since commit2 is still around)
try { assertThrows(HoodieRollbackException.class, () -> {
client.rollback(commitTime1); client.rollback(commitTime1);
fail("Should have thrown an exception "); }, "Should have thrown an exception ");
} catch (HoodieRollbackException hrbe) {
// should get here
}
// Rollback commit3 // Rollback commit3
client.rollback(commitTime3); client.rollback(commitTime3);

View File

@@ -36,10 +36,9 @@ import org.apache.hudi.table.action.compact.OperationResult;
import org.apache.log4j.LogManager; import org.apache.log4j.LogManager;
import org.apache.log4j.Logger; import org.apache.log4j.Logger;
import org.junit.After; import org.junit.jupiter.api.AfterEach;
import org.junit.Assert; import org.junit.jupiter.api.BeforeEach;
import org.junit.Before; import org.junit.jupiter.api.Test;
import org.junit.Test;
import java.io.IOException; import java.io.IOException;
import java.util.HashSet; import java.util.HashSet;
@@ -52,6 +51,9 @@ import java.util.stream.Stream;
import static org.apache.hudi.client.CompactionAdminClient.getRenamingActionsToAlignWithCompactionOperation; import static org.apache.hudi.client.CompactionAdminClient.getRenamingActionsToAlignWithCompactionOperation;
import static org.apache.hudi.client.CompactionAdminClient.renameLogFile; import static org.apache.hudi.client.CompactionAdminClient.renameLogFile;
import static org.apache.hudi.common.model.HoodieTableType.MERGE_ON_READ; import static org.apache.hudi.common.model.HoodieTableType.MERGE_ON_READ;
import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertFalse;
import static org.junit.jupiter.api.Assertions.assertTrue;
public class TestCompactionAdminClient extends TestHoodieClientBase { public class TestCompactionAdminClient extends TestHoodieClientBase {
@@ -60,7 +62,7 @@ public class TestCompactionAdminClient extends TestHoodieClientBase {
private HoodieTableMetaClient metaClient; private HoodieTableMetaClient metaClient;
private CompactionAdminClient client; private CompactionAdminClient client;
@Before @BeforeEach
public void setUp() throws Exception { public void setUp() throws Exception {
initPath(); initPath();
initSparkContexts(); initSparkContexts();
@@ -68,7 +70,7 @@ public class TestCompactionAdminClient extends TestHoodieClientBase {
client = new CompactionAdminClient(jsc, basePath); client = new CompactionAdminClient(jsc, basePath);
} }
@After @AfterEach
public void tearDown() { public void tearDown() {
client.close(); client.close();
metaClient = null; metaClient = null;
@@ -137,7 +139,7 @@ public class TestCompactionAdminClient extends TestHoodieClientBase {
metaClient = new HoodieTableMetaClient(metaClient.getHadoopConf(), basePath, true); metaClient = new HoodieTableMetaClient(metaClient.getHadoopConf(), basePath, true);
List<ValidationOpResult> result = client.validateCompactionPlan(metaClient, compactionInstant, 1); List<ValidationOpResult> result = client.validateCompactionPlan(metaClient, compactionInstant, 1);
if (expNumRepairs > 0) { if (expNumRepairs > 0) {
Assert.assertTrue("Expect some failures in validation", result.stream().anyMatch(r -> !r.isSuccess())); assertTrue(result.stream().anyMatch(r -> !r.isSuccess()), "Expect some failures in validation");
} }
// Now repair // Now repair
List<Pair<HoodieLogFile, HoodieLogFile>> undoFiles = List<Pair<HoodieLogFile, HoodieLogFile>> undoFiles =
@@ -155,18 +157,18 @@ public class TestCompactionAdminClient extends TestHoodieClientBase {
Map<String, String> expRenameFiles = renameFiles.stream() Map<String, String> expRenameFiles = renameFiles.stream()
.collect(Collectors.toMap(p -> p.getLeft().getPath().toString(), x -> x.getRight().getPath().toString())); .collect(Collectors.toMap(p -> p.getLeft().getPath().toString(), x -> x.getRight().getPath().toString()));
if (expNumRepairs > 0) { if (expNumRepairs > 0) {
Assert.assertFalse("Rename Files must be non-empty", renameFiles.isEmpty()); assertFalse(renameFiles.isEmpty(), "Rename Files must be non-empty");
} else { } else {
Assert.assertTrue("Rename Files must be empty", renameFiles.isEmpty()); assertTrue(renameFiles.isEmpty(), "Rename Files must be empty");
} }
expRenameFiles.forEach((key, value) -> LOG.info("Key :" + key + " renamed to " + value + " rolled back to " expRenameFiles.forEach((key, value) -> LOG.info("Key :" + key + " renamed to " + value + " rolled back to "
+ renameFilesFromUndo.get(key))); + renameFilesFromUndo.get(key)));
Assert.assertEquals("Undo must completely rollback renames", expRenameFiles, renameFilesFromUndo); assertEquals(expRenameFiles, renameFilesFromUndo, "Undo must completely rollback renames");
// Now expect validation to succeed // Now expect validation to succeed
result = client.validateCompactionPlan(metaClient, compactionInstant, 1); result = client.validateCompactionPlan(metaClient, compactionInstant, 1);
Assert.assertTrue("Expect no failures in validation", result.stream().allMatch(OperationResult::isSuccess)); assertTrue(result.stream().allMatch(OperationResult::isSuccess), "Expect no failures in validation");
Assert.assertEquals("Expected Num Repairs", expNumRepairs, undoFiles.size()); assertEquals(expNumRepairs, undoFiles.size(), "Expected Num Repairs");
} }
/** /**
@@ -178,8 +180,8 @@ public class TestCompactionAdminClient extends TestHoodieClientBase {
metaClient = new HoodieTableMetaClient(metaClient.getHadoopConf(), basePath, true); metaClient = new HoodieTableMetaClient(metaClient.getHadoopConf(), basePath, true);
// Ensure compaction-plan is good to begin with // Ensure compaction-plan is good to begin with
List<ValidationOpResult> validationResults = client.validateCompactionPlan(metaClient, compactionInstant, 1); List<ValidationOpResult> validationResults = client.validateCompactionPlan(metaClient, compactionInstant, 1);
Assert.assertFalse("Some validations failed", assertFalse(validationResults.stream().anyMatch(v -> !v.isSuccess()),
validationResults.stream().anyMatch(v -> !v.isSuccess())); "Some validations failed");
} }
private void validateRenameFiles(List<Pair<HoodieLogFile, HoodieLogFile>> renameFiles, String ingestionInstant, private void validateRenameFiles(List<Pair<HoodieLogFile, HoodieLogFile>> renameFiles, String ingestionInstant,
@@ -189,8 +191,8 @@ public class TestCompactionAdminClient extends TestHoodieClientBase {
Set<HoodieLogFile> uniqOldLogFiles = new HashSet<>(); Set<HoodieLogFile> uniqOldLogFiles = new HashSet<>();
renameFiles.forEach(lfPair -> { renameFiles.forEach(lfPair -> {
Assert.assertFalse("Old Log File Names do not collide", uniqOldLogFiles.contains(lfPair.getKey())); assertFalse(uniqOldLogFiles.contains(lfPair.getKey()), "Old Log File Names do not collide");
Assert.assertFalse("New Log File Names do not collide", uniqNewLogFiles.contains(lfPair.getValue())); assertFalse(uniqNewLogFiles.contains(lfPair.getValue()), "New Log File Names do not collide");
uniqOldLogFiles.add(lfPair.getKey()); uniqOldLogFiles.add(lfPair.getKey());
uniqNewLogFiles.add(lfPair.getValue()); uniqNewLogFiles.add(lfPair.getValue());
}); });
@@ -198,17 +200,17 @@ public class TestCompactionAdminClient extends TestHoodieClientBase {
renameFiles.forEach(lfPair -> { renameFiles.forEach(lfPair -> {
HoodieLogFile oldLogFile = lfPair.getLeft(); HoodieLogFile oldLogFile = lfPair.getLeft();
HoodieLogFile newLogFile = lfPair.getValue(); HoodieLogFile newLogFile = lfPair.getValue();
Assert.assertEquals("Base Commit time is expected", ingestionInstant, newLogFile.getBaseCommitTime()); assertEquals(ingestionInstant, newLogFile.getBaseCommitTime(), "Base Commit time is expected");
Assert.assertEquals("Base Commit time is expected", compactionInstant, oldLogFile.getBaseCommitTime()); assertEquals(compactionInstant, oldLogFile.getBaseCommitTime(), "Base Commit time is expected");
Assert.assertEquals("File Id is expected", oldLogFile.getFileId(), newLogFile.getFileId()); assertEquals(oldLogFile.getFileId(), newLogFile.getFileId(), "File Id is expected");
HoodieLogFile lastLogFileBeforeCompaction = HoodieLogFile lastLogFileBeforeCompaction =
fsView.getLatestMergedFileSlicesBeforeOrOn(HoodieTestUtils.DEFAULT_PARTITION_PATHS[0], ingestionInstant) fsView.getLatestMergedFileSlicesBeforeOrOn(HoodieTestUtils.DEFAULT_PARTITION_PATHS[0], ingestionInstant)
.filter(fs -> fs.getFileId().equals(oldLogFile.getFileId())).map(fs -> fs.getLogFiles().findFirst().get()) .filter(fs -> fs.getFileId().equals(oldLogFile.getFileId())).map(fs -> fs.getLogFiles().findFirst().get())
.findFirst().get(); .findFirst().get();
Assert.assertEquals("Log Version expected", assertEquals(lastLogFileBeforeCompaction.getLogVersion() + oldLogFile.getLogVersion(),
lastLogFileBeforeCompaction.getLogVersion() + oldLogFile.getLogVersion(), newLogFile.getLogVersion()); newLogFile.getLogVersion(), "Log Version expected");
Assert.assertTrue("Log version does not collide", assertTrue(newLogFile.getLogVersion() > lastLogFileBeforeCompaction.getLogVersion(),
newLogFile.getLogVersion() > lastLogFileBeforeCompaction.getLogVersion()); "Log version does not collide");
}); });
} }
@@ -243,8 +245,8 @@ public class TestCompactionAdminClient extends TestHoodieClientBase {
Set<HoodieLogFile> expLogFilesToBeRenamed = fsView.getLatestFileSlices(HoodieTestUtils.DEFAULT_PARTITION_PATHS[0]) Set<HoodieLogFile> expLogFilesToBeRenamed = fsView.getLatestFileSlices(HoodieTestUtils.DEFAULT_PARTITION_PATHS[0])
.filter(fs -> fs.getBaseInstantTime().equals(compactionInstant)).flatMap(FileSlice::getLogFiles) .filter(fs -> fs.getBaseInstantTime().equals(compactionInstant)).flatMap(FileSlice::getLogFiles)
.collect(Collectors.toSet()); .collect(Collectors.toSet());
Assert.assertEquals("Log files belonging to file-slices created because of compaction request must be renamed", assertEquals(expLogFilesToBeRenamed, gotLogFilesToBeRenamed,
expLogFilesToBeRenamed, gotLogFilesToBeRenamed); "Log files belonging to file-slices created because of compaction request must be renamed");
if (skipUnSchedule) { if (skipUnSchedule) {
// Do the renaming only but do not touch the compaction plan - Needed for repair tests // Do the renaming only but do not touch the compaction plan - Needed for repair tests
@@ -274,9 +276,10 @@ public class TestCompactionAdminClient extends TestHoodieClientBase {
new HoodieTableFileSystemView(metaClient, metaClient.getCommitsAndCompactionTimeline()); new HoodieTableFileSystemView(metaClient, metaClient.getCommitsAndCompactionTimeline());
// Expect all file-slice whose base-commit is same as compaction commit to contain no new Log files // Expect all file-slice whose base-commit is same as compaction commit to contain no new Log files
newFsView.getLatestFileSlicesBeforeOrOn(HoodieTestUtils.DEFAULT_PARTITION_PATHS[0], compactionInstant, true) newFsView.getLatestFileSlicesBeforeOrOn(HoodieTestUtils.DEFAULT_PARTITION_PATHS[0], compactionInstant, true)
.filter(fs -> fs.getBaseInstantTime().equals(compactionInstant)).forEach(fs -> { .filter(fs -> fs.getBaseInstantTime().equals(compactionInstant))
Assert.assertFalse("No Data file must be present", fs.getBaseFile().isPresent()); .forEach(fs -> {
Assert.assertEquals("No Log Files", 0, fs.getLogFiles().count()); assertFalse(fs.getBaseFile().isPresent(), "No Data file must be present");
assertEquals(0, fs.getLogFiles().count(), "No Log Files");
}); });
// Ensure same number of log-files before and after renaming per fileId // Ensure same number of log-files before and after renaming per fileId
@@ -286,10 +289,10 @@ public class TestCompactionAdminClient extends TestHoodieClientBase {
.map(fs -> Pair.of(fs.getFileId(), fs.getLogFiles().count())) .map(fs -> Pair.of(fs.getFileId(), fs.getLogFiles().count()))
.collect(Collectors.toMap(Pair::getKey, Pair::getValue)); .collect(Collectors.toMap(Pair::getKey, Pair::getValue));
Assert.assertEquals("Each File Id has same number of log-files", fileIdToCountsBeforeRenaming, assertEquals(fileIdToCountsBeforeRenaming, fileIdToCountsAfterRenaming,
fileIdToCountsAfterRenaming); "Each File Id has same number of log-files");
Assert.assertEquals("Not Empty", numEntriesPerInstant, fileIdToCountsAfterRenaming.size()); assertEquals(numEntriesPerInstant, fileIdToCountsAfterRenaming.size(), "Not Empty");
Assert.assertEquals("Expected number of renames", expNumRenames, renameFiles.size()); assertEquals(expNumRenames, renameFiles.size(), "Expected number of renames");
return renameFiles; return renameFiles;
} }
@@ -315,8 +318,8 @@ public class TestCompactionAdminClient extends TestHoodieClientBase {
.filter(fs -> fs.getBaseInstantTime().equals(compactionInstant)) .filter(fs -> fs.getBaseInstantTime().equals(compactionInstant))
.filter(fs -> fs.getFileId().equals(op.getFileId())).flatMap(FileSlice::getLogFiles) .filter(fs -> fs.getFileId().equals(op.getFileId())).flatMap(FileSlice::getLogFiles)
.collect(Collectors.toSet()); .collect(Collectors.toSet());
Assert.assertEquals("Log files belonging to file-slices created because of compaction request must be renamed", assertEquals(expLogFilesToBeRenamed, gotLogFilesToBeRenamed,
expLogFilesToBeRenamed, gotLogFilesToBeRenamed); "Log files belonging to file-slices created because of compaction request must be renamed");
validateRenameFiles(renameFiles, ingestionInstant, compactionInstant, fsView); validateRenameFiles(renameFiles, ingestionInstant, compactionInstant, fsView);
Map<String, Long> fileIdToCountsBeforeRenaming = Map<String, Long> fileIdToCountsBeforeRenaming =
@@ -335,9 +338,10 @@ public class TestCompactionAdminClient extends TestHoodieClientBase {
// Expect all file-slice whose base-commit is same as compaction commit to contain no new Log files // Expect all file-slice whose base-commit is same as compaction commit to contain no new Log files
newFsView.getLatestFileSlicesBeforeOrOn(HoodieTestUtils.DEFAULT_PARTITION_PATHS[0], compactionInstant, true) newFsView.getLatestFileSlicesBeforeOrOn(HoodieTestUtils.DEFAULT_PARTITION_PATHS[0], compactionInstant, true)
.filter(fs -> fs.getBaseInstantTime().equals(compactionInstant)) .filter(fs -> fs.getBaseInstantTime().equals(compactionInstant))
.filter(fs -> fs.getFileId().equals(op.getFileId())).forEach(fs -> { .filter(fs -> fs.getFileId().equals(op.getFileId()))
Assert.assertFalse("No Data file must be present", fs.getBaseFile().isPresent()); .forEach(fs -> {
Assert.assertEquals("No Log Files", 0, fs.getLogFiles().count()); assertFalse(fs.getBaseFile().isPresent(), "No Data file must be present");
assertEquals(0, fs.getLogFiles().count(), "No Log Files");
}); });
// Ensure same number of log-files before and after renaming per fileId // Ensure same number of log-files before and after renaming per fileId
@@ -348,9 +352,9 @@ public class TestCompactionAdminClient extends TestHoodieClientBase {
.map(fs -> Pair.of(fs.getFileId(), fs.getLogFiles().count())) .map(fs -> Pair.of(fs.getFileId(), fs.getLogFiles().count()))
.collect(Collectors.toMap(Pair::getKey, Pair::getValue)); .collect(Collectors.toMap(Pair::getKey, Pair::getValue));
Assert.assertEquals("Each File Id has same number of log-files", fileIdToCountsBeforeRenaming, assertEquals(fileIdToCountsBeforeRenaming, fileIdToCountsAfterRenaming,
fileIdToCountsAfterRenaming); "Each File Id has same number of log-files");
Assert.assertEquals("Not Empty", 1, fileIdToCountsAfterRenaming.size()); assertEquals(1, fileIdToCountsAfterRenaming.size(), "Not Empty");
Assert.assertEquals("Expected number of renames", expNumRenames, renameFiles.size()); assertEquals(expNumRenames, renameFiles.size(), "Expected number of renames");
} }
} }

View File

@@ -51,9 +51,8 @@ import org.apache.log4j.LogManager;
import org.apache.log4j.Logger; import org.apache.log4j.Logger;
import org.apache.spark.api.java.JavaRDD; import org.apache.spark.api.java.JavaRDD;
import org.apache.spark.sql.SQLContext; import org.apache.spark.sql.SQLContext;
import org.junit.After; import org.junit.jupiter.api.AfterEach;
import org.junit.Assert; import org.junit.jupiter.api.BeforeEach;
import org.junit.Before;
import java.io.IOException; import java.io.IOException;
import java.util.HashMap; import java.util.HashMap;
@@ -64,9 +63,9 @@ import java.util.Set;
import java.util.function.Function; import java.util.function.Function;
import java.util.stream.Collectors; import java.util.stream.Collectors;
import static org.junit.Assert.assertEquals; import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.Assert.assertFalse; import static org.junit.jupiter.api.Assertions.assertFalse;
import static org.junit.Assert.assertTrue; import static org.junit.jupiter.api.Assertions.assertTrue;
/** /**
* Base Class providing setup/cleanup and utility methods for testing Hoodie Client facing tests. * Base Class providing setup/cleanup and utility methods for testing Hoodie Client facing tests.
@@ -75,12 +74,12 @@ public class TestHoodieClientBase extends HoodieClientTestHarness {
private static final Logger LOG = LogManager.getLogger(TestHoodieClientBase.class); private static final Logger LOG = LogManager.getLogger(TestHoodieClientBase.class);
@Before @BeforeEach
public void setUp() throws Exception { public void setUp() throws Exception {
initResources(); initResources();
} }
@After @AfterEach
public void tearDown() throws Exception { public void tearDown() throws Exception {
cleanupResources(); cleanupResources();
} }
@@ -170,7 +169,7 @@ public class TestHoodieClientBase extends HoodieClientTestHarness {
public static void assertNoWriteErrors(List<WriteStatus> statuses) { public static void assertNoWriteErrors(List<WriteStatus> statuses) {
// Verify there are no errors // Verify there are no errors
for (WriteStatus status : statuses) { for (WriteStatus status : statuses) {
assertFalse("Errors found in write of " + status.getFileId(), status.hasErrors()); assertFalse(status.hasErrors(), "Errors found in write of " + status.getFileId());
} }
} }
@@ -200,7 +199,7 @@ public class TestHoodieClientBase extends HoodieClientTestHarness {
assertTrue(HoodiePartitionMetadata.hasPartitionMetadata(fs, new Path(basePath, partitionPath))); assertTrue(HoodiePartitionMetadata.hasPartitionMetadata(fs, new Path(basePath, partitionPath)));
HoodiePartitionMetadata pmeta = new HoodiePartitionMetadata(fs, new Path(basePath, partitionPath)); HoodiePartitionMetadata pmeta = new HoodiePartitionMetadata(fs, new Path(basePath, partitionPath));
pmeta.readFromFS(); pmeta.readFromFS();
Assert.assertEquals(HoodieTestDataGenerator.DEFAULT_PARTITION_DEPTH, pmeta.getPartitionDepth()); assertEquals(HoodieTestDataGenerator.DEFAULT_PARTITION_DEPTH, pmeta.getPartitionDepth());
} }
} }
@@ -212,9 +211,9 @@ public class TestHoodieClientBase extends HoodieClientTestHarness {
*/ */
protected void checkTaggedRecords(List<HoodieRecord> taggedRecords, String instantTime) { protected void checkTaggedRecords(List<HoodieRecord> taggedRecords, String instantTime) {
for (HoodieRecord rec : taggedRecords) { for (HoodieRecord rec : taggedRecords) {
assertTrue("Record " + rec + " found with no location.", rec.isCurrentLocationKnown()); assertTrue(rec.isCurrentLocationKnown(), "Record " + rec + " found with no location.");
assertEquals("All records should have commit time " + instantTime + ", since updates were made", assertEquals(rec.getCurrentLocation().getInstantTime(), instantTime,
rec.getCurrentLocation().getInstantTime(), instantTime); "All records should have commit time " + instantTime + ", since updates were made");
} }
} }
@@ -231,7 +230,7 @@ public class TestHoodieClientBase extends HoodieClientTestHarness {
if (!partitionToKeys.containsKey(partitionPath)) { if (!partitionToKeys.containsKey(partitionPath)) {
partitionToKeys.put(partitionPath, new HashSet<>()); partitionToKeys.put(partitionPath, new HashSet<>());
} }
assertFalse("key " + key + " is duplicate within partition " + partitionPath, partitionToKeys.get(partitionPath).contains(key)); assertFalse(partitionToKeys.get(partitionPath).contains(key), "key " + key + " is duplicate within partition " + partitionPath);
partitionToKeys.get(partitionPath).add(key); partitionToKeys.get(partitionPath).add(key);
} }
} }
@@ -472,30 +471,30 @@ public class TestHoodieClientBase extends HoodieClientTestHarness {
HoodieTimeline timeline = new HoodieActiveTimeline(metaClient).getCommitTimeline(); HoodieTimeline timeline = new HoodieActiveTimeline(metaClient).getCommitTimeline();
if (assertForCommit) { if (assertForCommit) {
assertEquals("Expecting " + expTotalCommits + " commits.", expTotalCommits, assertEquals(expTotalCommits, timeline.findInstantsAfter(initCommitTime, Integer.MAX_VALUE).countInstants(),
timeline.findInstantsAfter(initCommitTime, Integer.MAX_VALUE).countInstants()); "Expecting " + expTotalCommits + " commits.");
Assert.assertEquals("Latest commit should be " + newCommitTime, newCommitTime, assertEquals(newCommitTime, timeline.lastInstant().get().getTimestamp(),
timeline.lastInstant().get().getTimestamp()); "Latest commit should be " + newCommitTime);
assertEquals("Must contain " + expRecordsInThisCommit + " records", expRecordsInThisCommit, assertEquals(expRecordsInThisCommit, HoodieClientTestUtils.readCommit(basePath, sqlContext, timeline, newCommitTime).count(),
HoodieClientTestUtils.readCommit(basePath, sqlContext, timeline, newCommitTime).count()); "Must contain " + expRecordsInThisCommit + " records");
// Check the entire dataset has all records still // Check the entire dataset has all records still
String[] fullPartitionPaths = new String[dataGen.getPartitionPaths().length]; String[] fullPartitionPaths = new String[dataGen.getPartitionPaths().length];
for (int i = 0; i < fullPartitionPaths.length; i++) { for (int i = 0; i < fullPartitionPaths.length; i++) {
fullPartitionPaths[i] = String.format("%s/%s/*", basePath, dataGen.getPartitionPaths()[i]); fullPartitionPaths[i] = String.format("%s/%s/*", basePath, dataGen.getPartitionPaths()[i]);
} }
assertEquals("Must contain " + expTotalRecords + " records", expTotalRecords, assertEquals(expTotalRecords, HoodieClientTestUtils.read(jsc, basePath, sqlContext, fs, fullPartitionPaths).count(),
HoodieClientTestUtils.read(jsc, basePath, sqlContext, fs, fullPartitionPaths).count()); "Must contain " + expTotalRecords + " records");
// Check that the incremental consumption from prevCommitTime // Check that the incremental consumption from prevCommitTime
assertEquals("Incremental consumption from " + prevCommitTime + " should give all records in latest commit", assertEquals(HoodieClientTestUtils.readCommit(basePath, sqlContext, timeline, newCommitTime).count(),
HoodieClientTestUtils.readCommit(basePath, sqlContext, timeline, newCommitTime).count(), HoodieClientTestUtils.readSince(basePath, sqlContext, timeline, prevCommitTime).count(),
HoodieClientTestUtils.readSince(basePath, sqlContext, timeline, prevCommitTime).count()); "Incremental consumption from " + prevCommitTime + " should give all records in latest commit");
if (commitTimesBetweenPrevAndNew.isPresent()) { if (commitTimesBetweenPrevAndNew.isPresent()) {
commitTimesBetweenPrevAndNew.get().forEach(ct -> { commitTimesBetweenPrevAndNew.get().forEach(ct -> {
assertEquals("Incremental consumption from " + ct + " should give all records in latest commit", assertEquals(HoodieClientTestUtils.readCommit(basePath, sqlContext, timeline, newCommitTime).count(),
HoodieClientTestUtils.readCommit(basePath, sqlContext, timeline, newCommitTime).count(), HoodieClientTestUtils.readSince(basePath, sqlContext, timeline, ct).count(),
HoodieClientTestUtils.readSince(basePath, sqlContext, timeline, ct).count()); "Incremental consumption from " + ct + " should give all records in latest commit");
}); });
} }
} }
@@ -540,26 +539,26 @@ public class TestHoodieClientBase extends HoodieClientTestHarness {
HoodieTimeline timeline = new HoodieActiveTimeline(metaClient).getCommitTimeline(); HoodieTimeline timeline = new HoodieActiveTimeline(metaClient).getCommitTimeline();
if (assertForCommit) { if (assertForCommit) {
assertEquals("Expecting 3 commits.", 3, assertEquals(3, timeline.findInstantsAfter(initCommitTime, Integer.MAX_VALUE).countInstants(),
timeline.findInstantsAfter(initCommitTime, Integer.MAX_VALUE).countInstants()); "Expecting 3 commits.");
Assert.assertEquals("Latest commit should be " + newCommitTime, newCommitTime, assertEquals(newCommitTime, timeline.lastInstant().get().getTimestamp(),
timeline.lastInstant().get().getTimestamp()); "Latest commit should be " + newCommitTime);
assertEquals("Must contain " + expRecordsInThisCommit + " records", expRecordsInThisCommit, assertEquals(expRecordsInThisCommit, HoodieClientTestUtils.readCommit(basePath, sqlContext, timeline, newCommitTime).count(),
HoodieClientTestUtils.readCommit(basePath, sqlContext, timeline, newCommitTime).count()); "Must contain " + expRecordsInThisCommit + " records");
// Check the entire dataset has all records still // Check the entire dataset has all records still
String[] fullPartitionPaths = new String[dataGen.getPartitionPaths().length]; String[] fullPartitionPaths = new String[dataGen.getPartitionPaths().length];
for (int i = 0; i < fullPartitionPaths.length; i++) { for (int i = 0; i < fullPartitionPaths.length; i++) {
fullPartitionPaths[i] = String.format("%s/%s/*", basePath, dataGen.getPartitionPaths()[i]); fullPartitionPaths[i] = String.format("%s/%s/*", basePath, dataGen.getPartitionPaths()[i]);
} }
assertEquals("Must contain " + expTotalRecords + " records", expTotalRecords, assertEquals(expTotalRecords, HoodieClientTestUtils.read(jsc, basePath, sqlContext, fs, fullPartitionPaths).count(),
HoodieClientTestUtils.read(jsc, basePath, sqlContext, fs, fullPartitionPaths).count()); "Must contain " + expTotalRecords + " records");
// Check that the incremental consumption from prevCommitTime // Check that the incremental consumption from prevCommitTime
assertEquals("Incremental consumption from " + prevCommitTime + " should give no records in latest commit," assertEquals(HoodieClientTestUtils.readCommit(basePath, sqlContext, timeline, newCommitTime).count(),
+ " since it is a delete operation", HoodieClientTestUtils.readSince(basePath, sqlContext, timeline, prevCommitTime).count(),
HoodieClientTestUtils.readCommit(basePath, sqlContext, timeline, newCommitTime).count(), "Incremental consumption from " + prevCommitTime + " should give no records in latest commit,"
HoodieClientTestUtils.readSince(basePath, sqlContext, timeline, prevCommitTime).count()); + " since it is a delete operation");
} }
return result; return result;
} }

View File

@@ -18,7 +18,6 @@
package org.apache.hudi.client; package org.apache.hudi.client;
import java.util.HashSet;
import org.apache.hudi.common.HoodieClientTestUtils; import org.apache.hudi.common.HoodieClientTestUtils;
import org.apache.hudi.common.HoodieTestDataGenerator; import org.apache.hudi.common.HoodieTestDataGenerator;
import org.apache.hudi.common.TestRawTripPayload; import org.apache.hudi.common.TestRawTripPayload;
@@ -49,15 +48,14 @@ import org.apache.hudi.exception.HoodieIOException;
import org.apache.hudi.index.HoodieIndex; import org.apache.hudi.index.HoodieIndex;
import org.apache.hudi.index.HoodieIndex.IndexType; import org.apache.hudi.index.HoodieIndex.IndexType;
import org.apache.hudi.table.HoodieTable; import org.apache.hudi.table.HoodieTable;
import org.apache.hudi.table.action.commit.WriteHelper;
import org.apache.avro.generic.GenericRecord; import org.apache.avro.generic.GenericRecord;
import org.apache.hadoop.fs.Path; import org.apache.hadoop.fs.Path;
import org.apache.hudi.table.action.commit.WriteHelper;
import org.apache.log4j.LogManager; import org.apache.log4j.LogManager;
import org.apache.log4j.Logger; import org.apache.log4j.Logger;
import org.apache.spark.api.java.JavaRDD; import org.apache.spark.api.java.JavaRDD;
import org.junit.Assert; import org.junit.jupiter.api.Test;
import org.junit.Test;
import java.io.FileInputStream; import java.io.FileInputStream;
import java.io.IOException; import java.io.IOException;
@@ -65,6 +63,7 @@ import java.util.ArrayList;
import java.util.Arrays; import java.util.Arrays;
import java.util.Collection; import java.util.Collection;
import java.util.HashMap; import java.util.HashMap;
import java.util.HashSet;
import java.util.List; import java.util.List;
import java.util.Map; import java.util.Map;
import java.util.Set; import java.util.Set;
@@ -75,10 +74,10 @@ import static org.apache.hudi.common.HoodieTestDataGenerator.NULL_SCHEMA;
import static org.apache.hudi.common.HoodieTestDataGenerator.TRIP_EXAMPLE_SCHEMA; import static org.apache.hudi.common.HoodieTestDataGenerator.TRIP_EXAMPLE_SCHEMA;
import static org.apache.hudi.common.table.timeline.versioning.TimelineLayoutVersion.VERSION_0; import static org.apache.hudi.common.table.timeline.versioning.TimelineLayoutVersion.VERSION_0;
import static org.apache.hudi.common.util.ParquetUtils.readRowKeysFromParquet; import static org.apache.hudi.common.util.ParquetUtils.readRowKeysFromParquet;
import static org.junit.Assert.assertEquals; import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.Assert.assertFalse; import static org.junit.jupiter.api.Assertions.assertFalse;
import static org.junit.Assert.assertTrue; import static org.junit.jupiter.api.Assertions.assertThrows;
import static org.junit.Assert.fail; import static org.junit.jupiter.api.Assertions.assertTrue;
import static org.mockito.Mockito.mock; import static org.mockito.Mockito.mock;
import static org.mockito.Mockito.when; import static org.mockito.Mockito.when;
@@ -154,11 +153,11 @@ public class TestHoodieClientOnCopyOnWriteStorage extends TestHoodieClientBase {
JavaRDD<WriteStatus> result = insertFirstBatch(cfg, client, newCommitTime, prevCommitTime, numRecords, writeFn, JavaRDD<WriteStatus> result = insertFirstBatch(cfg, client, newCommitTime, prevCommitTime, numRecords, writeFn,
isPrepped, false, numRecords); isPrepped, false, numRecords);
assertFalse("If Autocommit is false, then commit should not be made automatically", assertFalse(HoodieTestUtils.doesCommitExist(basePath, newCommitTime),
HoodieTestUtils.doesCommitExist(basePath, newCommitTime)); "If Autocommit is false, then commit should not be made automatically");
assertTrue("Commit should succeed", client.commit(newCommitTime, result)); assertTrue(client.commit(newCommitTime, result), "Commit should succeed");
assertTrue("After explicit commit, commit file should be created", assertTrue(HoodieTestUtils.doesCommitExist(basePath, newCommitTime),
HoodieTestUtils.doesCommitExist(basePath, newCommitTime)); "After explicit commit, commit file should be created");
} }
} }
@@ -251,7 +250,7 @@ public class TestHoodieClientOnCopyOnWriteStorage extends TestHoodieClientBase {
if (!partitionToKeys.containsKey(partitionPath)) { if (!partitionToKeys.containsKey(partitionPath)) {
partitionToKeys.put(partitionPath, new HashSet<>()); partitionToKeys.put(partitionPath, new HashSet<>());
} }
assertFalse("key " + key + " is duplicate within partition " + partitionPath, partitionToKeys.get(partitionPath).contains(key)); assertFalse(partitionToKeys.get(partitionPath).contains(key), "key " + key + " is duplicate within partition " + partitionPath);
partitionToKeys.get(partitionPath).add(key); partitionToKeys.get(partitionPath).add(key);
} }
} }
@@ -326,8 +325,8 @@ public class TestHoodieClientOnCopyOnWriteStorage extends TestHoodieClientBase {
for (int i = 0; i < fullPartitionPaths.length; i++) { for (int i = 0; i < fullPartitionPaths.length; i++) {
fullPartitionPaths[i] = String.format("%s/%s/*", basePath, dataGen.getPartitionPaths()[i]); fullPartitionPaths[i] = String.format("%s/%s/*", basePath, dataGen.getPartitionPaths()[i]);
} }
assertEquals("Must contain " + 200 + " records", 200, assertEquals(200, HoodieClientTestUtils.read(jsc, basePath, sqlContext, fs, fullPartitionPaths).count(),
HoodieClientTestUtils.read(jsc, basePath, sqlContext, fs, fullPartitionPaths).count()); "Must contain " + 200 + " records");
// Perform Delete again on upgraded dataset. // Perform Delete again on upgraded dataset.
prevCommitTime = newCommitTime; prevCommitTime = newCommitTime;
@@ -340,17 +339,17 @@ public class TestHoodieClientOnCopyOnWriteStorage extends TestHoodieClientBase {
HoodieActiveTimeline activeTimeline = new HoodieActiveTimeline(metaClient, false); HoodieActiveTimeline activeTimeline = new HoodieActiveTimeline(metaClient, false);
List<HoodieInstant> instants = activeTimeline.getCommitTimeline().getInstants().collect(Collectors.toList()); List<HoodieInstant> instants = activeTimeline.getCommitTimeline().getInstants().collect(Collectors.toList());
Assert.assertEquals(5, instants.size()); assertEquals(5, instants.size());
Assert.assertEquals(new HoodieInstant(HoodieInstant.State.COMPLETED, HoodieTimeline.COMMIT_ACTION, "001"), assertEquals(new HoodieInstant(HoodieInstant.State.COMPLETED, HoodieTimeline.COMMIT_ACTION, "001"),
instants.get(0)); instants.get(0));
Assert.assertEquals(new HoodieInstant(HoodieInstant.State.COMPLETED, HoodieTimeline.COMMIT_ACTION, "004"), assertEquals(new HoodieInstant(HoodieInstant.State.COMPLETED, HoodieTimeline.COMMIT_ACTION, "004"),
instants.get(1)); instants.get(1));
// New Format should have all states of instants // New Format should have all states of instants
Assert.assertEquals(new HoodieInstant(HoodieInstant.State.REQUESTED, HoodieTimeline.COMMIT_ACTION, "006"), assertEquals(new HoodieInstant(HoodieInstant.State.REQUESTED, HoodieTimeline.COMMIT_ACTION, "006"),
instants.get(2)); instants.get(2));
Assert.assertEquals(new HoodieInstant(HoodieInstant.State.INFLIGHT, HoodieTimeline.COMMIT_ACTION, "006"), assertEquals(new HoodieInstant(HoodieInstant.State.INFLIGHT, HoodieTimeline.COMMIT_ACTION, "006"),
instants.get(3)); instants.get(3));
Assert.assertEquals(new HoodieInstant(HoodieInstant.State.COMPLETED, HoodieTimeline.COMMIT_ACTION, "006"), assertEquals(new HoodieInstant(HoodieInstant.State.COMPLETED, HoodieTimeline.COMMIT_ACTION, "006"),
instants.get(4)); instants.get(4));
} }
@@ -425,8 +424,8 @@ public class TestHoodieClientOnCopyOnWriteStorage extends TestHoodieClientBase {
for (int i = 0; i < fullPartitionPaths.length; i++) { for (int i = 0; i < fullPartitionPaths.length; i++) {
fullPartitionPaths[i] = String.format("%s/%s/*", basePath, dataGen.getPartitionPaths()[i]); fullPartitionPaths[i] = String.format("%s/%s/*", basePath, dataGen.getPartitionPaths()[i]);
} }
assertEquals("Must contain 100 records", 100, assertEquals(100, HoodieClientTestUtils.read(jsc, basePath, sqlContext, fs, fullPartitionPaths).count(),
HoodieClientTestUtils.read(jsc, basePath, sqlContext, fs, fullPartitionPaths).count()); "Must contain 100 records");
/** /**
* Write 2. Updates with different partition * Write 2. Updates with different partition
@@ -448,8 +447,8 @@ public class TestHoodieClientOnCopyOnWriteStorage extends TestHoodieClientBase {
for (int i = 0; i < fullPartitionPaths.length; i++) { for (int i = 0; i < fullPartitionPaths.length; i++) {
fullPartitionPaths[i] = String.format("%s/%s/*", basePath, dataGen.getPartitionPaths()[i]); fullPartitionPaths[i] = String.format("%s/%s/*", basePath, dataGen.getPartitionPaths()[i]);
} }
assertEquals("Must contain 100 records", 100, assertEquals(100, HoodieClientTestUtils.read(jsc, basePath, sqlContext, fs, fullPartitionPaths).count(),
HoodieClientTestUtils.read(jsc, basePath, sqlContext, fs, fullPartitionPaths).count()); "Must contain 100 records");
} }
/** /**
@@ -476,12 +475,11 @@ public class TestHoodieClientOnCopyOnWriteStorage extends TestHoodieClientBase {
assertNoWriteErrors(statuses); assertNoWriteErrors(statuses);
assertEquals("Just 1 file needs to be added.", 1, statuses.size()); assertEquals(1, statuses.size(), "Just 1 file needs to be added.");
String file1 = statuses.get(0).getFileId(); String file1 = statuses.get(0).getFileId();
Assert.assertEquals("file should contain 100 records", assertEquals(100,
readRowKeysFromParquet(jsc.hadoopConfiguration(), new Path(basePath, statuses.get(0).getStat().getPath())) readRowKeysFromParquet(jsc.hadoopConfiguration(), new Path(basePath, statuses.get(0).getStat().getPath()))
.size(), .size(), "file should contain 100 records");
100);
// Update + Inserts such that they just expand file1 // Update + Inserts such that they just expand file1
String commitTime2 = "002"; String commitTime2 = "002";
@@ -496,18 +494,18 @@ public class TestHoodieClientOnCopyOnWriteStorage extends TestHoodieClientBase {
statuses = client.upsert(insertAndUpdatesRDD2, commitTime2).collect(); statuses = client.upsert(insertAndUpdatesRDD2, commitTime2).collect();
assertNoWriteErrors(statuses); assertNoWriteErrors(statuses);
assertEquals("Just 1 file needs to be updated.", 1, statuses.size()); assertEquals(1, statuses.size(), "Just 1 file needs to be updated.");
assertEquals("Existing file should be expanded", file1, statuses.get(0).getFileId()); assertEquals(file1, statuses.get(0).getFileId(), "Existing file should be expanded");
assertEquals("Existing file should be expanded", commitTime1, statuses.get(0).getStat().getPrevCommit()); assertEquals(commitTime1, statuses.get(0).getStat().getPrevCommit(), "Existing file should be expanded");
Path newFile = new Path(basePath, statuses.get(0).getStat().getPath()); Path newFile = new Path(basePath, statuses.get(0).getStat().getPath());
assertEquals("file should contain 140 records", readRowKeysFromParquet(jsc.hadoopConfiguration(), newFile).size(), assertEquals(140, readRowKeysFromParquet(jsc.hadoopConfiguration(), newFile).size(),
140); "file should contain 140 records");
List<GenericRecord> records = ParquetUtils.readAvroRecords(jsc.hadoopConfiguration(), newFile); List<GenericRecord> records = ParquetUtils.readAvroRecords(jsc.hadoopConfiguration(), newFile);
for (GenericRecord record : records) { for (GenericRecord record : records) {
String recordKey = record.get(HoodieRecord.RECORD_KEY_METADATA_FIELD).toString(); String recordKey = record.get(HoodieRecord.RECORD_KEY_METADATA_FIELD).toString();
assertEquals("only expect commit2", commitTime2, record.get(HoodieRecord.COMMIT_TIME_METADATA_FIELD).toString()); assertEquals(commitTime2, record.get(HoodieRecord.COMMIT_TIME_METADATA_FIELD).toString(), "only expect commit2");
assertTrue("key expected to be part of commit2", keys2.contains(recordKey) || keys1.contains(recordKey)); assertTrue(keys2.contains(recordKey) || keys1.contains(recordKey), "key expected to be part of commit2");
} }
// update + inserts such that file1 is updated and expanded, a new file2 is created. // update + inserts such that file1 is updated and expanded, a new file2 is created.
@@ -522,7 +520,7 @@ public class TestHoodieClientOnCopyOnWriteStorage extends TestHoodieClientBase {
statuses = client.upsert(insertAndUpdatesRDD3, commitTime3).collect(); statuses = client.upsert(insertAndUpdatesRDD3, commitTime3).collect();
assertNoWriteErrors(statuses); assertNoWriteErrors(statuses);
assertEquals("2 files needs to be committed.", 2, statuses.size()); assertEquals(2, statuses.size(), "2 files needs to be committed.");
HoodieTableMetaClient metadata = new HoodieTableMetaClient(jsc.hadoopConfiguration(), basePath); HoodieTableMetaClient metadata = new HoodieTableMetaClient(jsc.hadoopConfiguration(), basePath);
HoodieTable table = getHoodieTable(metadata, config); HoodieTable table = getHoodieTable(metadata, config);
@@ -533,7 +531,7 @@ public class TestHoodieClientOnCopyOnWriteStorage extends TestHoodieClientBase {
int numTotalUpdatesInCommit3 = 0; int numTotalUpdatesInCommit3 = 0;
for (HoodieBaseFile file : files) { for (HoodieBaseFile file : files) {
if (file.getFileName().contains(file1)) { if (file.getFileName().contains(file1)) {
assertEquals("Existing file should be expanded", commitTime3, file.getCommitTime()); assertEquals(commitTime3, file.getCommitTime(), "Existing file should be expanded");
records = ParquetUtils.readAvroRecords(jsc.hadoopConfiguration(), new Path(file.getPath())); records = ParquetUtils.readAvroRecords(jsc.hadoopConfiguration(), new Path(file.getPath()));
for (GenericRecord record : records) { for (GenericRecord record : records) {
String recordKey = record.get(HoodieRecord.RECORD_KEY_METADATA_FIELD).toString(); String recordKey = record.get(HoodieRecord.RECORD_KEY_METADATA_FIELD).toString();
@@ -547,21 +545,21 @@ public class TestHoodieClientOnCopyOnWriteStorage extends TestHoodieClientBase {
} }
} }
} }
assertEquals("All keys added in commit 2 must be updated in commit3 correctly", 0, keys2.size()); assertEquals(0, keys2.size(), "All keys added in commit 2 must be updated in commit3 correctly");
} else { } else {
assertEquals("New file must be written for commit 3", commitTime3, file.getCommitTime()); assertEquals(commitTime3, file.getCommitTime(), "New file must be written for commit 3");
records = ParquetUtils.readAvroRecords(jsc.hadoopConfiguration(), new Path(file.getPath())); records = ParquetUtils.readAvroRecords(jsc.hadoopConfiguration(), new Path(file.getPath()));
for (GenericRecord record : records) { for (GenericRecord record : records) {
String recordKey = record.get(HoodieRecord.RECORD_KEY_METADATA_FIELD).toString(); String recordKey = record.get(HoodieRecord.RECORD_KEY_METADATA_FIELD).toString();
assertEquals("only expect commit3", commitTime3, assertEquals(commitTime3, record.get(HoodieRecord.COMMIT_TIME_METADATA_FIELD).toString(),
record.get(HoodieRecord.COMMIT_TIME_METADATA_FIELD).toString()); "only expect commit3");
assertTrue("key expected to be part of commit3", keys3.contains(recordKey)); assertTrue(keys3.contains(recordKey), "key expected to be part of commit3");
} }
numTotalInsertsInCommit3 += records.size(); numTotalInsertsInCommit3 += records.size();
} }
} }
assertEquals("Total updates in commit3 must add up", inserts2.size(), numTotalUpdatesInCommit3); assertEquals(numTotalUpdatesInCommit3, inserts2.size(), "Total updates in commit3 must add up");
assertEquals("Total inserts in commit3 must add up", keys3.size(), numTotalInsertsInCommit3); assertEquals(numTotalInsertsInCommit3, keys3.size(), "Total inserts in commit3 must add up");
} }
/** /**
@@ -588,12 +586,11 @@ public class TestHoodieClientOnCopyOnWriteStorage extends TestHoodieClientBase {
assertNoWriteErrors(statuses); assertNoWriteErrors(statuses);
assertPartitionMetadata(new String[] {testPartitionPath}, fs); assertPartitionMetadata(new String[] {testPartitionPath}, fs);
assertEquals("Just 1 file needs to be added.", 1, statuses.size()); assertEquals(1, statuses.size(), "Just 1 file needs to be added.");
String file1 = statuses.get(0).getFileId(); String file1 = statuses.get(0).getFileId();
assertEquals("file should contain 100 records", assertEquals(100,
readRowKeysFromParquet(jsc.hadoopConfiguration(), new Path(basePath, statuses.get(0).getStat().getPath())) readRowKeysFromParquet(jsc.hadoopConfiguration(), new Path(basePath, statuses.get(0).getStat().getPath()))
.size(), .size(), "file should contain 100 records");
100);
// Second, set of Inserts should just expand file1 // Second, set of Inserts should just expand file1
String commitTime2 = "002"; String commitTime2 = "002";
@@ -604,21 +601,21 @@ public class TestHoodieClientOnCopyOnWriteStorage extends TestHoodieClientBase {
statuses = client.insert(insertRecordsRDD2, commitTime2).collect(); statuses = client.insert(insertRecordsRDD2, commitTime2).collect();
assertNoWriteErrors(statuses); assertNoWriteErrors(statuses);
assertEquals("Just 1 file needs to be updated.", 1, statuses.size()); assertEquals(1, statuses.size(), "Just 1 file needs to be updated.");
assertEquals("Existing file should be expanded", file1, statuses.get(0).getFileId()); assertEquals(file1, statuses.get(0).getFileId(), "Existing file should be expanded");
assertEquals("Existing file should be expanded", commitTime1, statuses.get(0).getStat().getPrevCommit()); assertEquals(commitTime1, statuses.get(0).getStat().getPrevCommit(), "Existing file should be expanded");
Path newFile = new Path(basePath, statuses.get(0).getStat().getPath()); Path newFile = new Path(basePath, statuses.get(0).getStat().getPath());
assertEquals("file should contain 140 records", readRowKeysFromParquet(jsc.hadoopConfiguration(), newFile).size(), assertEquals(140, readRowKeysFromParquet(jsc.hadoopConfiguration(), newFile).size(),
140); "file should contain 140 records");
List<GenericRecord> records = ParquetUtils.readAvroRecords(jsc.hadoopConfiguration(), newFile); List<GenericRecord> records = ParquetUtils.readAvroRecords(jsc.hadoopConfiguration(), newFile);
for (GenericRecord record : records) { for (GenericRecord record : records) {
String recordKey = record.get(HoodieRecord.RECORD_KEY_METADATA_FIELD).toString(); String recordKey = record.get(HoodieRecord.RECORD_KEY_METADATA_FIELD).toString();
String recCommitTime = record.get(HoodieRecord.COMMIT_TIME_METADATA_FIELD).toString(); String recCommitTime = record.get(HoodieRecord.COMMIT_TIME_METADATA_FIELD).toString();
assertTrue("Record expected to be part of commit 1 or commit2", assertTrue(commitTime1.equals(recCommitTime) || commitTime2.equals(recCommitTime),
commitTime1.equals(recCommitTime) || commitTime2.equals(recCommitTime)); "Record expected to be part of commit 1 or commit2");
assertTrue("key expected to be part of commit 1 or commit2", assertTrue(keys2.contains(recordKey) || keys1.contains(recordKey),
keys2.contains(recordKey) || keys1.contains(recordKey)); "key expected to be part of commit 1 or commit2");
} }
// Lots of inserts such that file1 is updated and expanded, a new file2 is created. // Lots of inserts such that file1 is updated and expanded, a new file2 is created.
@@ -628,22 +625,22 @@ public class TestHoodieClientOnCopyOnWriteStorage extends TestHoodieClientBase {
JavaRDD<HoodieRecord> insertRecordsRDD3 = jsc.parallelize(insert3, 1); JavaRDD<HoodieRecord> insertRecordsRDD3 = jsc.parallelize(insert3, 1);
statuses = client.insert(insertRecordsRDD3, commitTime3).collect(); statuses = client.insert(insertRecordsRDD3, commitTime3).collect();
assertNoWriteErrors(statuses); assertNoWriteErrors(statuses);
assertEquals("2 files needs to be committed.", 2, statuses.size()); assertEquals(2, statuses.size(), "2 files needs to be committed.");
HoodieTableMetaClient metaClient = new HoodieTableMetaClient(jsc.hadoopConfiguration(), basePath); HoodieTableMetaClient metaClient = new HoodieTableMetaClient(jsc.hadoopConfiguration(), basePath);
HoodieTable table = getHoodieTable(metaClient, config); HoodieTable table = getHoodieTable(metaClient, config);
List<HoodieBaseFile> files = table.getBaseFileOnlyView() List<HoodieBaseFile> files = table.getBaseFileOnlyView()
.getLatestBaseFilesBeforeOrOn(testPartitionPath, commitTime3).collect(Collectors.toList()); .getLatestBaseFilesBeforeOrOn(testPartitionPath, commitTime3).collect(Collectors.toList());
assertEquals("Total of 2 valid data files", 2, files.size()); assertEquals(2, files.size(), "Total of 2 valid data files");
int totalInserts = 0; int totalInserts = 0;
for (HoodieBaseFile file : files) { for (HoodieBaseFile file : files) {
assertEquals("All files must be at commit 3", commitTime3, file.getCommitTime()); assertEquals(commitTime3, file.getCommitTime(), "All files must be at commit 3");
records = ParquetUtils.readAvroRecords(jsc.hadoopConfiguration(), new Path(file.getPath())); records = ParquetUtils.readAvroRecords(jsc.hadoopConfiguration(), new Path(file.getPath()));
totalInserts += records.size(); totalInserts += records.size();
} }
assertEquals("Total number of records must add up", totalInserts, assertEquals(totalInserts, inserts1.size() + inserts2.size() + insert3.size(),
inserts1.size() + inserts2.size() + insert3.size()); "Total number of records must add up");
} }
/** /**
@@ -670,12 +667,11 @@ public class TestHoodieClientOnCopyOnWriteStorage extends TestHoodieClientBase {
assertNoWriteErrors(statuses); assertNoWriteErrors(statuses);
assertEquals("Just 1 file needs to be added.", 1, statuses.size()); assertEquals(1, statuses.size(), "Just 1 file needs to be added.");
String file1 = statuses.get(0).getFileId(); String file1 = statuses.get(0).getFileId();
Assert.assertEquals("file should contain 100 records", assertEquals(100,
readRowKeysFromParquet(jsc.hadoopConfiguration(), new Path(basePath, statuses.get(0).getStat().getPath())) readRowKeysFromParquet(jsc.hadoopConfiguration(), new Path(basePath, statuses.get(0).getStat().getPath()))
.size(), .size(), "file should contain 100 records");
100);
// Delete 20 among 100 inserted // Delete 20 among 100 inserted
testDeletes(client, inserts1, 20, file1, "002", 80, keysSoFar); testDeletes(client, inserts1, 20, file1, "002", 80, keysSoFar);
@@ -701,15 +697,16 @@ public class TestHoodieClientOnCopyOnWriteStorage extends TestHoodieClientBase {
JavaRDD<HoodieKey> deleteKeys3 = jsc.parallelize(hoodieKeysToDelete3, 1); JavaRDD<HoodieKey> deleteKeys3 = jsc.parallelize(hoodieKeysToDelete3, 1);
statuses = client.delete(deleteKeys3, commitTime6).collect(); statuses = client.delete(deleteKeys3, commitTime6).collect();
assertNoWriteErrors(statuses); assertNoWriteErrors(statuses);
assertEquals("Just 0 write status for delete.", 0, statuses.size()); assertEquals(0, statuses.size(), "Just 0 write status for delete.");
// Check the entire dataset has all records still // Check the entire dataset has all records still
String[] fullPartitionPaths = new String[dataGen.getPartitionPaths().length]; String[] fullPartitionPaths = new String[dataGen.getPartitionPaths().length];
for (int i = 0; i < fullPartitionPaths.length; i++) { for (int i = 0; i < fullPartitionPaths.length; i++) {
fullPartitionPaths[i] = String.format("%s/%s/*", basePath, dataGen.getPartitionPaths()[i]); fullPartitionPaths[i] = String.format("%s/%s/*", basePath, dataGen.getPartitionPaths()[i]);
} }
assertEquals("Must contain " + 150 + " records", 150, assertEquals(150,
HoodieClientTestUtils.read(jsc, basePath, sqlContext, fs, fullPartitionPaths).count()); HoodieClientTestUtils.read(jsc, basePath, sqlContext, fs, fullPartitionPaths).count(),
"Must contain " + 150 + " records");
// delete another batch. previous delete commit should have persisted the schema. If not, // delete another batch. previous delete commit should have persisted the schema. If not,
// this will throw exception // this will throw exception
@@ -735,8 +732,9 @@ public class TestHoodieClientOnCopyOnWriteStorage extends TestHoodieClientBase {
for (int i = 0; i < fullPartitionPaths.length; i++) { for (int i = 0; i < fullPartitionPaths.length; i++) {
fullPartitionPaths[i] = String.format("%s/%s/*", basePath, dataGen.getPartitionPaths()[i]); fullPartitionPaths[i] = String.format("%s/%s/*", basePath, dataGen.getPartitionPaths()[i]);
} }
assertEquals("Must contain " + expectedTotalRecords + " records", expectedTotalRecords, assertEquals(expectedTotalRecords,
HoodieClientTestUtils.read(jsc, basePath, sqlContext, fs, fullPartitionPaths).count()); HoodieClientTestUtils.read(jsc, basePath, sqlContext, fs, fullPartitionPaths).count(),
"Must contain " + expectedTotalRecords + " records");
return Pair.of(keys, inserts); return Pair.of(keys, inserts);
} }
@@ -751,26 +749,28 @@ public class TestHoodieClientOnCopyOnWriteStorage extends TestHoodieClientBase {
assertNoWriteErrors(statuses); assertNoWriteErrors(statuses);
assertEquals("Just 1 file needs to be added.", 1, statuses.size()); assertEquals(1, statuses.size(), "Just 1 file needs to be added.");
assertEquals("Existing file should be expanded", existingFile, statuses.get(0).getFileId()); assertEquals(existingFile, statuses.get(0).getFileId(), "Existing file should be expanded");
// Check the entire dataset has all records still // Check the entire dataset has all records still
String[] fullPartitionPaths = new String[dataGen.getPartitionPaths().length]; String[] fullPartitionPaths = new String[dataGen.getPartitionPaths().length];
for (int i = 0; i < fullPartitionPaths.length; i++) { for (int i = 0; i < fullPartitionPaths.length; i++) {
fullPartitionPaths[i] = String.format("%s/%s/*", basePath, dataGen.getPartitionPaths()[i]); fullPartitionPaths[i] = String.format("%s/%s/*", basePath, dataGen.getPartitionPaths()[i]);
} }
assertEquals("Must contain " + exepctedRecords + " records", exepctedRecords, assertEquals(exepctedRecords,
HoodieClientTestUtils.read(jsc, basePath, sqlContext, fs, fullPartitionPaths).count()); HoodieClientTestUtils.read(jsc, basePath, sqlContext, fs, fullPartitionPaths).count(),
"Must contain " + exepctedRecords + " records");
Path newFile = new Path(basePath, statuses.get(0).getStat().getPath()); Path newFile = new Path(basePath, statuses.get(0).getStat().getPath());
assertEquals("file should contain 110 records", readRowKeysFromParquet(jsc.hadoopConfiguration(), newFile).size(), assertEquals(exepctedRecords,
exepctedRecords); readRowKeysFromParquet(jsc.hadoopConfiguration(), newFile).size(),
"file should contain 110 records");
List<GenericRecord> records = ParquetUtils.readAvroRecords(jsc.hadoopConfiguration(), newFile); List<GenericRecord> records = ParquetUtils.readAvroRecords(jsc.hadoopConfiguration(), newFile);
for (GenericRecord record : records) { for (GenericRecord record : records) {
String recordKey = record.get(HoodieRecord.RECORD_KEY_METADATA_FIELD).toString(); String recordKey = record.get(HoodieRecord.RECORD_KEY_METADATA_FIELD).toString();
assertTrue("key expected to be part of " + instantTime, keys.contains(recordKey)); assertTrue(keys.contains(recordKey), "key expected to be part of " + instantTime);
assertFalse("Key deleted", hoodieKeysToDelete.contains(recordKey)); assertFalse(hoodieKeysToDelete.contains(recordKey), "Key deleted");
} }
} }
@@ -795,12 +795,9 @@ public class TestHoodieClientOnCopyOnWriteStorage extends TestHoodieClientBase {
List<HoodieKey> hoodieKeysToDelete = HoodieClientTestUtils List<HoodieKey> hoodieKeysToDelete = HoodieClientTestUtils
.getKeysToDelete(HoodieClientTestUtils.getHoodieKeys(dummyInserts), 20); .getKeysToDelete(HoodieClientTestUtils.getHoodieKeys(dummyInserts), 20);
JavaRDD<HoodieKey> deleteKeys = jsc.parallelize(hoodieKeysToDelete, 1); JavaRDD<HoodieKey> deleteKeys = jsc.parallelize(hoodieKeysToDelete, 1);
try { assertThrows(HoodieIOException.class, () -> {
client.delete(deleteKeys, commitTime1).collect(); client.delete(deleteKeys, commitTime1).collect();
fail("Should have thrown Exception"); }, "Should have thrown Exception");
} catch (HoodieIOException e) {
// ignore
}
} }
/** /**
@@ -822,9 +819,9 @@ public class TestHoodieClientOnCopyOnWriteStorage extends TestHoodieClientBase {
JavaRDD<WriteStatus> result = client.bulkInsert(writeRecords, instantTime); JavaRDD<WriteStatus> result = client.bulkInsert(writeRecords, instantTime);
assertTrue("Commit should succeed", client.commit(instantTime, result)); assertTrue(client.commit(instantTime, result), "Commit should succeed");
assertTrue("After explicit commit, commit file should be created", assertTrue(HoodieTestUtils.doesCommitExist(basePath, instantTime),
HoodieTestUtils.doesCommitExist(basePath, instantTime)); "After explicit commit, commit file should be created");
// Get parquet file paths from commit metadata // Get parquet file paths from commit metadata
String actionType = metaClient.getCommitActionType(); String actionType = metaClient.getCommitActionType();
@@ -868,9 +865,9 @@ public class TestHoodieClientOnCopyOnWriteStorage extends TestHoodieClientBase {
JavaRDD<WriteStatus> result = client.bulkInsert(writeRecords, instantTime); JavaRDD<WriteStatus> result = client.bulkInsert(writeRecords, instantTime);
assertTrue("Commit should succeed", client.commit(instantTime, result)); assertTrue(client.commit(instantTime, result), "Commit should succeed");
assertTrue("After explicit commit, commit file should be created", assertTrue(HoodieTestUtils.doesCommitExist(basePath, instantTime),
HoodieTestUtils.doesCommitExist(basePath, instantTime)); "After explicit commit, commit file should be created");
// Read from commit file // Read from commit file
String filename = HoodieTestUtils.getCommitFilePath(basePath, instantTime); String filename = HoodieTestUtils.getCommitFilePath(basePath, instantTime);
@@ -888,7 +885,7 @@ public class TestHoodieClientOnCopyOnWriteStorage extends TestHoodieClientBase {
inserts += stat.getValue().getInserts(); inserts += stat.getValue().getInserts();
} }
} }
Assert.assertEquals(inserts, 200); assertEquals(200, inserts);
// Update + Inserts such that they just expand file1 // Update + Inserts such that they just expand file1
instantTime = "001"; instantTime = "001";
@@ -898,9 +895,9 @@ public class TestHoodieClientOnCopyOnWriteStorage extends TestHoodieClientBase {
writeRecords = jsc.parallelize(records, 1); writeRecords = jsc.parallelize(records, 1);
result = client.upsert(writeRecords, instantTime); result = client.upsert(writeRecords, instantTime);
assertTrue("Commit should succeed", client.commit(instantTime, result)); assertTrue(client.commit(instantTime, result), "Commit should succeed");
assertTrue("After explicit commit, commit file should be created", assertTrue(HoodieTestUtils.doesCommitExist(basePath, instantTime),
HoodieTestUtils.doesCommitExist(basePath, instantTime)); "After explicit commit, commit file should be created");
// Read from commit file // Read from commit file
filename = HoodieTestUtils.getCommitFilePath(basePath, instantTime); filename = HoodieTestUtils.getCommitFilePath(basePath, instantTime);
@@ -919,8 +916,8 @@ public class TestHoodieClientOnCopyOnWriteStorage extends TestHoodieClientBase {
upserts += stat.getValue().getUpserts(); upserts += stat.getValue().getUpserts();
} }
} }
Assert.assertEquals(inserts, 200); assertEquals(200, inserts);
Assert.assertEquals(upserts, 200); assertEquals(200, upserts);
} }
@@ -937,9 +934,9 @@ public class TestHoodieClientOnCopyOnWriteStorage extends TestHoodieClientBase {
// Delete orphan marker and commit should succeed // Delete orphan marker and commit should succeed
metaClient.getFs().delete(result.getKey(), false); metaClient.getFs().delete(result.getKey(), false);
assertTrue("Commit should succeed", client.commit(instantTime, result.getRight())); assertTrue(client.commit(instantTime, result.getRight()), "Commit should succeed");
assertTrue("After explicit commit, commit file should be created", assertTrue(HoodieTestUtils.doesCommitExist(basePath, instantTime),
HoodieTestUtils.doesCommitExist(basePath, instantTime)); "After explicit commit, commit file should be created");
// Marker directory must be removed // Marker directory must be removed
assertFalse(metaClient.getFs().exists(new Path(metaClient.getMarkerFolderPath(instantTime)))); assertFalse(metaClient.getFs().exists(new Path(metaClient.getMarkerFolderPath(instantTime))));
} }
@@ -954,8 +951,8 @@ public class TestHoodieClientOnCopyOnWriteStorage extends TestHoodieClientBase {
// Rollback of this commit should succeed // Rollback of this commit should succeed
client.rollback(instantTime); client.rollback(instantTime);
assertFalse("After explicit rollback, commit file should not be present", assertFalse(HoodieTestUtils.doesCommitExist(basePath, instantTime),
HoodieTestUtils.doesCommitExist(basePath, instantTime)); "After explicit rollback, commit file should not be present");
// Marker directory must be removed after rollback // Marker directory must be removed after rollback
assertFalse(metaClient.getFs().exists(new Path(metaClient.getMarkerFolderPath(instantTime)))); assertFalse(metaClient.getFs().exists(new Path(metaClient.getMarkerFolderPath(instantTime))));
} }
@@ -984,12 +981,10 @@ public class TestHoodieClientOnCopyOnWriteStorage extends TestHoodieClientBase {
metaClient.getFs().create(markerFilePath); metaClient.getFs().create(markerFilePath);
LOG.info("Created a dummy marker path=" + markerFilePath); LOG.info("Created a dummy marker path=" + markerFilePath);
try { Exception e = assertThrows(HoodieCommitException.class, () -> {
client.commit(instantTime, result); client.commit(instantTime, result);
fail("Commit should fail due to consistency check"); }, "Commit should fail due to consistency check");
} catch (HoodieCommitException cme) { assertTrue(e.getCause() instanceof HoodieIOException);
assertTrue(cme.getCause() instanceof HoodieIOException);
}
return Pair.of(markerFilePath, result); return Pair.of(markerFilePath, result);
} }

View File

@@ -28,7 +28,7 @@ import org.apache.spark.api.java.JavaRDD;
import org.apache.spark.sql.AnalysisException; import org.apache.spark.sql.AnalysisException;
import org.apache.spark.sql.Dataset; import org.apache.spark.sql.Dataset;
import org.apache.spark.sql.Row; import org.apache.spark.sql.Row;
import org.junit.Test; import org.junit.jupiter.api.Test;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.Arrays; import java.util.Arrays;
@@ -36,7 +36,8 @@ import java.util.Collection;
import java.util.List; import java.util.List;
import java.util.stream.Collectors; import java.util.stream.Collectors;
import static org.junit.Assert.assertEquals; import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertThrows;
@SuppressWarnings("unchecked") @SuppressWarnings("unchecked")
/** /**
@@ -79,11 +80,13 @@ public class TestHoodieReadClient extends TestHoodieClientBase {
}); });
} }
@Test(expected = IllegalStateException.class) @Test
public void testReadROViewFailsWithoutSqlContext() { public void testReadROViewFailsWithoutSqlContext() {
HoodieReadClient readClient = new HoodieReadClient(jsc, getConfig()); HoodieReadClient readClient = new HoodieReadClient(jsc, getConfig());
JavaRDD<HoodieKey> recordsRDD = jsc.parallelize(new ArrayList<>(), 1); JavaRDD<HoodieKey> recordsRDD = jsc.parallelize(new ArrayList<>(), 1);
assertThrows(IllegalStateException.class, () -> {
readClient.readROView(recordsRDD, 1); readClient.readROView(recordsRDD, 1);
});
} }
/** /**
@@ -133,12 +136,9 @@ public class TestHoodieReadClient extends TestHoodieClientBase {
JavaRDD<HoodieKey> keysWithoutPaths = keyToPathPair.filter(keyPath -> !keyPath._2.isPresent()) JavaRDD<HoodieKey> keysWithoutPaths = keyToPathPair.filter(keyPath -> !keyPath._2.isPresent())
.map(keyPath -> keyPath._1); .map(keyPath -> keyPath._1);
try { assertThrows(AnalysisException.class, () -> {
anotherReadClient.readROView(keysWithoutPaths, 1); anotherReadClient.readROView(keysWithoutPaths, 1);
} catch (Exception e) { });
// data frame reader throws exception for empty records. ignore the error.
assertEquals(e.getClass(), AnalysisException.class);
}
// Actual tests of getPendingCompactions method are in TestAsyncCompaction // Actual tests of getPendingCompactions method are in TestAsyncCompaction
// This is just testing empty list // This is just testing empty list

View File

@@ -39,13 +39,13 @@ import org.apache.log4j.Logger;
import org.apache.spark.api.java.JavaRDD; import org.apache.spark.api.java.JavaRDD;
import org.apache.spark.sql.Dataset; import org.apache.spark.sql.Dataset;
import org.apache.spark.sql.Row; import org.apache.spark.sql.Row;
import org.junit.After; import org.junit.jupiter.api.AfterEach;
import org.junit.Before; import org.junit.jupiter.api.BeforeEach;
import org.junit.Test; import org.junit.jupiter.api.Test;
import java.util.List; import java.util.List;
import static org.junit.Assert.assertEquals; import static org.junit.jupiter.api.Assertions.assertEquals;
public class TestMultiFS extends HoodieClientTestHarness { public class TestMultiFS extends HoodieClientTestHarness {
@@ -54,14 +54,14 @@ public class TestMultiFS extends HoodieClientTestHarness {
protected String tableName = "hoodie_rt"; protected String tableName = "hoodie_rt";
private String tableType = HoodieTableType.COPY_ON_WRITE.name(); private String tableType = HoodieTableType.COPY_ON_WRITE.name();
@Before @BeforeEach
public void setUp() throws Exception { public void setUp() throws Exception {
initSparkContexts(); initSparkContexts();
initDFS(); initDFS();
initTestDataGenerator(); initTestDataGenerator();
} }
@After @AfterEach
public void tearDown() throws Exception { public void tearDown() throws Exception {
cleanupSparkContexts(); cleanupSparkContexts();
cleanupDFS(); cleanupDFS();
@@ -103,7 +103,7 @@ public class TestMultiFS extends HoodieClientTestHarness {
HoodieTableMetaClient metaClient = new HoodieTableMetaClient(fs.getConf(), dfsBasePath); HoodieTableMetaClient metaClient = new HoodieTableMetaClient(fs.getConf(), dfsBasePath);
HoodieTimeline timeline = new HoodieActiveTimeline(metaClient).getCommitTimeline(); HoodieTimeline timeline = new HoodieActiveTimeline(metaClient).getCommitTimeline();
Dataset<Row> readRecords = HoodieClientTestUtils.readCommit(dfsBasePath, sqlContext, timeline, readCommitTime); Dataset<Row> readRecords = HoodieClientTestUtils.readCommit(dfsBasePath, sqlContext, timeline, readCommitTime);
assertEquals("Should contain 100 records", readRecords.count(), records.size()); assertEquals(readRecords.count(), records.size(), "Should contain 100 records");
// Write to local // Write to local
HoodieTableMetaClient.initTableType(jsc.hadoopConfiguration(), tablePath, HoodieTableType.valueOf(tableType), HoodieTableMetaClient.initTableType(jsc.hadoopConfiguration(), tablePath, HoodieTableType.valueOf(tableType),
@@ -122,7 +122,7 @@ public class TestMultiFS extends HoodieClientTestHarness {
timeline = new HoodieActiveTimeline(metaClient).getCommitTimeline(); timeline = new HoodieActiveTimeline(metaClient).getCommitTimeline();
Dataset<Row> localReadRecords = Dataset<Row> localReadRecords =
HoodieClientTestUtils.readCommit(tablePath, sqlContext, timeline, writeCommitTime); HoodieClientTestUtils.readCommit(tablePath, sqlContext, timeline, writeCommitTime);
assertEquals("Should contain 100 records", localReadRecords.count(), localRecords.size()); assertEquals(localReadRecords.count(), localRecords.size(), "Should contain 100 records");
} }
} }
} }

View File

@@ -18,8 +18,6 @@
package org.apache.hudi.client; package org.apache.hudi.client;
import org.apache.avro.Schema;
import org.apache.avro.generic.GenericRecord;
import org.apache.hudi.avro.HoodieAvroUtils; import org.apache.hudi.avro.HoodieAvroUtils;
import org.apache.hudi.common.HoodieClientTestUtils; import org.apache.hudi.common.HoodieClientTestUtils;
import org.apache.hudi.common.HoodieTestDataGenerator; import org.apache.hudi.common.HoodieTestDataGenerator;
@@ -36,9 +34,12 @@ import org.apache.hudi.config.HoodieWriteConfig;
import org.apache.hudi.exception.HoodieInsertException; import org.apache.hudi.exception.HoodieInsertException;
import org.apache.hudi.exception.HoodieUpsertException; import org.apache.hudi.exception.HoodieUpsertException;
import org.apache.hudi.index.HoodieIndex.IndexType; import org.apache.hudi.index.HoodieIndex.IndexType;
import org.junit.After;
import org.junit.Before; import org.apache.avro.Schema;
import org.junit.Test; import org.apache.avro.generic.GenericRecord;
import org.junit.jupiter.api.AfterEach;
import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.Test;
import java.io.IOException; import java.io.IOException;
import java.util.List; import java.util.List;
@@ -51,10 +52,10 @@ import static org.apache.hudi.common.HoodieTestDataGenerator.TRIP_EXAMPLE_SCHEMA
import static org.apache.hudi.common.HoodieTestDataGenerator.TRIP_SCHEMA_PREFIX; import static org.apache.hudi.common.HoodieTestDataGenerator.TRIP_SCHEMA_PREFIX;
import static org.apache.hudi.common.HoodieTestDataGenerator.TRIP_SCHEMA_SUFFIX; import static org.apache.hudi.common.HoodieTestDataGenerator.TRIP_SCHEMA_SUFFIX;
import static org.apache.hudi.common.table.timeline.versioning.TimelineLayoutVersion.VERSION_1; import static org.apache.hudi.common.table.timeline.versioning.TimelineLayoutVersion.VERSION_1;
import static org.junit.Assert.assertEquals; import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.Assert.assertFalse; import static org.junit.jupiter.api.Assertions.assertFalse;
import static org.junit.Assert.assertTrue; import static org.junit.jupiter.api.Assertions.assertTrue;
import static org.junit.Assert.fail; import static org.junit.jupiter.api.Assertions.fail;
public class TestTableSchemaEvolution extends TestHoodieClientBase { public class TestTableSchemaEvolution extends TestHoodieClientBase {
private final String initCommitTime = "000"; private final String initCommitTime = "000";
@@ -73,60 +74,60 @@ public class TestTableSchemaEvolution extends TestHoodieClientBase {
public static final String TRIP_EXAMPLE_SCHEMA_DEVOLVED = TRIP_SCHEMA_PREFIX + MAP_TYPE_SCHEMA + FARE_NESTED_SCHEMA public static final String TRIP_EXAMPLE_SCHEMA_DEVOLVED = TRIP_SCHEMA_PREFIX + MAP_TYPE_SCHEMA + FARE_NESTED_SCHEMA
+ TRIP_SCHEMA_SUFFIX; + TRIP_SCHEMA_SUFFIX;
@Before @BeforeEach
public void setUp() throws Exception { public void setUp() throws Exception {
initResources(); initResources();
} }
@After @AfterEach
public void tearDown() { public void tearDown() {
cleanupSparkContexts(); cleanupSparkContexts();
} }
@Test @Test
public void testSchemaCompatibilityBasic() throws Exception { public void testSchemaCompatibilityBasic() throws Exception {
assertTrue("Same schema is compatible", assertTrue(TableSchemaResolver.isSchemaCompatible(TRIP_EXAMPLE_SCHEMA, TRIP_EXAMPLE_SCHEMA),
TableSchemaResolver.isSchemaCompatible(TRIP_EXAMPLE_SCHEMA, TRIP_EXAMPLE_SCHEMA)); "Same schema is compatible");
String reorderedSchema = TRIP_SCHEMA_PREFIX + TIP_NESTED_SCHEMA + FARE_NESTED_SCHEMA String reorderedSchema = TRIP_SCHEMA_PREFIX + TIP_NESTED_SCHEMA + FARE_NESTED_SCHEMA
+ MAP_TYPE_SCHEMA + TRIP_SCHEMA_SUFFIX; + MAP_TYPE_SCHEMA + TRIP_SCHEMA_SUFFIX;
assertTrue("Reordered fields are compatible", assertTrue(TableSchemaResolver.isSchemaCompatible(TRIP_EXAMPLE_SCHEMA, reorderedSchema),
TableSchemaResolver.isSchemaCompatible(TRIP_EXAMPLE_SCHEMA, reorderedSchema)); "Reordered fields are compatible");
assertTrue("Reordered fields are compatible", assertTrue(TableSchemaResolver.isSchemaCompatible(reorderedSchema, TRIP_EXAMPLE_SCHEMA),
TableSchemaResolver.isSchemaCompatible(reorderedSchema, TRIP_EXAMPLE_SCHEMA)); "Reordered fields are compatible");
String renamedSchema = TRIP_EXAMPLE_SCHEMA.replace("tip_history", "tip_future"); String renamedSchema = TRIP_EXAMPLE_SCHEMA.replace("tip_history", "tip_future");
assertFalse("Renamed fields are not compatible", assertFalse(TableSchemaResolver.isSchemaCompatible(TRIP_EXAMPLE_SCHEMA, renamedSchema),
TableSchemaResolver.isSchemaCompatible(TRIP_EXAMPLE_SCHEMA, renamedSchema)); "Renamed fields are not compatible");
assertFalse("Deleted single field is not compatible", assertFalse(TableSchemaResolver.isSchemaCompatible(TRIP_EXAMPLE_SCHEMA, TRIP_EXAMPLE_SCHEMA_DEVOLVED),
TableSchemaResolver.isSchemaCompatible(TRIP_EXAMPLE_SCHEMA, TRIP_EXAMPLE_SCHEMA_DEVOLVED)); "Deleted single field is not compatible");
String deletedMultipleFieldSchema = TRIP_SCHEMA_PREFIX + TIP_NESTED_SCHEMA + TRIP_SCHEMA_SUFFIX; String deletedMultipleFieldSchema = TRIP_SCHEMA_PREFIX + TIP_NESTED_SCHEMA + TRIP_SCHEMA_SUFFIX;
assertFalse("Deleted multiple fields are not compatible", assertFalse(TableSchemaResolver.isSchemaCompatible(TRIP_EXAMPLE_SCHEMA, deletedMultipleFieldSchema),
TableSchemaResolver.isSchemaCompatible(TRIP_EXAMPLE_SCHEMA, deletedMultipleFieldSchema)); "Deleted multiple fields are not compatible");
String renamedRecordSchema = TRIP_EXAMPLE_SCHEMA.replace("triprec", "triprec_renamed"); String renamedRecordSchema = TRIP_EXAMPLE_SCHEMA.replace("triprec", "triprec_renamed");
assertFalse("Renamed record name is not compatible", assertFalse(TableSchemaResolver.isSchemaCompatible(TRIP_EXAMPLE_SCHEMA, renamedRecordSchema),
TableSchemaResolver.isSchemaCompatible(TRIP_EXAMPLE_SCHEMA, renamedRecordSchema)); "Renamed record name is not compatible");
String swappedFieldSchema = TRIP_SCHEMA_PREFIX + MAP_TYPE_SCHEMA.replace("city_to_state", "fare") String swappedFieldSchema = TRIP_SCHEMA_PREFIX + MAP_TYPE_SCHEMA.replace("city_to_state", "fare")
+ FARE_NESTED_SCHEMA.replace("fare", "city_to_state") + TIP_NESTED_SCHEMA + TRIP_SCHEMA_SUFFIX; + FARE_NESTED_SCHEMA.replace("fare", "city_to_state") + TIP_NESTED_SCHEMA + TRIP_SCHEMA_SUFFIX;
assertFalse("Swapped fields are not compatible", assertFalse(TableSchemaResolver.isSchemaCompatible(TRIP_EXAMPLE_SCHEMA, swappedFieldSchema),
TableSchemaResolver.isSchemaCompatible(TRIP_EXAMPLE_SCHEMA, swappedFieldSchema)); "Swapped fields are not compatible");
String typeChangeSchema = TRIP_SCHEMA_PREFIX + MAP_TYPE_SCHEMA + FARE_NESTED_SCHEMA String typeChangeSchema = TRIP_SCHEMA_PREFIX + MAP_TYPE_SCHEMA + FARE_NESTED_SCHEMA
+ TIP_NESTED_SCHEMA.replace("string", "boolean") + TRIP_SCHEMA_SUFFIX; + TIP_NESTED_SCHEMA.replace("string", "boolean") + TRIP_SCHEMA_SUFFIX;
assertFalse("Field type change is not compatible", assertFalse(TableSchemaResolver.isSchemaCompatible(TRIP_EXAMPLE_SCHEMA, typeChangeSchema),
TableSchemaResolver.isSchemaCompatible(TRIP_EXAMPLE_SCHEMA, typeChangeSchema)); "Field type change is not compatible");
assertTrue("Added field with default is compatible (Evolved Schema)", assertTrue(TableSchemaResolver.isSchemaCompatible(TRIP_EXAMPLE_SCHEMA, TRIP_EXAMPLE_SCHEMA_EVOLVED),
TableSchemaResolver.isSchemaCompatible(TRIP_EXAMPLE_SCHEMA, TRIP_EXAMPLE_SCHEMA_EVOLVED)); "Added field with default is compatible (Evolved Schema)");
String multipleAddedFieldSchema = TRIP_SCHEMA_PREFIX + MAP_TYPE_SCHEMA + FARE_NESTED_SCHEMA String multipleAddedFieldSchema = TRIP_SCHEMA_PREFIX + MAP_TYPE_SCHEMA + FARE_NESTED_SCHEMA
+ TIP_NESTED_SCHEMA + EXTRA_FIELD_SCHEMA + EXTRA_FIELD_SCHEMA.replace("new_field", "new_new_field") + TIP_NESTED_SCHEMA + EXTRA_FIELD_SCHEMA + EXTRA_FIELD_SCHEMA.replace("new_field", "new_new_field")
+ TRIP_SCHEMA_SUFFIX; + TRIP_SCHEMA_SUFFIX;
assertTrue("Multiple added fields with defauls are compatible", assertTrue(TableSchemaResolver.isSchemaCompatible(TRIP_EXAMPLE_SCHEMA, multipleAddedFieldSchema),
TableSchemaResolver.isSchemaCompatible(TRIP_EXAMPLE_SCHEMA, multipleAddedFieldSchema)); "Multiple added fields with defauls are compatible");
} }
@Test @Test

View File

@@ -31,26 +31,26 @@ import org.apache.hudi.common.util.ParquetUtils;
import org.apache.hudi.config.HoodieWriteConfig; import org.apache.hudi.config.HoodieWriteConfig;
import org.apache.hudi.io.HoodieCreateHandle; import org.apache.hudi.io.HoodieCreateHandle;
import org.apache.hudi.io.HoodieMergeHandle; import org.apache.hudi.io.HoodieMergeHandle;
import org.apache.hudi.table.HoodieTable;
import org.apache.avro.generic.GenericRecord; import org.apache.avro.generic.GenericRecord;
import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path; import org.apache.hadoop.fs.Path;
import org.apache.hudi.table.HoodieTable;
import org.apache.parquet.avro.AvroReadSupport; import org.apache.parquet.avro.AvroReadSupport;
import org.junit.After; import org.junit.jupiter.api.AfterEach;
import org.junit.Assert; import org.junit.jupiter.api.BeforeEach;
import org.junit.Before; import org.junit.jupiter.api.Test;
import org.junit.Test;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.Arrays; import java.util.Arrays;
import java.util.List; import java.util.List;
import static org.junit.Assert.fail; import static org.junit.jupiter.api.Assertions.assertDoesNotThrow;
import static org.junit.jupiter.api.Assertions.assertEquals;
public class TestUpdateSchemaEvolution extends HoodieClientTestHarness { public class TestUpdateSchemaEvolution extends HoodieClientTestHarness {
@Before @BeforeEach
public void setUp() throws Exception { public void setUp() throws Exception {
initPath(); initPath();
HoodieTestUtils.init(HoodieTestUtils.getDefaultHadoopConf(), basePath); HoodieTestUtils.init(HoodieTestUtils.getDefaultHadoopConf(), basePath);
@@ -58,7 +58,7 @@ public class TestUpdateSchemaEvolution extends HoodieClientTestHarness {
initFileSystem(); initFileSystem();
} }
@After @AfterEach
public void tearDown() { public void tearDown() {
cleanupSparkContexts(); cleanupSparkContexts();
} }
@@ -103,7 +103,7 @@ public class TestUpdateSchemaEvolution extends HoodieClientTestHarness {
String fileId = insertResult.getFileId(); String fileId = insertResult.getFileId();
final HoodieTable table2 = HoodieTable.create(config2, jsc); final HoodieTable table2 = HoodieTable.create(config2, jsc);
Assert.assertEquals(1, jsc.parallelize(Arrays.asList(1)).map(x -> { assertEquals(1, jsc.parallelize(Arrays.asList(1)).map(x -> {
// New content with values for the newly added field // New content with values for the newly added field
String recordStr1 = "{\"_row_key\":\"8eb5b87a-1feh-4edd-87b4-6ec96dc405a0\"," String recordStr1 = "{\"_row_key\":\"8eb5b87a-1feh-4edd-87b4-6ec96dc405a0\","
+ "\"time\":\"2016-01-31T03:16:41.415Z\",\"number\":12,\"added_field\":1}"; + "\"time\":\"2016-01-31T03:16:41.415Z\",\"number\":12,\"added_field\":1}";
@@ -116,7 +116,7 @@ public class TestUpdateSchemaEvolution extends HoodieClientTestHarness {
record1.seal(); record1.seal();
updateRecords.add(record1); updateRecords.add(record1);
try { assertDoesNotThrow(() -> {
HoodieMergeHandle mergeHandle = new HoodieMergeHandle(config2, "101", table2, HoodieMergeHandle mergeHandle = new HoodieMergeHandle(config2, "101", table2,
updateRecords.iterator(), record1.getPartitionPath(), fileId, supplier); updateRecords.iterator(), record1.getPartitionPath(), fileId, supplier);
Configuration conf = new Configuration(); Configuration conf = new Configuration();
@@ -127,10 +127,9 @@ public class TestUpdateSchemaEvolution extends HoodieClientTestHarness {
mergeHandle.write(rec); mergeHandle.write(rec);
} }
mergeHandle.close(); mergeHandle.close();
} catch (ClassCastException e) { }, "UpdateFunction could not read records written with exampleSchema.txt using the "
fail("UpdateFunction could not read records written with exampleSchema.txt using the "
+ "exampleEvolvedSchema.txt"); + "exampleEvolvedSchema.txt");
}
return 1; return 1;
}).collect().size()); }).collect().size());
} }

View File

@@ -23,6 +23,7 @@ import org.apache.hudi.common.fs.FSUtils;
import org.apache.hudi.common.minicluster.HdfsTestService; import org.apache.hudi.common.minicluster.HdfsTestService;
import org.apache.hudi.common.model.HoodieTestUtils; import org.apache.hudi.common.model.HoodieTestUtils;
import org.apache.hudi.common.table.HoodieTableMetaClient; import org.apache.hudi.common.table.HoodieTableMetaClient;
import org.apache.hudi.common.testutils.HoodieCommonTestHarnessJunit5;
import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.FileSystem;
@@ -44,7 +45,7 @@ import java.util.concurrent.atomic.AtomicInteger;
/** /**
* The test harness for resource initialization and cleanup. * The test harness for resource initialization and cleanup.
*/ */
public abstract class HoodieClientTestHarness extends HoodieCommonTestHarness implements Serializable { public abstract class HoodieClientTestHarness extends HoodieCommonTestHarnessJunit5 implements Serializable {
private static final Logger LOG = LoggerFactory.getLogger(HoodieClientTestHarness.class); private static final Logger LOG = LoggerFactory.getLogger(HoodieClientTestHarness.class);

View File

@@ -28,16 +28,17 @@ import org.apache.hudi.config.HoodieWriteConfig;
import org.apache.hudi.execution.LazyInsertIterable.HoodieInsertValueGenResult; import org.apache.hudi.execution.LazyInsertIterable.HoodieInsertValueGenResult;
import org.apache.avro.generic.IndexedRecord; import org.apache.avro.generic.IndexedRecord;
import org.junit.After; import org.junit.jupiter.api.AfterEach;
import org.junit.Assert; import org.junit.jupiter.api.BeforeEach;
import org.junit.Before; import org.junit.jupiter.api.Test;
import org.junit.Test;
import java.util.List; import java.util.List;
import scala.Tuple2; import scala.Tuple2;
import static org.apache.hudi.execution.LazyInsertIterable.getTransformFunction; import static org.apache.hudi.execution.LazyInsertIterable.getTransformFunction;
import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertFalse;
import static org.mockito.Mockito.mock; import static org.mockito.Mockito.mock;
import static org.mockito.Mockito.when; import static org.mockito.Mockito.when;
@@ -45,12 +46,12 @@ public class TestBoundedInMemoryExecutor extends HoodieClientTestHarness {
private final String instantTime = HoodieActiveTimeline.createNewInstantTime(); private final String instantTime = HoodieActiveTimeline.createNewInstantTime();
@Before @BeforeEach
public void setUp() throws Exception { public void setUp() throws Exception {
initTestDataGenerator(); initTestDataGenerator();
} }
@After @AfterEach
public void tearDown() throws Exception { public void tearDown() throws Exception {
cleanupTestDataGenerator(); cleanupTestDataGenerator();
} }
@@ -73,7 +74,8 @@ public class TestBoundedInMemoryExecutor extends HoodieClientTestHarness {
} }
@Override @Override
protected void finish() {} protected void finish() {
}
@Override @Override
protected Integer getResult() { protected Integer getResult() {
@@ -87,9 +89,9 @@ public class TestBoundedInMemoryExecutor extends HoodieClientTestHarness {
getTransformFunction(HoodieTestDataGenerator.AVRO_SCHEMA)); getTransformFunction(HoodieTestDataGenerator.AVRO_SCHEMA));
int result = executor.execute(); int result = executor.execute();
// It should buffer and write 100 records // It should buffer and write 100 records
Assert.assertEquals(result, 100); assertEquals(100, result);
// There should be no remaining records in the buffer // There should be no remaining records in the buffer
Assert.assertFalse(executor.isRemaining()); assertFalse(executor.isRemaining());
} finally { } finally {
if (executor != null) { if (executor != null) {
executor.shutdownNow(); executor.shutdownNow();

View File

@@ -34,10 +34,10 @@ import org.apache.hudi.exception.HoodieException;
import org.apache.hudi.execution.LazyInsertIterable.HoodieInsertValueGenResult; import org.apache.hudi.execution.LazyInsertIterable.HoodieInsertValueGenResult;
import org.apache.avro.generic.IndexedRecord; import org.apache.avro.generic.IndexedRecord;
import org.junit.After; import org.junit.jupiter.api.AfterEach;
import org.junit.Assert; import org.junit.jupiter.api.BeforeEach;
import org.junit.Before; import org.junit.jupiter.api.Test;
import org.junit.Test; import org.junit.jupiter.api.Timeout;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.HashMap; import java.util.HashMap;
@@ -54,6 +54,9 @@ import java.util.stream.IntStream;
import scala.Tuple2; import scala.Tuple2;
import static org.apache.hudi.execution.LazyInsertIterable.getTransformFunction; import static org.apache.hudi.execution.LazyInsertIterable.getTransformFunction;
import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertFalse;
import static org.junit.jupiter.api.Assertions.assertThrows;
import static org.mockito.Mockito.mock; import static org.mockito.Mockito.mock;
import static org.mockito.Mockito.when; import static org.mockito.Mockito.when;
@@ -61,13 +64,13 @@ public class TestBoundedInMemoryQueue extends HoodieClientTestHarness {
private final String instantTime = HoodieActiveTimeline.createNewInstantTime(); private final String instantTime = HoodieActiveTimeline.createNewInstantTime();
@Before @BeforeEach
public void setUp() throws Exception { public void setUp() throws Exception {
initTestDataGenerator(); initTestDataGenerator();
initExecutorServiceWithFixedThreadPool(2); initExecutorServiceWithFixedThreadPool(2);
} }
@After @AfterEach
public void tearDown() throws Exception { public void tearDown() throws Exception {
cleanupTestDataGenerator(); cleanupTestDataGenerator();
cleanupExecutorService(); cleanupExecutorService();
@@ -76,7 +79,8 @@ public class TestBoundedInMemoryQueue extends HoodieClientTestHarness {
// Test to ensure that we are reading all records from queue iterator in the same order // Test to ensure that we are reading all records from queue iterator in the same order
// without any exceptions. // without any exceptions.
@SuppressWarnings("unchecked") @SuppressWarnings("unchecked")
@Test(timeout = 60000) @Test
@Timeout(value = 60)
public void testRecordReading() throws Exception { public void testRecordReading() throws Exception {
final int numRecords = 128; final int numRecords = 128;
final List<HoodieRecord> hoodieRecords = dataGen.generateInserts(instantTime, numRecords); final List<HoodieRecord> hoodieRecords = dataGen.generateInserts(instantTime, numRecords);
@@ -96,15 +100,15 @@ public class TestBoundedInMemoryQueue extends HoodieClientTestHarness {
originalRecord.getData().getInsertValue(HoodieTestDataGenerator.AVRO_SCHEMA); originalRecord.getData().getInsertValue(HoodieTestDataGenerator.AVRO_SCHEMA);
final HoodieInsertValueGenResult<HoodieRecord> payload = queue.iterator().next(); final HoodieInsertValueGenResult<HoodieRecord> payload = queue.iterator().next();
// Ensure that record ordering is guaranteed. // Ensure that record ordering is guaranteed.
Assert.assertEquals(originalRecord, payload.record); assertEquals(originalRecord, payload.record);
// cached insert value matches the expected insert value. // cached insert value matches the expected insert value.
Assert.assertEquals(originalInsertValue, assertEquals(originalInsertValue,
payload.record.getData().getInsertValue(HoodieTestDataGenerator.AVRO_SCHEMA)); payload.record.getData().getInsertValue(HoodieTestDataGenerator.AVRO_SCHEMA));
recordsRead++; recordsRead++;
} }
Assert.assertFalse(queue.iterator().hasNext() || originalRecordIterator.hasNext()); assertFalse(queue.iterator().hasNext() || originalRecordIterator.hasNext());
// all the records should be read successfully. // all the records should be read successfully.
Assert.assertEquals(numRecords, recordsRead); assertEquals(numRecords, recordsRead);
// should not throw any exceptions. // should not throw any exceptions.
resFuture.get(); resFuture.get();
} }
@@ -113,7 +117,8 @@ public class TestBoundedInMemoryQueue extends HoodieClientTestHarness {
* Test to ensure that we are reading all records from queue iterator when we have multiple producers. * Test to ensure that we are reading all records from queue iterator when we have multiple producers.
*/ */
@SuppressWarnings("unchecked") @SuppressWarnings("unchecked")
@Test(timeout = 60000) @Test
@Timeout(value = 60)
public void testCompositeProducerRecordReading() throws Exception { public void testCompositeProducerRecordReading() throws Exception {
final int numRecords = 1000; final int numRecords = 1000;
final int numProducers = 40; final int numProducers = 40;
@@ -129,7 +134,7 @@ public class TestBoundedInMemoryQueue extends HoodieClientTestHarness {
List<HoodieRecord> pRecs = dataGen.generateInserts(instantTime, numRecords); List<HoodieRecord> pRecs = dataGen.generateInserts(instantTime, numRecords);
int j = 0; int j = 0;
for (HoodieRecord r : pRecs) { for (HoodieRecord r : pRecs) {
Assert.assertTrue(!keyToProducerAndIndexMap.containsKey(r.getRecordKey())); assertFalse(keyToProducerAndIndexMap.containsKey(r.getRecordKey()));
keyToProducerAndIndexMap.put(r.getRecordKey(), new Tuple2<>(i, j)); keyToProducerAndIndexMap.put(r.getRecordKey(), new Tuple2<>(i, j));
j++; j++;
} }
@@ -192,12 +197,12 @@ public class TestBoundedInMemoryQueue extends HoodieClientTestHarness {
countMap.put(producerPos._1(), countMap.get(producerPos._1()) + 1); countMap.put(producerPos._1(), countMap.get(producerPos._1()) + 1);
lastSeenMap.put(producerPos._1(), lastSeenPos + 1); lastSeenMap.put(producerPos._1(), lastSeenPos + 1);
// Ensure we are seeing the next record generated // Ensure we are seeing the next record generated
Assert.assertEquals(lastSeenPos + 1, producerPos._2().intValue()); assertEquals(lastSeenPos + 1, producerPos._2().intValue());
} }
for (int i = 0; i < numProducers; i++) { for (int i = 0; i < numProducers; i++) {
// Ensure we have seen all the records for each producers // Ensure we have seen all the records for each producers
Assert.assertEquals(Integer.valueOf(numRecords), countMap.get(i)); assertEquals(Integer.valueOf(numRecords), countMap.get(i));
} }
// Ensure Close future is done // Ensure Close future is done
@@ -206,7 +211,8 @@ public class TestBoundedInMemoryQueue extends HoodieClientTestHarness {
// Test to ensure that record queueing is throttled when we hit memory limit. // Test to ensure that record queueing is throttled when we hit memory limit.
@SuppressWarnings("unchecked") @SuppressWarnings("unchecked")
@Test(timeout = 60000) @Test
@Timeout(value = 60)
public void testMemoryLimitForBuffering() throws Exception { public void testMemoryLimitForBuffering() throws Exception {
final int numRecords = 128; final int numRecords = 128;
final List<HoodieRecord> hoodieRecords = dataGen.generateInserts(instantTime, numRecords); final List<HoodieRecord> hoodieRecords = dataGen.generateInserts(instantTime, numRecords);
@@ -229,14 +235,14 @@ public class TestBoundedInMemoryQueue extends HoodieClientTestHarness {
while (!isQueueFull(queue.rateLimiter)) { while (!isQueueFull(queue.rateLimiter)) {
Thread.sleep(10); Thread.sleep(10);
} }
Assert.assertEquals(0, queue.rateLimiter.availablePermits()); assertEquals(0, queue.rateLimiter.availablePermits());
Assert.assertEquals(recordLimit, queue.currentRateLimit); assertEquals(recordLimit, queue.currentRateLimit);
Assert.assertEquals(recordLimit, queue.size()); assertEquals(recordLimit, queue.size());
Assert.assertEquals(recordLimit - 1, queue.samplingRecordCounter.get()); assertEquals(recordLimit - 1, queue.samplingRecordCounter.get());
// try to read 2 records. // try to read 2 records.
Assert.assertEquals(hoodieRecords.get(0), queue.iterator().next().record); assertEquals(hoodieRecords.get(0), queue.iterator().next().record);
Assert.assertEquals(hoodieRecords.get(1), queue.iterator().next().record); assertEquals(hoodieRecords.get(1), queue.iterator().next().record);
// waiting for permits to expire. // waiting for permits to expire.
while (!isQueueFull(queue.rateLimiter)) { while (!isQueueFull(queue.rateLimiter)) {
@@ -245,17 +251,18 @@ public class TestBoundedInMemoryQueue extends HoodieClientTestHarness {
// No change is expected in rate limit or number of queued records. We only expect // No change is expected in rate limit or number of queued records. We only expect
// queueing thread to read // queueing thread to read
// 2 more records into the queue. // 2 more records into the queue.
Assert.assertEquals(0, queue.rateLimiter.availablePermits()); assertEquals(0, queue.rateLimiter.availablePermits());
Assert.assertEquals(recordLimit, queue.currentRateLimit); assertEquals(recordLimit, queue.currentRateLimit);
Assert.assertEquals(recordLimit, queue.size()); assertEquals(recordLimit, queue.size());
Assert.assertEquals(recordLimit - 1 + 2, queue.samplingRecordCounter.get()); assertEquals(recordLimit - 1 + 2, queue.samplingRecordCounter.get());
} }
// Test to ensure that exception in either queueing thread or BufferedIterator-reader thread // Test to ensure that exception in either queueing thread or BufferedIterator-reader thread
// is propagated to // is propagated to
// another thread. // another thread.
@SuppressWarnings("unchecked") @SuppressWarnings("unchecked")
@Test(timeout = 60000) @Test
@Timeout(value = 60)
public void testException() throws Exception { public void testException() throws Exception {
final int numRecords = 256; final int numRecords = 256;
final List<HoodieRecord> hoodieRecords = dataGen.generateInserts(instantTime, numRecords); final List<HoodieRecord> hoodieRecords = dataGen.generateInserts(instantTime, numRecords);
@@ -285,13 +292,10 @@ public class TestBoundedInMemoryQueue extends HoodieClientTestHarness {
// notify queueing thread of an exception and ensure that it exits. // notify queueing thread of an exception and ensure that it exits.
final Exception e = new Exception("Failing it :)"); final Exception e = new Exception("Failing it :)");
queue1.markAsFailed(e); queue1.markAsFailed(e);
try { final Throwable thrown1 = assertThrows(ExecutionException.class, resFuture::get,
resFuture.get(); "exception is expected");
Assert.fail("exception is expected"); assertEquals(HoodieException.class, thrown1.getCause().getClass());
} catch (ExecutionException e1) { assertEquals(e, thrown1.getCause().getCause());
Assert.assertEquals(HoodieException.class, e1.getCause().getClass());
Assert.assertEquals(e, e1.getCause().getCause());
}
// second let us raise an exception while doing record queueing. this exception should get // second let us raise an exception while doing record queueing. this exception should get
// propagated to // propagated to
@@ -314,19 +318,14 @@ public class TestBoundedInMemoryQueue extends HoodieClientTestHarness {
return true; return true;
}); });
try { final Throwable thrown2 = assertThrows(Exception.class, () -> {
queue2.iterator().hasNext(); queue2.iterator().hasNext();
Assert.fail("exception is expected"); }, "exception is expected");
} catch (Exception e1) { assertEquals(expectedException, thrown2.getCause());
Assert.assertEquals(expectedException, e1.getCause());
}
// queueing thread should also have exited. make sure that it is not running. // queueing thread should also have exited. make sure that it is not running.
try { final Throwable thrown3 = assertThrows(ExecutionException.class, res::get,
res.get(); "exception is expected");
Assert.fail("exception is expected"); assertEquals(expectedException, thrown3.getCause());
} catch (ExecutionException e2) {
Assert.assertEquals(expectedException, e2.getCause());
}
} }
private boolean isQueueFull(Semaphore rateLimiter) { private boolean isQueueFull(Semaphore rateLimiter) {

View File

@@ -32,19 +32,20 @@ import org.apache.hudi.index.hbase.HBaseIndexQPSResourceAllocator;
import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.HBaseTestingUtility; import org.apache.hadoop.hbase.HBaseTestingUtility;
import org.junit.After; import org.junit.jupiter.api.AfterEach;
import org.junit.Assert; import org.junit.jupiter.api.BeforeEach;
import org.junit.Before; import org.junit.jupiter.api.Test;
import org.junit.Test;
import static org.junit.jupiter.api.Assertions.assertEquals;
public class TestHBaseQPSResourceAllocator extends HoodieClientTestHarness { public class TestHBaseQPSResourceAllocator extends HoodieClientTestHarness {
private static String tableName = "test_table"; private static final String TABLE_NAME = "test_table";
private static final String QPS_TEST_SUFFIX_PATH = "qps_test_suffix";
private HBaseTestingUtility utility; private HBaseTestingUtility utility;
private Configuration hbaseConfig; private Configuration hbaseConfig;
private static String QPS_TEST_SUFFIX_PATH = "qps_test_suffix";
@Before @BeforeEach
public void setUp() throws Exception { public void setUp() throws Exception {
utility = new HBaseTestingUtility(); utility = new HBaseTestingUtility();
utility.startMiniCluster(); utility.startMiniCluster();
@@ -52,12 +53,12 @@ public class TestHBaseQPSResourceAllocator extends HoodieClientTestHarness {
initSparkContexts("TestQPSResourceAllocator"); initSparkContexts("TestQPSResourceAllocator");
initPath(); initPath();
basePath = folder.getRoot().getAbsolutePath() + QPS_TEST_SUFFIX_PATH; basePath = tempDir.resolve(QPS_TEST_SUFFIX_PATH).toAbsolutePath().toString();
// Initialize table // Initialize table
initMetaClient(); initMetaClient();
} }
@After @AfterEach
public void tearDown() throws Exception { public void tearDown() throws Exception {
cleanupSparkContexts(); cleanupSparkContexts();
cleanupMetaClient(); cleanupMetaClient();
@@ -71,9 +72,9 @@ public class TestHBaseQPSResourceAllocator extends HoodieClientTestHarness {
HoodieWriteConfig config = getConfig(Option.empty()); HoodieWriteConfig config = getConfig(Option.empty());
HBaseIndex index = new HBaseIndex(config); HBaseIndex index = new HBaseIndex(config);
HBaseIndexQPSResourceAllocator hBaseIndexQPSResourceAllocator = index.createQPSResourceAllocator(config); HBaseIndexQPSResourceAllocator hBaseIndexQPSResourceAllocator = index.createQPSResourceAllocator(config);
Assert.assertEquals(hBaseIndexQPSResourceAllocator.getClass().getName(), assertEquals(hBaseIndexQPSResourceAllocator.getClass().getName(),
DefaultHBaseQPSResourceAllocator.class.getName()); DefaultHBaseQPSResourceAllocator.class.getName());
Assert.assertEquals(config.getHbaseIndexQPSFraction(), assertEquals(config.getHbaseIndexQPSFraction(),
hBaseIndexQPSResourceAllocator.acquireQPSResources(config.getHbaseIndexQPSFraction(), 100), 0.0f); hBaseIndexQPSResourceAllocator.acquireQPSResources(config.getHbaseIndexQPSFraction(), 100), 0.0f);
} }
@@ -82,9 +83,9 @@ public class TestHBaseQPSResourceAllocator extends HoodieClientTestHarness {
HoodieWriteConfig config = getConfig(Option.of(HoodieHBaseIndexConfig.DEFAULT_HBASE_INDEX_QPS_ALLOCATOR_CLASS)); HoodieWriteConfig config = getConfig(Option.of(HoodieHBaseIndexConfig.DEFAULT_HBASE_INDEX_QPS_ALLOCATOR_CLASS));
HBaseIndex index = new HBaseIndex(config); HBaseIndex index = new HBaseIndex(config);
HBaseIndexQPSResourceAllocator hBaseIndexQPSResourceAllocator = index.createQPSResourceAllocator(config); HBaseIndexQPSResourceAllocator hBaseIndexQPSResourceAllocator = index.createQPSResourceAllocator(config);
Assert.assertEquals(hBaseIndexQPSResourceAllocator.getClass().getName(), assertEquals(hBaseIndexQPSResourceAllocator.getClass().getName(),
DefaultHBaseQPSResourceAllocator.class.getName()); DefaultHBaseQPSResourceAllocator.class.getName());
Assert.assertEquals(config.getHbaseIndexQPSFraction(), assertEquals(config.getHbaseIndexQPSFraction(),
hBaseIndexQPSResourceAllocator.acquireQPSResources(config.getHbaseIndexQPSFraction(), 100), 0.0f); hBaseIndexQPSResourceAllocator.acquireQPSResources(config.getHbaseIndexQPSFraction(), 100), 0.0f);
} }
@@ -93,9 +94,9 @@ public class TestHBaseQPSResourceAllocator extends HoodieClientTestHarness {
HoodieWriteConfig config = getConfig(Option.of("InvalidResourceAllocatorClassName")); HoodieWriteConfig config = getConfig(Option.of("InvalidResourceAllocatorClassName"));
HBaseIndex index = new HBaseIndex(config); HBaseIndex index = new HBaseIndex(config);
HBaseIndexQPSResourceAllocator hBaseIndexQPSResourceAllocator = index.createQPSResourceAllocator(config); HBaseIndexQPSResourceAllocator hBaseIndexQPSResourceAllocator = index.createQPSResourceAllocator(config);
Assert.assertEquals(hBaseIndexQPSResourceAllocator.getClass().getName(), assertEquals(hBaseIndexQPSResourceAllocator.getClass().getName(),
DefaultHBaseQPSResourceAllocator.class.getName()); DefaultHBaseQPSResourceAllocator.class.getName());
Assert.assertEquals(config.getHbaseIndexQPSFraction(), assertEquals(config.getHbaseIndexQPSFraction(),
hBaseIndexQPSResourceAllocator.acquireQPSResources(config.getHbaseIndexQPSFraction(), 100), 0.0f); hBaseIndexQPSResourceAllocator.acquireQPSResources(config.getHbaseIndexQPSFraction(), 100), 0.0f);
} }
@@ -117,7 +118,7 @@ public class TestHBaseQPSResourceAllocator extends HoodieClientTestHarness {
private HoodieHBaseIndexConfig getConfigWithResourceAllocator(Option<String> resourceAllocatorClass) { private HoodieHBaseIndexConfig getConfigWithResourceAllocator(Option<String> resourceAllocatorClass) {
HoodieHBaseIndexConfig.Builder builder = new HoodieHBaseIndexConfig.Builder() HoodieHBaseIndexConfig.Builder builder = new HoodieHBaseIndexConfig.Builder()
.hbaseZkPort(Integer.parseInt(hbaseConfig.get("hbase.zookeeper.property.clientPort"))) .hbaseZkPort(Integer.parseInt(hbaseConfig.get("hbase.zookeeper.property.clientPort")))
.hbaseZkQuorum(hbaseConfig.get("hbase.zookeeper.quorum")).hbaseTableName(tableName).hbaseIndexGetBatchSize(100); .hbaseZkQuorum(hbaseConfig.get("hbase.zookeeper.quorum")).hbaseTableName(TABLE_NAME).hbaseIndexGetBatchSize(100);
if (resourceAllocatorClass.isPresent()) { if (resourceAllocatorClass.isPresent()) {
builder.withQPSResourceAllocatorType(resourceAllocatorClass.get()); builder.withQPSResourceAllocatorType(resourceAllocatorClass.get());
} }

View File

@@ -50,14 +50,13 @@ import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hbase.client.Result; import org.apache.hadoop.hbase.client.Result;
import org.apache.hadoop.hbase.util.Bytes; import org.apache.hadoop.hbase.util.Bytes;
import org.apache.spark.api.java.JavaRDD; import org.apache.spark.api.java.JavaRDD;
import org.junit.After; import org.junit.jupiter.api.AfterAll;
import org.junit.AfterClass; import org.junit.jupiter.api.AfterEach;
import org.junit.Before; import org.junit.jupiter.api.BeforeAll;
import org.junit.BeforeClass; import org.junit.jupiter.api.BeforeEach;
import org.junit.FixMethodOrder; import org.junit.jupiter.api.MethodOrderer;
import org.junit.Test; import org.junit.jupiter.api.Test;
import org.junit.runners.MethodSorters; import org.junit.jupiter.api.TestMethodOrder;
import org.mockito.Mockito;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.Arrays; import java.util.Arrays;
@@ -65,12 +64,13 @@ import java.util.List;
import scala.Tuple2; import scala.Tuple2;
import static org.junit.Assert.assertEquals; import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.Assert.assertFalse; import static org.junit.jupiter.api.Assertions.assertFalse;
import static org.junit.Assert.assertTrue; import static org.junit.jupiter.api.Assertions.assertThrows;
import static org.junit.Assert.fail; import static org.junit.jupiter.api.Assertions.assertTrue;
import static org.mockito.ArgumentMatchers.any; import static org.mockito.ArgumentMatchers.any;
import static org.mockito.Mockito.atMost; import static org.mockito.Mockito.atMost;
import static org.mockito.Mockito.mock;
import static org.mockito.Mockito.times; import static org.mockito.Mockito.times;
import static org.mockito.Mockito.verify; import static org.mockito.Mockito.verify;
import static org.mockito.Mockito.when; import static org.mockito.Mockito.when;
@@ -78,9 +78,9 @@ import static org.mockito.Mockito.when;
/** /**
* Note :: HBaseTestingUtility is really flaky with issues where the HbaseMiniCluster fails to shutdown across tests, * Note :: HBaseTestingUtility is really flaky with issues where the HbaseMiniCluster fails to shutdown across tests,
* (see one problem here : https://issues.apache .org/jira/browse/HBASE-15835). Hence, the need to use * (see one problem here : https://issues.apache .org/jira/browse/HBASE-15835). Hence, the need to use
* MethodSorters.NAME_ASCENDING to make sure the tests run in order. Please alter the order of tests running carefully. * {@link MethodOrderer.Alphanumeric} to make sure the tests run in order. Please alter the order of tests running carefully.
*/ */
@FixMethodOrder(MethodSorters.NAME_ASCENDING) @TestMethodOrder(MethodOrderer.Alphanumeric.class)
public class TestHbaseIndex extends HoodieClientTestHarness { public class TestHbaseIndex extends HoodieClientTestHarness {
private static HBaseTestingUtility utility; private static HBaseTestingUtility utility;
@@ -89,14 +89,14 @@ public class TestHbaseIndex extends HoodieClientTestHarness {
public TestHbaseIndex() {} public TestHbaseIndex() {}
@AfterClass @AfterAll
public static void clean() throws Exception { public static void clean() throws Exception {
if (utility != null) { if (utility != null) {
utility.shutdownMiniCluster(); utility.shutdownMiniCluster();
} }
} }
@BeforeClass @BeforeAll
public static void init() throws Exception { public static void init() throws Exception {
// Initialize HbaseMiniCluster // Initialize HbaseMiniCluster
hbaseConfig = HBaseConfiguration.create(); hbaseConfig = HBaseConfiguration.create();
@@ -108,7 +108,7 @@ public class TestHbaseIndex extends HoodieClientTestHarness {
utility.createTable(TableName.valueOf(tableName), Bytes.toBytes("_s")); utility.createTable(TableName.valueOf(tableName), Bytes.toBytes("_s"));
} }
@Before @BeforeEach
public void setUp() throws Exception { public void setUp() throws Exception {
// Initialize a local spark env // Initialize a local spark env
initSparkContexts("TestHbaseIndex"); initSparkContexts("TestHbaseIndex");
@@ -120,7 +120,7 @@ public class TestHbaseIndex extends HoodieClientTestHarness {
initMetaClient(); initMetaClient();
} }
@After @AfterEach
public void tearDown() throws Exception { public void tearDown() throws Exception {
cleanupSparkContexts(); cleanupSparkContexts();
cleanupTestDataGenerator(); cleanupTestDataGenerator();
@@ -257,8 +257,8 @@ public class TestHbaseIndex extends HoodieClientTestHarness {
HBaseIndex index = new HBaseIndex(config); HBaseIndex index = new HBaseIndex(config);
// Mock hbaseConnection and related entities // Mock hbaseConnection and related entities
Connection hbaseConnection = Mockito.mock(Connection.class); Connection hbaseConnection = mock(Connection.class);
HTable table = Mockito.mock(HTable.class); HTable table = mock(HTable.class);
when(hbaseConnection.getTable(TableName.valueOf(tableName))).thenReturn(table); when(hbaseConnection.getTable(TableName.valueOf(tableName))).thenReturn(table);
when(table.get((List<Get>) any())).thenReturn(new Result[0]); when(table.get((List<Get>) any())).thenReturn(new Result[0]);
@@ -306,8 +306,8 @@ public class TestHbaseIndex extends HoodieClientTestHarness {
writeClient.commit(newCommitTime, writeStatues); writeClient.commit(newCommitTime, writeStatues);
// Mock hbaseConnection and related entities // Mock hbaseConnection and related entities
Connection hbaseConnection = Mockito.mock(Connection.class); Connection hbaseConnection = mock(Connection.class);
HTable table = Mockito.mock(HTable.class); HTable table = mock(HTable.class);
when(hbaseConnection.getTable(TableName.valueOf(tableName))).thenReturn(table); when(hbaseConnection.getTable(TableName.valueOf(tableName))).thenReturn(table);
when(table.get((List<Get>) any())).thenReturn(new Result[0]); when(table.get((List<Get>) any())).thenReturn(new Result[0]);
@@ -335,28 +335,28 @@ public class TestHbaseIndex extends HoodieClientTestHarness {
// 8 (batchSize) * 200 (parallelism) * 10 (maxReqsInOneSecond) * 10 (numRegionServers) * 0.1 (qpsFraction)) => 16000 // 8 (batchSize) * 200 (parallelism) * 10 (maxReqsInOneSecond) * 10 (numRegionServers) * 0.1 (qpsFraction)) => 16000
// We assume requests get distributed to Region Servers uniformly, so each RS gets 1600 request // We assume requests get distributed to Region Servers uniformly, so each RS gets 1600 request
// 1600 happens to be 10% of 16667 (maxQPSPerRegionServer) as expected. // 1600 happens to be 10% of 16667 (maxQPSPerRegionServer) as expected.
assertEquals(putBatchSize, 8); assertEquals(8, putBatchSize);
// Number of Region Servers are halved, total requests sent in a second are also halved, so batchSize is also halved // Number of Region Servers are halved, total requests sent in a second are also halved, so batchSize is also halved
int putBatchSize2 = batchSizeCalculator.getBatchSize(5, 16667, 1200, 200, 100, 0.1f); int putBatchSize2 = batchSizeCalculator.getBatchSize(5, 16667, 1200, 200, 100, 0.1f);
assertEquals(putBatchSize2, 4); assertEquals(4, putBatchSize2);
// If the parallelism is halved, batchSize has to double // If the parallelism is halved, batchSize has to double
int putBatchSize3 = batchSizeCalculator.getBatchSize(10, 16667, 1200, 100, 100, 0.1f); int putBatchSize3 = batchSizeCalculator.getBatchSize(10, 16667, 1200, 100, 100, 0.1f);
assertEquals(putBatchSize3, 16); assertEquals(16, putBatchSize3);
// If the parallelism is halved, batchSize has to double. // If the parallelism is halved, batchSize has to double.
// This time parallelism is driven by numTasks rather than numExecutors // This time parallelism is driven by numTasks rather than numExecutors
int putBatchSize4 = batchSizeCalculator.getBatchSize(10, 16667, 100, 200, 100, 0.1f); int putBatchSize4 = batchSizeCalculator.getBatchSize(10, 16667, 100, 200, 100, 0.1f);
assertEquals(putBatchSize4, 16); assertEquals(16, putBatchSize4);
// If sleepTimeMs is halved, batchSize has to halve // If sleepTimeMs is halved, batchSize has to halve
int putBatchSize5 = batchSizeCalculator.getBatchSize(10, 16667, 1200, 200, 100, 0.05f); int putBatchSize5 = batchSizeCalculator.getBatchSize(10, 16667, 1200, 200, 100, 0.05f);
assertEquals(putBatchSize5, 4); assertEquals(4, putBatchSize5);
// If maxQPSPerRegionServer is doubled, batchSize also doubles // If maxQPSPerRegionServer is doubled, batchSize also doubles
int putBatchSize6 = batchSizeCalculator.getBatchSize(10, 33334, 1200, 200, 100, 0.1f); int putBatchSize6 = batchSizeCalculator.getBatchSize(10, 33334, 1200, 200, 100, 0.1f);
assertEquals(putBatchSize6, 16); assertEquals(16, putBatchSize6);
} }
@Test @Test
@@ -494,19 +494,15 @@ public class TestHbaseIndex extends HoodieClientTestHarness {
} }
@Test @Test
public void testFeatureSupport() throws Exception { public void testFeatureSupport() {
HoodieWriteConfig config = getConfig(); HoodieWriteConfig config = getConfig();
HBaseIndex index = new HBaseIndex(config); HBaseIndex index = new HBaseIndex(config);
assertTrue(index.canIndexLogFiles()); assertTrue(index.canIndexLogFiles());
try { assertThrows(UnsupportedOperationException.class, () -> {
HoodieTable hoodieTable = HoodieTable.create(metaClient, config, jsc); HoodieTable hoodieTable = HoodieTable.create(metaClient, config, jsc);
index.fetchRecordLocation(jsc.parallelize(new ArrayList<HoodieKey>(), 1), jsc, hoodieTable); index.fetchRecordLocation(jsc.parallelize(new ArrayList<HoodieKey>(), 1), jsc, hoodieTable);
fail("HbaseIndex supports fetchRecordLocation"); }, "HbaseIndex supports fetchRecordLocation");
} catch (UnsupportedOperationException ex) {
// Expected so ignore
ex.getStackTrace();
}
} }
private WriteStatus getSampleWriteStatus(final int numInserts, final int numUpdateWrites) { private WriteStatus getSampleWriteStatus(final int numInserts, final int numUpdateWrites) {
@@ -521,7 +517,7 @@ public class TestHbaseIndex extends HoodieClientTestHarness {
private void assertNoWriteErrors(List<WriteStatus> statuses) { private void assertNoWriteErrors(List<WriteStatus> statuses) {
// Verify there are no errors // Verify there are no errors
for (WriteStatus status : statuses) { for (WriteStatus status : statuses) {
assertFalse("Errors found in write of " + status.getFileId(), status.hasErrors()); assertFalse(status.hasErrors(), "Errors found in write of " + status.getFileId());
} }
} }

View File

@@ -39,23 +39,28 @@ import org.apache.hudi.table.HoodieTable;
import org.apache.spark.api.java.JavaPairRDD; import org.apache.spark.api.java.JavaPairRDD;
import org.apache.spark.api.java.JavaRDD; import org.apache.spark.api.java.JavaRDD;
import org.apache.spark.api.java.JavaSparkContext; import org.apache.spark.api.java.JavaSparkContext;
import org.junit.After; import org.junit.jupiter.api.AfterEach;
import org.junit.Before; import org.junit.jupiter.api.BeforeEach;
import org.junit.Test; import org.junit.jupiter.api.Test;
import static org.junit.Assert.assertTrue; import static org.junit.jupiter.api.Assertions.assertThrows;
import static org.junit.Assert.fail; import static org.junit.jupiter.api.Assertions.assertTrue;
public class TestHoodieIndex extends HoodieClientTestHarness { public class TestHoodieIndex extends HoodieClientTestHarness {
@Before private HoodieWriteConfig.Builder clientConfigBuilder;
private HoodieIndexConfig.Builder indexConfigBuilder;
@BeforeEach
public void setUp() throws Exception { public void setUp() throws Exception {
initSparkContexts("TestHoodieIndex"); initSparkContexts("TestHoodieIndex");
initPath(); initPath();
initMetaClient(); initMetaClient();
clientConfigBuilder = HoodieWriteConfig.newBuilder();
indexConfigBuilder = HoodieIndexConfig.newBuilder();
} }
@After @AfterEach
public void tearDown() { public void tearDown() {
cleanupSparkContexts(); cleanupSparkContexts();
cleanupMetaClient(); cleanupMetaClient();
@@ -63,8 +68,6 @@ public class TestHoodieIndex extends HoodieClientTestHarness {
@Test @Test
public void testCreateIndex() { public void testCreateIndex() {
HoodieWriteConfig.Builder clientConfigBuilder = HoodieWriteConfig.newBuilder();
HoodieIndexConfig.Builder indexConfigBuilder = HoodieIndexConfig.newBuilder();
// Different types // Different types
HoodieWriteConfig config = clientConfigBuilder.withPath(basePath) HoodieWriteConfig config = clientConfigBuilder.withPath(basePath)
.withIndexConfig(indexConfigBuilder.withIndexType(HoodieIndex.IndexType.HBASE) .withIndexConfig(indexConfigBuilder.withIndexType(HoodieIndex.IndexType.HBASE)
@@ -84,27 +87,27 @@ public class TestHoodieIndex extends HoodieClientTestHarness {
config = clientConfigBuilder.withPath(basePath) config = clientConfigBuilder.withPath(basePath)
.withIndexConfig(indexConfigBuilder.withIndexClass(DummyHoodieIndex.class.getName()).build()).build(); .withIndexConfig(indexConfigBuilder.withIndexClass(DummyHoodieIndex.class.getName()).build()).build();
assertTrue(HoodieIndex.createIndex(config, jsc) instanceof DummyHoodieIndex); assertTrue(HoodieIndex.createIndex(config, jsc) instanceof DummyHoodieIndex);
}
config = clientConfigBuilder.withPath(basePath) @Test
public void testCreateIndex_withException() {
final HoodieWriteConfig config1 = clientConfigBuilder.withPath(basePath)
.withIndexConfig(indexConfigBuilder.withIndexClass(IndexWithConstructor.class.getName()).build()).build(); .withIndexConfig(indexConfigBuilder.withIndexClass(IndexWithConstructor.class.getName()).build()).build();
try { final Throwable thrown1 = assertThrows(HoodieException.class, () -> {
HoodieIndex.createIndex(config, jsc); HoodieIndex.createIndex(config1, jsc);
fail("exception is expected"); }, "exception is expected");
} catch (HoodieIndexException e) { assertTrue(thrown1.getMessage().contains("is not a subclass of HoodieIndex"));
assertTrue(e.getMessage().contains("is not a subclass of HoodieIndex"));
}
config = clientConfigBuilder.withPath(basePath) final HoodieWriteConfig config2 = clientConfigBuilder.withPath(basePath)
.withIndexConfig(indexConfigBuilder.withIndexClass(IndexWithoutConstructor.class.getName()).build()).build(); .withIndexConfig(indexConfigBuilder.withIndexClass(IndexWithoutConstructor.class.getName()).build()).build();
try { final Throwable thrown2 = assertThrows(HoodieException.class, () -> {
HoodieIndex.createIndex(config, jsc); HoodieIndex.createIndex(config2, jsc);
fail("exception is expected"); }, "exception is expected");
} catch (HoodieException e) { assertTrue(thrown2.getMessage().contains("Unable to instantiate class"));
assertTrue(e.getMessage().contains("Unable to instantiate class"));
}
} }
public static class DummyHoodieIndex<T extends HoodieRecordPayload> extends HoodieIndex<T> { public static class DummyHoodieIndex<T extends HoodieRecordPayload> extends HoodieIndex<T> {
public DummyHoodieIndex(HoodieWriteConfig config) { public DummyHoodieIndex(HoodieWriteConfig config) {
super(config); super(config);
} }
@@ -146,7 +149,9 @@ public class TestHoodieIndex extends HoodieClientTestHarness {
} }
public static class IndexWithConstructor { public static class IndexWithConstructor {
public IndexWithConstructor(HoodieWriteConfig config) {}
public IndexWithConstructor(HoodieWriteConfig config) {
}
} }
public static class IndexWithoutConstructor { public static class IndexWithoutConstructor {

View File

@@ -41,17 +41,18 @@ import org.apache.avro.Schema;
import org.apache.hadoop.fs.Path; import org.apache.hadoop.fs.Path;
import org.apache.spark.api.java.JavaPairRDD; import org.apache.spark.api.java.JavaPairRDD;
import org.apache.spark.api.java.JavaRDD; import org.apache.spark.api.java.JavaRDD;
import org.junit.After; import org.junit.jupiter.api.AfterEach;
import org.junit.Before; import org.junit.jupiter.api.BeforeEach;
import org.junit.Test; import org.junit.jupiter.api.Test;
import org.junit.runner.RunWith; import org.junit.jupiter.params.ParameterizedTest;
import org.junit.runners.Parameterized; import org.junit.jupiter.params.provider.Arguments;
import org.junit.jupiter.params.provider.MethodSource;
import java.io.File;
import java.io.IOException; import java.io.IOException;
import java.nio.file.Files;
import java.nio.file.Paths;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.Arrays; import java.util.Arrays;
import java.util.Collection;
import java.util.Collections; import java.util.Collections;
import java.util.HashMap; import java.util.HashMap;
import java.util.HashSet; import java.util.HashSet;
@@ -59,40 +60,31 @@ import java.util.List;
import java.util.Map; import java.util.Map;
import java.util.UUID; import java.util.UUID;
import java.util.stream.Collectors; import java.util.stream.Collectors;
import java.util.stream.Stream;
import scala.Tuple2; import scala.Tuple2;
import static org.junit.Assert.assertEquals; import static org.junit.jupiter.api.Assertions.assertDoesNotThrow;
import static org.junit.Assert.assertFalse; import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.Assert.assertNotNull; import static org.junit.jupiter.api.Assertions.assertFalse;
import static org.junit.Assert.assertNull; import static org.junit.jupiter.api.Assertions.assertNotNull;
import static org.junit.Assert.assertTrue; import static org.junit.jupiter.api.Assertions.assertNull;
import static org.junit.Assert.fail; import static org.junit.jupiter.api.Assertions.assertTrue;
@RunWith(Parameterized.class)
public class TestHoodieBloomIndex extends HoodieClientTestHarness { public class TestHoodieBloomIndex extends HoodieClientTestHarness {
private String schemaStr; private String schemaStr;
private Schema schema; private Schema schema;
private boolean rangePruning; private static final String TEST_NAME_WITH_PARAMS = "[{index}] Test with rangePruning={0}, treeFiltering={1}, bucketizedChecking={2}";
private boolean treeFiltering;
private boolean bucketizedChecking;
@Parameterized.Parameters(name = "{index}: Test with rangePruning={0}, treeFiltering ={1}, bucketizedChecking is:{2}") public static Stream<Arguments> configParams() {
public static Collection<Object[]> data() {
Object[][] data = Object[][] data =
new Object[][] {{true, true, true}, {false, true, true}, {true, true, false}, {true, false, true}}; new Object[][] {{true, true, true}, {false, true, true}, {true, true, false}, {true, false, true}};
return Arrays.asList(data); return Stream.of(data).map(Arguments::of);
} }
public TestHoodieBloomIndex(boolean rangePruning, boolean treeFiltering, boolean bucketizedChecking) { @BeforeEach
this.rangePruning = rangePruning;
this.treeFiltering = treeFiltering;
this.bucketizedChecking = bucketizedChecking;
}
@Before
public void setUp() throws Exception { public void setUp() throws Exception {
initSparkContexts("TestHoodieBloomIndex"); initSparkContexts("TestHoodieBloomIndex");
initPath(); initPath();
@@ -103,14 +95,14 @@ public class TestHoodieBloomIndex extends HoodieClientTestHarness {
initMetaClient(); initMetaClient();
} }
@After @AfterEach
public void tearDown() throws Exception { public void tearDown() throws Exception {
cleanupSparkContexts(); cleanupSparkContexts();
cleanupFileSystem(); cleanupFileSystem();
cleanupMetaClient(); cleanupMetaClient();
} }
private HoodieWriteConfig makeConfig() { private HoodieWriteConfig makeConfig(boolean rangePruning, boolean treeFiltering, boolean bucketizedChecking) {
return HoodieWriteConfig.newBuilder().withPath(basePath) return HoodieWriteConfig.newBuilder().withPath(basePath)
.withIndexConfig(HoodieIndexConfig.newBuilder().bloomIndexPruneByRanges(rangePruning) .withIndexConfig(HoodieIndexConfig.newBuilder().bloomIndexPruneByRanges(rangePruning)
.bloomIndexTreebasedFilter(treeFiltering).bloomIndexBucketizedChecking(bucketizedChecking) .bloomIndexTreebasedFilter(treeFiltering).bloomIndexBucketizedChecking(bucketizedChecking)
@@ -118,9 +110,10 @@ public class TestHoodieBloomIndex extends HoodieClientTestHarness {
.build(); .build();
} }
@Test @ParameterizedTest(name = TEST_NAME_WITH_PARAMS)
public void testLoadInvolvedFiles() throws IOException { @MethodSource("configParams")
HoodieWriteConfig config = makeConfig(); public void testLoadInvolvedFiles(boolean rangePruning, boolean treeFiltering, boolean bucketizedChecking) throws IOException {
HoodieWriteConfig config = makeConfig(rangePruning, treeFiltering, bucketizedChecking);
HoodieBloomIndex index = new HoodieBloomIndex(config); HoodieBloomIndex index = new HoodieBloomIndex(config);
// Create some partitions, and put some files // Create some partitions, and put some files
@@ -128,9 +121,9 @@ public class TestHoodieBloomIndex extends HoodieClientTestHarness {
// "2016/04/01": 1 file (2_0_20160401010101.parquet) // "2016/04/01": 1 file (2_0_20160401010101.parquet)
// "2015/03/12": 3 files (1_0_20150312101010.parquet, 3_0_20150312101010.parquet, // "2015/03/12": 3 files (1_0_20150312101010.parquet, 3_0_20150312101010.parquet,
// 4_0_20150312101010.parquet) // 4_0_20150312101010.parquet)
new File(basePath + "/2016/01/21").mkdirs(); Files.createDirectories(Paths.get(basePath, "2016", "01", "21"));
new File(basePath + "/2016/04/01").mkdirs(); Files.createDirectories(Paths.get(basePath, "2016", "04", "01"));
new File(basePath + "/2015/03/12").mkdirs(); Files.createDirectories(Paths.get(basePath, "2015", "03", "12"));
TestRawTripPayload rowChange1 = TestRawTripPayload rowChange1 =
new TestRawTripPayload("{\"_row_key\":\"000\",\"time\":\"2016-01-31T03:16:41.415Z\",\"number\":12}"); new TestRawTripPayload("{\"_row_key\":\"000\",\"time\":\"2016-01-31T03:16:41.415Z\",\"number\":12}");
@@ -163,16 +156,16 @@ public class TestHoodieBloomIndex extends HoodieClientTestHarness {
HoodieTable table = HoodieTable.create(metaClient, config, jsc); HoodieTable table = HoodieTable.create(metaClient, config, jsc);
List<Tuple2<String, BloomIndexFileInfo>> filesList = index.loadInvolvedFiles(partitions, jsc, table); List<Tuple2<String, BloomIndexFileInfo>> filesList = index.loadInvolvedFiles(partitions, jsc, table);
// Still 0, as no valid commit // Still 0, as no valid commit
assertEquals(filesList.size(), 0); assertEquals(0, filesList.size());
// Add some commits // Add some commits
new File(basePath + "/.hoodie").mkdirs(); java.nio.file.Path hoodieDir = Files.createDirectories(Paths.get(basePath, ".hoodie"));
new File(basePath + "/.hoodie/20160401010101.commit").createNewFile(); Files.createFile(hoodieDir.resolve("20160401010101.commit"));
new File(basePath + "/.hoodie/20150312101010.commit").createNewFile(); Files.createFile(hoodieDir.resolve("20150312101010.commit"));
table = HoodieTable.create(metaClient, config, jsc); table = HoodieTable.create(metaClient, config, jsc);
filesList = index.loadInvolvedFiles(partitions, jsc, table); filesList = index.loadInvolvedFiles(partitions, jsc, table);
assertEquals(filesList.size(), 4); assertEquals(4, filesList.size());
if (rangePruning) { if (rangePruning) {
// these files will not have the key ranges // these files will not have the key ranges
@@ -194,9 +187,10 @@ public class TestHoodieBloomIndex extends HoodieClientTestHarness {
} }
} }
@Test @ParameterizedTest(name = TEST_NAME_WITH_PARAMS)
public void testRangePruning() { @MethodSource("configParams")
HoodieWriteConfig config = makeConfig(); public void testRangePruning(boolean rangePruning, boolean treeFiltering, boolean bucketizedChecking) {
HoodieWriteConfig config = makeConfig(rangePruning, treeFiltering, bucketizedChecking);
HoodieBloomIndex index = new HoodieBloomIndex(config); HoodieBloomIndex index = new HoodieBloomIndex(config);
final Map<String, List<BloomIndexFileInfo>> partitionToFileIndexInfo = new HashMap<>(); final Map<String, List<BloomIndexFileInfo>> partitionToFileIndexInfo = new HashMap<>();
@@ -277,27 +271,27 @@ public class TestHoodieBloomIndex extends HoodieClientTestHarness {
// assertTrue(results.get(1)._2().equals(filename)); // assertTrue(results.get(1)._2().equals(filename));
} }
@Test @ParameterizedTest(name = TEST_NAME_WITH_PARAMS)
public void testTagLocationWithEmptyRDD() { @MethodSource("configParams")
public void testTagLocationWithEmptyRDD(boolean rangePruning, boolean treeFiltering, boolean bucketizedChecking) {
// We have some records to be tagged (two different partitions) // We have some records to be tagged (two different partitions)
JavaRDD<HoodieRecord> recordRDD = jsc.emptyRDD(); JavaRDD<HoodieRecord> recordRDD = jsc.emptyRDD();
// Also create the metadata and config // Also create the metadata and config
HoodieWriteConfig config = makeConfig(); HoodieWriteConfig config = makeConfig(rangePruning, treeFiltering, bucketizedChecking);
metaClient = HoodieTableMetaClient.reload(metaClient); metaClient = HoodieTableMetaClient.reload(metaClient);
HoodieTable table = HoodieTable.create(metaClient, config, jsc); HoodieTable table = HoodieTable.create(metaClient, config, jsc);
// Let's tag // Let's tag
HoodieBloomIndex bloomIndex = new HoodieBloomIndex(config); HoodieBloomIndex bloomIndex = new HoodieBloomIndex(config);
try { assertDoesNotThrow(() -> {
bloomIndex.tagLocation(recordRDD, jsc, table); bloomIndex.tagLocation(recordRDD, jsc, table);
} catch (IllegalArgumentException e) { }, "EmptyRDD should not result in IllegalArgumentException: Positive number of slices required");
fail("EmptyRDD should not result in IllegalArgumentException: Positive number of slices required");
}
} }
@Test @ParameterizedTest(name = TEST_NAME_WITH_PARAMS)
public void testTagLocation() throws Exception { @MethodSource("configParams")
public void testTagLocation(boolean rangePruning, boolean treeFiltering, boolean bucketizedChecking) throws Exception {
// We have some records to be tagged (two different partitions) // We have some records to be tagged (two different partitions)
String rowKey1 = UUID.randomUUID().toString(); String rowKey1 = UUID.randomUUID().toString();
String rowKey2 = UUID.randomUUID().toString(); String rowKey2 = UUID.randomUUID().toString();
@@ -322,7 +316,7 @@ public class TestHoodieBloomIndex extends HoodieClientTestHarness {
JavaRDD<HoodieRecord> recordRDD = jsc.parallelize(Arrays.asList(record1, record2, record3, record4)); JavaRDD<HoodieRecord> recordRDD = jsc.parallelize(Arrays.asList(record1, record2, record3, record4));
// Also create the metadata and config // Also create the metadata and config
HoodieWriteConfig config = makeConfig(); HoodieWriteConfig config = makeConfig(rangePruning, treeFiltering, bucketizedChecking);
metaClient = HoodieTableMetaClient.reload(metaClient); metaClient = HoodieTableMetaClient.reload(metaClient);
HoodieTable table = HoodieTable.create(metaClient, config, jsc); HoodieTable table = HoodieTable.create(metaClient, config, jsc);
@@ -365,8 +359,9 @@ public class TestHoodieBloomIndex extends HoodieClientTestHarness {
} }
} }
@Test @ParameterizedTest(name = TEST_NAME_WITH_PARAMS)
public void testCheckExists() throws Exception { @MethodSource("configParams")
public void testCheckExists(boolean rangePruning, boolean treeFiltering, boolean bucketizedChecking) throws Exception {
// We have some records to be tagged (two different partitions) // We have some records to be tagged (two different partitions)
String recordStr1 = "{\"_row_key\":\"1eb5b87a-1feh-4edd-87b4-6ec96dc405a0\"," String recordStr1 = "{\"_row_key\":\"1eb5b87a-1feh-4edd-87b4-6ec96dc405a0\","
@@ -392,7 +387,7 @@ public class TestHoodieBloomIndex extends HoodieClientTestHarness {
JavaRDD<HoodieKey> keysRDD = jsc.parallelize(Arrays.asList(key1, key2, key3, key4)); JavaRDD<HoodieKey> keysRDD = jsc.parallelize(Arrays.asList(key1, key2, key3, key4));
// Also create the metadata and config // Also create the metadata and config
HoodieWriteConfig config = makeConfig(); HoodieWriteConfig config = makeConfig(rangePruning, treeFiltering, bucketizedChecking);
metaClient = HoodieTableMetaClient.reload(metaClient); metaClient = HoodieTableMetaClient.reload(metaClient);
HoodieTable table = HoodieTable.create(metaClient, config, jsc); HoodieTable table = HoodieTable.create(metaClient, config, jsc);
@@ -437,8 +432,9 @@ public class TestHoodieBloomIndex extends HoodieClientTestHarness {
} }
} }
@Test @ParameterizedTest(name = TEST_NAME_WITH_PARAMS)
public void testBloomFilterFalseError() throws IOException, InterruptedException { @MethodSource("configParams")
public void testBloomFilterFalseError(boolean rangePruning, boolean treeFiltering, boolean bucketizedChecking) throws IOException, InterruptedException {
// We have two hoodie records // We have two hoodie records
String recordStr1 = "{\"_row_key\":\"1eb5b87a-1feh-4edd-87b4-6ec96dc405a0\"," String recordStr1 = "{\"_row_key\":\"1eb5b87a-1feh-4edd-87b4-6ec96dc405a0\","
+ "\"time\":\"2016-01-31T03:16:41.415Z\",\"number\":12}"; + "\"time\":\"2016-01-31T03:16:41.415Z\",\"number\":12}";
@@ -463,7 +459,7 @@ public class TestHoodieBloomIndex extends HoodieClientTestHarness {
// We do the tag // We do the tag
JavaRDD<HoodieRecord> recordRDD = jsc.parallelize(Arrays.asList(record1, record2)); JavaRDD<HoodieRecord> recordRDD = jsc.parallelize(Arrays.asList(record1, record2));
HoodieWriteConfig config = makeConfig(); HoodieWriteConfig config = makeConfig(rangePruning, treeFiltering, bucketizedChecking);
metaClient = HoodieTableMetaClient.reload(metaClient); metaClient = HoodieTableMetaClient.reload(metaClient);
HoodieTable table = HoodieTable.create(metaClient, config, jsc); HoodieTable table = HoodieTable.create(metaClient, config, jsc);

View File

@@ -36,12 +36,14 @@ import org.apache.hudi.table.HoodieTable;
import org.apache.avro.Schema; import org.apache.avro.Schema;
import org.apache.spark.api.java.JavaPairRDD; import org.apache.spark.api.java.JavaPairRDD;
import org.apache.spark.api.java.JavaRDD; import org.apache.spark.api.java.JavaRDD;
import org.junit.After; import org.junit.jupiter.api.AfterEach;
import org.junit.Before; import org.junit.jupiter.api.BeforeEach;
import org.junit.Test; import org.junit.jupiter.api.Test;
import java.io.File;
import java.io.IOException; import java.io.IOException;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.Arrays; import java.util.Arrays;
import java.util.Collections; import java.util.Collections;
@@ -53,12 +55,12 @@ import java.util.stream.Collectors;
import scala.Tuple2; import scala.Tuple2;
import static org.junit.Assert.assertEquals; import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.Assert.assertFalse; import static org.junit.jupiter.api.Assertions.assertFalse;
import static org.junit.Assert.assertNotNull; import static org.junit.jupiter.api.Assertions.assertNotNull;
import static org.junit.Assert.assertNull; import static org.junit.jupiter.api.Assertions.assertNull;
import static org.junit.Assert.assertTrue; import static org.junit.jupiter.api.Assertions.assertTrue;
import static org.junit.Assert.fail; import static org.junit.jupiter.api.Assertions.fail;
public class TestHoodieGlobalBloomIndex extends HoodieClientTestHarness { public class TestHoodieGlobalBloomIndex extends HoodieClientTestHarness {
@@ -67,7 +69,7 @@ public class TestHoodieGlobalBloomIndex extends HoodieClientTestHarness {
public TestHoodieGlobalBloomIndex() { public TestHoodieGlobalBloomIndex() {
} }
@Before @BeforeEach
public void setUp() throws Exception { public void setUp() throws Exception {
initSparkContexts("TestHoodieGlobalBloomIndex"); initSparkContexts("TestHoodieGlobalBloomIndex");
initPath(); initPath();
@@ -77,7 +79,7 @@ public class TestHoodieGlobalBloomIndex extends HoodieClientTestHarness {
initMetaClient(); initMetaClient();
} }
@After @AfterEach
public void tearDown() { public void tearDown() {
cleanupSparkContexts(); cleanupSparkContexts();
cleanupMetaClient(); cleanupMetaClient();
@@ -93,12 +95,12 @@ public class TestHoodieGlobalBloomIndex extends HoodieClientTestHarness {
// "2016/04/01": 1 file (2_0_20160401010101.parquet) // "2016/04/01": 1 file (2_0_20160401010101.parquet)
// "2015/03/12": 3 files (1_0_20150312101010.parquet, 3_0_20150312101010.parquet, // "2015/03/12": 3 files (1_0_20150312101010.parquet, 3_0_20150312101010.parquet,
// 4_0_20150312101010.parquet) // 4_0_20150312101010.parquet)
new File(basePath + "/2016/01/21").mkdirs(); Path dir1 = Files.createDirectories(Paths.get(basePath, "2016", "01", "21"));
new File(basePath + "/2016/01/21/" + HoodiePartitionMetadata.HOODIE_PARTITION_METAFILE).createNewFile(); Files.createFile(dir1.resolve(HoodiePartitionMetadata.HOODIE_PARTITION_METAFILE));
new File(basePath + "/2016/04/01").mkdirs(); Path dir2 = Files.createDirectories(Paths.get(basePath, "2016", "04", "01"));
new File(basePath + "/2016/04/01/" + HoodiePartitionMetadata.HOODIE_PARTITION_METAFILE).createNewFile(); Files.createFile(dir2.resolve(HoodiePartitionMetadata.HOODIE_PARTITION_METAFILE));
new File(basePath + "/2015/03/12").mkdirs(); Path dir3 = Files.createDirectories(Paths.get(basePath, "2015", "03", "12"));
new File(basePath + "/2015/03/12/" + HoodiePartitionMetadata.HOODIE_PARTITION_METAFILE).createNewFile(); Files.createFile(dir3.resolve(HoodiePartitionMetadata.HOODIE_PARTITION_METAFILE));
TestRawTripPayload rowChange1 = TestRawTripPayload rowChange1 =
new TestRawTripPayload("{\"_row_key\":\"000\",\"time\":\"2016-01-31T03:16:41.415Z\",\"number\":12}"); new TestRawTripPayload("{\"_row_key\":\"000\",\"time\":\"2016-01-31T03:16:41.415Z\",\"number\":12}");
@@ -133,16 +135,16 @@ public class TestHoodieGlobalBloomIndex extends HoodieClientTestHarness {
// partitions will NOT be respected by this loadInvolvedFiles(...) call // partitions will NOT be respected by this loadInvolvedFiles(...) call
List<Tuple2<String, BloomIndexFileInfo>> filesList = index.loadInvolvedFiles(partitions, jsc, table); List<Tuple2<String, BloomIndexFileInfo>> filesList = index.loadInvolvedFiles(partitions, jsc, table);
// Still 0, as no valid commit // Still 0, as no valid commit
assertEquals(filesList.size(), 0); assertEquals(0, filesList.size());
// Add some commits // Add some commits
new File(basePath + "/.hoodie").mkdirs(); Path hoodieDir = Files.createDirectories(Paths.get(basePath, ".hoodie"));
new File(basePath + "/.hoodie/20160401010101.commit").createNewFile(); Files.createFile(hoodieDir.resolve("20160401010101.commit"));
new File(basePath + "/.hoodie/20150312101010.commit").createNewFile(); Files.createFile(hoodieDir.resolve("20150312101010.commit"));
table = HoodieTable.create(metaClient, config, jsc); table = HoodieTable.create(metaClient, config, jsc);
filesList = index.loadInvolvedFiles(partitions, jsc, table); filesList = index.loadInvolvedFiles(partitions, jsc, table);
assertEquals(filesList.size(), 4); assertEquals(4, filesList.size());
Map<String, BloomIndexFileInfo> filesMap = toFileMap(filesList); Map<String, BloomIndexFileInfo> filesMap = toFileMap(filesList);
// key ranges checks // key ranges checks
@@ -213,12 +215,12 @@ public class TestHoodieGlobalBloomIndex extends HoodieClientTestHarness {
// "2016/04/01": 1 file (2_0_20160401010101.parquet) // "2016/04/01": 1 file (2_0_20160401010101.parquet)
// "2015/03/12": 3 files (1_0_20150312101010.parquet, 3_0_20150312101010.parquet, // "2015/03/12": 3 files (1_0_20150312101010.parquet, 3_0_20150312101010.parquet,
// 4_0_20150312101010.parquet) // 4_0_20150312101010.parquet)
new File(basePath + "/2016/01/21").mkdirs(); Path dir1 = Files.createDirectories(Paths.get(basePath, "2016", "01", "21"));
new File(basePath + "/2016/01/21/" + HoodiePartitionMetadata.HOODIE_PARTITION_METAFILE).createNewFile(); Files.createFile(dir1.resolve(HoodiePartitionMetadata.HOODIE_PARTITION_METAFILE));
new File(basePath + "/2016/04/01").mkdirs(); Path dir2 = Files.createDirectories(Paths.get(basePath, "2016", "04", "01"));
new File(basePath + "/2016/04/01/" + HoodiePartitionMetadata.HOODIE_PARTITION_METAFILE).createNewFile(); Files.createFile(dir2.resolve(HoodiePartitionMetadata.HOODIE_PARTITION_METAFILE));
new File(basePath + "/2015/03/12").mkdirs(); Path dir3 = Files.createDirectories(Paths.get(basePath, "2015", "03", "12"));
new File(basePath + "/2015/03/12/" + HoodiePartitionMetadata.HOODIE_PARTITION_METAFILE).createNewFile(); Files.createFile(dir3.resolve(HoodiePartitionMetadata.HOODIE_PARTITION_METAFILE));
TestRawTripPayload rowChange1 = TestRawTripPayload rowChange1 =
new TestRawTripPayload("{\"_row_key\":\"000\",\"time\":\"2016-01-31T03:16:41.415Z\",\"number\":12}"); new TestRawTripPayload("{\"_row_key\":\"000\",\"time\":\"2016-01-31T03:16:41.415Z\",\"number\":12}");
@@ -262,7 +264,7 @@ public class TestHoodieGlobalBloomIndex extends HoodieClientTestHarness {
HoodieTable table = HoodieTable.create(metaClient, config, jsc); HoodieTable table = HoodieTable.create(metaClient, config, jsc);
// Add some commits // Add some commits
new File(basePath + "/.hoodie").mkdirs(); Files.createDirectories(Paths.get(basePath, ".hoodie"));
// partitions will NOT be respected by this loadInvolvedFiles(...) call // partitions will NOT be respected by this loadInvolvedFiles(...) call
JavaRDD<HoodieRecord> taggedRecordRDD = index.tagLocation(recordRDD, jsc, table); JavaRDD<HoodieRecord> taggedRecordRDD = index.tagLocation(recordRDD, jsc, table);
@@ -305,8 +307,8 @@ public class TestHoodieGlobalBloomIndex extends HoodieClientTestHarness {
// Create the original partition, and put a record, along with the meta file // Create the original partition, and put a record, along with the meta file
// "2016/01/31": 1 file (1_0_20160131101010.parquet) // "2016/01/31": 1 file (1_0_20160131101010.parquet)
new File(basePath + "/2016/01/31").mkdirs(); Path dir = Files.createDirectories(Paths.get(basePath, "2016", "01", "31"));
new File(basePath + "/2016/01/31/" + HoodiePartitionMetadata.HOODIE_PARTITION_METAFILE).createNewFile(); Files.createFile(dir.resolve(HoodiePartitionMetadata.HOODIE_PARTITION_METAFILE));
// this record will be saved in table and will be tagged to an empty record // this record will be saved in table and will be tagged to an empty record
TestRawTripPayload originalPayload = TestRawTripPayload originalPayload =
@@ -347,7 +349,7 @@ public class TestHoodieGlobalBloomIndex extends HoodieClientTestHarness {
HoodieTable table = HoodieTable.create(metaClient, config, jsc); HoodieTable table = HoodieTable.create(metaClient, config, jsc);
// Add some commits // Add some commits
new File(basePath + "/.hoodie").mkdirs(); Files.createDirectories(Paths.get(basePath, ".hoodie"));
// test against incoming record with a different partition // test against incoming record with a different partition
JavaRDD<HoodieRecord> recordRDD = jsc.parallelize(Collections.singletonList(incomingRecord)); JavaRDD<HoodieRecord> recordRDD = jsc.parallelize(Collections.singletonList(incomingRecord));

View File

@@ -35,9 +35,9 @@ import org.apache.hudi.table.HoodieTimelineArchiveLog;
import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path; import org.apache.hadoop.fs.Path;
import org.junit.After; import org.junit.jupiter.api.AfterEach;
import org.junit.Before; import org.junit.jupiter.api.BeforeEach;
import org.junit.Test; import org.junit.jupiter.api.Test;
import java.io.IOException; import java.io.IOException;
import java.util.Arrays; import java.util.Arrays;
@@ -47,16 +47,16 @@ import java.util.List;
import java.util.Set; import java.util.Set;
import java.util.stream.Collectors; import java.util.stream.Collectors;
import static org.junit.Assert.assertEquals; import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.Assert.assertFalse; import static org.junit.jupiter.api.Assertions.assertFalse;
import static org.junit.Assert.assertTrue; import static org.junit.jupiter.api.Assertions.assertTrue;
public class TestHoodieCommitArchiveLog extends HoodieClientTestHarness { public class TestHoodieCommitArchiveLog extends HoodieClientTestHarness {
private Configuration hadoopConf; private Configuration hadoopConf;
private HoodieTableMetaClient metaClient; private HoodieTableMetaClient metaClient;
@Before @BeforeEach
public void init() throws Exception { public void init() throws Exception {
initDFS(); initDFS();
initPath(); initPath();
@@ -67,7 +67,7 @@ public class TestHoodieCommitArchiveLog extends HoodieClientTestHarness {
metaClient = HoodieTestUtils.init(hadoopConf, basePath); metaClient = HoodieTestUtils.init(hadoopConf, basePath);
} }
@After @AfterEach
public void clean() throws IOException { public void clean() throws IOException {
cleanupDFS(); cleanupDFS();
cleanupSparkContexts(); cleanupSparkContexts();
@@ -137,7 +137,7 @@ public class TestHoodieCommitArchiveLog extends HoodieClientTestHarness {
metaClient = HoodieTableMetaClient.reload(metaClient); metaClient = HoodieTableMetaClient.reload(metaClient);
HoodieTimeline timeline = metaClient.getActiveTimeline().getCommitsTimeline().filterCompletedInstants(); HoodieTimeline timeline = metaClient.getActiveTimeline().getCommitsTimeline().filterCompletedInstants();
assertEquals("Loaded 6 commits and the count should match", 6, timeline.countInstants()); assertEquals(6, timeline.countInstants(), "Loaded 6 commits and the count should match");
HoodieTestUtils.createCleanFiles(metaClient, basePath, "100", dfs.getConf()); HoodieTestUtils.createCleanFiles(metaClient, basePath, "100", dfs.getConf());
HoodieTestUtils.createCleanFiles(metaClient, basePath, "101", dfs.getConf()); HoodieTestUtils.createCleanFiles(metaClient, basePath, "101", dfs.getConf());
@@ -151,7 +151,7 @@ public class TestHoodieCommitArchiveLog extends HoodieClientTestHarness {
timeline = metaClient.getActiveTimeline().reload().getAllCommitsTimeline().filterCompletedInstants(); timeline = metaClient.getActiveTimeline().reload().getAllCommitsTimeline().filterCompletedInstants();
List<HoodieInstant> originalCommits = timeline.getInstants().collect(Collectors.toList()); List<HoodieInstant> originalCommits = timeline.getInstants().collect(Collectors.toList());
assertEquals("Loaded 6 commits and the count should match", 12, timeline.countInstants()); assertEquals(12, timeline.countInstants(), "Loaded 6 commits and the count should match");
// verify in-flight instants before archive // verify in-flight instants before archive
verifyInflightInstants(metaClient, 2); verifyInflightInstants(metaClient, 2);
@@ -168,42 +168,42 @@ public class TestHoodieCommitArchiveLog extends HoodieClientTestHarness {
// Check compaction instants // Check compaction instants
List<HoodieInstant> instants = metaClient.scanHoodieInstantsFromFileSystem( List<HoodieInstant> instants = metaClient.scanHoodieInstantsFromFileSystem(
new Path(metaClient.getMetaAuxiliaryPath()), HoodieActiveTimeline.VALID_EXTENSIONS_IN_ACTIVE_TIMELINE, false); new Path(metaClient.getMetaAuxiliaryPath()), HoodieActiveTimeline.VALID_EXTENSIONS_IN_ACTIVE_TIMELINE, false);
assertEquals("Should delete all compaction instants < 104", 4, instants.size()); assertEquals(4, instants.size(), "Should delete all compaction instants < 104");
assertFalse("Requested Compaction must be absent for 100", assertFalse(instants.contains(new HoodieInstant(State.REQUESTED, HoodieTimeline.COMPACTION_ACTION, "100")),
instants.contains(new HoodieInstant(State.REQUESTED, HoodieTimeline.COMPACTION_ACTION, "100"))); "Requested Compaction must be absent for 100");
assertFalse("Inflight Compaction must be absent for 100", assertFalse(instants.contains(new HoodieInstant(State.INFLIGHT, HoodieTimeline.COMPACTION_ACTION, "100")),
instants.contains(new HoodieInstant(State.INFLIGHT, HoodieTimeline.COMPACTION_ACTION, "100"))); "Inflight Compaction must be absent for 100");
assertFalse("Requested Compaction must be absent for 101", assertFalse(instants.contains(new HoodieInstant(State.REQUESTED, HoodieTimeline.COMPACTION_ACTION, "101")),
instants.contains(new HoodieInstant(State.REQUESTED, HoodieTimeline.COMPACTION_ACTION, "101"))); "Requested Compaction must be absent for 101");
assertFalse("Inflight Compaction must be absent for 101", assertFalse(instants.contains(new HoodieInstant(State.INFLIGHT, HoodieTimeline.COMPACTION_ACTION, "101")),
instants.contains(new HoodieInstant(State.INFLIGHT, HoodieTimeline.COMPACTION_ACTION, "101"))); "Inflight Compaction must be absent for 101");
assertFalse("Requested Compaction must be absent for 102", assertFalse(instants.contains(new HoodieInstant(State.REQUESTED, HoodieTimeline.COMPACTION_ACTION, "102")),
instants.contains(new HoodieInstant(State.REQUESTED, HoodieTimeline.COMPACTION_ACTION, "102"))); "Requested Compaction must be absent for 102");
assertFalse("Inflight Compaction must be absent for 102", assertFalse(instants.contains(new HoodieInstant(State.INFLIGHT, HoodieTimeline.COMPACTION_ACTION, "102")),
instants.contains(new HoodieInstant(State.INFLIGHT, HoodieTimeline.COMPACTION_ACTION, "102"))); "Inflight Compaction must be absent for 102");
assertFalse("Requested Compaction must be absent for 103", assertFalse(instants.contains(new HoodieInstant(State.REQUESTED, HoodieTimeline.COMPACTION_ACTION, "103")),
instants.contains(new HoodieInstant(State.REQUESTED, HoodieTimeline.COMPACTION_ACTION, "103"))); "Requested Compaction must be absent for 103");
assertFalse("Inflight Compaction must be absent for 103", assertFalse(instants.contains(new HoodieInstant(State.INFLIGHT, HoodieTimeline.COMPACTION_ACTION, "103")),
instants.contains(new HoodieInstant(State.INFLIGHT, HoodieTimeline.COMPACTION_ACTION, "103"))); "Inflight Compaction must be absent for 103");
assertTrue("Requested Compaction must be present for 104", assertTrue(instants.contains(new HoodieInstant(State.REQUESTED, HoodieTimeline.COMPACTION_ACTION, "104")),
instants.contains(new HoodieInstant(State.REQUESTED, HoodieTimeline.COMPACTION_ACTION, "104"))); "Requested Compaction must be present for 104");
assertTrue("Inflight Compaction must be present for 104", assertTrue(instants.contains(new HoodieInstant(State.INFLIGHT, HoodieTimeline.COMPACTION_ACTION, "104")),
instants.contains(new HoodieInstant(State.INFLIGHT, HoodieTimeline.COMPACTION_ACTION, "104"))); "Inflight Compaction must be present for 104");
assertTrue("Requested Compaction must be present for 105", assertTrue(instants.contains(new HoodieInstant(State.REQUESTED, HoodieTimeline.COMPACTION_ACTION, "105")),
instants.contains(new HoodieInstant(State.REQUESTED, HoodieTimeline.COMPACTION_ACTION, "105"))); "Requested Compaction must be present for 105");
assertTrue("Inflight Compaction must be present for 105", assertTrue(instants.contains(new HoodieInstant(State.INFLIGHT, HoodieTimeline.COMPACTION_ACTION, "105")),
instants.contains(new HoodieInstant(State.INFLIGHT, HoodieTimeline.COMPACTION_ACTION, "105"))); "Inflight Compaction must be present for 105");
// read the file // read the file
HoodieArchivedTimeline archivedTimeline = new HoodieArchivedTimeline(metaClient); HoodieArchivedTimeline archivedTimeline = new HoodieArchivedTimeline(metaClient);
assertEquals("Total archived records and total read records are the same count", assertEquals(24, archivedTimeline.countInstants(),
24, archivedTimeline.countInstants()); "Total archived records and total read records are the same count");
//make sure the archived commits are the same as the (originalcommits - commitsleft) //make sure the archived commits are the same as the (originalcommits - commitsleft)
Set<String> readCommits = Set<String> readCommits =
archivedTimeline.getInstants().map(HoodieInstant::getTimestamp).collect(Collectors.toSet()); archivedTimeline.getInstants().map(HoodieInstant::getTimestamp).collect(Collectors.toSet());
assertEquals("Read commits map should match the originalCommits - commitsLoadedFromArchival", assertEquals(originalCommits.stream().map(HoodieInstant::getTimestamp).collect(Collectors.toSet()), readCommits,
originalCommits.stream().map(HoodieInstant::getTimestamp).collect(Collectors.toSet()), readCommits); "Read commits map should match the originalCommits - commitsLoadedFromArchival");
// verify in-flight instants after archive // verify in-flight instants after archive
verifyInflightInstants(metaClient, 2); verifyInflightInstants(metaClient, 2);
@@ -247,31 +247,31 @@ public class TestHoodieCommitArchiveLog extends HoodieClientTestHarness {
HoodieTestDataGenerator.createCommitFile(basePath, "103", dfs.getConf()); HoodieTestDataGenerator.createCommitFile(basePath, "103", dfs.getConf());
HoodieTimeline timeline = metaClient.getActiveTimeline().getCommitsTimeline().filterCompletedInstants(); HoodieTimeline timeline = metaClient.getActiveTimeline().getCommitsTimeline().filterCompletedInstants();
assertEquals("Loaded 4 commits and the count should match", 4, timeline.countInstants()); assertEquals(4, timeline.countInstants(), "Loaded 4 commits and the count should match");
boolean result = archiveLog.archiveIfRequired(jsc); boolean result = archiveLog.archiveIfRequired(jsc);
assertTrue(result); assertTrue(result);
timeline = metaClient.getActiveTimeline().reload().getCommitsTimeline().filterCompletedInstants(); timeline = metaClient.getActiveTimeline().reload().getCommitsTimeline().filterCompletedInstants();
assertEquals("Should not archive commits when maxCommitsToKeep is 5", 4, timeline.countInstants()); assertEquals(4, timeline.countInstants(), "Should not archive commits when maxCommitsToKeep is 5");
List<HoodieInstant> instants = metaClient.scanHoodieInstantsFromFileSystem( List<HoodieInstant> instants = metaClient.scanHoodieInstantsFromFileSystem(
new Path(metaClient.getMetaAuxiliaryPath()), HoodieActiveTimeline.VALID_EXTENSIONS_IN_ACTIVE_TIMELINE, false); new Path(metaClient.getMetaAuxiliaryPath()), HoodieActiveTimeline.VALID_EXTENSIONS_IN_ACTIVE_TIMELINE, false);
assertEquals("Should not delete any aux compaction files when maxCommitsToKeep is 5", 8, instants.size()); assertEquals(8, instants.size(), "Should not delete any aux compaction files when maxCommitsToKeep is 5");
assertTrue("Requested Compaction must be present for 100", assertTrue(instants.contains(new HoodieInstant(State.REQUESTED, HoodieTimeline.COMPACTION_ACTION, "100")),
instants.contains(new HoodieInstant(State.REQUESTED, HoodieTimeline.COMPACTION_ACTION, "100"))); "Requested Compaction must be present for 100");
assertTrue("Inflight Compaction must be present for 100", assertTrue(instants.contains(new HoodieInstant(State.INFLIGHT, HoodieTimeline.COMPACTION_ACTION, "100")),
instants.contains(new HoodieInstant(State.INFLIGHT, HoodieTimeline.COMPACTION_ACTION, "100"))); "Inflight Compaction must be present for 100");
assertTrue("Requested Compaction must be present for 101", assertTrue(instants.contains(new HoodieInstant(State.REQUESTED, HoodieTimeline.COMPACTION_ACTION, "101")),
instants.contains(new HoodieInstant(State.REQUESTED, HoodieTimeline.COMPACTION_ACTION, "101"))); "Requested Compaction must be present for 101");
assertTrue("Inflight Compaction must be present for 101", assertTrue(instants.contains(new HoodieInstant(State.INFLIGHT, HoodieTimeline.COMPACTION_ACTION, "101")),
instants.contains(new HoodieInstant(State.INFLIGHT, HoodieTimeline.COMPACTION_ACTION, "101"))); "Inflight Compaction must be present for 101");
assertTrue("Requested Compaction must be present for 102", assertTrue(instants.contains(new HoodieInstant(State.REQUESTED, HoodieTimeline.COMPACTION_ACTION, "102")),
instants.contains(new HoodieInstant(State.REQUESTED, HoodieTimeline.COMPACTION_ACTION, "102"))); "Requested Compaction must be present for 102");
assertTrue("Inflight Compaction must be present for 102", assertTrue(instants.contains(new HoodieInstant(State.INFLIGHT, HoodieTimeline.COMPACTION_ACTION, "102")),
instants.contains(new HoodieInstant(State.INFLIGHT, HoodieTimeline.COMPACTION_ACTION, "102"))); "Inflight Compaction must be present for 102");
assertTrue("Requested Compaction must be present for 103", assertTrue(instants.contains(new HoodieInstant(State.REQUESTED, HoodieTimeline.COMPACTION_ACTION, "103")),
instants.contains(new HoodieInstant(State.REQUESTED, HoodieTimeline.COMPACTION_ACTION, "103"))); "Requested Compaction must be present for 103");
assertTrue("Inflight Compaction must be present for 103", assertTrue(instants.contains(new HoodieInstant(State.INFLIGHT, HoodieTimeline.COMPACTION_ACTION, "103")),
instants.contains(new HoodieInstant(State.INFLIGHT, HoodieTimeline.COMPACTION_ACTION, "103"))); "Inflight Compaction must be present for 103");
} }
@Test @Test
@@ -290,14 +290,14 @@ public class TestHoodieCommitArchiveLog extends HoodieClientTestHarness {
HoodieTestDataGenerator.createCommitFile(basePath, "105", dfs.getConf()); HoodieTestDataGenerator.createCommitFile(basePath, "105", dfs.getConf());
HoodieTimeline timeline = metaClient.getActiveTimeline().getCommitsTimeline().filterCompletedInstants(); HoodieTimeline timeline = metaClient.getActiveTimeline().getCommitsTimeline().filterCompletedInstants();
assertEquals("Loaded 6 commits and the count should match", 6, timeline.countInstants()); assertEquals(6, timeline.countInstants(), "Loaded 6 commits and the count should match");
boolean result = archiveLog.archiveIfRequired(jsc); boolean result = archiveLog.archiveIfRequired(jsc);
assertTrue(result); assertTrue(result);
timeline = metaClient.getActiveTimeline().reload().getCommitsTimeline().filterCompletedInstants(); timeline = metaClient.getActiveTimeline().reload().getCommitsTimeline().filterCompletedInstants();
assertTrue("Archived commits should always be safe", timeline.containsOrBeforeTimelineStarts("100")); assertTrue(timeline.containsOrBeforeTimelineStarts("100"), "Archived commits should always be safe");
assertTrue("Archived commits should always be safe", timeline.containsOrBeforeTimelineStarts("101")); assertTrue(timeline.containsOrBeforeTimelineStarts("101"), "Archived commits should always be safe");
assertTrue("Archived commits should always be safe", timeline.containsOrBeforeTimelineStarts("102")); assertTrue(timeline.containsOrBeforeTimelineStarts("102"), "Archived commits should always be safe");
assertTrue("Archived commits should always be safe", timeline.containsOrBeforeTimelineStarts("103")); assertTrue(timeline.containsOrBeforeTimelineStarts("103"), "Archived commits should always be safe");
} }
@Test @Test
@@ -317,19 +317,18 @@ public class TestHoodieCommitArchiveLog extends HoodieClientTestHarness {
HoodieTestDataGenerator.createCommitFile(basePath, "105", dfs.getConf()); HoodieTestDataGenerator.createCommitFile(basePath, "105", dfs.getConf());
HoodieTimeline timeline = metaClient.getActiveTimeline().getCommitsTimeline().filterCompletedInstants(); HoodieTimeline timeline = metaClient.getActiveTimeline().getCommitsTimeline().filterCompletedInstants();
assertEquals("Loaded 6 commits and the count should match", 6, timeline.countInstants()); assertEquals(6, timeline.countInstants(), "Loaded 6 commits and the count should match");
boolean result = archiveLog.archiveIfRequired(jsc); boolean result = archiveLog.archiveIfRequired(jsc);
assertTrue(result); assertTrue(result);
timeline = metaClient.getActiveTimeline().reload().getCommitsTimeline().filterCompletedInstants(); timeline = metaClient.getActiveTimeline().reload().getCommitsTimeline().filterCompletedInstants();
assertEquals( assertEquals(5, timeline.countInstants(),
"Since we have a savepoint at 101, we should never archive any commit after 101 (we only archive 100)", 5, "Since we have a savepoint at 101, we should never archive any commit after 101 (we only archive 100)");
timeline.countInstants()); assertTrue(timeline.containsInstant(new HoodieInstant(false, HoodieTimeline.COMMIT_ACTION, "101")),
assertTrue("Archived commits should always be safe", "Archived commits should always be safe");
timeline.containsInstant(new HoodieInstant(false, HoodieTimeline.COMMIT_ACTION, "101"))); assertTrue(timeline.containsInstant(new HoodieInstant(false, HoodieTimeline.COMMIT_ACTION, "102")),
assertTrue("Archived commits should always be safe", "Archived commits should always be safe");
timeline.containsInstant(new HoodieInstant(false, HoodieTimeline.COMMIT_ACTION, "102"))); assertTrue(timeline.containsInstant(new HoodieInstant(false, HoodieTimeline.COMMIT_ACTION, "103")),
assertTrue("Archived commits should always be safe", "Archived commits should always be safe");
timeline.containsInstant(new HoodieInstant(false, HoodieTimeline.COMMIT_ACTION, "103")));
} }
@Test @Test
@@ -354,28 +353,29 @@ public class TestHoodieCommitArchiveLog extends HoodieClientTestHarness {
HoodieTestDataGenerator.createCommitFile(basePath, "107", dfs.getConf()); HoodieTestDataGenerator.createCommitFile(basePath, "107", dfs.getConf());
HoodieTimeline timeline = metaClient.getActiveTimeline().getCommitsAndCompactionTimeline(); HoodieTimeline timeline = metaClient.getActiveTimeline().getCommitsAndCompactionTimeline();
assertEquals("Loaded 6 commits and the count should match", 8, timeline.countInstants()); assertEquals(8, timeline.countInstants(), "Loaded 6 commits and the count should match");
boolean result = archiveLog.archiveIfRequired(jsc); boolean result = archiveLog.archiveIfRequired(jsc);
assertTrue(result); assertTrue(result);
timeline = metaClient.getActiveTimeline().reload().getCommitsAndCompactionTimeline(); timeline = metaClient.getActiveTimeline().reload().getCommitsAndCompactionTimeline();
assertFalse("Instants before oldest pending compaction can be removed", assertFalse(timeline.containsInstant(new HoodieInstant(false, HoodieTimeline.COMMIT_ACTION, "100")),
timeline.containsInstant(new HoodieInstant(false, HoodieTimeline.COMMIT_ACTION, "100"))); "Instants before oldest pending compaction can be removed");
assertEquals("Since we have a pending compaction at 101, we should never archive any commit " assertEquals(7, timeline.countInstants(),
+ "after 101 (we only archive 100)", 7, timeline.countInstants()); "Since we have a pending compaction at 101, we should never archive any commit "
assertTrue("Requested Compaction must still be present", + "after 101 (we only archive 100)");
timeline.containsInstant(new HoodieInstant(State.REQUESTED, HoodieTimeline.COMPACTION_ACTION, "101"))); assertTrue(timeline.containsInstant(new HoodieInstant(State.REQUESTED, HoodieTimeline.COMPACTION_ACTION, "101")),
assertTrue("Instants greater than oldest pending compaction must be present", "Requested Compaction must still be present");
timeline.containsInstant(new HoodieInstant(false, HoodieTimeline.COMMIT_ACTION, "102"))); assertTrue(timeline.containsInstant(new HoodieInstant(false, HoodieTimeline.COMMIT_ACTION, "102")),
assertTrue("Instants greater than oldest pending compaction must be present", "Instants greater than oldest pending compaction must be present");
timeline.containsInstant(new HoodieInstant(false, HoodieTimeline.COMMIT_ACTION, "103"))); assertTrue(timeline.containsInstant(new HoodieInstant(false, HoodieTimeline.COMMIT_ACTION, "103")),
assertTrue("Instants greater than oldest pending compaction must be present", "Instants greater than oldest pending compaction must be present");
timeline.containsInstant(new HoodieInstant(State.REQUESTED, HoodieTimeline.COMPACTION_ACTION, "104"))); assertTrue(timeline.containsInstant(new HoodieInstant(State.REQUESTED, HoodieTimeline.COMPACTION_ACTION, "104")),
assertTrue("Instants greater than oldest pending compaction must be present", "Instants greater than oldest pending compaction must be present");
timeline.containsInstant(new HoodieInstant(false, HoodieTimeline.COMMIT_ACTION, "105"))); assertTrue(timeline.containsInstant(new HoodieInstant(false, HoodieTimeline.COMMIT_ACTION, "105")),
assertTrue("Instants greater than oldest pending compaction must be present", "Instants greater than oldest pending compaction must be present");
timeline.containsInstant(new HoodieInstant(false, HoodieTimeline.COMMIT_ACTION, "106"))); assertTrue(timeline.containsInstant(new HoodieInstant(false, HoodieTimeline.COMMIT_ACTION, "106")),
assertTrue("Instants greater than oldest pending compaction must be present", "Instants greater than oldest pending compaction must be present");
timeline.containsInstant(new HoodieInstant(false, HoodieTimeline.COMMIT_ACTION, "107"))); assertTrue(timeline.containsInstant(new HoodieInstant(false, HoodieTimeline.COMMIT_ACTION, "107")),
"Instants greater than oldest pending compaction must be present");
} }
@Test @Test
@@ -412,8 +412,8 @@ public class TestHoodieCommitArchiveLog extends HoodieClientTestHarness {
private void verifyInflightInstants(HoodieTableMetaClient metaClient, int expectedTotalInstants) { private void verifyInflightInstants(HoodieTableMetaClient metaClient, int expectedTotalInstants) {
HoodieTimeline timeline = metaClient.getActiveTimeline().reload() HoodieTimeline timeline = metaClient.getActiveTimeline().reload()
.getTimelineOfActions(Collections.singleton(HoodieTimeline.CLEAN_ACTION)).filterInflights(); .getTimelineOfActions(Collections.singleton(HoodieTimeline.CLEAN_ACTION)).filterInflights();
assertEquals("Loaded inflight clean actions and the count should match", expectedTotalInstants, assertEquals(expectedTotalInstants, timeline.countInstants(),
timeline.countInstants()); "Loaded inflight clean actions and the count should match");
} }
@Test @Test

View File

@@ -39,22 +39,23 @@ import org.apache.hadoop.fs.FileSystem;
import org.apache.spark.api.java.JavaRDD; import org.apache.spark.api.java.JavaRDD;
import org.apache.spark.sql.Dataset; import org.apache.spark.sql.Dataset;
import org.apache.spark.sql.Row; import org.apache.spark.sql.Row;
import org.junit.After; import org.junit.jupiter.api.AfterEach;
import org.junit.Assert; import org.junit.jupiter.api.BeforeEach;
import org.junit.Before; import org.junit.jupiter.api.Test;
import org.junit.Test;
import java.nio.file.Paths;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.List; import java.util.List;
import static org.junit.Assert.assertEquals; import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.Assert.assertFalse; import static org.junit.jupiter.api.Assertions.assertFalse;
import static org.junit.Assert.assertNotEquals; import static org.junit.jupiter.api.Assertions.assertNotEquals;
import static org.junit.jupiter.api.Assertions.assertTrue;
@SuppressWarnings("unchecked") @SuppressWarnings("unchecked")
public class TestHoodieMergeHandle extends HoodieClientTestHarness { public class TestHoodieMergeHandle extends HoodieClientTestHarness {
@Before @BeforeEach
public void setUp() throws Exception { public void setUp() throws Exception {
initSparkContexts("TestHoodieMergeHandle"); initSparkContexts("TestHoodieMergeHandle");
initPath(); initPath();
@@ -63,7 +64,7 @@ public class TestHoodieMergeHandle extends HoodieClientTestHarness {
initMetaClient(); initMetaClient();
} }
@After @AfterEach
public void tearDown() throws Exception { public void tearDown() throws Exception {
cleanupFileSystem(); cleanupFileSystem();
cleanupTestDataGenerator(); cleanupTestDataGenerator();
@@ -110,11 +111,12 @@ public class TestHoodieMergeHandle extends HoodieClientTestHarness {
// verify that there is a commit // verify that there is a commit
metaClient = HoodieTableMetaClient.reload(metaClient); metaClient = HoodieTableMetaClient.reload(metaClient);
HoodieTimeline timeline = new HoodieActiveTimeline(metaClient).getCommitTimeline(); HoodieTimeline timeline = new HoodieActiveTimeline(metaClient).getCommitTimeline();
assertEquals("Expecting a single commit.", 1, assertEquals(1, timeline.findInstantsAfter("000", Integer.MAX_VALUE).countInstants(),
timeline.findInstantsAfter("000", Integer.MAX_VALUE).countInstants()); "Expecting a single commit.");
Assert.assertEquals("Latest commit should be 001", newCommitTime, timeline.lastInstant().get().getTimestamp()); assertEquals(newCommitTime, timeline.lastInstant().get().getTimestamp(), "Latest commit should be 001");
assertEquals("Must contain 44 records", records.size(), assertEquals(records.size(),
HoodieClientTestUtils.readCommit(basePath, sqlContext, timeline, newCommitTime).count()); HoodieClientTestUtils.readCommit(basePath, sqlContext, timeline, newCommitTime).count(),
"Must contain 44 records");
/** /**
* Write 2 (insert) This will do a bulk insert of 1 record with the same row_key as record1 in the previous insert * Write 2 (insert) This will do a bulk insert of 1 record with the same row_key as record1 in the previous insert
@@ -135,10 +137,10 @@ public class TestHoodieMergeHandle extends HoodieClientTestHarness {
// verify that there are 2 commits // verify that there are 2 commits
metaClient = HoodieTableMetaClient.reload(metaClient); metaClient = HoodieTableMetaClient.reload(metaClient);
timeline = new HoodieActiveTimeline(metaClient).getCommitTimeline(); timeline = new HoodieActiveTimeline(metaClient).getCommitTimeline();
assertEquals("Expecting two commits.", 2, timeline.findInstantsAfter("000", Integer.MAX_VALUE).countInstants()); assertEquals(2, timeline.findInstantsAfter("000", Integer.MAX_VALUE).countInstants(), "Expecting two commits.");
Assert.assertEquals("Latest commit should be 002", newCommitTime, timeline.lastInstant().get().getTimestamp()); assertEquals(newCommitTime, timeline.lastInstant().get().getTimestamp(), "Latest commit should be 002");
Dataset<Row> dataSet = getRecords(); Dataset<Row> dataSet = getRecords();
assertEquals("Must contain 45 records", 45, dataSet.count()); assertEquals(45, dataSet.count(), "Must contain 45 records");
/** /**
* Write 3 (insert) This will bulk insert 2 new completely new records. At this point, we will have 2 files with * Write 3 (insert) This will bulk insert 2 new completely new records. At this point, we will have 2 files with
@@ -155,10 +157,10 @@ public class TestHoodieMergeHandle extends HoodieClientTestHarness {
// verify that there are now 3 commits // verify that there are now 3 commits
metaClient = HoodieTableMetaClient.reload(metaClient); metaClient = HoodieTableMetaClient.reload(metaClient);
timeline = new HoodieActiveTimeline(metaClient).getCommitTimeline(); timeline = new HoodieActiveTimeline(metaClient).getCommitTimeline();
assertEquals("Expecting three commits.", 3, timeline.findInstantsAfter("000", Integer.MAX_VALUE).countInstants()); assertEquals(3, timeline.findInstantsAfter("000", Integer.MAX_VALUE).countInstants(), "Expecting three commits.");
Assert.assertEquals("Latest commit should be 003", newCommitTime, timeline.lastInstant().get().getTimestamp()); assertEquals(newCommitTime, timeline.lastInstant().get().getTimestamp(), "Latest commit should be 003");
dataSet = getRecords(); dataSet = getRecords();
assertEquals("Must contain 47 records", 47, dataSet.count()); assertEquals(47, dataSet.count(), "Must contain 47 records");
/** /**
* Write 4 (updates) This will generate 2 upsert records with id1 and id2. The rider and driver names in the * Write 4 (updates) This will generate 2 upsert records with id1 and id2. The rider and driver names in the
@@ -185,12 +187,12 @@ public class TestHoodieMergeHandle extends HoodieClientTestHarness {
// verify there are now 4 commits // verify there are now 4 commits
timeline = new HoodieActiveTimeline(metaClient).getCommitTimeline(); timeline = new HoodieActiveTimeline(metaClient).getCommitTimeline();
assertEquals("Expecting four commits.", 4, timeline.findInstantsAfter("000", Integer.MAX_VALUE).countInstants()); assertEquals(4, timeline.findInstantsAfter("000", Integer.MAX_VALUE).countInstants(), "Expecting four commits.");
Assert.assertEquals("Latest commit should be 004", timeline.lastInstant().get().getTimestamp(), newCommitTime); assertEquals(timeline.lastInstant().get().getTimestamp(), newCommitTime, "Latest commit should be 004");
// Check the entire dataset has 47 records still // Check the entire dataset has 47 records still
dataSet = getRecords(); dataSet = getRecords();
assertEquals("Must contain 47 records", 47, dataSet.count()); assertEquals(47, dataSet.count(), "Must contain 47 records");
Row[] rows = (Row[]) dataSet.collect(); Row[] rows = (Row[]) dataSet.collect();
int record1Count = 0; int record1Count = 0;
int record2Count = 0; int record2Count = 0;
@@ -233,19 +235,18 @@ public class TestHoodieMergeHandle extends HoodieClientTestHarness {
List<WriteStatus> statuses = writeClient.insert(recordsRDD, newCommitTime).collect(); List<WriteStatus> statuses = writeClient.insert(recordsRDD, newCommitTime).collect();
// All records should be inserts into new parquet // All records should be inserts into new parquet
Assert.assertTrue(statuses.stream() assertTrue(statuses.stream()
.filter(status -> status.getStat().getPrevCommit() != HoodieWriteStat.NULL_COMMIT).count() > 0); .filter(status -> status.getStat().getPrevCommit() != HoodieWriteStat.NULL_COMMIT).count() > 0);
// Num writes should be equal to the number of records inserted // Num writes should be equal to the number of records inserted
Assert.assertEquals( assertEquals(100,
(long) statuses.stream().map(status -> status.getStat().getNumWrites()).reduce((a, b) -> a + b).get(), 100); (long) statuses.stream().map(status -> status.getStat().getNumWrites()).reduce((a, b) -> a + b).get());
// Num update writes should be equal to the number of records updated // Num update writes should be equal to the number of records updated
Assert.assertEquals( assertEquals(0,
(long) statuses.stream().map(status -> status.getStat().getNumUpdateWrites()).reduce((a, b) -> a + b).get(), (long) statuses.stream().map(status -> status.getStat().getNumUpdateWrites()).reduce((a, b) -> a + b).get());
0);
// Num update writes should be equal to the number of insert records converted to updates as part of small file // Num update writes should be equal to the number of insert records converted to updates as part of small file
// handling // handling
Assert.assertEquals( assertEquals(100,
(long) statuses.stream().map(status -> status.getStat().getNumInserts()).reduce((a, b) -> a + b).get(), 100); (long) statuses.stream().map(status -> status.getStat().getNumInserts()).reduce((a, b) -> a + b).get());
// Update all the 100 records // Update all the 100 records
metaClient = HoodieTableMetaClient.reload(metaClient); metaClient = HoodieTableMetaClient.reload(metaClient);
@@ -258,20 +259,18 @@ public class TestHoodieMergeHandle extends HoodieClientTestHarness {
statuses = writeClient.upsert(updatedRecordsRDD, newCommitTime).collect(); statuses = writeClient.upsert(updatedRecordsRDD, newCommitTime).collect();
// All records should be upserts into existing parquet // All records should be upserts into existing parquet
Assert.assertEquals( assertEquals(0,
statuses.stream().filter(status -> status.getStat().getPrevCommit() == HoodieWriteStat.NULL_COMMIT).count(), statuses.stream().filter(status -> status.getStat().getPrevCommit() == HoodieWriteStat.NULL_COMMIT).count());
0);
// Num writes should be equal to the number of records inserted // Num writes should be equal to the number of records inserted
Assert.assertEquals( assertEquals(100,
(long) statuses.stream().map(status -> status.getStat().getNumWrites()).reduce((a, b) -> a + b).get(), 100); (long) statuses.stream().map(status -> status.getStat().getNumWrites()).reduce((a, b) -> a + b).get());
// Num update writes should be equal to the number of records updated // Num update writes should be equal to the number of records updated
Assert.assertEquals( assertEquals(100,
(long) statuses.stream().map(status -> status.getStat().getNumUpdateWrites()).reduce((a, b) -> a + b).get(), (long) statuses.stream().map(status -> status.getStat().getNumUpdateWrites()).reduce((a, b) -> a + b).get());
100);
// Num update writes should be equal to the number of insert records converted to updates as part of small file // Num update writes should be equal to the number of insert records converted to updates as part of small file
// handling // handling
Assert.assertEquals( assertEquals(0,
(long) statuses.stream().map(status -> status.getStat().getNumInserts()).reduce((a, b) -> a + b).get(), 0); (long) statuses.stream().map(status -> status.getStat().getNumInserts()).reduce((a, b) -> a + b).get());
newCommitTime = "102"; newCommitTime = "102";
writeClient.startCommitWithTime(newCommitTime); writeClient.startCommitWithTime(newCommitTime);
@@ -282,24 +281,23 @@ public class TestHoodieMergeHandle extends HoodieClientTestHarness {
statuses = writeClient.upsert(allRecordsRDD, newCommitTime).collect(); statuses = writeClient.upsert(allRecordsRDD, newCommitTime).collect();
// All records should be upserts into existing parquet (with inserts as updates small file handled) // All records should be upserts into existing parquet (with inserts as updates small file handled)
Assert.assertEquals((long) statuses.stream() assertEquals(0, (long) statuses.stream()
.filter(status -> status.getStat().getPrevCommit() == HoodieWriteStat.NULL_COMMIT).count(), 0); .filter(status -> status.getStat().getPrevCommit() == HoodieWriteStat.NULL_COMMIT).count());
// Num writes should be equal to the total number of records written // Num writes should be equal to the total number of records written
Assert.assertEquals( assertEquals(200,
(long) statuses.stream().map(status -> status.getStat().getNumWrites()).reduce((a, b) -> a + b).get(), 200); (long) statuses.stream().map(status -> status.getStat().getNumWrites()).reduce((a, b) -> a + b).get());
// Num update writes should be equal to the number of records updated (including inserts converted as updates) // Num update writes should be equal to the number of records updated (including inserts converted as updates)
Assert.assertEquals( assertEquals(100,
(long) statuses.stream().map(status -> status.getStat().getNumUpdateWrites()).reduce((a, b) -> a + b).get(), (long) statuses.stream().map(status -> status.getStat().getNumUpdateWrites()).reduce((a, b) -> a + b).get());
100);
// Num update writes should be equal to the number of insert records converted to updates as part of small file // Num update writes should be equal to the number of insert records converted to updates as part of small file
// handling // handling
Assert.assertEquals( assertEquals(100,
(long) statuses.stream().map(status -> status.getStat().getNumInserts()).reduce((a, b) -> a + b).get(), 100); (long) statuses.stream().map(status -> status.getStat().getNumInserts()).reduce((a, b) -> a + b).get());
// Verify all records have location set // Verify all records have location set
statuses.forEach(writeStatus -> { statuses.forEach(writeStatus -> {
writeStatus.getWrittenRecords().forEach(r -> { writeStatus.getWrittenRecords().forEach(r -> {
// Ensure New Location is set // Ensure New Location is set
Assert.assertTrue(r.getNewLocation().isPresent()); assertTrue(r.getNewLocation().isPresent());
}); });
}); });
} }
@@ -309,7 +307,7 @@ public class TestHoodieMergeHandle extends HoodieClientTestHarness {
// Check the entire dataset has 8 records still // Check the entire dataset has 8 records still
String[] fullPartitionPaths = new String[dataGen.getPartitionPaths().length]; String[] fullPartitionPaths = new String[dataGen.getPartitionPaths().length];
for (int i = 0; i < fullPartitionPaths.length; i++) { for (int i = 0; i < fullPartitionPaths.length; i++) {
fullPartitionPaths[i] = String.format("%s/%s/*", basePath, dataGen.getPartitionPaths()[i]); fullPartitionPaths[i] = Paths.get(basePath, dataGen.getPartitionPaths()[i], "*").toString();
} }
Dataset<Row> dataSet = HoodieClientTestUtils.read(jsc, basePath, sqlContext, fs, fullPartitionPaths); Dataset<Row> dataSet = HoodieClientTestUtils.read(jsc, basePath, sqlContext, fs, fullPartitionPaths);
return dataSet; return dataSet;
@@ -323,7 +321,7 @@ public class TestHoodieMergeHandle extends HoodieClientTestHarness {
void assertNoWriteErrors(List<WriteStatus> statuses) { void assertNoWriteErrors(List<WriteStatus> statuses) {
// Verify there are no errors // Verify there are no errors
for (WriteStatus status : statuses) { for (WriteStatus status : statuses) {
assertFalse("Errors found in write of " + status.getFileId(), status.hasErrors()); assertFalse(status.hasErrors(), "Errors found in write of " + status.getFileId());
} }
} }

View File

@@ -26,12 +26,12 @@ import org.apache.hudi.table.HoodieTable;
import org.apache.avro.generic.IndexedRecord; import org.apache.avro.generic.IndexedRecord;
import org.apache.hadoop.fs.Path; import org.apache.hadoop.fs.Path;
import org.junit.Assert; import org.junit.jupiter.api.Test;
import org.junit.Test;
import java.io.IOException; import java.io.IOException;
import static org.junit.Assert.fail; import static org.junit.jupiter.api.Assertions.assertThrows;
import static org.junit.jupiter.api.Assertions.assertTrue;
/** /**
* Tests for {@link HoodieStorageWriterFactory}. * Tests for {@link HoodieStorageWriterFactory}.
@@ -48,17 +48,14 @@ public class TestHoodieStorageWriterFactory extends TestHoodieClientBase {
SparkTaskContextSupplier supplier = new SparkTaskContextSupplier(); SparkTaskContextSupplier supplier = new SparkTaskContextSupplier();
HoodieStorageWriter<IndexedRecord> parquetWriter = HoodieStorageWriterFactory.getStorageWriter(instantTime, HoodieStorageWriter<IndexedRecord> parquetWriter = HoodieStorageWriterFactory.getStorageWriter(instantTime,
parquetPath, table, cfg, HoodieTestDataGenerator.AVRO_SCHEMA, supplier); parquetPath, table, cfg, HoodieTestDataGenerator.AVRO_SCHEMA, supplier);
Assert.assertTrue(parquetWriter instanceof HoodieParquetWriter); assertTrue(parquetWriter instanceof HoodieParquetWriter);
// other file format exception. // other file format exception.
final Path logPath = new Path(basePath + "/partition/path/f.b51192a8-574b-4a85-b246-bcfec03ac8bf_100.log.2_1-0-1"); final Path logPath = new Path(basePath + "/partition/path/f.b51192a8-574b-4a85-b246-bcfec03ac8bf_100.log.2_1-0-1");
try { final Throwable thrown = assertThrows(UnsupportedOperationException.class, () -> {
HoodieStorageWriter<IndexedRecord> logWriter = HoodieStorageWriterFactory.getStorageWriter(instantTime, logPath, HoodieStorageWriter<IndexedRecord> logWriter = HoodieStorageWriterFactory.getStorageWriter(instantTime, logPath,
table, cfg, HoodieTestDataGenerator.AVRO_SCHEMA, supplier); table, cfg, HoodieTestDataGenerator.AVRO_SCHEMA, supplier);
fail("should fail since log storage writer is not supported yet."); }, "should fail since log storage writer is not supported yet.");
} catch (Exception e) { assertTrue(thrown.getMessage().contains("format not supported yet."));
Assert.assertTrue(e instanceof UnsupportedOperationException);
Assert.assertTrue(e.getMessage().contains("format not supported yet."));
}
} }
} }

View File

@@ -60,8 +60,7 @@ import org.apache.hadoop.fs.RemoteIterator;
import org.apache.log4j.LogManager; import org.apache.log4j.LogManager;
import org.apache.log4j.Logger; import org.apache.log4j.Logger;
import org.apache.spark.api.java.JavaRDD; import org.apache.spark.api.java.JavaRDD;
import org.junit.Assert; import org.junit.jupiter.api.Test;
import org.junit.Test;
import java.io.IOException; import java.io.IOException;
import java.nio.charset.StandardCharsets; import java.nio.charset.StandardCharsets;
@@ -82,9 +81,9 @@ import java.util.stream.Stream;
import scala.Tuple3; import scala.Tuple3;
import static org.apache.hudi.common.model.HoodieTestUtils.DEFAULT_PARTITION_PATHS; import static org.apache.hudi.common.model.HoodieTestUtils.DEFAULT_PARTITION_PATHS;
import static org.junit.Assert.assertEquals; import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.Assert.assertFalse; import static org.junit.jupiter.api.Assertions.assertFalse;
import static org.junit.Assert.assertTrue; import static org.junit.jupiter.api.Assertions.assertTrue;
/** /**
* Test Cleaning related logic. * Test Cleaning related logic.
@@ -123,15 +122,16 @@ public class TestCleaner extends TestHoodieClientBase {
// verify that there is a commit // verify that there is a commit
metaClient = HoodieTableMetaClient.reload(metaClient); metaClient = HoodieTableMetaClient.reload(metaClient);
HoodieTimeline timeline = new HoodieActiveTimeline(metaClient).getCommitTimeline(); HoodieTimeline timeline = new HoodieActiveTimeline(metaClient).getCommitTimeline();
assertEquals("Expecting a single commit.", 1, timeline.findInstantsAfter("000", Integer.MAX_VALUE).countInstants()); assertEquals(1, timeline.findInstantsAfter("000", Integer.MAX_VALUE).countInstants(), "Expecting a single commit.");
// Should have 100 records in table (check using Index), all in locations marked at commit // Should have 100 records in table (check using Index), all in locations marked at commit
HoodieTable table = HoodieTable.create(metaClient, client.getConfig(), jsc); HoodieTable table = HoodieTable.create(metaClient, client.getConfig(), jsc);
assertFalse(table.getCompletedCommitsTimeline().empty()); assertFalse(table.getCompletedCommitsTimeline().empty());
String instantTime = table.getCompletedCommitsTimeline().getInstants().findFirst().get().getTimestamp(); String instantTime = table.getCompletedCommitsTimeline().getInstants().findFirst().get().getTimestamp();
assertFalse(table.getCompletedCleanTimeline().empty()); assertFalse(table.getCompletedCleanTimeline().empty());
assertEquals("The clean instant should be the same as the commit instant", instantTime, assertEquals(instantTime,
table.getCompletedCleanTimeline().getInstants().findFirst().get().getTimestamp()); table.getCompletedCleanTimeline().getInstants().findFirst().get().getTimestamp(),
"The clean instant should be the same as the commit instant");
HoodieIndex index = HoodieIndex.createIndex(cfg, jsc); HoodieIndex index = HoodieIndex.createIndex(cfg, jsc);
List<HoodieRecord> taggedRecords = index.tagLocation(jsc.parallelize(records, 1), jsc, table).collect(); List<HoodieRecord> taggedRecords = index.tagLocation(jsc.parallelize(records, 1), jsc, table).collect();
@@ -272,22 +272,22 @@ public class TestCleaner extends TestHoodieClientBase {
return compactionFileIdToLatestFileSlice.get(fileGroup.getFileGroupId()).getBaseInstantTime() return compactionFileIdToLatestFileSlice.get(fileGroup.getFileGroupId()).getBaseInstantTime()
.equals(df.getCommitTime()); .equals(df.getCommitTime());
}).findAny()); }).findAny());
Assert.assertTrue("Data File selected for compaction is retained", assertTrue(dataFileForCompactionPresent.isPresent(),
dataFileForCompactionPresent.isPresent()); "Data File selected for compaction is retained");
} else { } else {
// file has no more than max versions // file has no more than max versions
String fileId = fileGroup.getFileGroupId().getFileId(); String fileId = fileGroup.getFileGroupId().getFileId();
List<HoodieBaseFile> dataFiles = fileGroup.getAllBaseFiles().collect(Collectors.toList()); List<HoodieBaseFile> dataFiles = fileGroup.getAllBaseFiles().collect(Collectors.toList());
assertTrue("fileId " + fileId + " has more than " + maxVersions + " versions", assertTrue(dataFiles.size() <= maxVersions,
dataFiles.size() <= maxVersions); "fileId " + fileId + " has more than " + maxVersions + " versions");
// Each file, has the latest N versions (i.e cleaning gets rid of older versions) // Each file, has the latest N versions (i.e cleaning gets rid of older versions)
List<String> commitedVersions = new ArrayList<>(fileIdToVersions.get(fileId)); List<String> commitedVersions = new ArrayList<>(fileIdToVersions.get(fileId));
for (int i = 0; i < dataFiles.size(); i++) { for (int i = 0; i < dataFiles.size(); i++) {
assertEquals("File " + fileId + " does not have latest versions on commits" + commitedVersions, assertEquals((dataFiles.get(i)).getCommitTime(),
(dataFiles.get(i)).getCommitTime(), commitedVersions.get(commitedVersions.size() - 1 - i),
commitedVersions.get(commitedVersions.size() - 1 - i)); "File " + fileId + " does not have latest versions on commits" + commitedVersions);
} }
} }
} }
@@ -395,8 +395,8 @@ public class TestCleaner extends TestHoodieClientBase {
LOG.debug("Data File - " + value); LOG.debug("Data File - " + value);
commitTimes.add(value.getCommitTime()); commitTimes.add(value.getCommitTime());
}); });
assertEquals("Only contain acceptable versions of file should be present", assertEquals(acceptableCommits.stream().map(HoodieInstant::getTimestamp).collect(Collectors.toSet()), commitTimes,
acceptableCommits.stream().map(HoodieInstant::getTimestamp).collect(Collectors.toSet()), commitTimes); "Only contain acceptable versions of file should be present");
} }
} }
} catch (IOException ioe) { } catch (IOException ioe) {
@@ -432,17 +432,17 @@ public class TestCleaner extends TestHoodieClientBase {
HoodieInstant completedCleanInstant = new HoodieInstant(State.COMPLETED, HoodieTimeline.CLEAN_ACTION, cleanInstantTs); HoodieInstant completedCleanInstant = new HoodieInstant(State.COMPLETED, HoodieTimeline.CLEAN_ACTION, cleanInstantTs);
metaClient.reloadActiveTimeline().revertToInflight(completedCleanInstant); metaClient.reloadActiveTimeline().revertToInflight(completedCleanInstant);
HoodieCleanMetadata cleanMetadata2 = writeClient.clean(getNextInstant()); HoodieCleanMetadata cleanMetadata2 = writeClient.clean(getNextInstant());
Assert.assertEquals(cleanMetadata1.getEarliestCommitToRetain(), cleanMetadata2.getEarliestCommitToRetain()); assertEquals(cleanMetadata1.getEarliestCommitToRetain(), cleanMetadata2.getEarliestCommitToRetain());
Assert.assertEquals(new Integer(0), cleanMetadata2.getTotalFilesDeleted()); assertEquals(new Integer(0), cleanMetadata2.getTotalFilesDeleted());
Assert.assertEquals(cleanMetadata1.getPartitionMetadata().keySet(), cleanMetadata2.getPartitionMetadata().keySet()); assertEquals(cleanMetadata1.getPartitionMetadata().keySet(), cleanMetadata2.getPartitionMetadata().keySet());
final HoodieCleanMetadata retriedCleanMetadata = CleanerUtils.getCleanerMetadata(HoodieTableMetaClient.reload(metaClient), completedCleanInstant); final HoodieCleanMetadata retriedCleanMetadata = CleanerUtils.getCleanerMetadata(HoodieTableMetaClient.reload(metaClient), completedCleanInstant);
cleanMetadata1.getPartitionMetadata().keySet().forEach(k -> { cleanMetadata1.getPartitionMetadata().keySet().forEach(k -> {
HoodieCleanPartitionMetadata p1 = cleanMetadata1.getPartitionMetadata().get(k); HoodieCleanPartitionMetadata p1 = cleanMetadata1.getPartitionMetadata().get(k);
HoodieCleanPartitionMetadata p2 = retriedCleanMetadata.getPartitionMetadata().get(k); HoodieCleanPartitionMetadata p2 = retriedCleanMetadata.getPartitionMetadata().get(k);
Assert.assertEquals(p1.getDeletePathPatterns(), p2.getDeletePathPatterns()); assertEquals(p1.getDeletePathPatterns(), p2.getDeletePathPatterns());
Assert.assertEquals(p1.getSuccessDeleteFiles(), p2.getFailedDeleteFiles()); assertEquals(p1.getSuccessDeleteFiles(), p2.getFailedDeleteFiles());
Assert.assertEquals(p1.getPartitionPath(), p2.getPartitionPath()); assertEquals(p1.getPartitionPath(), p2.getPartitionPath());
Assert.assertEquals(k, p1.getPartitionPath()); assertEquals(k, p1.getPartitionPath());
}); });
} }
@@ -478,12 +478,12 @@ public class TestCleaner extends TestHoodieClientBase {
metaClient = HoodieTableMetaClient.reload(metaClient); metaClient = HoodieTableMetaClient.reload(metaClient);
List<HoodieCleanStat> hoodieCleanStatsOne = runCleaner(config); List<HoodieCleanStat> hoodieCleanStatsOne = runCleaner(config);
assertEquals("Must not clean any files", 0, assertEquals(0,
getCleanStat(hoodieCleanStatsOne, HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH).getSuccessDeleteFiles() getCleanStat(hoodieCleanStatsOne, HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH).getSuccessDeleteFiles()
.size()); .size(), "Must not clean any files");
assertEquals("Must not clean any files", 0, assertEquals(0,
getCleanStat(hoodieCleanStatsOne, HoodieTestDataGenerator.DEFAULT_SECOND_PARTITION_PATH).getSuccessDeleteFiles() getCleanStat(hoodieCleanStatsOne, HoodieTestDataGenerator.DEFAULT_SECOND_PARTITION_PATH).getSuccessDeleteFiles()
.size()); .size(), "Must not clean any files");
assertTrue(HoodieTestUtils.doesDataFileExist(basePath, HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH, "000", assertTrue(HoodieTestUtils.doesDataFileExist(basePath, HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH, "000",
file1P0C0)); file1P0C0));
assertTrue(HoodieTestUtils.doesDataFileExist(basePath, HoodieTestDataGenerator.DEFAULT_SECOND_PARTITION_PATH, "000", assertTrue(HoodieTestUtils.doesDataFileExist(basePath, HoodieTestDataGenerator.DEFAULT_SECOND_PARTITION_PATH, "000",
@@ -501,12 +501,12 @@ public class TestCleaner extends TestHoodieClientBase {
HoodieTestUtils.createDataFile(basePath, HoodieTestDataGenerator.DEFAULT_SECOND_PARTITION_PATH, "001", file1P1C0); // update HoodieTestUtils.createDataFile(basePath, HoodieTestDataGenerator.DEFAULT_SECOND_PARTITION_PATH, "001", file1P1C0); // update
List<HoodieCleanStat> hoodieCleanStatsTwo = runCleaner(config); List<HoodieCleanStat> hoodieCleanStatsTwo = runCleaner(config);
assertEquals("Must clean 1 file", 1, assertEquals(1,
getCleanStat(hoodieCleanStatsTwo, HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH).getSuccessDeleteFiles() getCleanStat(hoodieCleanStatsTwo, HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH).getSuccessDeleteFiles()
.size()); .size(), "Must clean 1 file");
assertEquals("Must clean 1 file", 1, assertEquals(1,
getCleanStat(hoodieCleanStatsTwo, HoodieTestDataGenerator.DEFAULT_SECOND_PARTITION_PATH).getSuccessDeleteFiles() getCleanStat(hoodieCleanStatsTwo, HoodieTestDataGenerator.DEFAULT_SECOND_PARTITION_PATH).getSuccessDeleteFiles()
.size()); .size(), "Must clean 1 file");
assertTrue(HoodieTestUtils.doesDataFileExist(basePath, HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH, "001", assertTrue(HoodieTestUtils.doesDataFileExist(basePath, HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH, "001",
file2P0C1)); file2P0C1));
assertTrue(HoodieTestUtils.doesDataFileExist(basePath, HoodieTestDataGenerator.DEFAULT_SECOND_PARTITION_PATH, "001", assertTrue(HoodieTestUtils.doesDataFileExist(basePath, HoodieTestDataGenerator.DEFAULT_SECOND_PARTITION_PATH, "001",
@@ -526,9 +526,9 @@ public class TestCleaner extends TestHoodieClientBase {
HoodieTestUtils.createNewDataFile(basePath, HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH, "002"); HoodieTestUtils.createNewDataFile(basePath, HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH, "002");
List<HoodieCleanStat> hoodieCleanStatsThree = runCleaner(config); List<HoodieCleanStat> hoodieCleanStatsThree = runCleaner(config);
assertEquals("Must clean two files", 2, assertEquals(2,
getCleanStat(hoodieCleanStatsThree, HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH) getCleanStat(hoodieCleanStatsThree, HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH)
.getSuccessDeleteFiles().size()); .getSuccessDeleteFiles().size(), "Must clean two files");
assertFalse(HoodieTestUtils.doesDataFileExist(basePath, HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH, "001", assertFalse(HoodieTestUtils.doesDataFileExist(basePath, HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH, "001",
file1P0C0)); file1P0C0));
assertFalse(HoodieTestUtils.doesDataFileExist(basePath, HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH, "001", assertFalse(HoodieTestUtils.doesDataFileExist(basePath, HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH, "001",
@@ -539,9 +539,9 @@ public class TestCleaner extends TestHoodieClientBase {
// No cleaning on partially written file, with no commit. // No cleaning on partially written file, with no commit.
HoodieTestUtils.createDataFile(basePath, HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH, "003", file3P0C2); // update HoodieTestUtils.createDataFile(basePath, HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH, "003", file3P0C2); // update
List<HoodieCleanStat> hoodieCleanStatsFour = runCleaner(config); List<HoodieCleanStat> hoodieCleanStatsFour = runCleaner(config);
assertEquals("Must not clean any files", 0, assertEquals(0,
getCleanStat(hoodieCleanStatsFour, HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH).getSuccessDeleteFiles() getCleanStat(hoodieCleanStatsFour, HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH).getSuccessDeleteFiles()
.size()); .size(), "Must not clean any files");
assertTrue(HoodieTestUtils.doesDataFileExist(basePath, HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH, "002", assertTrue(HoodieTestUtils.doesDataFileExist(basePath, HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH, "002",
file3P0C2)); file3P0C2));
} }
@@ -578,9 +578,9 @@ public class TestCleaner extends TestHoodieClientBase {
HoodieTestUtils.createCompactionCommitFiles(fs, basePath, "001"); HoodieTestUtils.createCompactionCommitFiles(fs, basePath, "001");
List<HoodieCleanStat> hoodieCleanStats = runCleaner(config); List<HoodieCleanStat> hoodieCleanStats = runCleaner(config);
assertEquals("Must clean three files, one parquet and 2 log files", 3, assertEquals(3,
getCleanStat(hoodieCleanStats, HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH).getSuccessDeleteFiles() getCleanStat(hoodieCleanStats, HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH).getSuccessDeleteFiles()
.size()); .size(), "Must clean three files, one parquet and 2 log files");
assertFalse(HoodieTestUtils.doesDataFileExist(basePath, HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH, "000", assertFalse(HoodieTestUtils.doesDataFileExist(basePath, HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH, "000",
file1P0)); file1P0));
assertFalse(HoodieTestUtils.doesLogFileExist(basePath, HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH, "000", assertFalse(HoodieTestUtils.doesLogFileExist(basePath, HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH, "000",
@@ -646,37 +646,37 @@ public class TestCleaner extends TestHoodieClientBase {
CleanMetadataMigrator migrator = new CleanMetadataMigrator(metaClient); CleanMetadataMigrator migrator = new CleanMetadataMigrator(metaClient);
HoodieCleanMetadata oldMetadata = HoodieCleanMetadata oldMetadata =
migrator.migrateToVersion(metadata, metadata.getVersion(), CleanerUtils.CLEAN_METADATA_VERSION_1); migrator.migrateToVersion(metadata, metadata.getVersion(), CleanerUtils.CLEAN_METADATA_VERSION_1);
Assert.assertEquals(CleanerUtils.CLEAN_METADATA_VERSION_1, oldMetadata.getVersion()); assertEquals(CleanerUtils.CLEAN_METADATA_VERSION_1, oldMetadata.getVersion());
testCleanMetadataEquality(metadata, oldMetadata); testCleanMetadataEquality(metadata, oldMetadata);
testCleanMetadataPathEquality(oldMetadata, oldExpected); testCleanMetadataPathEquality(oldMetadata, oldExpected);
HoodieCleanMetadata newMetadata = migrator.upgradeToLatest(oldMetadata, oldMetadata.getVersion()); HoodieCleanMetadata newMetadata = migrator.upgradeToLatest(oldMetadata, oldMetadata.getVersion());
Assert.assertEquals(CleanerUtils.LATEST_CLEAN_METADATA_VERSION, newMetadata.getVersion()); assertEquals(CleanerUtils.LATEST_CLEAN_METADATA_VERSION, newMetadata.getVersion());
testCleanMetadataEquality(oldMetadata, newMetadata); testCleanMetadataEquality(oldMetadata, newMetadata);
testCleanMetadataPathEquality(newMetadata, newExpected); testCleanMetadataPathEquality(newMetadata, newExpected);
testCleanMetadataPathEquality(oldMetadata, oldExpected); testCleanMetadataPathEquality(oldMetadata, oldExpected);
} }
public void testCleanMetadataEquality(HoodieCleanMetadata input1, HoodieCleanMetadata input2) { public void testCleanMetadataEquality(HoodieCleanMetadata input1, HoodieCleanMetadata input2) {
Assert.assertEquals(input1.getEarliestCommitToRetain(), input2.getEarliestCommitToRetain()); assertEquals(input1.getEarliestCommitToRetain(), input2.getEarliestCommitToRetain());
Assert.assertEquals(input1.getStartCleanTime(), input2.getStartCleanTime()); assertEquals(input1.getStartCleanTime(), input2.getStartCleanTime());
Assert.assertEquals(input1.getTimeTakenInMillis(), input2.getTimeTakenInMillis()); assertEquals(input1.getTimeTakenInMillis(), input2.getTimeTakenInMillis());
Assert.assertEquals(input1.getTotalFilesDeleted(), input2.getTotalFilesDeleted()); assertEquals(input1.getTotalFilesDeleted(), input2.getTotalFilesDeleted());
Map<String, HoodieCleanPartitionMetadata> map1 = input1.getPartitionMetadata(); Map<String, HoodieCleanPartitionMetadata> map1 = input1.getPartitionMetadata();
Map<String, HoodieCleanPartitionMetadata> map2 = input2.getPartitionMetadata(); Map<String, HoodieCleanPartitionMetadata> map2 = input2.getPartitionMetadata();
Assert.assertEquals(map1.keySet(), map2.keySet()); assertEquals(map1.keySet(), map2.keySet());
List<String> partitions1 = map1.values().stream().map(HoodieCleanPartitionMetadata::getPartitionPath).collect( List<String> partitions1 = map1.values().stream().map(HoodieCleanPartitionMetadata::getPartitionPath).collect(
Collectors.toList()); Collectors.toList());
List<String> partitions2 = map2.values().stream().map(HoodieCleanPartitionMetadata::getPartitionPath).collect( List<String> partitions2 = map2.values().stream().map(HoodieCleanPartitionMetadata::getPartitionPath).collect(
Collectors.toList()); Collectors.toList());
Assert.assertEquals(partitions1, partitions2); assertEquals(partitions1, partitions2);
List<String> policies1 = map1.values().stream().map(HoodieCleanPartitionMetadata::getPolicy).collect(Collectors.toList()); List<String> policies1 = map1.values().stream().map(HoodieCleanPartitionMetadata::getPolicy).collect(Collectors.toList());
List<String> policies2 = map2.values().stream().map(HoodieCleanPartitionMetadata::getPolicy).collect(Collectors.toList()); List<String> policies2 = map2.values().stream().map(HoodieCleanPartitionMetadata::getPolicy).collect(Collectors.toList());
Assert.assertEquals(policies1, policies2); assertEquals(policies1, policies2);
} }
private void testCleanMetadataPathEquality(HoodieCleanMetadata metadata, Map<String, Tuple3> expected) { private void testCleanMetadataPathEquality(HoodieCleanMetadata metadata, Map<String, Tuple3> expected) {
@@ -687,9 +687,9 @@ public class TestCleaner extends TestHoodieClientBase {
String partitionPath = entry.getKey(); String partitionPath = entry.getKey();
HoodieCleanPartitionMetadata partitionMetadata = entry.getValue(); HoodieCleanPartitionMetadata partitionMetadata = entry.getValue();
Assert.assertEquals(expected.get(partitionPath)._1(), partitionMetadata.getDeletePathPatterns()); assertEquals(expected.get(partitionPath)._1(), partitionMetadata.getDeletePathPatterns());
Assert.assertEquals(expected.get(partitionPath)._2(), partitionMetadata.getSuccessDeleteFiles()); assertEquals(expected.get(partitionPath)._2(), partitionMetadata.getSuccessDeleteFiles());
Assert.assertEquals(expected.get(partitionPath)._3(), partitionMetadata.getFailedDeleteFiles()); assertEquals(expected.get(partitionPath)._3(), partitionMetadata.getFailedDeleteFiles());
} }
} }
@@ -751,12 +751,12 @@ public class TestCleaner extends TestHoodieClientBase {
metaClient = HoodieTableMetaClient.reload(metaClient); metaClient = HoodieTableMetaClient.reload(metaClient);
List<HoodieCleanStat> hoodieCleanStatsOne = runCleaner(config, simulateFailureRetry); List<HoodieCleanStat> hoodieCleanStatsOne = runCleaner(config, simulateFailureRetry);
assertEquals("Must not clean any files", 0, assertEquals(0,
getCleanStat(hoodieCleanStatsOne, HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH).getSuccessDeleteFiles() getCleanStat(hoodieCleanStatsOne, HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH).getSuccessDeleteFiles()
.size()); .size(), "Must not clean any files");
assertEquals("Must not clean any files", 0, assertEquals(0,
getCleanStat(hoodieCleanStatsOne, HoodieTestDataGenerator.DEFAULT_SECOND_PARTITION_PATH).getSuccessDeleteFiles() getCleanStat(hoodieCleanStatsOne, HoodieTestDataGenerator.DEFAULT_SECOND_PARTITION_PATH).getSuccessDeleteFiles()
.size()); .size(), "Must not clean any files");
assertTrue(HoodieTestUtils.doesDataFileExist(basePath, HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH, "000", assertTrue(HoodieTestUtils.doesDataFileExist(basePath, HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH, "000",
file1P0C0)); file1P0C0));
assertTrue(HoodieTestUtils.doesDataFileExist(basePath, HoodieTestDataGenerator.DEFAULT_SECOND_PARTITION_PATH, "000", assertTrue(HoodieTestUtils.doesDataFileExist(basePath, HoodieTestDataGenerator.DEFAULT_SECOND_PARTITION_PATH, "000",
@@ -786,12 +786,12 @@ public class TestCleaner extends TestHoodieClientBase {
new HoodieInstant(State.INFLIGHT, HoodieTimeline.COMMIT_ACTION, "001"), new HoodieInstant(State.INFLIGHT, HoodieTimeline.COMMIT_ACTION, "001"),
Option.of(commitMetadata.toJsonString().getBytes(StandardCharsets.UTF_8))); Option.of(commitMetadata.toJsonString().getBytes(StandardCharsets.UTF_8)));
List<HoodieCleanStat> hoodieCleanStatsTwo = runCleaner(config, simulateFailureRetry); List<HoodieCleanStat> hoodieCleanStatsTwo = runCleaner(config, simulateFailureRetry);
assertEquals("Must not clean any files", 0, assertEquals(0,
getCleanStat(hoodieCleanStatsTwo, HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH).getSuccessDeleteFiles() getCleanStat(hoodieCleanStatsTwo, HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH).getSuccessDeleteFiles()
.size()); .size(), "Must not clean any files");
assertEquals("Must not clean any files", 0, assertEquals(0,
getCleanStat(hoodieCleanStatsTwo, HoodieTestDataGenerator.DEFAULT_SECOND_PARTITION_PATH).getSuccessDeleteFiles() getCleanStat(hoodieCleanStatsTwo, HoodieTestDataGenerator.DEFAULT_SECOND_PARTITION_PATH).getSuccessDeleteFiles()
.size()); .size(), "Must not clean any files");
assertTrue(HoodieTestUtils.doesDataFileExist(basePath, HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH, "001", assertTrue(HoodieTestUtils.doesDataFileExist(basePath, HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH, "001",
file2P0C1)); file2P0C1));
assertTrue(HoodieTestUtils.doesDataFileExist(basePath, HoodieTestDataGenerator.DEFAULT_SECOND_PARTITION_PATH, "001", assertTrue(HoodieTestUtils.doesDataFileExist(basePath, HoodieTestDataGenerator.DEFAULT_SECOND_PARTITION_PATH, "001",
@@ -820,9 +820,10 @@ public class TestCleaner extends TestHoodieClientBase {
Option.of(commitMetadata.toJsonString().getBytes(StandardCharsets.UTF_8))); Option.of(commitMetadata.toJsonString().getBytes(StandardCharsets.UTF_8)));
List<HoodieCleanStat> hoodieCleanStatsThree = runCleaner(config, simulateFailureRetry); List<HoodieCleanStat> hoodieCleanStatsThree = runCleaner(config, simulateFailureRetry);
assertEquals("Must not clean any file. We have to keep 1 version before the latest commit time to keep", 0, assertEquals(0,
getCleanStat(hoodieCleanStatsThree, HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH) getCleanStat(hoodieCleanStatsThree, HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH)
.getSuccessDeleteFiles().size()); .getSuccessDeleteFiles().size(),
"Must not clean any file. We have to keep 1 version before the latest commit time to keep");
assertTrue(HoodieTestUtils.doesDataFileExist(basePath, HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH, "000", assertTrue(HoodieTestUtils.doesDataFileExist(basePath, HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH, "000",
file1P0C0)); file1P0C0));
@@ -844,9 +845,9 @@ public class TestCleaner extends TestHoodieClientBase {
Option.of(commitMetadata.toJsonString().getBytes(StandardCharsets.UTF_8))); Option.of(commitMetadata.toJsonString().getBytes(StandardCharsets.UTF_8)));
List<HoodieCleanStat> hoodieCleanStatsFour = runCleaner(config, simulateFailureRetry); List<HoodieCleanStat> hoodieCleanStatsFour = runCleaner(config, simulateFailureRetry);
assertEquals("Must not clean one old file", 1, assertEquals(1,
getCleanStat(hoodieCleanStatsFour, HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH).getSuccessDeleteFiles() getCleanStat(hoodieCleanStatsFour, HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH).getSuccessDeleteFiles()
.size()); .size(), "Must not clean one old file");
assertFalse(HoodieTestUtils.doesDataFileExist(basePath, HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH, "000", assertFalse(HoodieTestUtils.doesDataFileExist(basePath, HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH, "000",
file1P0C0)); file1P0C0));
@@ -875,8 +876,8 @@ public class TestCleaner extends TestHoodieClientBase {
Option.of(commitMetadata.toJsonString().getBytes(StandardCharsets.UTF_8))); Option.of(commitMetadata.toJsonString().getBytes(StandardCharsets.UTF_8)));
List<HoodieCleanStat> hoodieCleanStatsFive = runCleaner(config, simulateFailureRetry); List<HoodieCleanStat> hoodieCleanStatsFive = runCleaner(config, simulateFailureRetry);
HoodieCleanStat cleanStat = getCleanStat(hoodieCleanStatsFive, HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH); HoodieCleanStat cleanStat = getCleanStat(hoodieCleanStatsFive, HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH);
assertEquals("Must not clean any files", 0, assertEquals(0,
cleanStat != null ? cleanStat.getSuccessDeleteFiles().size() : 0); cleanStat != null ? cleanStat.getSuccessDeleteFiles().size() : 0, "Must not clean any files");
assertTrue(HoodieTestUtils.doesDataFileExist(basePath, HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH, "001", assertTrue(HoodieTestUtils.doesDataFileExist(basePath, HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH, "001",
file1P0C0)); file1P0C0));
assertTrue(HoodieTestUtils.doesDataFileExist(basePath, HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH, "001", assertTrue(HoodieTestUtils.doesDataFileExist(basePath, HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH, "001",
@@ -889,8 +890,8 @@ public class TestCleaner extends TestHoodieClientBase {
@Test @Test
public void testCleanMarkerDataFilesOnRollback() throws IOException { public void testCleanMarkerDataFilesOnRollback() throws IOException {
List<String> markerFiles = createMarkerFiles("000", 10); List<String> markerFiles = createMarkerFiles("000", 10);
assertEquals("Some marker files are created.", 10, markerFiles.size()); assertEquals(10, markerFiles.size(), "Some marker files are created.");
assertEquals("Some marker files are created.", markerFiles.size(), getTotalTempFiles()); assertEquals(markerFiles.size(), getTotalTempFiles(), "Some marker files are created.");
HoodieWriteConfig config = HoodieWriteConfig.newBuilder().withPath(basePath).build(); HoodieWriteConfig config = HoodieWriteConfig.newBuilder().withPath(basePath).build();
metaClient = HoodieTableMetaClient.reload(metaClient); metaClient = HoodieTableMetaClient.reload(metaClient);
@@ -901,7 +902,7 @@ public class TestCleaner extends TestHoodieClientBase {
new HoodieInstant(State.REQUESTED, HoodieTimeline.COMMIT_ACTION, "000"), Option.empty()); new HoodieInstant(State.REQUESTED, HoodieTimeline.COMMIT_ACTION, "000"), Option.empty());
metaClient.reloadActiveTimeline(); metaClient.reloadActiveTimeline();
table.rollback(jsc, "001", new HoodieInstant(State.INFLIGHT, HoodieTimeline.COMMIT_ACTION, "000"), true); table.rollback(jsc, "001", new HoodieInstant(State.INFLIGHT, HoodieTimeline.COMMIT_ACTION, "000"), true);
assertEquals("All temp files are deleted.", 0, getTotalTempFiles()); assertEquals(0, getTotalTempFiles(), "All temp files are deleted.");
} }
/** /**
@@ -922,7 +923,7 @@ public class TestCleaner extends TestHoodieClientBase {
metaClient = HoodieTableMetaClient.reload(metaClient); metaClient = HoodieTableMetaClient.reload(metaClient);
List<HoodieCleanStat> hoodieCleanStatsOne = runCleaner(config); List<HoodieCleanStat> hoodieCleanStatsOne = runCleaner(config);
assertTrue("HoodieCleanStats should be empty for a table with empty partitionPaths", hoodieCleanStatsOne.isEmpty()); assertTrue(hoodieCleanStatsOne.isEmpty(), "HoodieCleanStats should be empty for a table with empty partitionPaths");
} }
/** /**
@@ -998,7 +999,7 @@ public class TestCleaner extends TestHoodieClientBase {
metaClient = HoodieTableMetaClient.reload(metaClient); metaClient = HoodieTableMetaClient.reload(metaClient);
List<HoodieCleanStat> cleanStats = runCleaner(config); List<HoodieCleanStat> cleanStats = runCleaner(config);
assertEquals("Must not clean any files", 0, cleanStats.size()); assertEquals(0, cleanStats.size(), "Must not clean any files");
} }
/** /**
@@ -1090,9 +1091,9 @@ public class TestCleaner extends TestHoodieClientBase {
.getLatestFileSlicesBeforeOrOn(HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH, baseInstantForCompaction, .getLatestFileSlicesBeforeOrOn(HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH, baseInstantForCompaction,
true) true)
.filter(fs -> fs.getFileId().equals(fileId)).findFirst()); .filter(fs -> fs.getFileId().equals(fileId)).findFirst());
Assert.assertTrue("Base Instant for Compaction must be preserved", fileSliceForCompaction.isPresent()); assertTrue(fileSliceForCompaction.isPresent(), "Base Instant for Compaction must be preserved");
Assert.assertTrue("FileSlice has data-file", fileSliceForCompaction.get().getBaseFile().isPresent()); assertTrue(fileSliceForCompaction.get().getBaseFile().isPresent(), "FileSlice has data-file");
Assert.assertEquals("FileSlice has log-files", 2, fileSliceForCompaction.get().getLogFiles().count()); assertEquals(2, fileSliceForCompaction.get().getLogFiles().count(), "FileSlice has log-files");
}); });
// Test for progress (Did we clean some files ?) // Test for progress (Did we clean some files ?)
@@ -1100,10 +1101,10 @@ public class TestCleaner extends TestHoodieClientBase {
.flatMap(cleanStat -> convertPathToFileIdWithCommitTime(newMetaClient, cleanStat.getDeletePathPatterns()) .flatMap(cleanStat -> convertPathToFileIdWithCommitTime(newMetaClient, cleanStat.getDeletePathPatterns())
.map(fileIdWithCommitTime -> { .map(fileIdWithCommitTime -> {
if (expFileIdToPendingCompaction.containsKey(fileIdWithCommitTime.getKey())) { if (expFileIdToPendingCompaction.containsKey(fileIdWithCommitTime.getKey())) {
Assert.assertTrue("Deleted instant time must be less than pending compaction", assertTrue(HoodieTimeline.compareTimestamps(
HoodieTimeline.compareTimestamps(
fileIdToLatestInstantBeforeCompaction.get(fileIdWithCommitTime.getKey()), fileIdToLatestInstantBeforeCompaction.get(fileIdWithCommitTime.getKey()),
fileIdWithCommitTime.getValue(), HoodieTimeline.GREATER)); fileIdWithCommitTime.getValue(), HoodieTimeline.GREATER),
"Deleted instant time must be less than pending compaction");
return true; return true;
} }
return false; return false;
@@ -1111,9 +1112,9 @@ public class TestCleaner extends TestHoodieClientBase {
long numDeleted = long numDeleted =
hoodieCleanStats.stream().mapToLong(cleanStat -> cleanStat.getDeletePathPatterns().size()).sum(); hoodieCleanStats.stream().mapToLong(cleanStat -> cleanStat.getDeletePathPatterns().size()).sum();
// Tighter check for regression // Tighter check for regression
Assert.assertEquals("Correct number of files deleted", expNumFilesDeleted, numDeleted); assertEquals(expNumFilesDeleted, numDeleted, "Correct number of files deleted");
Assert.assertEquals("Correct number of files under compaction deleted", expNumFilesUnderCompactionDeleted, assertEquals(expNumFilesUnderCompactionDeleted, numFilesUnderCompactionDeleted,
numFilesUnderCompactionDeleted); "Correct number of files under compaction deleted");
} }
/** /**

View File

@@ -25,22 +25,24 @@ import org.apache.hudi.common.fs.ConsistencyGuardConfig;
import org.apache.hudi.common.fs.FailSafeConsistencyGuard; import org.apache.hudi.common.fs.FailSafeConsistencyGuard;
import org.apache.hadoop.fs.Path; import org.apache.hadoop.fs.Path;
import org.junit.After; import org.junit.jupiter.api.AfterEach;
import org.junit.Before; import org.junit.jupiter.api.BeforeEach;
import org.junit.Test; import org.junit.jupiter.api.Test;
import java.util.Arrays; import java.util.Arrays;
import java.util.concurrent.TimeoutException; import java.util.concurrent.TimeoutException;
import static org.junit.jupiter.api.Assertions.assertThrows;
public class TestConsistencyGuard extends HoodieClientTestHarness { public class TestConsistencyGuard extends HoodieClientTestHarness {
@Before @BeforeEach
public void setup() { public void setup() {
initPath(); initPath();
initFileSystemWithDefaultConfiguration(); initFileSystemWithDefaultConfiguration();
} }
@After @AfterEach
public void tearDown() throws Exception { public void tearDown() throws Exception {
cleanupFileSystem(); cleanupFileSystem();
} }
@@ -65,35 +67,43 @@ public class TestConsistencyGuard extends HoodieClientTestHarness {
.asList(basePath + "/partition/path/f1_1-0-1_000.parquet", basePath + "/partition/path/f2_1-0-1_000.parquet")); .asList(basePath + "/partition/path/f1_1-0-1_000.parquet", basePath + "/partition/path/f2_1-0-1_000.parquet"));
} }
@Test(expected = TimeoutException.class) @Test
public void testCheckFailingAppear() throws Exception { public void testCheckFailingAppear() throws Exception {
HoodieClientTestUtils.fakeDataFile(basePath, "partition/path", "000", "f1"); HoodieClientTestUtils.fakeDataFile(basePath, "partition/path", "000", "f1");
ConsistencyGuard passing = new FailSafeConsistencyGuard(fs, getConsistencyGuardConfig()); ConsistencyGuard passing = new FailSafeConsistencyGuard(fs, getConsistencyGuardConfig());
assertThrows(TimeoutException.class, () -> {
passing.waitTillAllFilesAppear(basePath + "/partition/path", Arrays passing.waitTillAllFilesAppear(basePath + "/partition/path", Arrays
.asList(basePath + "/partition/path/f1_1-0-2_000.parquet", basePath + "/partition/path/f2_1-0-2_000.parquet")); .asList(basePath + "/partition/path/f1_1-0-2_000.parquet", basePath + "/partition/path/f2_1-0-2_000.parquet"));
});
} }
@Test(expected = TimeoutException.class) @Test
public void testCheckFailingAppears() throws Exception { public void testCheckFailingAppears() throws Exception {
HoodieClientTestUtils.fakeDataFile(basePath, "partition/path", "000", "f1"); HoodieClientTestUtils.fakeDataFile(basePath, "partition/path", "000", "f1");
ConsistencyGuard passing = new FailSafeConsistencyGuard(fs, getConsistencyGuardConfig()); ConsistencyGuard passing = new FailSafeConsistencyGuard(fs, getConsistencyGuardConfig());
assertThrows(TimeoutException.class, () -> {
passing.waitTillFileAppears(new Path(basePath + "/partition/path/f1_1-0-2_000.parquet")); passing.waitTillFileAppears(new Path(basePath + "/partition/path/f1_1-0-2_000.parquet"));
});
} }
@Test(expected = TimeoutException.class) @Test
public void testCheckFailingDisappear() throws Exception { public void testCheckFailingDisappear() throws Exception {
HoodieClientTestUtils.fakeDataFile(basePath, "partition/path", "000", "f1"); HoodieClientTestUtils.fakeDataFile(basePath, "partition/path", "000", "f1");
ConsistencyGuard passing = new FailSafeConsistencyGuard(fs, getConsistencyGuardConfig()); ConsistencyGuard passing = new FailSafeConsistencyGuard(fs, getConsistencyGuardConfig());
assertThrows(TimeoutException.class, () -> {
passing.waitTillAllFilesDisappear(basePath + "/partition/path", Arrays passing.waitTillAllFilesDisappear(basePath + "/partition/path", Arrays
.asList(basePath + "/partition/path/f1_1-0-1_000.parquet", basePath + "/partition/path/f2_1-0-2_000.parquet")); .asList(basePath + "/partition/path/f1_1-0-1_000.parquet", basePath + "/partition/path/f2_1-0-2_000.parquet"));
});
} }
@Test(expected = TimeoutException.class) @Test
public void testCheckFailingDisappears() throws Exception { public void testCheckFailingDisappears() throws Exception {
HoodieClientTestUtils.fakeDataFile(basePath, "partition/path", "000", "f1"); HoodieClientTestUtils.fakeDataFile(basePath, "partition/path", "000", "f1");
HoodieClientTestUtils.fakeDataFile(basePath, "partition/path", "000", "f1"); HoodieClientTestUtils.fakeDataFile(basePath, "partition/path", "000", "f1");
ConsistencyGuard passing = new FailSafeConsistencyGuard(fs, getConsistencyGuardConfig()); ConsistencyGuard passing = new FailSafeConsistencyGuard(fs, getConsistencyGuardConfig());
assertThrows(TimeoutException.class, () -> {
passing.waitTillFileDisappears(new Path(basePath + "/partition/path/f1_1-0-1_000.parquet")); passing.waitTillFileDisappears(new Path(basePath + "/partition/path/f1_1-0-1_000.parquet"));
});
} }
private ConsistencyGuardConfig getConsistencyGuardConfig() { private ConsistencyGuardConfig getConsistencyGuardConfig() {

View File

@@ -18,8 +18,6 @@
package org.apache.hudi.table; package org.apache.hudi.table;
import org.apache.hadoop.mapred.FileInputFormat;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hudi.client.HoodieReadClient; import org.apache.hudi.client.HoodieReadClient;
import org.apache.hudi.client.HoodieWriteClient; import org.apache.hudi.client.HoodieWriteClient;
import org.apache.hudi.client.WriteStatus; import org.apache.hudi.client.WriteStatus;
@@ -57,23 +55,27 @@ import org.apache.hudi.hadoop.HoodieParquetInputFormat;
import org.apache.hudi.hadoop.realtime.HoodieParquetRealtimeInputFormat; import org.apache.hudi.hadoop.realtime.HoodieParquetRealtimeInputFormat;
import org.apache.hudi.index.HoodieIndex; import org.apache.hudi.index.HoodieIndex;
import org.apache.hudi.index.HoodieIndex.IndexType; import org.apache.hudi.index.HoodieIndex.IndexType;
import org.apache.hudi.table.action.deltacommit.DeleteDeltaCommitActionExecutor;
import org.apache.hudi.table.action.deltacommit.DeltaCommitActionExecutor;
import org.apache.avro.generic.GenericRecord; import org.apache.avro.generic.GenericRecord;
import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.Path; import org.apache.hadoop.fs.Path;
import org.apache.hudi.table.action.deltacommit.DeltaCommitActionExecutor; import org.apache.hadoop.mapred.FileInputFormat;
import org.apache.hudi.table.action.deltacommit.DeleteDeltaCommitActionExecutor; import org.apache.hadoop.mapred.JobConf;
import org.apache.spark.api.java.JavaRDD; import org.apache.spark.api.java.JavaRDD;
import org.junit.After; import org.junit.jupiter.api.AfterEach;
import org.junit.Assert; import org.junit.jupiter.api.BeforeEach;
import org.junit.Before; import org.junit.jupiter.api.Test;
import org.junit.Test; import org.junit.jupiter.api.io.TempDir;
import org.junit.rules.TemporaryFolder;
import java.io.File; import java.io.File;
import java.io.IOException; import java.io.IOException;
import java.nio.file.Files;
import java.nio.file.Paths;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.Arrays; import java.util.Arrays;
import java.util.Collections;
import java.util.HashMap; import java.util.HashMap;
import java.util.List; import java.util.List;
import java.util.Map; import java.util.Map;
@@ -82,9 +84,9 @@ import java.util.stream.Collectors;
import java.util.stream.Stream; import java.util.stream.Stream;
import static org.apache.hudi.common.HoodieTestDataGenerator.TRIP_EXAMPLE_SCHEMA; import static org.apache.hudi.common.HoodieTestDataGenerator.TRIP_EXAMPLE_SCHEMA;
import static org.junit.Assert.assertEquals; import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.Assert.assertFalse; import static org.junit.jupiter.api.Assertions.assertFalse;
import static org.junit.Assert.assertTrue; import static org.junit.jupiter.api.Assertions.assertTrue;
public class TestMergeOnReadTable extends HoodieClientTestHarness { public class TestMergeOnReadTable extends HoodieClientTestHarness {
@@ -94,7 +96,7 @@ public class TestMergeOnReadTable extends HoodieClientTestHarness {
private HoodieParquetRealtimeInputFormat rtInputFormat; private HoodieParquetRealtimeInputFormat rtInputFormat;
private JobConf rtJobConf; private JobConf rtJobConf;
@Before @BeforeEach
public void init() throws IOException { public void init() throws IOException {
initDFS(); initDFS();
initSparkContexts("TestHoodieMergeOnReadTable"); initSparkContexts("TestHoodieMergeOnReadTable");
@@ -114,7 +116,7 @@ public class TestMergeOnReadTable extends HoodieClientTestHarness {
rtInputFormat.setConf(rtJobConf); rtInputFormat.setConf(rtJobConf);
} }
@After @AfterEach
public void clean() throws IOException { public void clean() throws IOException {
cleanupDFS(); cleanupDFS();
cleanupSparkContexts(); cleanupSparkContexts();
@@ -159,13 +161,13 @@ public class TestMergeOnReadTable extends HoodieClientTestHarness {
// verify that there is a commit // verify that there is a commit
metaClient = HoodieTableMetaClient.reload(metaClient); metaClient = HoodieTableMetaClient.reload(metaClient);
HoodieTimeline timeline = metaClient.getCommitTimeline().filterCompletedInstants(); HoodieTimeline timeline = metaClient.getCommitTimeline().filterCompletedInstants();
assertEquals("Expecting a single commit.", 1, assertEquals(1, timeline.findInstantsAfter("000", Integer.MAX_VALUE).countInstants(),
timeline.findInstantsAfter("000", Integer.MAX_VALUE).countInstants()); "Expecting a single commit.");
String latestCompactionCommitTime = timeline.lastInstant().get().getTimestamp(); String latestCompactionCommitTime = timeline.lastInstant().get().getTimestamp();
assertTrue(HoodieTimeline.compareTimestamps("000", latestCompactionCommitTime, HoodieTimeline.LESSER)); assertTrue(HoodieTimeline.compareTimestamps("000", latestCompactionCommitTime, HoodieTimeline.LESSER));
assertEquals("Must contain 200 records", 200, assertEquals(200, HoodieClientTestUtils.readSince(basePath, sqlContext, timeline, "000").count(),
HoodieClientTestUtils.readSince(basePath, sqlContext, timeline, "000").count()); "Must contain 200 records");
} }
} }
@@ -310,7 +312,7 @@ public class TestMergeOnReadTable extends HoodieClientTestHarness {
Option<HoodieInstant> deltaCommit = metaClient.getActiveTimeline().getDeltaCommitTimeline().firstInstant(); Option<HoodieInstant> deltaCommit = metaClient.getActiveTimeline().getDeltaCommitTimeline().firstInstant();
assertTrue(deltaCommit.isPresent()); assertTrue(deltaCommit.isPresent());
assertEquals("Delta commit should be 001", "001", deltaCommit.get().getTimestamp()); assertEquals("001", deltaCommit.get().getTimestamp(), "Delta commit should be 001");
Option<HoodieInstant> commit = metaClient.getActiveTimeline().getCommitTimeline().firstInstant(); Option<HoodieInstant> commit = metaClient.getActiveTimeline().getCommitTimeline().firstInstant();
assertFalse(commit.isPresent()); assertFalse(commit.isPresent());
@@ -323,8 +325,8 @@ public class TestMergeOnReadTable extends HoodieClientTestHarness {
roView = new HoodieTableFileSystemView(metaClient, hoodieTable.getCompletedCommitsTimeline(), allFiles); roView = new HoodieTableFileSystemView(metaClient, hoodieTable.getCompletedCommitsTimeline(), allFiles);
dataFilesToRead = roView.getLatestBaseFiles(); dataFilesToRead = roView.getLatestBaseFiles();
assertTrue("should list the parquet files we wrote in the delta commit", assertTrue(dataFilesToRead.findAny().isPresent(),
dataFilesToRead.findAny().isPresent()); "should list the parquet files we wrote in the delta commit");
/** /**
* Write 2 (only updates, written to .log file) * Write 2 (only updates, written to .log file)
@@ -352,7 +354,7 @@ public class TestMergeOnReadTable extends HoodieClientTestHarness {
metaClient = HoodieTableMetaClient.reload(metaClient); metaClient = HoodieTableMetaClient.reload(metaClient);
deltaCommit = metaClient.getActiveTimeline().getDeltaCommitTimeline().lastInstant(); deltaCommit = metaClient.getActiveTimeline().getDeltaCommitTimeline().lastInstant();
assertTrue(deltaCommit.isPresent()); assertTrue(deltaCommit.isPresent());
assertEquals("Latest Delta commit should be 004", "004", deltaCommit.get().getTimestamp()); assertEquals("004", deltaCommit.get().getTimestamp(), "Latest Delta commit should be 004");
commit = metaClient.getActiveTimeline().getCommitTimeline().firstInstant(); commit = metaClient.getActiveTimeline().getCommitTimeline().firstInstant();
assertFalse(commit.isPresent()); assertFalse(commit.isPresent());
@@ -365,7 +367,7 @@ public class TestMergeOnReadTable extends HoodieClientTestHarness {
List<String> dataFiles = roView.getLatestBaseFiles().map(HoodieBaseFile::getPath).collect(Collectors.toList()); List<String> dataFiles = roView.getLatestBaseFiles().map(HoodieBaseFile::getPath).collect(Collectors.toList());
List<GenericRecord> recordsRead = HoodieMergeOnReadTestUtils.getRecordsUsingInputFormat(dataFiles, basePath); List<GenericRecord> recordsRead = HoodieMergeOnReadTestUtils.getRecordsUsingInputFormat(dataFiles, basePath);
// Wrote 20 records and deleted 20 records, so remaining 20-20 = 0 // Wrote 20 records and deleted 20 records, so remaining 20-20 = 0
assertEquals("Must contain 0 records", 0, recordsRead.size()); assertEquals(0, recordsRead.size(), "Must contain 0 records");
} }
} }
@@ -394,7 +396,7 @@ public class TestMergeOnReadTable extends HoodieClientTestHarness {
HoodieTableMetaClient metaClient = new HoodieTableMetaClient(jsc.hadoopConfiguration(), cfg.getBasePath()); HoodieTableMetaClient metaClient = new HoodieTableMetaClient(jsc.hadoopConfiguration(), cfg.getBasePath());
Option<HoodieInstant> commit = metaClient.getActiveTimeline().getCommitTimeline().firstInstant(); Option<HoodieInstant> commit = metaClient.getActiveTimeline().getCommitTimeline().firstInstant();
assertTrue(commit.isPresent()); assertTrue(commit.isPresent());
assertEquals("commit should be 001", "001", commit.get().getTimestamp()); assertEquals("001", commit.get().getTimestamp(), "commit should be 001");
/** /**
* Write 2 (updates) * Write 2 (updates)
@@ -451,7 +453,7 @@ public class TestMergeOnReadTable extends HoodieClientTestHarness {
Option<HoodieInstant> deltaCommit = metaClient.getActiveTimeline().getDeltaCommitTimeline().firstInstant(); Option<HoodieInstant> deltaCommit = metaClient.getActiveTimeline().getDeltaCommitTimeline().firstInstant();
assertTrue(deltaCommit.isPresent()); assertTrue(deltaCommit.isPresent());
assertEquals("Delta commit should be 001", "001", deltaCommit.get().getTimestamp()); assertEquals("001", deltaCommit.get().getTimestamp(), "Delta commit should be 001");
Option<HoodieInstant> commit = metaClient.getActiveTimeline().getCommitTimeline().firstInstant(); Option<HoodieInstant> commit = metaClient.getActiveTimeline().getCommitTimeline().firstInstant();
assertFalse(commit.isPresent()); assertFalse(commit.isPresent());
@@ -464,8 +466,8 @@ public class TestMergeOnReadTable extends HoodieClientTestHarness {
roView = new HoodieTableFileSystemView(metaClient, hoodieTable.getCompletedCommitsTimeline(), allFiles); roView = new HoodieTableFileSystemView(metaClient, hoodieTable.getCompletedCommitsTimeline(), allFiles);
dataFilesToRead = roView.getLatestBaseFiles(); dataFilesToRead = roView.getLatestBaseFiles();
assertTrue("should list the parquet files we wrote in the delta commit", assertTrue(dataFilesToRead.findAny().isPresent(),
dataFilesToRead.findAny().isPresent()); "should list the parquet files we wrote in the delta commit");
/** /**
* Write 2 (inserts + updates - testing failed delta commit) * Write 2 (inserts + updates - testing failed delta commit)
@@ -491,11 +493,11 @@ public class TestMergeOnReadTable extends HoodieClientTestHarness {
secondClient.rollback(commitTime1); secondClient.rollback(commitTime1);
allFiles = HoodieTestUtils.listAllDataFilesInPath(metaClient.getFs(), cfg.getBasePath()); allFiles = HoodieTestUtils.listAllDataFilesInPath(metaClient.getFs(), cfg.getBasePath());
// After rollback, there should be no parquet file with the failed commit time // After rollback, there should be no parquet file with the failed commit time
Assert.assertEquals(Arrays.stream(allFiles) assertEquals(0, Arrays.stream(allFiles)
.filter(file -> file.getPath().getName().contains(commitTime1)).count(), 0); .filter(file -> file.getPath().getName().contains(commitTime1)).count());
dataFiles = roView.getLatestBaseFiles().map(HoodieBaseFile::getPath).collect(Collectors.toList()); dataFiles = roView.getLatestBaseFiles().map(HoodieBaseFile::getPath).collect(Collectors.toList());
recordsRead = HoodieMergeOnReadTestUtils.getRecordsUsingInputFormat(dataFiles, basePath); recordsRead = HoodieMergeOnReadTestUtils.getRecordsUsingInputFormat(dataFiles, basePath);
assertEquals(recordsRead.size(), 200); assertEquals(200, recordsRead.size());
} }
/** /**
@@ -511,7 +513,7 @@ public class TestMergeOnReadTable extends HoodieClientTestHarness {
List<String> dataFiles = roView.getLatestBaseFiles().map(HoodieBaseFile::getPath).collect(Collectors.toList()); List<String> dataFiles = roView.getLatestBaseFiles().map(HoodieBaseFile::getPath).collect(Collectors.toList());
List<GenericRecord> recordsRead = HoodieMergeOnReadTestUtils.getRecordsUsingInputFormat(dataFiles, basePath); List<GenericRecord> recordsRead = HoodieMergeOnReadTestUtils.getRecordsUsingInputFormat(dataFiles, basePath);
assertEquals(recordsRead.size(), 200); assertEquals(200, recordsRead.size());
writeRecords = jsc.parallelize(copyOfRecords, 1); writeRecords = jsc.parallelize(copyOfRecords, 1);
writeStatusJavaRDD = thirdClient.upsert(writeRecords, commitTime2); writeStatusJavaRDD = thirdClient.upsert(writeRecords, commitTime2);
@@ -524,8 +526,8 @@ public class TestMergeOnReadTable extends HoodieClientTestHarness {
thirdClient.rollback(commitTime2); thirdClient.rollback(commitTime2);
allFiles = HoodieTestUtils.listAllDataFilesInPath(metaClient.getFs(), cfg.getBasePath()); allFiles = HoodieTestUtils.listAllDataFilesInPath(metaClient.getFs(), cfg.getBasePath());
// After rollback, there should be no parquet file with the failed commit time // After rollback, there should be no parquet file with the failed commit time
Assert.assertEquals(Arrays.stream(allFiles) assertEquals(0, Arrays.stream(allFiles)
.filter(file -> file.getPath().getName().contains(commitTime2)).count(), 0); .filter(file -> file.getPath().getName().contains(commitTime2)).count());
metaClient = HoodieTableMetaClient.reload(metaClient); metaClient = HoodieTableMetaClient.reload(metaClient);
hoodieTable = HoodieTable.create(metaClient, cfg, jsc); hoodieTable = HoodieTable.create(metaClient, cfg, jsc);
@@ -533,7 +535,7 @@ public class TestMergeOnReadTable extends HoodieClientTestHarness {
dataFiles = roView.getLatestBaseFiles().map(HoodieBaseFile::getPath).collect(Collectors.toList()); dataFiles = roView.getLatestBaseFiles().map(HoodieBaseFile::getPath).collect(Collectors.toList());
recordsRead = HoodieMergeOnReadTestUtils.getRecordsUsingInputFormat(dataFiles, basePath); recordsRead = HoodieMergeOnReadTestUtils.getRecordsUsingInputFormat(dataFiles, basePath);
// check that the number of records read is still correct after rollback operation // check that the number of records read is still correct after rollback operation
assertEquals(recordsRead.size(), 200); assertEquals(200, recordsRead.size());
// Test compaction commit rollback // Test compaction commit rollback
/** /**
@@ -598,7 +600,7 @@ public class TestMergeOnReadTable extends HoodieClientTestHarness {
Option<HoodieInstant> deltaCommit = metaClient.getActiveTimeline().getDeltaCommitTimeline().firstInstant(); Option<HoodieInstant> deltaCommit = metaClient.getActiveTimeline().getDeltaCommitTimeline().firstInstant();
assertTrue(deltaCommit.isPresent()); assertTrue(deltaCommit.isPresent());
assertEquals("Delta commit should be 001", "001", deltaCommit.get().getTimestamp()); assertEquals("001", deltaCommit.get().getTimestamp(), "Delta commit should be 001");
Option<HoodieInstant> commit = metaClient.getActiveTimeline().getCommitTimeline().firstInstant(); Option<HoodieInstant> commit = metaClient.getActiveTimeline().getCommitTimeline().firstInstant();
assertFalse(commit.isPresent()); assertFalse(commit.isPresent());
@@ -611,8 +613,8 @@ public class TestMergeOnReadTable extends HoodieClientTestHarness {
roView = new HoodieTableFileSystemView(metaClient, hoodieTable.getCompletedCommitsTimeline(), allFiles); roView = new HoodieTableFileSystemView(metaClient, hoodieTable.getCompletedCommitsTimeline(), allFiles);
dataFilesToRead = roView.getLatestBaseFiles(); dataFilesToRead = roView.getLatestBaseFiles();
assertTrue("Should list the parquet files we wrote in the delta commit", assertTrue(dataFilesToRead.findAny().isPresent(),
dataFilesToRead.findAny().isPresent()); "Should list the parquet files we wrote in the delta commit");
/** /**
* Write 2 (inserts + updates) * Write 2 (inserts + updates)
@@ -628,7 +630,7 @@ public class TestMergeOnReadTable extends HoodieClientTestHarness {
List<String> dataFiles = roView.getLatestBaseFiles().map(hf -> hf.getPath()).collect(Collectors.toList()); List<String> dataFiles = roView.getLatestBaseFiles().map(hf -> hf.getPath()).collect(Collectors.toList());
List<GenericRecord> recordsRead = HoodieMergeOnReadTestUtils.getRecordsUsingInputFormat(dataFiles, basePath); List<GenericRecord> recordsRead = HoodieMergeOnReadTestUtils.getRecordsUsingInputFormat(dataFiles, basePath);
assertEquals(recordsRead.size(), 200); assertEquals(200, recordsRead.size());
statuses = nClient.upsert(jsc.parallelize(copyOfRecords, 1), newCommitTime).collect(); statuses = nClient.upsert(jsc.parallelize(copyOfRecords, 1), newCommitTime).collect();
// Verify there are no errors // Verify there are no errors
@@ -761,7 +763,7 @@ public class TestMergeOnReadTable extends HoodieClientTestHarness {
Option<HoodieInstant> deltaCommit = metaClient.getActiveTimeline().getDeltaCommitTimeline().firstInstant(); Option<HoodieInstant> deltaCommit = metaClient.getActiveTimeline().getDeltaCommitTimeline().firstInstant();
assertTrue(deltaCommit.isPresent()); assertTrue(deltaCommit.isPresent());
assertEquals("Delta commit should be 001", "001", deltaCommit.get().getTimestamp()); assertEquals("001", deltaCommit.get().getTimestamp(), "Delta commit should be 001");
Option<HoodieInstant> commit = metaClient.getActiveTimeline().getCommitTimeline().firstInstant(); Option<HoodieInstant> commit = metaClient.getActiveTimeline().getCommitTimeline().firstInstant();
assertFalse(commit.isPresent()); assertFalse(commit.isPresent());
@@ -776,8 +778,8 @@ public class TestMergeOnReadTable extends HoodieClientTestHarness {
roView = new HoodieTableFileSystemView(metaClient, hoodieTable.getCompletedCommitsTimeline(), allFiles); roView = new HoodieTableFileSystemView(metaClient, hoodieTable.getCompletedCommitsTimeline(), allFiles);
dataFilesToRead = roView.getLatestBaseFiles(); dataFilesToRead = roView.getLatestBaseFiles();
List<HoodieBaseFile> dataFilesList = dataFilesToRead.collect(Collectors.toList()); List<HoodieBaseFile> dataFilesList = dataFilesToRead.collect(Collectors.toList());
assertTrue("Should list the parquet files we wrote in the delta commit", assertTrue(dataFilesList.size() > 0,
dataFilesList.size() > 0); "Should list the parquet files we wrote in the delta commit");
/** /**
* Write 2 (only updates + inserts, written to .log file + correction of existing parquet file size) * Write 2 (only updates + inserts, written to .log file + correction of existing parquet file size)
@@ -795,7 +797,7 @@ public class TestMergeOnReadTable extends HoodieClientTestHarness {
metaClient = HoodieTableMetaClient.reload(metaClient); metaClient = HoodieTableMetaClient.reload(metaClient);
deltaCommit = metaClient.getActiveTimeline().getDeltaCommitTimeline().lastInstant(); deltaCommit = metaClient.getActiveTimeline().getDeltaCommitTimeline().lastInstant();
assertTrue(deltaCommit.isPresent()); assertTrue(deltaCommit.isPresent());
assertEquals("Latest Delta commit should be 002", "002", deltaCommit.get().getTimestamp()); assertEquals("002", deltaCommit.get().getTimestamp(), "Latest Delta commit should be 002");
commit = metaClient.getActiveTimeline().getCommitTimeline().firstInstant(); commit = metaClient.getActiveTimeline().getCommitTimeline().firstInstant();
assertFalse(commit.isPresent()); assertFalse(commit.isPresent());
@@ -813,7 +815,7 @@ public class TestMergeOnReadTable extends HoodieClientTestHarness {
List<String> dataFiles = roView.getLatestBaseFiles().map(HoodieBaseFile::getPath).collect(Collectors.toList()); List<String> dataFiles = roView.getLatestBaseFiles().map(HoodieBaseFile::getPath).collect(Collectors.toList());
List<GenericRecord> recordsRead = HoodieMergeOnReadTestUtils.getRecordsUsingInputFormat(dataFiles, basePath); List<GenericRecord> recordsRead = HoodieMergeOnReadTestUtils.getRecordsUsingInputFormat(dataFiles, basePath);
// Wrote 20 records in 2 batches // Wrote 20 records in 2 batches
assertEquals("Must contain 40 records", 40, recordsRead.size()); assertEquals(40, recordsRead.size(), "Must contain 40 records");
} }
} }
@@ -855,7 +857,7 @@ public class TestMergeOnReadTable extends HoodieClientTestHarness {
List<FileSlice> groupedLogFiles = List<FileSlice> groupedLogFiles =
table.getSliceView().getLatestFileSlices(partitionPath).collect(Collectors.toList()); table.getSliceView().getLatestFileSlices(partitionPath).collect(Collectors.toList());
for (FileSlice fileSlice : groupedLogFiles) { for (FileSlice fileSlice : groupedLogFiles) {
assertEquals("There should be 1 log file written for every data file", 1, fileSlice.getLogFiles().count()); assertEquals(1, fileSlice.getLogFiles().count(), "There should be 1 log file written for every data file");
} }
} }
@@ -874,14 +876,15 @@ public class TestMergeOnReadTable extends HoodieClientTestHarness {
table = HoodieTable.create(metaClient, config, jsc); table = HoodieTable.create(metaClient, config, jsc);
HoodieActiveTimeline timeline = metaClient.getActiveTimeline(); HoodieActiveTimeline timeline = metaClient.getActiveTimeline();
assertTrue("Compaction commit should be > than last insert", HoodieTimeline assertTrue(HoodieTimeline
.compareTimestamps(timeline.lastInstant().get().getTimestamp(), newCommitTime, HoodieTimeline.GREATER)); .compareTimestamps(timeline.lastInstant().get().getTimestamp(), newCommitTime, HoodieTimeline.GREATER),
"Compaction commit should be > than last insert");
for (String partitionPath : dataGen.getPartitionPaths()) { for (String partitionPath : dataGen.getPartitionPaths()) {
List<FileSlice> groupedLogFiles = List<FileSlice> groupedLogFiles =
table.getSliceView().getLatestFileSlices(partitionPath).collect(Collectors.toList()); table.getSliceView().getLatestFileSlices(partitionPath).collect(Collectors.toList());
for (FileSlice slice : groupedLogFiles) { for (FileSlice slice : groupedLogFiles) {
assertEquals("After compaction there should be no log files visible on a full view", 0, slice.getLogFiles().count()); assertEquals(0, slice.getLogFiles().count(), "After compaction there should be no log files visible on a full view");
} }
List<WriteStatus> writeStatuses = result.collect(); List<WriteStatus> writeStatuses = result.collect();
assertTrue(writeStatuses.stream().anyMatch(writeStatus -> writeStatus.getStat().getPartitionPath().contentEquals(partitionPath))); assertTrue(writeStatuses.stream().anyMatch(writeStatus -> writeStatus.getStat().getPartitionPath().contentEquals(partitionPath)));
@@ -911,23 +914,23 @@ public class TestMergeOnReadTable extends HoodieClientTestHarness {
for (String partitionPath : dataGen.getPartitionPaths()) { for (String partitionPath : dataGen.getPartitionPaths()) {
assertEquals(0, tableRTFileSystemView.getLatestFileSlices(partitionPath) assertEquals(0, tableRTFileSystemView.getLatestFileSlices(partitionPath)
.filter(fileSlice -> fileSlice.getBaseFile().isPresent()).count()); .filter(fileSlice -> fileSlice.getBaseFile().isPresent()).count());
Assert.assertTrue(tableRTFileSystemView.getLatestFileSlices(partitionPath).anyMatch(fileSlice -> fileSlice.getLogFiles().count() > 0)); assertTrue(tableRTFileSystemView.getLatestFileSlices(partitionPath).anyMatch(fileSlice -> fileSlice.getLogFiles().count() > 0));
numLogFiles += tableRTFileSystemView.getLatestFileSlices(partitionPath) numLogFiles += tableRTFileSystemView.getLatestFileSlices(partitionPath)
.filter(fileSlice -> fileSlice.getLogFiles().count() > 0).count(); .filter(fileSlice -> fileSlice.getLogFiles().count() > 0).count();
} }
Assert.assertTrue(numLogFiles > 0); assertTrue(numLogFiles > 0);
// Do a compaction // Do a compaction
String instantTime = writeClient.scheduleCompaction(Option.empty()).get().toString(); String instantTime = writeClient.scheduleCompaction(Option.empty()).get().toString();
statuses = writeClient.compact(instantTime); statuses = writeClient.compact(instantTime);
assertEquals(statuses.map(status -> status.getStat().getPath().contains("parquet")).count(), numLogFiles); assertEquals(statuses.map(status -> status.getStat().getPath().contains("parquet")).count(), numLogFiles);
Assert.assertEquals(statuses.count(), numLogFiles); assertEquals(statuses.count(), numLogFiles);
writeClient.commitCompaction(instantTime, statuses, Option.empty()); writeClient.commitCompaction(instantTime, statuses, Option.empty());
} }
} }
@Test @Test
public void testInsertsGeneratedIntoLogFilesRollback() throws Exception { public void testInsertsGeneratedIntoLogFilesRollback(@TempDir java.nio.file.Path tempFolder) throws Exception {
// insert 100 records // insert 100 records
// Setting IndexType to be InMemory to simulate Global Index nature // Setting IndexType to be InMemory to simulate Global Index nature
HoodieWriteConfig config = getConfigBuilder(false, IndexType.INMEMORY).build(); HoodieWriteConfig config = getConfigBuilder(false, IndexType.INMEMORY).build();
@@ -942,14 +945,14 @@ public class TestMergeOnReadTable extends HoodieClientTestHarness {
List<WriteStatus> writeStatuses = statuses.collect(); List<WriteStatus> writeStatuses = statuses.collect();
// Ensure that inserts are written to only log files // Ensure that inserts are written to only log files
Assert.assertEquals( assertEquals(0,
writeStatuses.stream().filter(writeStatus -> !writeStatus.getStat().getPath().contains("log")).count(), 0); writeStatuses.stream().filter(writeStatus -> !writeStatus.getStat().getPath().contains("log")).count());
Assert.assertTrue( assertTrue(
writeStatuses.stream().anyMatch(writeStatus -> writeStatus.getStat().getPath().contains("log"))); writeStatuses.stream().anyMatch(writeStatus -> writeStatus.getStat().getPath().contains("log")));
// rollback a failed commit // rollback a failed commit
boolean rollback = writeClient.rollback(newCommitTime); boolean rollback = writeClient.rollback(newCommitTime);
Assert.assertTrue(rollback); assertTrue(rollback);
newCommitTime = "101"; newCommitTime = "101";
writeClient.startCommitWithTime(newCommitTime); writeClient.startCommitWithTime(newCommitTime);
@@ -972,9 +975,7 @@ public class TestMergeOnReadTable extends HoodieClientTestHarness {
// Save the .commit file to local directory. // Save the .commit file to local directory.
// Rollback will be called twice to test the case where rollback failed first time and retried. // Rollback will be called twice to test the case where rollback failed first time and retried.
// We got the "BaseCommitTime cannot be null" exception before the fix // We got the "BaseCommitTime cannot be null" exception before the fix
TemporaryFolder folder = new TemporaryFolder(); File file = Files.createTempFile(tempFolder, null, null).toFile();
folder.create();
File file = folder.newFile();
metaClient.getFs().copyToLocalFile(new Path(metaClient.getMetaPath(), fileName), metaClient.getFs().copyToLocalFile(new Path(metaClient.getMetaPath(), fileName),
new Path(file.getAbsolutePath())); new Path(file.getAbsolutePath()));
writeClient.rollback(newCommitTime); writeClient.rollback(newCommitTime);
@@ -985,8 +986,8 @@ public class TestMergeOnReadTable extends HoodieClientTestHarness {
long numLogFiles = 0; long numLogFiles = 0;
for (String partitionPath : dataGen.getPartitionPaths()) { for (String partitionPath : dataGen.getPartitionPaths()) {
Assert.assertTrue(tableRTFileSystemView.getLatestFileSlices(partitionPath).noneMatch(fileSlice -> fileSlice.getBaseFile().isPresent())); assertTrue(tableRTFileSystemView.getLatestFileSlices(partitionPath).noneMatch(fileSlice -> fileSlice.getBaseFile().isPresent()));
Assert.assertTrue(tableRTFileSystemView.getLatestFileSlices(partitionPath).noneMatch(fileSlice -> fileSlice.getLogFiles().count() > 0)); assertTrue(tableRTFileSystemView.getLatestFileSlices(partitionPath).noneMatch(fileSlice -> fileSlice.getLogFiles().count() > 0));
numLogFiles += tableRTFileSystemView.getLatestFileSlices(partitionPath) numLogFiles += tableRTFileSystemView.getLatestFileSlices(partitionPath)
.filter(fileSlice -> fileSlice.getLogFiles().count() > 0).count(); .filter(fileSlice -> fileSlice.getLogFiles().count() > 0).count();
} }
@@ -996,7 +997,6 @@ public class TestMergeOnReadTable extends HoodieClientTestHarness {
Thread.sleep(1000); Thread.sleep(1000);
// Rollback again to pretend the first rollback failed partially. This should not error our // Rollback again to pretend the first rollback failed partially. This should not error our
writeClient.rollback(newCommitTime); writeClient.rollback(newCommitTime);
folder.delete();
} }
} }
@@ -1022,19 +1022,19 @@ public class TestMergeOnReadTable extends HoodieClientTestHarness {
long numLogFiles = 0; long numLogFiles = 0;
for (String partitionPath : dataGen.getPartitionPaths()) { for (String partitionPath : dataGen.getPartitionPaths()) {
Assert.assertTrue(tableRTFileSystemView.getLatestFileSlices(partitionPath).noneMatch(fileSlice -> fileSlice.getBaseFile().isPresent())); assertTrue(tableRTFileSystemView.getLatestFileSlices(partitionPath).noneMatch(fileSlice -> fileSlice.getBaseFile().isPresent()));
Assert.assertTrue(tableRTFileSystemView.getLatestFileSlices(partitionPath).anyMatch(fileSlice -> fileSlice.getLogFiles().count() > 0)); assertTrue(tableRTFileSystemView.getLatestFileSlices(partitionPath).anyMatch(fileSlice -> fileSlice.getLogFiles().count() > 0));
numLogFiles += tableRTFileSystemView.getLatestFileSlices(partitionPath) numLogFiles += tableRTFileSystemView.getLatestFileSlices(partitionPath)
.filter(fileSlice -> fileSlice.getLogFiles().count() > 0).count(); .filter(fileSlice -> fileSlice.getLogFiles().count() > 0).count();
} }
Assert.assertTrue(numLogFiles > 0); assertTrue(numLogFiles > 0);
// Do a compaction // Do a compaction
newCommitTime = writeClient.scheduleCompaction(Option.empty()).get().toString(); newCommitTime = writeClient.scheduleCompaction(Option.empty()).get().toString();
statuses = writeClient.compact(newCommitTime); statuses = writeClient.compact(newCommitTime);
// Ensure all log files have been compacted into parquet files // Ensure all log files have been compacted into parquet files
assertEquals(statuses.map(status -> status.getStat().getPath().contains("parquet")).count(), numLogFiles); assertEquals(statuses.map(status -> status.getStat().getPath().contains("parquet")).count(), numLogFiles);
Assert.assertEquals(statuses.count(), numLogFiles); assertEquals(statuses.count(), numLogFiles);
writeClient.commitCompaction(newCommitTime, statuses, Option.empty()); writeClient.commitCompaction(newCommitTime, statuses, Option.empty());
// Trigger a rollback of compaction // Trigger a rollback of compaction
writeClient.rollback(newCommitTime); writeClient.rollback(newCommitTime);
@@ -1044,8 +1044,8 @@ public class TestMergeOnReadTable extends HoodieClientTestHarness {
Option<HoodieInstant> lastInstant = ((SyncableFileSystemView) tableRTFileSystemView).getLastInstant(); Option<HoodieInstant> lastInstant = ((SyncableFileSystemView) tableRTFileSystemView).getLastInstant();
System.out.println("Last Instant =" + lastInstant); System.out.println("Last Instant =" + lastInstant);
for (String partitionPath : dataGen.getPartitionPaths()) { for (String partitionPath : dataGen.getPartitionPaths()) {
Assert.assertTrue(tableRTFileSystemView.getLatestFileSlices(partitionPath).noneMatch(fileSlice -> fileSlice.getBaseFile().isPresent())); assertTrue(tableRTFileSystemView.getLatestFileSlices(partitionPath).noneMatch(fileSlice -> fileSlice.getBaseFile().isPresent()));
Assert.assertTrue(tableRTFileSystemView.getLatestFileSlices(partitionPath).anyMatch(fileSlice -> fileSlice.getLogFiles().count() > 0)); assertTrue(tableRTFileSystemView.getLatestFileSlices(partitionPath).anyMatch(fileSlice -> fileSlice.getLogFiles().count() > 0));
} }
} }
} }
@@ -1077,7 +1077,7 @@ public class TestMergeOnReadTable extends HoodieClientTestHarness {
JavaRDD<HoodieRecord> writeRecords = jsc.parallelize(records, 1); JavaRDD<HoodieRecord> writeRecords = jsc.parallelize(records, 1);
JavaRDD<WriteStatus> statuses = client.insert(writeRecords, instantTime); JavaRDD<WriteStatus> statuses = client.insert(writeRecords, instantTime);
assertTrue("Commit should succeed", client.commit(instantTime, statuses)); assertTrue(client.commit(instantTime, statuses), "Commit should succeed");
// Read from commit file // Read from commit file
table = HoodieTable.create(cfg, jsc); table = HoodieTable.create(cfg, jsc);
@@ -1094,14 +1094,14 @@ public class TestMergeOnReadTable extends HoodieClientTestHarness {
inserts += stat.getValue().getInserts(); inserts += stat.getValue().getInserts();
} }
} }
Assert.assertEquals(inserts, 200); assertEquals(200, inserts);
instantTime = "002"; instantTime = "002";
client.startCommitWithTime(instantTime); client.startCommitWithTime(instantTime);
records = dataGen.generateUpdates(instantTime, records); records = dataGen.generateUpdates(instantTime, records);
writeRecords = jsc.parallelize(records, 1); writeRecords = jsc.parallelize(records, 1);
statuses = client.upsert(writeRecords, instantTime); statuses = client.upsert(writeRecords, instantTime);
assertTrue("Commit should succeed", client.commit(instantTime, statuses)); assertTrue(client.commit(instantTime, statuses), "Commit should succeed");
// Read from commit file // Read from commit file
table = HoodieTable.create(cfg, jsc); table = HoodieTable.create(cfg, jsc);
@@ -1122,8 +1122,8 @@ public class TestMergeOnReadTable extends HoodieClientTestHarness {
} }
} }
Assert.assertEquals(inserts, 200); assertEquals(200, inserts);
Assert.assertEquals(upserts, 200); assertEquals(200, upserts);
client.rollback(instantTime); client.rollback(instantTime);
@@ -1145,8 +1145,8 @@ public class TestMergeOnReadTable extends HoodieClientTestHarness {
upserts += stat.getValue().getUpserts(); upserts += stat.getValue().getUpserts();
} }
} }
Assert.assertEquals(inserts, 200); assertEquals(200, inserts);
Assert.assertEquals(upserts, 0); assertEquals(0, upserts);
} }
} }
@@ -1168,7 +1168,7 @@ public class TestMergeOnReadTable extends HoodieClientTestHarness {
JavaRDD<HoodieRecord> writeRecords = jsc.parallelize(records, 1); JavaRDD<HoodieRecord> writeRecords = jsc.parallelize(records, 1);
JavaRDD<WriteStatus> statuses = client.insert(writeRecords, instantTime); JavaRDD<WriteStatus> statuses = client.insert(writeRecords, instantTime);
assertTrue("Commit should succeed", client.commit(instantTime, statuses)); assertTrue(client.commit(instantTime, statuses), "Commit should succeed");
// Read from commit file // Read from commit file
HoodieTable table = HoodieTable.create(cfg, jsc); HoodieTable table = HoodieTable.create(cfg, jsc);
@@ -1188,7 +1188,7 @@ public class TestMergeOnReadTable extends HoodieClientTestHarness {
fileIdToUpsertsMap.put(stat.getKey(), stat.getValue().getUpserts()); fileIdToUpsertsMap.put(stat.getKey(), stat.getValue().getUpserts());
} }
} }
Assert.assertEquals(inserts, 200); assertEquals(200, inserts);
instantTime = "001"; instantTime = "001";
client.startCommitWithTime(instantTime); client.startCommitWithTime(instantTime);
@@ -1197,7 +1197,7 @@ public class TestMergeOnReadTable extends HoodieClientTestHarness {
records.addAll(dataGen.generateInserts(instantTime, 200)); records.addAll(dataGen.generateInserts(instantTime, 200));
writeRecords = jsc.parallelize(records, 1); writeRecords = jsc.parallelize(records, 1);
statuses = client.upsert(writeRecords, instantTime); statuses = client.upsert(writeRecords, instantTime);
assertTrue("Commit should succeed", client.commit(instantTime, statuses)); assertTrue(client.commit(instantTime, statuses), "Commit should succeed");
// Read from commit file // Read from commit file
table = HoodieTable.create(cfg, jsc); table = HoodieTable.create(cfg, jsc);
@@ -1221,8 +1221,8 @@ public class TestMergeOnReadTable extends HoodieClientTestHarness {
} }
} }
Assert.assertEquals(inserts, 400); assertEquals(400, inserts);
Assert.assertEquals(upserts, 200); assertEquals(200, upserts);
// Test small file handling after compaction // Test small file handling after compaction
instantTime = "002"; instantTime = "002";
@@ -1243,8 +1243,8 @@ public class TestMergeOnReadTable extends HoodieClientTestHarness {
// Ensure that the rolling stats from the extra metadata of delta commits is copied over to the compaction commit // Ensure that the rolling stats from the extra metadata of delta commits is copied over to the compaction commit
for (Map.Entry<String, Map<String, HoodieRollingStat>> entry : rollingStatMetadata.getPartitionToRollingStats() for (Map.Entry<String, Map<String, HoodieRollingStat>> entry : rollingStatMetadata.getPartitionToRollingStats()
.entrySet()) { .entrySet()) {
Assert.assertTrue(rollingStatMetadata1.getPartitionToRollingStats().containsKey(entry.getKey())); assertTrue(rollingStatMetadata1.getPartitionToRollingStats().containsKey(entry.getKey()));
Assert.assertEquals(rollingStatMetadata1.getPartitionToRollingStats().get(entry.getKey()).size(), assertEquals(rollingStatMetadata1.getPartitionToRollingStats().get(entry.getKey()).size(),
entry.getValue().size()); entry.getValue().size());
} }
@@ -1256,7 +1256,7 @@ public class TestMergeOnReadTable extends HoodieClientTestHarness {
records.addAll(dataGen.generateInserts(instantTime, 200)); records.addAll(dataGen.generateInserts(instantTime, 200));
writeRecords = jsc.parallelize(records, 1); writeRecords = jsc.parallelize(records, 1);
statuses = client.upsert(writeRecords, instantTime); statuses = client.upsert(writeRecords, instantTime);
assertTrue("Commit should succeed", client.commit(instantTime, statuses)); assertTrue(client.commit(instantTime, statuses), "Commit should succeed");
// Read from commit file // Read from commit file
table = HoodieTable.create(cfg, jsc); table = HoodieTable.create(cfg, jsc);
@@ -1279,8 +1279,8 @@ public class TestMergeOnReadTable extends HoodieClientTestHarness {
} }
} }
Assert.assertEquals(inserts, 600); assertEquals(600, inserts);
Assert.assertEquals(upserts, 600); assertEquals(600, upserts);
} }
} }
@@ -1309,7 +1309,7 @@ public class TestMergeOnReadTable extends HoodieClientTestHarness {
Option<HoodieInstant> deltaCommit = metaClient.getActiveTimeline().getDeltaCommitTimeline().firstInstant(); Option<HoodieInstant> deltaCommit = metaClient.getActiveTimeline().getDeltaCommitTimeline().firstInstant();
assertTrue(deltaCommit.isPresent()); assertTrue(deltaCommit.isPresent());
assertEquals("Delta commit should be 001", "001", deltaCommit.get().getTimestamp()); assertEquals("001", deltaCommit.get().getTimestamp(), "Delta commit should be 001");
Option<HoodieInstant> commit = metaClient.getActiveTimeline().getCommitTimeline().firstInstant(); Option<HoodieInstant> commit = metaClient.getActiveTimeline().getCommitTimeline().firstInstant();
assertFalse(commit.isPresent()); assertFalse(commit.isPresent());
@@ -1322,8 +1322,8 @@ public class TestMergeOnReadTable extends HoodieClientTestHarness {
roView = new HoodieTableFileSystemView(metaClient, hoodieTable.getCompletedCommitsTimeline(), allFiles); roView = new HoodieTableFileSystemView(metaClient, hoodieTable.getCompletedCommitsTimeline(), allFiles);
dataFilesToRead = roView.getLatestBaseFiles(); dataFilesToRead = roView.getLatestBaseFiles();
assertTrue("should list the parquet files we wrote in the delta commit", assertTrue(dataFilesToRead.findAny().isPresent(),
dataFilesToRead.findAny().isPresent()); "should list the parquet files we wrote in the delta commit");
/** /**
* Write 2 (only updates, written to .log file) * Write 2 (only updates, written to .log file)
@@ -1386,7 +1386,7 @@ public class TestMergeOnReadTable extends HoodieClientTestHarness {
private void assertNoWriteErrors(List<WriteStatus> statuses) { private void assertNoWriteErrors(List<WriteStatus> statuses) {
// Verify there are no errors // Verify there are no errors
for (WriteStatus status : statuses) { for (WriteStatus status : statuses) {
assertFalse("Errors found in write of " + status.getFileId(), status.hasErrors()); assertFalse(status.hasErrors(), "Errors found in write of " + status.getFileId());
} }
} }
@@ -1402,7 +1402,7 @@ public class TestMergeOnReadTable extends HoodieClientTestHarness {
Option<HoodieInstant> deltaCommit = metaClient.getActiveTimeline().getDeltaCommitTimeline().lastInstant(); Option<HoodieInstant> deltaCommit = metaClient.getActiveTimeline().getDeltaCommitTimeline().lastInstant();
assertTrue(deltaCommit.isPresent()); assertTrue(deltaCommit.isPresent());
Assert.assertEquals("Delta commit should be specified value", commitTime, deltaCommit.get().getTimestamp()); assertEquals(commitTime, deltaCommit.get().getTimestamp(), "Delta commit should be specified value");
Option<HoodieInstant> commit = metaClient.getActiveTimeline().getCommitTimeline().lastInstant(); Option<HoodieInstant> commit = metaClient.getActiveTimeline().getCommitTimeline().lastInstant();
assertFalse(commit.isPresent()); assertFalse(commit.isPresent());
@@ -1415,8 +1415,8 @@ public class TestMergeOnReadTable extends HoodieClientTestHarness {
roView = new HoodieTableFileSystemView(metaClient, hoodieTable.getCompletedCommitsTimeline(), allFiles); roView = new HoodieTableFileSystemView(metaClient, hoodieTable.getCompletedCommitsTimeline(), allFiles);
dataFilesToRead = roView.getLatestBaseFiles(); dataFilesToRead = roView.getLatestBaseFiles();
assertTrue("should list the parquet files we wrote in the delta commit", assertTrue(dataFilesToRead.findAny().isPresent(),
dataFilesToRead.findAny().isPresent()); "should list the parquet files we wrote in the delta commit");
return allFiles; return allFiles;
} }
@@ -1435,8 +1435,8 @@ public class TestMergeOnReadTable extends HoodieClientTestHarness {
metaClient = HoodieTableMetaClient.reload(metaClient); metaClient = HoodieTableMetaClient.reload(metaClient);
Option<HoodieInstant> deltaCommit = metaClient.getActiveTimeline().getDeltaCommitTimeline().lastInstant(); Option<HoodieInstant> deltaCommit = metaClient.getActiveTimeline().getDeltaCommitTimeline().lastInstant();
assertTrue(deltaCommit.isPresent()); assertTrue(deltaCommit.isPresent());
assertEquals("Latest Delta commit should match specified time", assertEquals(commitTime, deltaCommit.get().getTimestamp(),
commitTime, deltaCommit.get().getTimestamp()); "Latest Delta commit should match specified time");
Option<HoodieInstant> commit = metaClient.getActiveTimeline().getCommitTimeline().firstInstant(); Option<HoodieInstant> commit = metaClient.getActiveTimeline().getCommitTimeline().firstInstant();
assertFalse(commit.isPresent()); assertFalse(commit.isPresent());
@@ -1452,7 +1452,7 @@ public class TestMergeOnReadTable extends HoodieClientTestHarness {
throws Exception { throws Exception {
HoodieTestUtils.init(jsc.hadoopConfiguration(), basePath, HoodieTableType.MERGE_ON_READ); HoodieTestUtils.init(jsc.hadoopConfiguration(), basePath, HoodieTableType.MERGE_ON_READ);
setupIncremental(roJobConf, startCommitTime, numCommitsToPull, stopAtCompaction); setupIncremental(roJobConf, startCommitTime, numCommitsToPull, stopAtCompaction);
FileInputFormat.setInputPaths(roJobConf, basePath + "/" + partitionPath); FileInputFormat.setInputPaths(roJobConf, Paths.get(basePath, partitionPath).toString());
return roInputFormat.listStatus(roJobConf); return roInputFormat.listStatus(roJobConf);
} }
@@ -1465,7 +1465,7 @@ public class TestMergeOnReadTable extends HoodieClientTestHarness {
throws Exception { throws Exception {
HoodieTestUtils.init(jsc.hadoopConfiguration(), basePath, HoodieTableType.MERGE_ON_READ); HoodieTestUtils.init(jsc.hadoopConfiguration(), basePath, HoodieTableType.MERGE_ON_READ);
setupIncremental(rtJobConf, startCommitTime, numCommitsToPull, false); setupIncremental(rtJobConf, startCommitTime, numCommitsToPull, false);
FileInputFormat.setInputPaths(rtJobConf, basePath + "/" + partitionPath); FileInputFormat.setInputPaths(rtJobConf, Paths.get(basePath, partitionPath).toString());
return rtInputFormat.listStatus(rtJobConf); return rtInputFormat.listStatus(rtJobConf);
} }
@@ -1492,9 +1492,9 @@ public class TestMergeOnReadTable extends HoodieClientTestHarness {
JobConf jobConf, int expectedRecords, String... expectedCommits) { JobConf jobConf, int expectedRecords, String... expectedCommits) {
assertEquals(expectedNumFiles, files.length); assertEquals(expectedNumFiles, files.length);
Set<String> expectedCommitsSet = Arrays.asList(expectedCommits).stream().collect(Collectors.toSet()); Set<String> expectedCommitsSet = Arrays.stream(expectedCommits).collect(Collectors.toSet());
List<GenericRecord> records = HoodieMergeOnReadTestUtils.getRecordsUsingInputFormat( List<GenericRecord> records = HoodieMergeOnReadTestUtils.getRecordsUsingInputFormat(
Arrays.asList(basePath + "/" + partitionPath), basePath, jobConf, inputFormat); Collections.singletonList(Paths.get(basePath, partitionPath).toString()), basePath, jobConf, inputFormat);
assertEquals(expectedRecords, records.size()); assertEquals(expectedRecords, records.size());
Set<String> actualCommits = records.stream().map(r -> Set<String> actualCommits = records.stream().map(r ->
r.get(HoodieRecord.COMMIT_TIME_METADATA_FIELD).toString()).collect(Collectors.toSet()); r.get(HoodieRecord.COMMIT_TIME_METADATA_FIELD).toString()).collect(Collectors.toSet());

View File

@@ -47,26 +47,25 @@ import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.Path; import org.apache.hadoop.fs.Path;
import org.apache.hadoop.mapred.FileInputFormat; import org.apache.hadoop.mapred.FileInputFormat;
import org.apache.hadoop.mapred.JobConf; import org.apache.hadoop.mapred.JobConf;
import org.apache.log4j.LogManager; import org.apache.log4j.LogManager;
import org.apache.log4j.Logger; import org.apache.log4j.Logger;
import org.apache.parquet.avro.AvroReadSupport; import org.apache.parquet.avro.AvroReadSupport;
import org.apache.parquet.hadoop.ParquetReader; import org.apache.parquet.hadoop.ParquetReader;
import org.apache.spark.TaskContext; import org.apache.spark.TaskContext;
import org.junit.After; import org.junit.jupiter.api.AfterEach;
import org.junit.Assert; import org.junit.jupiter.api.BeforeEach;
import org.junit.Before; import org.junit.jupiter.api.Test;
import org.junit.Test;
import java.io.File; import java.io.File;
import java.nio.file.Paths;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.Arrays; import java.util.Arrays;
import java.util.List; import java.util.List;
import java.util.Map; import java.util.Map;
import java.util.UUID; import java.util.UUID;
import static org.junit.Assert.assertEquals; import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.Assert.assertTrue; import static org.junit.jupiter.api.Assertions.assertTrue;
import static org.mockito.Mockito.mock; import static org.mockito.Mockito.mock;
import static org.mockito.Mockito.when; import static org.mockito.Mockito.when;
@@ -74,7 +73,7 @@ public class TestCopyOnWriteActionExecutor extends HoodieClientTestHarness {
private static final Logger LOG = LogManager.getLogger(TestCopyOnWriteActionExecutor.class); private static final Logger LOG = LogManager.getLogger(TestCopyOnWriteActionExecutor.class);
@Before @BeforeEach
public void setUp() throws Exception { public void setUp() throws Exception {
initSparkContexts("TestCopyOnWriteActionExecutor"); initSparkContexts("TestCopyOnWriteActionExecutor");
initPath(); initPath();
@@ -83,7 +82,7 @@ public class TestCopyOnWriteActionExecutor extends HoodieClientTestHarness {
initFileSystem(); initFileSystem();
} }
@After @AfterEach
public void tearDown() throws Exception { public void tearDown() throws Exception {
cleanupSparkContexts(); cleanupSparkContexts();
cleanupMetaClient(); cleanupMetaClient();
@@ -110,8 +109,8 @@ public class TestCopyOnWriteActionExecutor extends HoodieClientTestHarness {
return Pair.of(io.makeNewPath(record.getPartitionPath()), writeToken); return Pair.of(io.makeNewPath(record.getPartitionPath()), writeToken);
}).collect().get(0); }).collect().get(0);
Assert.assertEquals(newPathWithWriteToken.getKey().toString(), this.basePath + "/" + partitionPath + "/" assertEquals(newPathWithWriteToken.getKey().toString(), Paths.get(this.basePath, partitionPath,
+ FSUtils.makeDataFileName(instantTime, newPathWithWriteToken.getRight(), fileName)); FSUtils.makeDataFileName(instantTime, newPathWithWriteToken.getRight(), fileName)).toString());
} }
private HoodieWriteConfig makeHoodieClientConfig() throws Exception { private HoodieWriteConfig makeHoodieClientConfig() throws Exception {
@@ -134,7 +133,7 @@ public class TestCopyOnWriteActionExecutor extends HoodieClientTestHarness {
writeClient.startCommitWithTime(firstCommitTime); writeClient.startCommitWithTime(firstCommitTime);
metaClient = HoodieTableMetaClient.reload(metaClient); metaClient = HoodieTableMetaClient.reload(metaClient);
String partitionPath = "/2016/01/31"; String partitionPath = "2016/01/31";
HoodieCopyOnWriteTable table = (HoodieCopyOnWriteTable) HoodieTable.create(metaClient, config, jsc); HoodieCopyOnWriteTable table = (HoodieCopyOnWriteTable) HoodieTable.create(metaClient, config, jsc);
// Get some records belong to the same partition (2016/01/31) // Get some records belong to the same partition (2016/01/31)
@@ -227,7 +226,7 @@ public class TestCopyOnWriteActionExecutor extends HoodieClientTestHarness {
updatedReader.close(); updatedReader.close();
// Also check the numRecordsWritten // Also check the numRecordsWritten
WriteStatus writeStatus = statuses.get(0); WriteStatus writeStatus = statuses.get(0);
assertEquals("Should be only one file generated", 1, statuses.size()); assertEquals(1, statuses.size(), "Should be only one file generated");
assertEquals(4, writeStatus.getStat().getNumWrites());// 3 rewritten records + 1 new record assertEquals(4, writeStatus.getStat().getNumWrites());// 3 rewritten records + 1 new record
} }
@@ -239,7 +238,7 @@ public class TestCopyOnWriteActionExecutor extends HoodieClientTestHarness {
hoodieInputFormat.setConf(jobConf); hoodieInputFormat.setConf(jobConf);
HoodieTestUtils.init(jsc.hadoopConfiguration(), basePath, HoodieTableType.COPY_ON_WRITE); HoodieTestUtils.init(jsc.hadoopConfiguration(), basePath, HoodieTableType.COPY_ON_WRITE);
setupIncremental(jobConf, startCommitTime, numCommitsToPull); setupIncremental(jobConf, startCommitTime, numCommitsToPull);
FileInputFormat.setInputPaths(jobConf, basePath + partitionPath); FileInputFormat.setInputPaths(jobConf, Paths.get(basePath, partitionPath).toString());
return hoodieInputFormat.listStatus(jobConf); return hoodieInputFormat.listStatus(jobConf);
} }
@@ -390,13 +389,13 @@ public class TestCopyOnWriteActionExecutor extends HoodieClientTestHarness {
// Check the updated file // Check the updated file
int counts = 0; int counts = 0;
for (File file : new File(basePath + "/2016/01/31").listFiles()) { for (File file : Paths.get(basePath, "2016/01/31").toFile().listFiles()) {
if (file.getName().endsWith(".parquet") && FSUtils.getCommitTime(file.getName()).equals(instantTime)) { if (file.getName().endsWith(".parquet") && FSUtils.getCommitTime(file.getName()).equals(instantTime)) {
LOG.info(file.getName() + "-" + file.length()); LOG.info(file.getName() + "-" + file.length());
counts++; counts++;
} }
} }
assertEquals("If the number of records are more than 1150, then there should be a new file", 3, counts); assertEquals(3, counts, "If the number of records are more than 1150, then there should be a new file");
} }
@Test @Test
@@ -416,7 +415,7 @@ public class TestCopyOnWriteActionExecutor extends HoodieClientTestHarness {
WriteStatus writeStatus = ws.get(0).get(0); WriteStatus writeStatus = ws.get(0).get(0);
String fileId = writeStatus.getFileId(); String fileId = writeStatus.getFileId();
metaClient.getFs().create(new Path(basePath + "/.hoodie/000.commit")).close(); metaClient.getFs().create(new Path(Paths.get(basePath, ".hoodie", "000.commit").toString())).close();
final List<HoodieRecord> updates = dataGen.generateUpdatesWithHoodieAvroPayload(instantTime, inserts); final List<HoodieRecord> updates = dataGen.generateUpdatesWithHoodieAvroPayload(instantTime, inserts);
String partitionPath = updates.get(0).getPartitionPath(); String partitionPath = updates.get(0).getPartitionPath();
@@ -429,11 +428,8 @@ public class TestCopyOnWriteActionExecutor extends HoodieClientTestHarness {
assertEquals(updates.size() - numRecordsInPartition, updateStatus.get(0).get(0).getTotalErrorRecords()); assertEquals(updates.size() - numRecordsInPartition, updateStatus.get(0).get(0).getTotalErrorRecords());
} }
@After @AfterEach
public void cleanup() { public void cleanup() {
if (basePath != null) {
new File(basePath).delete();
}
if (jsc != null) { if (jsc != null) {
jsc.stop(); jsc.stop();
} }

View File

@@ -18,10 +18,6 @@
package org.apache.hudi.table.action.commit; package org.apache.hudi.table.action.commit;
import static org.junit.Assert.assertEquals;
import java.util.ArrayList;
import java.util.List;
import org.apache.hudi.common.HoodieClientTestHarness; import org.apache.hudi.common.HoodieClientTestHarness;
import org.apache.hudi.common.HoodieClientTestUtils; import org.apache.hudi.common.HoodieClientTestUtils;
import org.apache.hudi.common.HoodieTestDataGenerator; import org.apache.hudi.common.HoodieTestDataGenerator;
@@ -36,18 +32,25 @@ import org.apache.hudi.config.HoodieWriteConfig;
import org.apache.hudi.table.HoodieCopyOnWriteTable; import org.apache.hudi.table.HoodieCopyOnWriteTable;
import org.apache.hudi.table.HoodieTable; import org.apache.hudi.table.HoodieTable;
import org.apache.hudi.table.WorkloadProfile; import org.apache.hudi.table.WorkloadProfile;
import org.apache.log4j.LogManager; import org.apache.log4j.LogManager;
import org.apache.log4j.Logger; import org.apache.log4j.Logger;
import org.junit.After; import org.junit.jupiter.api.AfterEach;
import org.junit.Before; import org.junit.jupiter.api.BeforeEach;
import org.junit.Test; import org.junit.jupiter.api.Test;
import java.util.ArrayList;
import java.util.List;
import scala.Tuple2; import scala.Tuple2;
import static org.junit.jupiter.api.Assertions.assertEquals;
public class TestUpsertPartitioner extends HoodieClientTestHarness { public class TestUpsertPartitioner extends HoodieClientTestHarness {
private static final Logger LOG = LogManager.getLogger(TestUpsertPartitioner.class); private static final Logger LOG = LogManager.getLogger(TestUpsertPartitioner.class);
@Before @BeforeEach
public void setUp() throws Exception { public void setUp() throws Exception {
initSparkContexts("TestUpsertPartitioner"); initSparkContexts("TestUpsertPartitioner");
initPath(); initPath();
@@ -56,7 +59,7 @@ public class TestUpsertPartitioner extends HoodieClientTestHarness {
initFileSystem(); initFileSystem();
} }
@After @AfterEach
public void tearDown() throws Exception { public void tearDown() throws Exception {
cleanupSparkContexts(); cleanupSparkContexts();
cleanupMetaClient(); cleanupMetaClient();
@@ -89,8 +92,9 @@ public class TestUpsertPartitioner extends HoodieClientTestHarness {
records.addAll(updateRecords); records.addAll(updateRecords);
WorkloadProfile profile = new WorkloadProfile(jsc.parallelize(records)); WorkloadProfile profile = new WorkloadProfile(jsc.parallelize(records));
UpsertPartitioner partitioner = new UpsertPartitioner(profile, jsc, table, config); UpsertPartitioner partitioner = new UpsertPartitioner(profile, jsc, table, config);
assertEquals("Update record should have gone to the 1 update partition", 0, partitioner.getPartition( assertEquals(0, partitioner.getPartition(
new Tuple2<>(updateRecords.get(0).getKey(), Option.ofNullable(updateRecords.get(0).getCurrentLocation())))); new Tuple2<>(updateRecords.get(0).getKey(), Option.ofNullable(updateRecords.get(0).getCurrentLocation()))),
"Update record should have gone to the 1 update partition");
return partitioner; return partitioner;
} }
@@ -100,7 +104,7 @@ public class TestUpsertPartitioner extends HoodieClientTestHarness {
// Inserts + Updates... Check all updates go together & inserts subsplit // Inserts + Updates... Check all updates go together & inserts subsplit
UpsertPartitioner partitioner = getUpsertPartitioner(0, 200, 100, 1024, testPartitionPath, false); UpsertPartitioner partitioner = getUpsertPartitioner(0, 200, 100, 1024, testPartitionPath, false);
List<InsertBucket> insertBuckets = partitioner.getInsertBuckets(testPartitionPath); List<InsertBucket> insertBuckets = partitioner.getInsertBuckets(testPartitionPath);
assertEquals("Total of 2 insert buckets", 2, insertBuckets.size()); assertEquals(2, insertBuckets.size(), "Total of 2 insert buckets");
} }
@Test @Test
@@ -111,33 +115,33 @@ public class TestUpsertPartitioner extends HoodieClientTestHarness {
UpsertPartitioner partitioner = getUpsertPartitioner(1000 * 1024, 400, 100, 800 * 1024, testPartitionPath, false); UpsertPartitioner partitioner = getUpsertPartitioner(1000 * 1024, 400, 100, 800 * 1024, testPartitionPath, false);
List<InsertBucket> insertBuckets = partitioner.getInsertBuckets(testPartitionPath); List<InsertBucket> insertBuckets = partitioner.getInsertBuckets(testPartitionPath);
assertEquals("Should have 3 partitions", 3, partitioner.numPartitions()); assertEquals(3, partitioner.numPartitions(), "Should have 3 partitions");
assertEquals("Bucket 0 is UPDATE", BucketType.UPDATE, assertEquals(BucketType.UPDATE, partitioner.getBucketInfo(0).bucketType,
partitioner.getBucketInfo(0).bucketType); "Bucket 0 is UPDATE");
assertEquals("Bucket 1 is INSERT", BucketType.INSERT, assertEquals(BucketType.INSERT, partitioner.getBucketInfo(1).bucketType,
partitioner.getBucketInfo(1).bucketType); "Bucket 1 is INSERT");
assertEquals("Bucket 2 is INSERT", BucketType.INSERT, assertEquals(BucketType.INSERT, partitioner.getBucketInfo(2).bucketType,
partitioner.getBucketInfo(2).bucketType); "Bucket 2 is INSERT");
assertEquals("Total of 3 insert buckets", 3, insertBuckets.size()); assertEquals(3, insertBuckets.size(), "Total of 3 insert buckets");
assertEquals("First insert bucket must be same as update bucket", 0, insertBuckets.get(0).bucketNumber); assertEquals(0, insertBuckets.get(0).bucketNumber, "First insert bucket must be same as update bucket");
assertEquals("First insert bucket should have weight 0.5", 0.5, insertBuckets.get(0).weight, 0.01); assertEquals(0.5, insertBuckets.get(0).weight, 0.01, "First insert bucket should have weight 0.5");
// Now with insert split size auto tuned // Now with insert split size auto tuned
partitioner = getUpsertPartitioner(1000 * 1024, 2400, 100, 800 * 1024, testPartitionPath, true); partitioner = getUpsertPartitioner(1000 * 1024, 2400, 100, 800 * 1024, testPartitionPath, true);
insertBuckets = partitioner.getInsertBuckets(testPartitionPath); insertBuckets = partitioner.getInsertBuckets(testPartitionPath);
assertEquals("Should have 4 partitions", 4, partitioner.numPartitions()); assertEquals(4, partitioner.numPartitions(), "Should have 4 partitions");
assertEquals("Bucket 0 is UPDATE", BucketType.UPDATE, assertEquals(BucketType.UPDATE, partitioner.getBucketInfo(0).bucketType,
partitioner.getBucketInfo(0).bucketType); "Bucket 0 is UPDATE");
assertEquals("Bucket 1 is INSERT", BucketType.INSERT, assertEquals(BucketType.INSERT, partitioner.getBucketInfo(1).bucketType,
partitioner.getBucketInfo(1).bucketType); "Bucket 1 is INSERT");
assertEquals("Bucket 2 is INSERT", BucketType.INSERT, assertEquals(BucketType.INSERT, partitioner.getBucketInfo(2).bucketType,
partitioner.getBucketInfo(2).bucketType); "Bucket 2 is INSERT");
assertEquals("Bucket 3 is INSERT", BucketType.INSERT, assertEquals(BucketType.INSERT, partitioner.getBucketInfo(3).bucketType,
partitioner.getBucketInfo(3).bucketType); "Bucket 3 is INSERT");
assertEquals("Total of 4 insert buckets", 4, insertBuckets.size()); assertEquals(4, insertBuckets.size(), "Total of 4 insert buckets");
assertEquals("First insert bucket must be same as update bucket", 0, insertBuckets.get(0).bucketNumber); assertEquals(0, insertBuckets.get(0).bucketNumber, "First insert bucket must be same as update bucket");
assertEquals("First insert bucket should have weight 0.5", 200.0 / 2400, insertBuckets.get(0).weight, 0.01); assertEquals(200.0 / 2400, insertBuckets.get(0).weight, 0.01, "First insert bucket should have weight 0.5");
} }
private HoodieWriteConfig.Builder makeHoodieClientConfigBuilder() throws Exception { private HoodieWriteConfig.Builder makeHoodieClientConfigBuilder() throws Exception {

View File

@@ -52,8 +52,7 @@ import org.apache.hudi.table.HoodieTable;
import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.Path; import org.apache.hadoop.fs.Path;
import org.apache.spark.api.java.JavaRDD; import org.apache.spark.api.java.JavaRDD;
import org.junit.Assert; import org.junit.jupiter.api.Test;
import org.junit.Test;
import java.io.IOException; import java.io.IOException;
import java.util.ArrayList; import java.util.ArrayList;
@@ -63,9 +62,10 @@ import java.util.Map;
import java.util.stream.Collectors; import java.util.stream.Collectors;
import static org.apache.hudi.common.HoodieTestDataGenerator.TRIP_EXAMPLE_SCHEMA; import static org.apache.hudi.common.HoodieTestDataGenerator.TRIP_EXAMPLE_SCHEMA;
import static org.junit.Assert.assertEquals; import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.Assert.assertFalse; import static org.junit.jupiter.api.Assertions.assertFalse;
import static org.junit.Assert.assertTrue; import static org.junit.jupiter.api.Assertions.assertThrows;
import static org.junit.jupiter.api.Assertions.assertTrue;
/** /**
* Test Cases for Async Compaction and Ingestion interaction. * Test Cases for Async Compaction and Ingestion interaction.
@@ -111,9 +111,9 @@ public class TestAsyncCompaction extends TestHoodieClientBase {
HoodieInstant pendingCompactionInstant = HoodieInstant pendingCompactionInstant =
metaClient.getActiveTimeline().filterPendingCompactionTimeline().firstInstant().get(); metaClient.getActiveTimeline().filterPendingCompactionTimeline().firstInstant().get();
assertEquals("Pending Compaction instant has expected instant time", pendingCompactionInstant.getTimestamp(), assertEquals(compactionInstantTime, pendingCompactionInstant.getTimestamp(),
compactionInstantTime); "Pending Compaction instant has expected instant time");
assertEquals("Pending Compaction instant has expected state", pendingCompactionInstant.getState(), State.REQUESTED); assertEquals(State.REQUESTED, pendingCompactionInstant.getState(), "Pending Compaction instant has expected state");
moveCompactionFromRequestedToInflight(compactionInstantTime, cfg); moveCompactionFromRequestedToInflight(compactionInstantTime, cfg);
@@ -169,11 +169,11 @@ public class TestAsyncCompaction extends TestHoodieClientBase {
metaClient = new HoodieTableMetaClient(jsc.hadoopConfiguration(), cfg.getBasePath()); metaClient = new HoodieTableMetaClient(jsc.hadoopConfiguration(), cfg.getBasePath());
HoodieInstant pendingCompactionInstant = HoodieInstant pendingCompactionInstant =
metaClient.getActiveTimeline().filterPendingCompactionTimeline().firstInstant().get(); metaClient.getActiveTimeline().filterPendingCompactionTimeline().firstInstant().get();
assertEquals("Pending Compaction instant has expected instant time", pendingCompactionInstant.getTimestamp(), assertEquals(compactionInstantTime, pendingCompactionInstant.getTimestamp(),
compactionInstantTime); "Pending Compaction instant has expected instant time");
HoodieInstant inflightInstant = HoodieInstant inflightInstant =
metaClient.getActiveTimeline().filterPendingExcludingCompaction().firstInstant().get(); metaClient.getActiveTimeline().filterPendingExcludingCompaction().firstInstant().get();
assertEquals("inflight instant has expected instant time", inflightInstant.getTimestamp(), inflightInstantTime); assertEquals(inflightInstantTime, inflightInstant.getTimestamp(), "inflight instant has expected instant time");
// This should rollback // This should rollback
client.startCommitWithTime(nextInflightInstantTime); client.startCommitWithTime(nextInflightInstantTime);
@@ -181,13 +181,14 @@ public class TestAsyncCompaction extends TestHoodieClientBase {
// Validate // Validate
metaClient = new HoodieTableMetaClient(jsc.hadoopConfiguration(), cfg.getBasePath()); metaClient = new HoodieTableMetaClient(jsc.hadoopConfiguration(), cfg.getBasePath());
inflightInstant = metaClient.getActiveTimeline().filterPendingExcludingCompaction().firstInstant().get(); inflightInstant = metaClient.getActiveTimeline().filterPendingExcludingCompaction().firstInstant().get();
assertEquals("inflight instant has expected instant time", inflightInstant.getTimestamp(), nextInflightInstantTime); assertEquals(inflightInstant.getTimestamp(), nextInflightInstantTime, "inflight instant has expected instant time");
assertEquals("Expect only one inflight instant", 1, metaClient.getActiveTimeline() assertEquals(1, metaClient.getActiveTimeline()
.filterPendingExcludingCompaction().getInstants().count()); .filterPendingExcludingCompaction().getInstants().count(),
"Expect only one inflight instant");
// Expect pending Compaction to be present // Expect pending Compaction to be present
pendingCompactionInstant = metaClient.getActiveTimeline().filterPendingCompactionTimeline().firstInstant().get(); pendingCompactionInstant = metaClient.getActiveTimeline().filterPendingCompactionTimeline().firstInstant().get();
assertEquals("Pending Compaction instant has expected instant time", pendingCompactionInstant.getTimestamp(), assertEquals(compactionInstantTime, pendingCompactionInstant.getTimestamp(),
compactionInstantTime); "Pending Compaction instant has expected instant time");
} }
} }
@@ -237,8 +238,8 @@ public class TestAsyncCompaction extends TestHoodieClientBase {
String compactionInstantTime = "006"; String compactionInstantTime = "006";
int numRecs = 2000; int numRecs = 2000;
List<HoodieRecord> records = dataGen.generateInserts(firstInstantTime, numRecs); final List<HoodieRecord> initalRecords = dataGen.generateInserts(firstInstantTime, numRecs);
records = runNextDeltaCommits(client, readClient, Arrays.asList(firstInstantTime, secondInstantTime), records, cfg, true, final List<HoodieRecord> records = runNextDeltaCommits(client, readClient, Arrays.asList(firstInstantTime, secondInstantTime), initalRecords, cfg, true,
new ArrayList<>()); new ArrayList<>());
// Schedule compaction but do not run them // Schedule compaction but do not run them
@@ -246,17 +247,12 @@ public class TestAsyncCompaction extends TestHoodieClientBase {
HoodieTableMetaClient metaClient = new HoodieTableMetaClient(jsc.hadoopConfiguration(), cfg.getBasePath()); HoodieTableMetaClient metaClient = new HoodieTableMetaClient(jsc.hadoopConfiguration(), cfg.getBasePath());
HoodieInstant pendingCompactionInstant = HoodieInstant pendingCompactionInstant =
metaClient.getActiveTimeline().filterPendingCompactionTimeline().firstInstant().get(); metaClient.getActiveTimeline().filterPendingCompactionTimeline().firstInstant().get();
assertEquals("Pending Compaction instant has expected instant time", pendingCompactionInstant.getTimestamp(), compactionInstantTime); assertEquals(compactionInstantTime, pendingCompactionInstant.getTimestamp(), "Pending Compaction instant has expected instant time");
boolean gotException = false; assertThrows(IllegalArgumentException.class, () -> {
try {
runNextDeltaCommits(client, readClient, Arrays.asList(failedInstantTime), records, cfg, false, runNextDeltaCommits(client, readClient, Arrays.asList(failedInstantTime), records, cfg, false,
Arrays.asList(compactionInstantTime)); Arrays.asList(compactionInstantTime));
} catch (IllegalArgumentException iex) { }, "Latest pending compaction instant time must be earlier than this instant time");
// Latest pending compaction instant time must be earlier than this instant time. Should fail here
gotException = true;
}
assertTrue("Latest pending compaction instant time must be earlier than this instant time", gotException);
} }
@Test @Test
@@ -283,17 +279,12 @@ public class TestAsyncCompaction extends TestHoodieClientBase {
metaClient = new HoodieTableMetaClient(jsc.hadoopConfiguration(), cfg.getBasePath()); metaClient = new HoodieTableMetaClient(jsc.hadoopConfiguration(), cfg.getBasePath());
HoodieInstant inflightInstant = HoodieInstant inflightInstant =
metaClient.getActiveTimeline().filterPendingExcludingCompaction().firstInstant().get(); metaClient.getActiveTimeline().filterPendingExcludingCompaction().firstInstant().get();
assertEquals("inflight instant has expected instant time", inflightInstant.getTimestamp(), inflightInstantTime); assertEquals(inflightInstantTime, inflightInstant.getTimestamp(), "inflight instant has expected instant time");
boolean gotException = false; assertThrows(IllegalArgumentException.class, () -> {
try {
// Schedule compaction but do not run them // Schedule compaction but do not run them
scheduleCompaction(compactionInstantTime, client, cfg); scheduleCompaction(compactionInstantTime, client, cfg);
} catch (IllegalArgumentException iex) { }, "Earliest ingestion inflight instant time must be later than compaction time");
// Earliest ingestion inflight instant time must be later than compaction time. Should fail here
gotException = true;
}
assertTrue("Earliest ingestion inflight instant time must be later than compaction time", gotException);
} }
@Test @Test
@@ -304,44 +295,32 @@ public class TestAsyncCompaction extends TestHoodieClientBase {
HoodieWriteClient client = getHoodieWriteClient(cfg, true); HoodieWriteClient client = getHoodieWriteClient(cfg, true);
HoodieReadClient readClient = getHoodieReadClient(cfg.getBasePath()); HoodieReadClient readClient = getHoodieReadClient(cfg.getBasePath());
String firstInstantTime = "001"; final String firstInstantTime = "001";
String secondInstantTime = "004"; final String secondInstantTime = "004";
String compactionInstantTime = "002"; final String compactionInstantTime = "002";
int numRecs = 2000; int numRecs = 2000;
List<HoodieRecord> records = dataGen.generateInserts(firstInstantTime, numRecs); List<HoodieRecord> records = dataGen.generateInserts(firstInstantTime, numRecs);
runNextDeltaCommits(client, readClient, Arrays.asList(firstInstantTime, secondInstantTime), records, cfg, true, runNextDeltaCommits(client, readClient, Arrays.asList(firstInstantTime, secondInstantTime), records, cfg, true,
new ArrayList<>()); new ArrayList<>());
boolean gotException = false; assertThrows(IllegalArgumentException.class, () -> {
try {
// Schedule compaction but do not run them // Schedule compaction but do not run them
scheduleCompaction(compactionInstantTime, client, cfg); scheduleCompaction(compactionInstantTime, client, cfg);
} catch (IllegalArgumentException iex) { }, "Compaction Instant to be scheduled cannot have older timestamp");
gotException = true;
}
assertTrue("Compaction Instant to be scheduled cannot have older timestamp", gotException);
// Schedule with timestamp same as that of committed instant // Schedule with timestamp same as that of committed instant
gotException = false; assertThrows(IllegalArgumentException.class, () -> {
try {
// Schedule compaction but do not run them // Schedule compaction but do not run them
scheduleCompaction(secondInstantTime, client, cfg); scheduleCompaction(secondInstantTime, client, cfg);
} catch (IllegalArgumentException iex) { }, "Compaction Instant to be scheduled cannot have same timestamp as committed instant");
gotException = true;
}
assertTrue("Compaction Instant to be scheduled cannot have same timestamp as committed instant", gotException);
compactionInstantTime = "006"; final String compactionInstantTime2 = "006";
scheduleCompaction(compactionInstantTime, client, cfg); scheduleCompaction(compactionInstantTime2, client, cfg);
gotException = false; assertThrows(IllegalArgumentException.class, () -> {
try {
// Schedule compaction with the same times as a pending compaction // Schedule compaction with the same times as a pending compaction
scheduleCompaction(secondInstantTime, client, cfg); scheduleCompaction(secondInstantTime, client, cfg);
} catch (IllegalArgumentException iex) { }, "Compaction Instant to be scheduled cannot have same timestamp as a pending compaction");
gotException = true;
}
assertTrue("Compaction Instant to be scheduled cannot have same timestamp as a pending compaction", gotException);
} }
@Test @Test
@@ -406,13 +385,13 @@ public class TestAsyncCompaction extends TestHoodieClientBase {
fileSliceList.forEach(fileSlice -> { fileSliceList.forEach(fileSlice -> {
Pair<String, HoodieCompactionOperation> opPair = fgIdToCompactionOperation.get(fileSlice.getFileGroupId()); Pair<String, HoodieCompactionOperation> opPair = fgIdToCompactionOperation.get(fileSlice.getFileGroupId());
if (opPair != null) { if (opPair != null) {
assertEquals("Expect baseInstant to match compaction Instant", fileSlice.getBaseInstantTime(), opPair.getKey()); assertEquals(fileSlice.getBaseInstantTime(), opPair.getKey(), "Expect baseInstant to match compaction Instant");
assertTrue("Expect atleast one log file to be present where the latest delta commit was written", assertTrue(fileSlice.getLogFiles().count() > 0,
fileSlice.getLogFiles().count() > 0); "Expect atleast one log file to be present where the latest delta commit was written");
assertFalse("Expect no data-file to be present", fileSlice.getBaseFile().isPresent()); assertFalse(fileSlice.getBaseFile().isPresent(), "Expect no data-file to be present");
} else { } else {
assertTrue("Expect baseInstant to be less than or equal to latestDeltaCommit", assertTrue(fileSlice.getBaseInstantTime().compareTo(latestDeltaCommit) <= 0,
fileSlice.getBaseInstantTime().compareTo(latestDeltaCommit) <= 0); "Expect baseInstant to be less than or equal to latestDeltaCommit");
} }
}); });
} }
@@ -446,8 +425,8 @@ public class TestAsyncCompaction extends TestHoodieClientBase {
metaClient = new HoodieTableMetaClient(jsc.hadoopConfiguration(), cfg.getBasePath()); metaClient = new HoodieTableMetaClient(jsc.hadoopConfiguration(), cfg.getBasePath());
HoodieTable hoodieTable = getHoodieTable(metaClient, cfg); HoodieTable hoodieTable = getHoodieTable(metaClient, cfg);
List<HoodieBaseFile> dataFilesToRead = getCurrentLatestDataFiles(hoodieTable, cfg); List<HoodieBaseFile> dataFilesToRead = getCurrentLatestDataFiles(hoodieTable, cfg);
assertTrue("should list the parquet files we wrote in the delta commit", assertTrue(dataFilesToRead.stream().findAny().isPresent(),
dataFilesToRead.stream().findAny().isPresent()); "should list the parquet files we wrote in the delta commit");
validateDeltaCommit(firstInstant, fgIdToCompactionOperation, cfg); validateDeltaCommit(firstInstant, fgIdToCompactionOperation, cfg);
} }
@@ -467,7 +446,7 @@ public class TestAsyncCompaction extends TestHoodieClientBase {
metaClient.getActiveTimeline().transitionCompactionRequestedToInflight(compactionInstant); metaClient.getActiveTimeline().transitionCompactionRequestedToInflight(compactionInstant);
HoodieInstant instant = metaClient.getActiveTimeline().reload().filterPendingCompactionTimeline().getInstants() HoodieInstant instant = metaClient.getActiveTimeline().reload().filterPendingCompactionTimeline().getInstants()
.filter(in -> in.getTimestamp().equals(compactionInstantTime)).findAny().get(); .filter(in -> in.getTimestamp().equals(compactionInstantTime)).findAny().get();
assertTrue("Instant must be marked inflight", instant.isInflight()); assertTrue(instant.isInflight(), "Instant must be marked inflight");
} }
private void scheduleCompaction(String compactionInstantTime, HoodieWriteClient client, HoodieWriteConfig cfg) private void scheduleCompaction(String compactionInstantTime, HoodieWriteClient client, HoodieWriteConfig cfg)
@@ -475,7 +454,7 @@ public class TestAsyncCompaction extends TestHoodieClientBase {
client.scheduleCompactionAtInstant(compactionInstantTime, Option.empty()); client.scheduleCompactionAtInstant(compactionInstantTime, Option.empty());
HoodieTableMetaClient metaClient = new HoodieTableMetaClient(jsc.hadoopConfiguration(), cfg.getBasePath()); HoodieTableMetaClient metaClient = new HoodieTableMetaClient(jsc.hadoopConfiguration(), cfg.getBasePath());
HoodieInstant instant = metaClient.getActiveTimeline().filterPendingCompactionTimeline().lastInstant().get(); HoodieInstant instant = metaClient.getActiveTimeline().filterPendingCompactionTimeline().lastInstant().get();
assertEquals("Last compaction instant must be the one set", instant.getTimestamp(), compactionInstantTime); assertEquals(compactionInstantTime, instant.getTimestamp(), "Last compaction instant must be the one set");
} }
private void scheduleAndExecuteCompaction(String compactionInstantTime, HoodieWriteClient client, HoodieTable table, private void scheduleAndExecuteCompaction(String compactionInstantTime, HoodieWriteClient client, HoodieTable table,
@@ -489,28 +468,30 @@ public class TestAsyncCompaction extends TestHoodieClientBase {
client.compact(compactionInstantTime); client.compact(compactionInstantTime);
List<FileSlice> fileSliceList = getCurrentLatestFileSlices(table); List<FileSlice> fileSliceList = getCurrentLatestFileSlices(table);
assertTrue("Ensure latest file-slices are not empty", fileSliceList.stream().findAny().isPresent()); assertTrue(fileSliceList.stream().findAny().isPresent(), "Ensure latest file-slices are not empty");
assertFalse("Verify all file-slices have base-instant same as compaction instant", fileSliceList.stream() assertFalse(fileSliceList.stream()
.anyMatch(fs -> !fs.getBaseInstantTime().equals(compactionInstantTime))); .anyMatch(fs -> !fs.getBaseInstantTime().equals(compactionInstantTime)),
assertFalse("Verify all file-slices have data-files", "Verify all file-slices have base-instant same as compaction instant");
fileSliceList.stream().anyMatch(fs -> !fs.getBaseFile().isPresent())); assertFalse(fileSliceList.stream().anyMatch(fs -> !fs.getBaseFile().isPresent()),
"Verify all file-slices have data-files");
if (hasDeltaCommitAfterPendingCompaction) { if (hasDeltaCommitAfterPendingCompaction) {
assertFalse("Verify all file-slices have atleast one log-file", assertFalse(fileSliceList.stream().anyMatch(fs -> fs.getLogFiles().count() == 0),
fileSliceList.stream().anyMatch(fs -> fs.getLogFiles().count() == 0)); "Verify all file-slices have atleast one log-file");
} else { } else {
assertFalse("Verify all file-slices have no log-files", assertFalse(fileSliceList.stream().anyMatch(fs -> fs.getLogFiles().count() > 0),
fileSliceList.stream().anyMatch(fs -> fs.getLogFiles().count() > 0)); "Verify all file-slices have no log-files");
} }
// verify that there is a commit // verify that there is a commit
table = getHoodieTable(new HoodieTableMetaClient(jsc.hadoopConfiguration(), cfg.getBasePath(), true), cfg); table = getHoodieTable(new HoodieTableMetaClient(jsc.hadoopConfiguration(), cfg.getBasePath(), true), cfg);
HoodieTimeline timeline = table.getMetaClient().getCommitTimeline().filterCompletedInstants(); HoodieTimeline timeline = table.getMetaClient().getCommitTimeline().filterCompletedInstants();
String latestCompactionCommitTime = timeline.lastInstant().get().getTimestamp(); String latestCompactionCommitTime = timeline.lastInstant().get().getTimestamp();
assertEquals("Expect compaction instant time to be the latest commit time", latestCompactionCommitTime, assertEquals(latestCompactionCommitTime, compactionInstantTime,
compactionInstantTime); "Expect compaction instant time to be the latest commit time");
Assert.assertEquals("Must contain expected records", expectedNumRecs, assertEquals(expectedNumRecs,
HoodieClientTestUtils.readSince(basePath, sqlContext, timeline, "000").count()); HoodieClientTestUtils.readSince(basePath, sqlContext, timeline, "000").count(),
"Must contain expected records");
} }
@@ -530,11 +511,11 @@ public class TestAsyncCompaction extends TestHoodieClientBase {
Option<HoodieInstant> deltaCommit = Option<HoodieInstant> deltaCommit =
metaClient.getActiveTimeline().reload().getDeltaCommitTimeline().filterCompletedInstants().lastInstant(); metaClient.getActiveTimeline().reload().getDeltaCommitTimeline().filterCompletedInstants().lastInstant();
if (skipCommit && !cfg.shouldAutoCommit()) { if (skipCommit && !cfg.shouldAutoCommit()) {
assertTrue("Delta commit should not be latest instant", assertTrue(deltaCommit.get().getTimestamp().compareTo(instantTime) < 0,
deltaCommit.get().getTimestamp().compareTo(instantTime) < 0); "Delta commit should not be latest instant");
} else { } else {
assertTrue(deltaCommit.isPresent()); assertTrue(deltaCommit.isPresent());
assertEquals("Delta commit should be latest instant", instantTime, deltaCommit.get().getTimestamp()); assertEquals(instantTime, deltaCommit.get().getTimestamp(), "Delta commit should be latest instant");
} }
return statusList; return statusList;
} }

View File

@@ -43,23 +43,24 @@ import org.apache.hudi.table.HoodieTable;
import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.conf.Configuration;
import org.apache.spark.api.java.JavaRDD; import org.apache.spark.api.java.JavaRDD;
import org.junit.After; import org.junit.jupiter.api.AfterEach;
import org.junit.Before; import org.junit.jupiter.api.BeforeEach;
import org.junit.Test; import org.junit.jupiter.api.Test;
import java.util.List; import java.util.List;
import java.util.stream.Collectors; import java.util.stream.Collectors;
import static org.junit.Assert.assertEquals; import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.Assert.assertTrue; import static org.junit.jupiter.api.Assertions.assertFalse;
import static org.junit.Assert.assertFalse; import static org.junit.jupiter.api.Assertions.assertThrows;
import static org.junit.jupiter.api.Assertions.assertTrue;
public class TestHoodieCompactor extends HoodieClientTestHarness { public class TestHoodieCompactor extends HoodieClientTestHarness {
private Configuration hadoopConf; private Configuration hadoopConf;
private HoodieTableMetaClient metaClient; private HoodieTableMetaClient metaClient;
@Before @BeforeEach
public void setUp() throws Exception { public void setUp() throws Exception {
// Initialize a local spark env // Initialize a local spark env
initSparkContexts("TestHoodieCompactor"); initSparkContexts("TestHoodieCompactor");
@@ -72,7 +73,7 @@ public class TestHoodieCompactor extends HoodieClientTestHarness {
initTestDataGenerator(); initTestDataGenerator();
} }
@After @AfterEach
public void tearDown() throws Exception { public void tearDown() throws Exception {
cleanupFileSystem(); cleanupFileSystem();
cleanupTestDataGenerator(); cleanupTestDataGenerator();
@@ -100,13 +101,15 @@ public class TestHoodieCompactor extends HoodieClientTestHarness {
.withIndexConfig(HoodieIndexConfig.newBuilder().withIndexType(HoodieIndex.IndexType.BLOOM).build()); .withIndexConfig(HoodieIndexConfig.newBuilder().withIndexType(HoodieIndex.IndexType.BLOOM).build());
} }
@Test(expected = HoodieNotSupportedException.class) @Test
public void testCompactionOnCopyOnWriteFail() throws Exception { public void testCompactionOnCopyOnWriteFail() throws Exception {
metaClient = HoodieTestUtils.init(hadoopConf, basePath, HoodieTableType.COPY_ON_WRITE); metaClient = HoodieTestUtils.init(hadoopConf, basePath, HoodieTableType.COPY_ON_WRITE);
HoodieTable<?> table = HoodieTable.create(metaClient, getConfig(), jsc); HoodieTable<?> table = HoodieTable.create(metaClient, getConfig(), jsc);
String compactionInstantTime = HoodieActiveTimeline.createNewInstantTime(); String compactionInstantTime = HoodieActiveTimeline.createNewInstantTime();
assertThrows(HoodieNotSupportedException.class, () -> {
table.scheduleCompaction(jsc, compactionInstantTime, Option.empty()); table.scheduleCompaction(jsc, compactionInstantTime, Option.empty());
table.compact(jsc, compactionInstantTime); table.compact(jsc, compactionInstantTime);
});
} }
@Test @Test
@@ -123,7 +126,7 @@ public class TestHoodieCompactor extends HoodieClientTestHarness {
String compactionInstantTime = HoodieActiveTimeline.createNewInstantTime(); String compactionInstantTime = HoodieActiveTimeline.createNewInstantTime();
Option<HoodieCompactionPlan> plan = table.scheduleCompaction(jsc, compactionInstantTime, Option.empty()); Option<HoodieCompactionPlan> plan = table.scheduleCompaction(jsc, compactionInstantTime, Option.empty());
assertFalse("If there is nothing to compact, result will be empty", plan.isPresent()); assertFalse(plan.isPresent(), "If there is nothing to compact, result will be empty");
} }
} }
@@ -159,7 +162,7 @@ public class TestHoodieCompactor extends HoodieClientTestHarness {
List<FileSlice> groupedLogFiles = List<FileSlice> groupedLogFiles =
table.getSliceView().getLatestFileSlices(partitionPath).collect(Collectors.toList()); table.getSliceView().getLatestFileSlices(partitionPath).collect(Collectors.toList());
for (FileSlice fileSlice : groupedLogFiles) { for (FileSlice fileSlice : groupedLogFiles) {
assertEquals("There should be 1 log file written for every data file", 1, fileSlice.getLogFiles().count()); assertEquals(1, fileSlice.getLogFiles().count(), "There should be 1 log file written for every data file");
} }
} }
HoodieTestUtils.createDeltaCommitFiles(basePath, newCommitTime); HoodieTestUtils.createDeltaCommitFiles(basePath, newCommitTime);

View File

@@ -50,9 +50,11 @@ public class TestHoodieTableMetaClient extends HoodieCommonTestHarnessJunit5 {
@Test @Test
public void checkMetadata() { public void checkMetadata() {
assertEquals(HoodieTestUtils.RAW_TRIPS_TEST_NAME, metaClient.getTableConfig().getTableName(), "Table name should be raw_trips"); assertEquals(HoodieTestUtils.RAW_TRIPS_TEST_NAME, metaClient.getTableConfig().getTableName(),
"Table name should be raw_trips");
assertEquals(basePath, metaClient.getBasePath(), "Basepath should be the one assigned"); assertEquals(basePath, metaClient.getBasePath(), "Basepath should be the one assigned");
assertEquals(basePath + "/.hoodie", metaClient.getMetaPath(), "Metapath should be ${basepath}/.hoodie"); assertEquals(basePath + "/.hoodie", metaClient.getMetaPath(),
"Metapath should be ${basepath}/.hoodie");
} }
@Test @Test
@@ -67,8 +69,10 @@ public class TestHoodieTableMetaClient extends HoodieCommonTestHarnessJunit5 {
commitTimeline.saveAsComplete(instant, Option.of("test-detail".getBytes())); commitTimeline.saveAsComplete(instant, Option.of("test-detail".getBytes()));
commitTimeline = commitTimeline.reload(); commitTimeline = commitTimeline.reload();
HoodieInstant completedInstant = HoodieTimeline.getCompletedInstant(instant); HoodieInstant completedInstant = HoodieTimeline.getCompletedInstant(instant);
assertEquals(completedInstant, commitTimeline.getInstants().findFirst().get(), "Commit should be 1 and completed"); assertEquals(completedInstant, commitTimeline.getInstants().findFirst().get(),
assertArrayEquals("test-detail".getBytes(), commitTimeline.getInstantDetails(completedInstant).get(), "Commit value should be \"test-detail\""); "Commit should be 1 and completed");
assertArrayEquals("test-detail".getBytes(), commitTimeline.getInstantDetails(completedInstant).get(),
"Commit value should be \"test-detail\"");
} }
@Test @Test
@@ -90,8 +94,10 @@ public class TestHoodieTableMetaClient extends HoodieCommonTestHarnessJunit5 {
activeTimeline = activeTimeline.reload(); activeTimeline = activeTimeline.reload();
activeCommitTimeline = activeTimeline.getCommitTimeline(); activeCommitTimeline = activeTimeline.getCommitTimeline();
assertFalse(activeCommitTimeline.empty(), "Should be the 1 commit we made"); assertFalse(activeCommitTimeline.empty(), "Should be the 1 commit we made");
assertEquals(completedInstant, activeCommitTimeline.getInstants().findFirst().get(), "Commit should be 1"); assertEquals(completedInstant, activeCommitTimeline.getInstants().findFirst().get(),
assertArrayEquals("test-detail".getBytes(), activeCommitTimeline.getInstantDetails(completedInstant).get(), "Commit value should be \"test-detail\""); "Commit should be 1");
assertArrayEquals("test-detail".getBytes(), activeCommitTimeline.getInstantDetails(completedInstant).get(),
"Commit value should be \"test-detail\"");
} }
@Test @Test

View File

@@ -288,7 +288,8 @@ public class TestHoodieTableFileSystemView extends HoodieCommonTestHarnessJunit5
refreshFsView(); refreshFsView();
List<FileSlice> slices = rtView.getLatestFileSlices(partitionPath).collect(Collectors.toList()); List<FileSlice> slices = rtView.getLatestFileSlices(partitionPath).collect(Collectors.toList());
assertEquals(1, slices.size(), "Expected latest file-slices"); assertEquals(1, slices.size(), "Expected latest file-slices");
assertEquals(compactionRequestedTime, slices.get(0).getBaseInstantTime(), "Base-Instant must be compaction Instant"); assertEquals(compactionRequestedTime, slices.get(0).getBaseInstantTime(),
"Base-Instant must be compaction Instant");
assertFalse(slices.get(0).getBaseFile().isPresent(), "Latest File Slice must not have data-file"); assertFalse(slices.get(0).getBaseFile().isPresent(), "Latest File Slice must not have data-file");
assertEquals(0, slices.get(0).getLogFiles().count(), "Latest File Slice must not have any log-files"); assertEquals(0, slices.get(0).getLogFiles().count(), "Latest File Slice must not have any log-files");
@@ -328,7 +329,8 @@ public class TestHoodieTableFileSystemView extends HoodieCommonTestHarnessJunit5
} else { } else {
assertFalse(fileSlice.getBaseFile().isPresent(), "No data-file expected as it was not created"); assertFalse(fileSlice.getBaseFile().isPresent(), "No data-file expected as it was not created");
} }
assertEquals(instantTime1, fileSlice.getBaseInstantTime(), "Base Instant of penultimate file-slice must be base instant"); assertEquals(instantTime1, fileSlice.getBaseInstantTime(),
"Base Instant of penultimate file-slice must be base instant");
List<HoodieLogFile> logFiles = fileSlice.getLogFiles().collect(Collectors.toList()); List<HoodieLogFile> logFiles = fileSlice.getLogFiles().collect(Collectors.toList());
assertEquals(4, logFiles.size(), "Log files must include those after compaction request"); assertEquals(4, logFiles.size(), "Log files must include those after compaction request");
assertEquals(fileName4, logFiles.get(0).getFileName(), "Log File Order check"); assertEquals(fileName4, logFiles.get(0).getFileName(), "Log File Order check");
@@ -342,7 +344,8 @@ public class TestHoodieTableFileSystemView extends HoodieCommonTestHarnessJunit5
fileSlice = fileSliceList.get(0); fileSlice = fileSliceList.get(0);
assertEquals(fileId, fileSlice.getFileId()); assertEquals(fileId, fileSlice.getFileId());
assertFalse(fileSlice.getBaseFile().isPresent(), "No data-file expected in latest file-slice"); assertFalse(fileSlice.getBaseFile().isPresent(), "No data-file expected in latest file-slice");
assertEquals(compactionRequestedTime, fileSlice.getBaseInstantTime(), "Compaction requested instant must be base instant"); assertEquals(compactionRequestedTime, fileSlice.getBaseInstantTime(),
"Compaction requested instant must be base instant");
logFiles = fileSlice.getLogFiles().collect(Collectors.toList()); logFiles = fileSlice.getLogFiles().collect(Collectors.toList());
assertEquals(2, logFiles.size(), "Log files must include only those after compaction request"); assertEquals(2, logFiles.size(), "Log files must include only those after compaction request");
assertEquals(fileName4, logFiles.get(0).getFileName(), "Log File Order check"); assertEquals(fileName4, logFiles.get(0).getFileName(), "Log File Order check");
@@ -457,7 +460,8 @@ public class TestHoodieTableFileSystemView extends HoodieCommonTestHarnessJunit5
"Orphan File Slice with log-file check data-file"); "Orphan File Slice with log-file check data-file");
logFiles = orphanFileSliceWithLogFile.getLogFiles().collect(Collectors.toList()); logFiles = orphanFileSliceWithLogFile.getLogFiles().collect(Collectors.toList());
assertEquals(1, logFiles.size(), "Orphan File Slice with log-file check data-file"); assertEquals(1, logFiles.size(), "Orphan File Slice with log-file check data-file");
assertEquals(orphanLogFileName, logFiles.get(0).getFileName(), "Orphan File Slice with log-file check data-file"); assertEquals(orphanLogFileName, logFiles.get(0).getFileName(),
"Orphan File Slice with log-file check data-file");
assertEquals(inflightDeltaInstantTime, inflightFileSliceWithLogFile.getBaseInstantTime(), assertEquals(inflightDeltaInstantTime, inflightFileSliceWithLogFile.getBaseInstantTime(),
"Inflight File Slice with log-file check base-commit"); "Inflight File Slice with log-file check base-commit");
assertFalse(inflightFileSliceWithLogFile.getBaseFile().isPresent(), assertFalse(inflightFileSliceWithLogFile.getBaseFile().isPresent(),
@@ -1115,7 +1119,8 @@ public class TestHoodieTableFileSystemView extends HoodieCommonTestHarnessJunit5
fileSlice = fileSliceList.get(0); fileSlice = fileSliceList.get(0);
assertEquals(fileId, fileSlice.getFileId()); assertEquals(fileId, fileSlice.getFileId());
assertFalse(fileSlice.getBaseFile().isPresent(), "No data-file expected in latest file-slice"); assertFalse(fileSlice.getBaseFile().isPresent(), "No data-file expected in latest file-slice");
assertEquals(compactionRequestedTime, fileSlice.getBaseInstantTime(), "Compaction requested instant must be base instant"); assertEquals(compactionRequestedTime, fileSlice.getBaseInstantTime(),
"Compaction requested instant must be base instant");
logFiles = fileSlice.getLogFiles().collect(Collectors.toList()); logFiles = fileSlice.getLogFiles().collect(Collectors.toList());
assertEquals(2, logFiles.size(), "Log files must include only those after compaction request"); assertEquals(2, logFiles.size(), "Log files must include only those after compaction request");
assertEquals(fileName4, logFiles.get(0).getFileName(), "Log File Order check"); assertEquals(fileName4, logFiles.get(0).getFileName(), "Log File Order check");

View File

@@ -45,32 +45,27 @@ import org.apache.spark.sql.Column;
import org.apache.spark.sql.DataFrameWriter; import org.apache.spark.sql.DataFrameWriter;
import org.apache.spark.sql.Dataset; import org.apache.spark.sql.Dataset;
import org.apache.spark.sql.Row; import org.apache.spark.sql.Row;
import org.junit.After; import org.junit.jupiter.api.AfterEach;
import org.junit.Before; import org.junit.jupiter.api.BeforeEach;
import org.junit.Rule; import org.junit.jupiter.api.Nested;
import org.junit.Test; import org.junit.jupiter.api.Test;
import org.junit.experimental.runners.Enclosed; import org.junit.jupiter.params.ParameterizedTest;
import org.junit.rules.ExpectedException; import org.junit.jupiter.params.provider.NullSource;
import org.junit.runner.RunWith; import org.junit.jupiter.params.provider.ValueSource;
import org.junit.runners.Parameterized;
import org.junit.runners.Parameterized.Parameter;
import org.junit.runners.Parameterized.Parameters;
import java.io.IOException; import java.io.IOException;
import java.util.Arrays; import java.util.Arrays;
import java.util.List; import java.util.List;
import java.util.stream.Collectors; import java.util.stream.Collectors;
import static org.junit.Assert.assertEquals; import static org.junit.jupiter.api.Assertions.assertDoesNotThrow;
import static org.junit.Assert.assertNull; import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.Assert.assertTrue; import static org.junit.jupiter.api.Assertions.assertThrows;
import static org.junit.jupiter.api.Assertions.assertTrue;
@RunWith(Enclosed.class) public class TestHoodieSnapshotExporter extends HoodieClientTestHarness {
public class TestHoodieSnapshotExporter {
static class ExporterTestHarness extends HoodieClientTestHarness { static final Logger LOG = LogManager.getLogger(TestHoodieSnapshotExporter.class);
static final Logger LOG = LogManager.getLogger(ExporterTestHarness.class);
static final int NUM_RECORDS = 100; static final int NUM_RECORDS = 100;
static final String COMMIT_TIME = "20200101000000"; static final String COMMIT_TIME = "20200101000000";
static final String PARTITION_PATH = "2020"; static final String PARTITION_PATH = "2020";
@@ -78,7 +73,7 @@ public class TestHoodieSnapshotExporter {
String sourcePath; String sourcePath;
String targetPath; String targetPath;
@Before @BeforeEach
public void setUp() throws Exception { public void setUp() throws Exception {
initSparkContexts(); initSparkContexts();
initDFS(); initDFS();
@@ -107,7 +102,7 @@ public class TestHoodieSnapshotExporter {
} }
} }
@After @AfterEach
public void tearDown() throws Exception { public void tearDown() throws Exception {
cleanupSparkContexts(); cleanupSparkContexts();
cleanupDFS(); cleanupDFS();
@@ -125,15 +120,14 @@ public class TestHoodieSnapshotExporter {
.withIndexConfig(HoodieIndexConfig.newBuilder().withIndexType(IndexType.BLOOM).build()) .withIndexConfig(HoodieIndexConfig.newBuilder().withIndexType(IndexType.BLOOM).build())
.build(); .build();
} }
}
public static class TestHoodieSnapshotExporterForHudi extends ExporterTestHarness { @Nested
public class TestHoodieSnapshotExporterForHudi {
private HoodieSnapshotExporter.Config cfg; private HoodieSnapshotExporter.Config cfg;
@Before @BeforeEach
public void setUp() throws Exception { public void setUp() throws Exception {
super.setUp();
cfg = new Config(); cfg = new Config();
cfg.sourceBasePath = sourcePath; cfg.sourceBasePath = sourcePath;
cfg.targetOutputPath = targetPath; cfg.targetOutputPath = targetPath;
@@ -156,23 +150,20 @@ public class TestHoodieSnapshotExporter {
long numParquetFiles = Arrays.stream(dfs.listStatus(new Path(partition))) long numParquetFiles = Arrays.stream(dfs.listStatus(new Path(partition)))
.filter(fileStatus -> fileStatus.getPath().toString().endsWith(".parquet")) .filter(fileStatus -> fileStatus.getPath().toString().endsWith(".parquet"))
.count(); .count();
assertTrue("There should exist at least 1 parquet file.", numParquetFiles >= 1); assertTrue(numParquetFiles >= 1, "There should exist at least 1 parquet file.");
assertEquals(NUM_RECORDS, sqlContext.read().parquet(partition).count()); assertEquals(NUM_RECORDS, sqlContext.read().parquet(partition).count());
assertTrue(dfs.exists(new Path(partition + "/.hoodie_partition_metadata"))); assertTrue(dfs.exists(new Path(partition + "/.hoodie_partition_metadata")));
assertTrue(dfs.exists(new Path(targetPath + "/_SUCCESS"))); assertTrue(dfs.exists(new Path(targetPath + "/_SUCCESS")));
} }
} }
public static class TestHoodieSnapshotExporterForEarlyAbort extends ExporterTestHarness { @Nested
public class TestHoodieSnapshotExporterForEarlyAbort {
private HoodieSnapshotExporter.Config cfg; private HoodieSnapshotExporter.Config cfg;
@Rule @BeforeEach
public ExpectedException exceptionRule = ExpectedException.none();
@Before
public void setUp() throws Exception { public void setUp() throws Exception {
super.setUp();
cfg = new Config(); cfg = new Config();
cfg.sourceBasePath = sourcePath; cfg.sourceBasePath = sourcePath;
cfg.targetOutputPath = targetPath; cfg.targetOutputPath = targetPath;
@@ -185,9 +176,10 @@ public class TestHoodieSnapshotExporter {
dfs.mkdirs(new Path(targetPath)); dfs.mkdirs(new Path(targetPath));
// export // export
exceptionRule.expect(HoodieSnapshotExporterException.class); final Throwable thrown = assertThrows(HoodieSnapshotExporterException.class, () -> {
exceptionRule.expectMessage("The target output path already exists.");
new HoodieSnapshotExporter().export(jsc, cfg); new HoodieSnapshotExporter().export(jsc, cfg);
});
assertEquals("The target output path already exists.", thrown.getMessage());
} }
@Test @Test
@@ -202,9 +194,10 @@ public class TestHoodieSnapshotExporter {
} }
// export // export
exceptionRule.expect(HoodieSnapshotExporterException.class); final Throwable thrown = assertThrows(HoodieSnapshotExporterException.class, () -> {
exceptionRule.expectMessage("No commits present. Nothing to snapshot.");
new HoodieSnapshotExporter().export(jsc, cfg); new HoodieSnapshotExporter().export(jsc, cfg);
});
assertEquals("No commits present. Nothing to snapshot.", thrown.getMessage());
} }
@Test @Test
@@ -213,25 +206,19 @@ public class TestHoodieSnapshotExporter {
dfs.delete(new Path(sourcePath + "/" + PARTITION_PATH), true); dfs.delete(new Path(sourcePath + "/" + PARTITION_PATH), true);
// export // export
exceptionRule.expect(HoodieSnapshotExporterException.class); final Throwable thrown = assertThrows(HoodieSnapshotExporterException.class, () -> {
exceptionRule.expectMessage("The source dataset has 0 partition to snapshot.");
new HoodieSnapshotExporter().export(jsc, cfg); new HoodieSnapshotExporter().export(jsc, cfg);
});
assertEquals("The source dataset has 0 partition to snapshot.", thrown.getMessage());
} }
} }
@RunWith(Parameterized.class) @Nested
public static class TestHoodieSnapshotExporterForNonHudi extends ExporterTestHarness { public class TestHoodieSnapshotExporterForNonHudi {
@Parameters @ParameterizedTest
public static Iterable<String[]> formats() { @ValueSource(strings = {"json", "parquet"})
return Arrays.asList(new String[][] {{"json"}, {"parquet"}}); public void testExportAsNonHudi(String format) throws IOException {
}
@Parameter
public String format;
@Test
public void testExportAsNonHudi() throws IOException {
HoodieSnapshotExporter.Config cfg = new Config(); HoodieSnapshotExporter.Config cfg = new Config();
cfg.sourceBasePath = sourcePath; cfg.sourceBasePath = sourcePath;
cfg.targetOutputPath = targetPath; cfg.targetOutputPath = targetPath;
@@ -242,12 +229,10 @@ public class TestHoodieSnapshotExporter {
} }
} }
public static class TestHoodieSnapshotExporterForRepartitioning extends ExporterTestHarness {
private static final String PARTITION_NAME = "year";
public static class UserDefinedPartitioner implements Partitioner { public static class UserDefinedPartitioner implements Partitioner {
public static final String PARTITION_NAME = "year";
@Override @Override
public DataFrameWriter<Row> partition(Dataset<Row> source) { public DataFrameWriter<Row> partition(Dataset<Row> source) {
return source return source
@@ -258,11 +243,13 @@ public class TestHoodieSnapshotExporter {
} }
} }
@Nested
public class TestHoodieSnapshotExporterForRepartitioning {
private HoodieSnapshotExporter.Config cfg; private HoodieSnapshotExporter.Config cfg;
@Before @BeforeEach
public void setUp() throws Exception { public void setUp() throws Exception {
super.setUp();
cfg = new Config(); cfg = new Config();
cfg.sourceBasePath = sourcePath; cfg.sourceBasePath = sourcePath;
cfg.targetOutputPath = targetPath; cfg.targetOutputPath = targetPath;
@@ -287,39 +274,35 @@ public class TestHoodieSnapshotExporter {
assertEquals(NUM_RECORDS, sqlContext.read().format("json").load(targetPath).count()); assertEquals(NUM_RECORDS, sqlContext.read().format("json").load(targetPath).count());
assertTrue(dfs.exists(new Path(targetPath + "/_SUCCESS"))); assertTrue(dfs.exists(new Path(targetPath + "/_SUCCESS")));
assertTrue(dfs.exists(new Path(String.format("%s/%s=%s", targetPath, PARTITION_NAME, PARTITION_PATH)))); assertTrue(dfs.exists(new Path(String.format("%s/%s=%s", targetPath, UserDefinedPartitioner.PARTITION_NAME, PARTITION_PATH))));
} }
} }
@RunWith(Parameterized.class) @Nested
public static class TestHoodieSnapshotExporterInputValidation { public class TestHoodieSnapshotExporterInputValidation {
@Parameters @ParameterizedTest
public static Iterable<Object[]> data() { @ValueSource(strings = {"json", "parquet", "hudi"})
return Arrays.asList(new Object[][] { public void testValidateOutputFormat_withValidFormat(String format) {
{"json", true}, {"parquet", true}, {"hudi", true}, assertDoesNotThrow(() -> {
{"JSON", false}, {"foo", false}, {null, false}, {"", false} new OutputFormatValidator().validate(null, format);
}); });
} }
@Parameter @ParameterizedTest
public String format; @ValueSource(strings = {"", "JSON"})
@Parameter(1) public void testValidateOutputFormat_withInvalidFormat(String format) {
public boolean isValid; assertThrows(ParameterException.class, () -> {
@Test
public void testValidateOutputFormat() {
Throwable t = null;
try {
new OutputFormatValidator().validate(null, format); new OutputFormatValidator().validate(null, format);
} catch (Exception e) { });
t = e;
}
if (isValid) {
assertNull(t);
} else {
assertTrue(t instanceof ParameterException);
} }
@ParameterizedTest
@NullSource
public void testValidateOutputFormat_withNullFormat(String format) {
assertThrows(ParameterException.class, () -> {
new OutputFormatValidator().validate(null, format);
});
} }
} }
} }