1
0

[HUDI-296] Explore use of spotless to auto fix formatting errors (#945)

- Add spotless format fixing to project
- One time reformatting for conformity
- Build fails for formatting changes and mvn spotless:apply autofixes them
This commit is contained in:
leesf
2019-10-10 20:19:40 +08:00
committed by vinoth chandar
parent 834c591955
commit b19bed442d
381 changed files with 7350 additions and 9064 deletions

View File

@@ -41,7 +41,8 @@ import org.apache.spark.api.java.JavaRDD;
import org.apache.spark.api.java.JavaSparkContext;
/**
* Driver program that uses the Hoodie client with synthetic workload, and performs basic operations. <p>
* Driver program that uses the Hoodie client with synthetic workload, and performs basic operations.
* <p>
*/
public class HoodieClientExample {
@@ -82,18 +83,15 @@ public class HoodieClientExample {
Path path = new Path(tablePath);
FileSystem fs = FSUtils.getFs(tablePath, jsc.hadoopConfiguration());
if (!fs.exists(path)) {
HoodieTableMetaClient
.initTableType(jsc.hadoopConfiguration(), tablePath, HoodieTableType.valueOf(tableType), tableName,
HoodieAvroPayload.class.getName());
HoodieTableMetaClient.initTableType(jsc.hadoopConfiguration(), tablePath, HoodieTableType.valueOf(tableType),
tableName, HoodieAvroPayload.class.getName());
}
// Create the write client to write some records in
HoodieWriteConfig cfg = HoodieWriteConfig.newBuilder().withPath(tablePath)
.withSchema(HoodieTestDataGenerator.TRIP_EXAMPLE_SCHEMA).withParallelism(2, 2)
.forTable(tableName)
.withSchema(HoodieTestDataGenerator.TRIP_EXAMPLE_SCHEMA).withParallelism(2, 2).forTable(tableName)
.withIndexConfig(HoodieIndexConfig.newBuilder().withIndexType(IndexType.BLOOM).build())
.withCompactionConfig(
HoodieCompactionConfig.newBuilder().archiveCommitsWith(2, 3).build()).build();
.withCompactionConfig(HoodieCompactionConfig.newBuilder().archiveCommitsWith(2, 3).build()).build();
HoodieWriteClient client = new HoodieWriteClient(jsc, cfg);
/**

View File

@@ -53,7 +53,7 @@ public abstract class HoodieClientTestHarness extends HoodieCommonTestHarness im
protected transient ExecutorService executorService;
protected transient HoodieTableMetaClient metaClient;
//dfs
// dfs
protected String dfsBasePath;
protected transient HdfsTestService hdfsTestService;
protected transient MiniDFSCluster dfsCluster;
@@ -74,6 +74,7 @@ public abstract class HoodieClientTestHarness extends HoodieCommonTestHarness im
/**
* Cleanups resource group for the subclasses of {@link TestHoodieClientBase}.
*
* @throws IOException
*/
public void cleanupResources() throws IOException {
@@ -84,8 +85,7 @@ public abstract class HoodieClientTestHarness extends HoodieCommonTestHarness im
}
/**
* Initializes the Spark contexts ({@link JavaSparkContext} and {@link SQLContext})
* with the given application name.
* Initializes the Spark contexts ({@link JavaSparkContext} and {@link SQLContext}) with the given application name.
*
* @param appName The specified application name.
*/
@@ -94,13 +94,13 @@ public abstract class HoodieClientTestHarness extends HoodieCommonTestHarness im
jsc = new JavaSparkContext(HoodieClientTestUtils.getSparkConfForTest(appName));
jsc.setLogLevel("ERROR");
//SQLContext stuff
// SQLContext stuff
sqlContext = new SQLContext(jsc);
}
/**
* Initializes the Spark contexts ({@link JavaSparkContext} and {@link SQLContext})
* with a default name <b>TestHoodieClient</b>.
* Initializes the Spark contexts ({@link JavaSparkContext} and {@link SQLContext}) with a default name
* <b>TestHoodieClient</b>.
*/
protected void initSparkContexts() {
initSparkContexts("TestHoodieClient");
@@ -155,8 +155,8 @@ public abstract class HoodieClientTestHarness extends HoodieCommonTestHarness im
}
/**
* Initializes an instance of {@link HoodieTableMetaClient} with a special table type
* specified by {@code getTableType()}.
* Initializes an instance of {@link HoodieTableMetaClient} with a special table type specified by
* {@code getTableType()}.
*
* @throws IOException
*/

View File

@@ -73,15 +73,14 @@ public class TestAsyncCompaction extends TestHoodieClientBase {
private HoodieWriteConfig.Builder getConfigBuilder(Boolean autoCommit) {
return HoodieWriteConfig.newBuilder().withPath(basePath).withSchema(TRIP_EXAMPLE_SCHEMA).withParallelism(2, 2)
.withAutoCommit(autoCommit).withAssumeDatePartitioning(true).withCompactionConfig(
HoodieCompactionConfig.newBuilder().compactionSmallFileSize(1024 * 1024 * 1024).withInlineCompaction(false)
.withMaxNumDeltaCommitsBeforeCompaction(1).build())
.withAutoCommit(autoCommit).withAssumeDatePartitioning(true)
.withCompactionConfig(HoodieCompactionConfig.newBuilder().compactionSmallFileSize(1024 * 1024 * 1024)
.withInlineCompaction(false).withMaxNumDeltaCommitsBeforeCompaction(1).build())
.withStorageConfig(HoodieStorageConfig.newBuilder().limitFileSize(1024 * 1024 * 1024).build())
.forTable("test-trip-table")
.withIndexConfig(HoodieIndexConfig.newBuilder().withIndexType(HoodieIndex.IndexType.BLOOM).build())
.withEmbeddedTimelineServerEnabled(true).withFileSystemViewConfig(
FileSystemViewStorageConfig.newBuilder().withStorageType(FileSystemViewStorageType.EMBEDDED_KV_STORE)
.build());
.withEmbeddedTimelineServerEnabled(true).withFileSystemViewConfig(FileSystemViewStorageConfig.newBuilder()
.withStorageType(FileSystemViewStorageType.EMBEDDED_KV_STORE).build());
}
@Test
@@ -97,8 +96,8 @@ public class TestAsyncCompaction extends TestHoodieClientBase {
int numRecs = 2000;
List<HoodieRecord> records = dataGen.generateInserts(firstInstantTime, numRecs);
runNextDeltaCommits(client, Arrays.asList(firstInstantTime, secondInstantTime),
records, cfg, true, new ArrayList<>());
runNextDeltaCommits(client, Arrays.asList(firstInstantTime, secondInstantTime), records, cfg, true,
new ArrayList<>());
// Schedule compaction but do not run them
scheduleCompaction(compactionInstantTime, client, cfg);
@@ -158,8 +157,8 @@ public class TestAsyncCompaction extends TestHoodieClientBase {
try (HoodieWriteClient client = getHoodieWriteClient(cfg, true);) {
List<HoodieRecord> records = dataGen.generateInserts(firstInstantTime, numRecs);
records = runNextDeltaCommits(client, Arrays.asList(firstInstantTime, secondInstantTime),
records, cfg, true, new ArrayList<>());
records = runNextDeltaCommits(client, Arrays.asList(firstInstantTime, secondInstantTime), records, cfg, true,
new ArrayList<>());
// Schedule compaction but do not run them
scheduleCompaction(compactionInstantTime, client, cfg);
@@ -177,20 +176,18 @@ public class TestAsyncCompaction extends TestHoodieClientBase {
assertTrue("inflight instant has expected instant time",
inflightInstant.getTimestamp().equals(inflightInstantTime));
//This should rollback
// This should rollback
client.startCommitWithTime(nextInflightInstantTime);
//Validate
// Validate
metaClient = new HoodieTableMetaClient(jsc.hadoopConfiguration(), cfg.getBasePath());
inflightInstant =
metaClient.getActiveTimeline().filterInflightsExcludingCompaction().firstInstant().get();
inflightInstant = metaClient.getActiveTimeline().filterInflightsExcludingCompaction().firstInstant().get();
assertTrue("inflight instant has expected instant time",
inflightInstant.getTimestamp().equals(nextInflightInstantTime));
assertTrue("Expect only one inflight instant",
metaClient.getActiveTimeline().filterInflightsExcludingCompaction().getInstants().count() == 1);
//Expect pending Compaction to be present
pendingCompactionInstant =
metaClient.getActiveTimeline().filterPendingCompactionTimeline().firstInstant().get();
// Expect pending Compaction to be present
pendingCompactionInstant = metaClient.getActiveTimeline().filterPendingCompactionTimeline().firstInstant().get();
assertTrue("Pending Compaction instant has expected instant time",
pendingCompactionInstant.getTimestamp().equals(compactionInstantTime));
}
@@ -211,8 +208,8 @@ public class TestAsyncCompaction extends TestHoodieClientBase {
int numRecs = 2000;
List<HoodieRecord> records = dataGen.generateInserts(firstInstantTime, numRecs);
records = runNextDeltaCommits(client, Arrays.asList(firstInstantTime, secondInstantTime),
records, cfg, true, new ArrayList<>());
records = runNextDeltaCommits(client, Arrays.asList(firstInstantTime, secondInstantTime), records, cfg, true,
new ArrayList<>());
// Schedule and mark compaction instant as inflight
HoodieTableMetaClient metaClient = new HoodieTableMetaClient(jsc.hadoopConfiguration(), cfg.getBasePath());
@@ -221,8 +218,8 @@ public class TestAsyncCompaction extends TestHoodieClientBase {
moveCompactionFromRequestedToInflight(compactionInstantTime, client, cfg);
// Complete ingestions
runNextDeltaCommits(client, Arrays.asList(thirdInstantTime, fourthInstantTime),
records, cfg, false, Arrays.asList(compactionInstantTime));
runNextDeltaCommits(client, Arrays.asList(thirdInstantTime, fourthInstantTime), records, cfg, false,
Arrays.asList(compactionInstantTime));
// execute inflight compaction
executeCompaction(compactionInstantTime, client, hoodieTable, cfg, numRecs, true);
@@ -242,8 +239,8 @@ public class TestAsyncCompaction extends TestHoodieClientBase {
int numRecs = 2000;
List<HoodieRecord> records = dataGen.generateInserts(firstInstantTime, numRecs);
records = runNextDeltaCommits(client, Arrays.asList(firstInstantTime, secondInstantTime),
records, cfg, true, new ArrayList<>());
records = runNextDeltaCommits(client, Arrays.asList(firstInstantTime, secondInstantTime), records, cfg, true,
new ArrayList<>());
// Schedule compaction but do not run them
HoodieTableMetaClient metaClient = new HoodieTableMetaClient(jsc.hadoopConfiguration(), cfg.getBasePath());
@@ -256,8 +253,8 @@ public class TestAsyncCompaction extends TestHoodieClientBase {
boolean gotException = false;
try {
runNextDeltaCommits(client, Arrays.asList(failedInstantTime),
records, cfg, false, Arrays.asList(compactionInstantTime));
runNextDeltaCommits(client, Arrays.asList(failedInstantTime), records, cfg, false,
Arrays.asList(compactionInstantTime));
} catch (IllegalArgumentException iex) {
// Latest pending compaction instant time must be earlier than this instant time. Should fail here
gotException = true;
@@ -279,8 +276,8 @@ public class TestAsyncCompaction extends TestHoodieClientBase {
int numRecs = 2000;
List<HoodieRecord> records = dataGen.generateInserts(firstInstantTime, numRecs);
records = runNextDeltaCommits(client, Arrays.asList(firstInstantTime, secondInstantTime),
records, cfg, true, new ArrayList<>());
records = runNextDeltaCommits(client, Arrays.asList(firstInstantTime, secondInstantTime), records, cfg, true,
new ArrayList<>());
HoodieTableMetaClient metaClient = new HoodieTableMetaClient(jsc.hadoopConfiguration(), cfg.getBasePath());
createNextDeltaCommit(inflightInstantTime, records, client, metaClient, cfg, true);
@@ -315,8 +312,8 @@ public class TestAsyncCompaction extends TestHoodieClientBase {
int numRecs = 2000;
List<HoodieRecord> records = dataGen.generateInserts(firstInstantTime, numRecs);
records = runNextDeltaCommits(client, Arrays.asList(firstInstantTime, secondInstantTime),
records, cfg, true, new ArrayList<>());
records = runNextDeltaCommits(client, Arrays.asList(firstInstantTime, secondInstantTime), records, cfg, true,
new ArrayList<>());
HoodieTableMetaClient metaClient = new HoodieTableMetaClient(jsc.hadoopConfiguration(), cfg.getBasePath());
boolean gotException = false;
@@ -337,8 +334,7 @@ public class TestAsyncCompaction extends TestHoodieClientBase {
} catch (IllegalArgumentException iex) {
gotException = true;
}
assertTrue("Compaction Instant to be scheduled cannot have same timestamp as committed instant",
gotException);
assertTrue("Compaction Instant to be scheduled cannot have same timestamp as committed instant", gotException);
compactionInstantTime = "006";
scheduleCompaction(compactionInstantTime, client, cfg);
@@ -349,8 +345,7 @@ public class TestAsyncCompaction extends TestHoodieClientBase {
} catch (IllegalArgumentException iex) {
gotException = true;
}
assertTrue("Compaction Instant to be scheduled cannot have same timestamp as a pending compaction",
gotException);
assertTrue("Compaction Instant to be scheduled cannot have same timestamp as a pending compaction", gotException);
}
@Test
@@ -365,8 +360,8 @@ public class TestAsyncCompaction extends TestHoodieClientBase {
int numRecs = 2000;
List<HoodieRecord> records = dataGen.generateInserts(firstInstantTime, numRecs);
records = runNextDeltaCommits(client, Arrays.asList(firstInstantTime, secondInstantTime),
records, cfg, true, new ArrayList<>());
records = runNextDeltaCommits(client, Arrays.asList(firstInstantTime, secondInstantTime), records, cfg, true,
new ArrayList<>());
HoodieTableMetaClient metaClient = new HoodieTableMetaClient(jsc.hadoopConfiguration(), cfg.getBasePath());
HoodieTable hoodieTable = getHoodieTable(metaClient, cfg);
@@ -376,7 +371,7 @@ public class TestAsyncCompaction extends TestHoodieClientBase {
@Test
public void testInterleavedCompaction() throws Exception {
//Case: Two delta commits before and after compaction schedule
// Case: Two delta commits before and after compaction schedule
HoodieWriteConfig cfg = getConfig(true);
try (HoodieWriteClient client = getHoodieWriteClient(cfg, true);) {
@@ -389,15 +384,15 @@ public class TestAsyncCompaction extends TestHoodieClientBase {
int numRecs = 2000;
List<HoodieRecord> records = dataGen.generateInserts(firstInstantTime, numRecs);
records = runNextDeltaCommits(client, Arrays.asList(firstInstantTime, secondInstantTime),
records, cfg, true, new ArrayList<>());
records = runNextDeltaCommits(client, Arrays.asList(firstInstantTime, secondInstantTime), records, cfg, true,
new ArrayList<>());
HoodieTableMetaClient metaClient = new HoodieTableMetaClient(jsc.hadoopConfiguration(), cfg.getBasePath());
HoodieTable hoodieTable = getHoodieTable(metaClient, cfg);
scheduleCompaction(compactionInstantTime, client, cfg);
runNextDeltaCommits(client, Arrays.asList(thirdInstantTime, fourthInstantTime),
records, cfg, false, Arrays.asList(compactionInstantTime));
runNextDeltaCommits(client, Arrays.asList(thirdInstantTime, fourthInstantTime), records, cfg, false,
Arrays.asList(compactionInstantTime));
executeCompaction(compactionInstantTime, client, hoodieTable, cfg, numRecs, true);
}
}
@@ -428,8 +423,8 @@ public class TestAsyncCompaction extends TestHoodieClientBase {
}
private List<HoodieRecord> runNextDeltaCommits(HoodieWriteClient client, List<String> deltaInstants,
List<HoodieRecord> records, HoodieWriteConfig cfg, boolean insertFirst,
List<String> expPendingCompactionInstants) throws Exception {
List<HoodieRecord> records, HoodieWriteConfig cfg, boolean insertFirst, List<String> expPendingCompactionInstants)
throws Exception {
HoodieTableMetaClient metaClient = new HoodieTableMetaClient(jsc.hadoopConfiguration(), cfg.getBasePath());
List<Pair<HoodieInstant, HoodieCompactionPlan>> pendingCompactions =
@@ -476,8 +471,8 @@ public class TestAsyncCompaction extends TestHoodieClientBase {
HoodieWriteConfig cfg) throws IOException {
HoodieTableMetaClient metaClient = new HoodieTableMetaClient(jsc.hadoopConfiguration(), cfg.getBasePath());
HoodieInstant compactionInstant = HoodieTimeline.getCompactionRequestedInstant(compactionInstantTime);
HoodieCompactionPlan workload = AvroUtils.deserializeCompactionPlan(
metaClient.getActiveTimeline().getInstantAuxiliaryDetails(compactionInstant).get());
HoodieCompactionPlan workload = AvroUtils
.deserializeCompactionPlan(metaClient.getActiveTimeline().getInstantAuxiliaryDetails(compactionInstant).get());
metaClient.getActiveTimeline().transitionCompactionRequestedToInflight(compactionInstant);
HoodieInstant instant = metaClient.getActiveTimeline().reload().filterPendingCompactionTimeline().getInstants()
.filter(in -> in.getTimestamp().equals(compactionInstantTime)).findAny().get();
@@ -489,27 +484,23 @@ public class TestAsyncCompaction extends TestHoodieClientBase {
client.scheduleCompactionAtInstant(compactionInstantTime, Option.empty());
HoodieTableMetaClient metaClient = new HoodieTableMetaClient(jsc.hadoopConfiguration(), cfg.getBasePath());
HoodieInstant instant = metaClient.getActiveTimeline().filterPendingCompactionTimeline().lastInstant().get();
assertEquals("Last compaction instant must be the one set",
instant.getTimestamp(), compactionInstantTime);
assertEquals("Last compaction instant must be the one set", instant.getTimestamp(), compactionInstantTime);
}
private void scheduleAndExecuteCompaction(String compactionInstantTime,
HoodieWriteClient client, HoodieTable table, HoodieWriteConfig cfg, int expectedNumRecs,
boolean hasDeltaCommitAfterPendingCompaction) throws IOException {
private void scheduleAndExecuteCompaction(String compactionInstantTime, HoodieWriteClient client, HoodieTable table,
HoodieWriteConfig cfg, int expectedNumRecs, boolean hasDeltaCommitAfterPendingCompaction) throws IOException {
scheduleCompaction(compactionInstantTime, client, cfg);
executeCompaction(compactionInstantTime, client, table, cfg, expectedNumRecs, hasDeltaCommitAfterPendingCompaction);
}
private void executeCompaction(String compactionInstantTime,
HoodieWriteClient client, HoodieTable table, HoodieWriteConfig cfg, int expectedNumRecs,
boolean hasDeltaCommitAfterPendingCompaction) throws IOException {
private void executeCompaction(String compactionInstantTime, HoodieWriteClient client, HoodieTable table,
HoodieWriteConfig cfg, int expectedNumRecs, boolean hasDeltaCommitAfterPendingCompaction) throws IOException {
client.compact(compactionInstantTime);
List<FileSlice> fileSliceList = getCurrentLatestFileSlices(table, cfg);
assertTrue("Ensure latest file-slices are not empty", fileSliceList.stream().findAny().isPresent());
assertFalse("Verify all file-slices have base-instant same as compaction instant",
fileSliceList.stream().filter(fs -> !fs.getBaseInstantTime().equals(compactionInstantTime))
.findAny().isPresent());
assertFalse("Verify all file-slices have base-instant same as compaction instant", fileSliceList.stream()
.filter(fs -> !fs.getBaseInstantTime().equals(compactionInstantTime)).findAny().isPresent());
assertFalse("Verify all file-slices have data-files",
fileSliceList.stream().filter(fs -> !fs.getDataFile().isPresent()).findAny().isPresent());
@@ -522,12 +513,11 @@ public class TestAsyncCompaction extends TestHoodieClientBase {
}
// verify that there is a commit
table = getHoodieTable(
new HoodieTableMetaClient(jsc.hadoopConfiguration(), cfg.getBasePath(), true), cfg);
table = getHoodieTable(new HoodieTableMetaClient(jsc.hadoopConfiguration(), cfg.getBasePath(), true), cfg);
HoodieTimeline timeline = table.getMetaClient().getCommitTimeline().filterCompletedInstants();
String latestCompactionCommitTime = timeline.lastInstant().get().getTimestamp();
assertEquals("Expect compaction instant time to be the latest commit time",
latestCompactionCommitTime, compactionInstantTime);
assertEquals("Expect compaction instant time to be the latest commit time", latestCompactionCommitTime,
compactionInstantTime);
Assert.assertEquals("Must contain expected records", expectedNumRecs,
HoodieClientTestUtils.readSince(basePath, sqlContext, timeline, "000").count());
@@ -546,8 +536,8 @@ public class TestAsyncCompaction extends TestHoodieClientBase {
client.commit(instantTime, statuses);
}
Option<HoodieInstant> deltaCommit = metaClient.getActiveTimeline().reload().getDeltaCommitTimeline()
.filterCompletedInstants().lastInstant();
Option<HoodieInstant> deltaCommit =
metaClient.getActiveTimeline().reload().getDeltaCommitTimeline().filterCompletedInstants().lastInstant();
if (skipCommit && !cfg.shouldAutoCommit()) {
assertTrue("Delta commit should not be latest instant",
deltaCommit.get().getTimestamp().compareTo(instantTime) < 0);
@@ -560,8 +550,8 @@ public class TestAsyncCompaction extends TestHoodieClientBase {
private List<HoodieDataFile> getCurrentLatestDataFiles(HoodieTable table, HoodieWriteConfig cfg) throws IOException {
FileStatus[] allFiles = HoodieTestUtils.listAllDataFilesInPath(table.getMetaClient().getFs(), cfg.getBasePath());
HoodieTableFileSystemView
view = new HoodieTableFileSystemView(table.getMetaClient(), table.getCompletedCommitsTimeline(), allFiles);
HoodieTableFileSystemView view =
new HoodieTableFileSystemView(table.getMetaClient(), table.getCompletedCommitsTimeline(), allFiles);
List<HoodieDataFile> dataFilesToRead = view.getLatestDataFiles().collect(Collectors.toList());
return dataFilesToRead;
}
@@ -569,9 +559,8 @@ public class TestAsyncCompaction extends TestHoodieClientBase {
private List<FileSlice> getCurrentLatestFileSlices(HoodieTable table, HoodieWriteConfig cfg) throws IOException {
HoodieTableFileSystemView view = new HoodieTableFileSystemView(table.getMetaClient(),
table.getMetaClient().getActiveTimeline().reload().getCommitsAndCompactionTimeline());
List<FileSlice> fileSliceList =
Arrays.asList(HoodieTestDataGenerator.DEFAULT_PARTITION_PATHS).stream().flatMap(partition ->
view.getLatestFileSlices(partition)).collect(Collectors.toList());
List<FileSlice> fileSliceList = Arrays.asList(HoodieTestDataGenerator.DEFAULT_PARTITION_PATHS).stream()
.flatMap(partition -> view.getLatestFileSlices(partition)).collect(Collectors.toList());
return fileSliceList;
}

View File

@@ -93,16 +93,13 @@ public class TestCleaner extends TestHoodieClientBase {
* @param insertFn Insertion API for testing
* @throws Exception in case of error
*/
private String insertFirstBigBatchForClientCleanerTest(
HoodieWriteConfig cfg,
HoodieWriteClient client,
private String insertFirstBigBatchForClientCleanerTest(HoodieWriteConfig cfg, HoodieWriteClient client,
Function2<List<HoodieRecord>, String, Integer> recordGenFunction,
Function3<JavaRDD<WriteStatus>, HoodieWriteClient, JavaRDD<HoodieRecord>, String> insertFn) throws Exception {
/**
* do a big insert
* (this is basically same as insert part of upsert, just adding it here so we can
* catch breakages in insert(), if the implementation diverges.)
* do a big insert (this is basically same as insert part of upsert, just adding it here so we can catch breakages
* in insert(), if the implementation diverges.)
*/
String newCommitTime = client.startCommit();
@@ -145,8 +142,8 @@ public class TestCleaner extends TestHoodieClientBase {
*/
@Test
public void testInsertPreppedAndCleanByVersions() throws Exception {
testInsertAndCleanByVersions(HoodieWriteClient::insertPreppedRecords,
HoodieWriteClient::upsertPreppedRecords, true);
testInsertAndCleanByVersions(HoodieWriteClient::insertPreppedRecords, HoodieWriteClient::upsertPreppedRecords,
true);
}
/**
@@ -173,20 +170,18 @@ public class TestCleaner extends TestHoodieClientBase {
* @param insertFn Insert API to be tested
* @param upsertFn Upsert API to be tested
* @param isPreppedAPI Flag to indicate if a prepped-version is used. If true, a wrapper function will be used during
* record generation to also tag the regards (de-dupe is implicit as we use uniq record-gen APIs)
* record generation to also tag the regards (de-dupe is implicit as we use uniq record-gen APIs)
* @throws Exception in case of errors
*/
private void testInsertAndCleanByVersions(
Function3<JavaRDD<WriteStatus>, HoodieWriteClient, JavaRDD<HoodieRecord>, String> insertFn,
Function3<JavaRDD<WriteStatus>, HoodieWriteClient, JavaRDD<HoodieRecord>, String> upsertFn,
boolean isPreppedAPI
) throws Exception {
Function3<JavaRDD<WriteStatus>, HoodieWriteClient, JavaRDD<HoodieRecord>, String> upsertFn, boolean isPreppedAPI)
throws Exception {
int maxVersions = 2; // keep upto 2 versions for each file
HoodieWriteConfig cfg = getConfigBuilder().withCompactionConfig(
HoodieCompactionConfig.newBuilder().withCleanerPolicy(HoodieCleaningPolicy.KEEP_LATEST_FILE_VERSIONS)
.retainFileVersions(maxVersions).build())
.withParallelism(1, 1).withBulkInsertParallelism(1)
.withFinalizeWriteParallelism(1)
HoodieWriteConfig cfg = getConfigBuilder()
.withCompactionConfig(HoodieCompactionConfig.newBuilder()
.withCleanerPolicy(HoodieCleaningPolicy.KEEP_LATEST_FILE_VERSIONS).retainFileVersions(maxVersions).build())
.withParallelism(1, 1).withBulkInsertParallelism(1).withFinalizeWriteParallelism(1)
.withConsistencyGuardConfig(ConsistencyGuardConfig.newBuilder().withConsistencyCheckEnabled(true).build())
.build();
try (HoodieWriteClient client = getHoodieWriteClient(cfg);) {
@@ -204,11 +199,10 @@ public class TestCleaner extends TestHoodieClientBase {
HoodieTable table = HoodieTable.getHoodieTable(metaClient, getConfig(), jsc);
for (String partitionPath : dataGen.getPartitionPaths()) {
TableFileSystemView fsView = table.getFileSystemView();
Option<Boolean> added = Option.fromJavaOptional(fsView.getAllFileGroups(partitionPath).findFirst()
.map(fg -> {
fg.getLatestFileSlice().map(fs -> compactionFileIdToLatestFileSlice.put(fg.getFileGroupId(), fs));
return true;
}));
Option<Boolean> added = Option.fromJavaOptional(fsView.getAllFileGroups(partitionPath).findFirst().map(fg -> {
fg.getLatestFileSlice().map(fs -> compactionFileIdToLatestFileSlice.put(fg.getFileGroupId(), fs));
return true;
}));
if (added.isPresent()) {
// Select only one file-group for compaction
break;
@@ -234,8 +228,7 @@ public class TestCleaner extends TestHoodieClientBase {
client.startCommitWithTime(newInstantTime);
List<HoodieRecord> records = recordUpsertGenWrappedFunction.apply(newInstantTime, 100);
List<WriteStatus> statuses =
upsertFn.apply(client, jsc.parallelize(records, 1), newInstantTime).collect();
List<WriteStatus> statuses = upsertFn.apply(client, jsc.parallelize(records, 1), newInstantTime).collect();
// Verify there are no errors
assertNoWriteErrors(statuses);
@@ -249,8 +242,8 @@ public class TestCleaner extends TestHoodieClientBase {
// compute all the versions of all files, from time 0
HashMap<String, TreeSet<String>> fileIdToVersions = new HashMap<>();
for (HoodieInstant entry : timeline.getInstants().collect(Collectors.toList())) {
HoodieCommitMetadata commitMetadata = HoodieCommitMetadata
.fromBytes(timeline.getInstantDetails(entry).get(), HoodieCommitMetadata.class);
HoodieCommitMetadata commitMetadata =
HoodieCommitMetadata.fromBytes(timeline.getInstantDetails(entry).get(), HoodieCommitMetadata.class);
for (HoodieWriteStat wstat : commitMetadata.getWriteStats(partitionPath)) {
if (!fileIdToVersions.containsKey(wstat.getFileId())) {
@@ -267,8 +260,8 @@ public class TestCleaner extends TestHoodieClientBase {
// Ensure latest file-slice selected for compaction is retained
Option<HoodieDataFile> dataFileForCompactionPresent =
Option.fromJavaOptional(fileGroup.getAllDataFiles().filter(df -> {
return compactionFileIdToLatestFileSlice.get(fileGroup.getFileGroupId())
.getBaseInstantTime().equals(df.getCommitTime());
return compactionFileIdToLatestFileSlice.get(fileGroup.getFileGroupId()).getBaseInstantTime()
.equals(df.getCommitTime());
}).findAny());
Assert.assertTrue("Data File selected for compaction is retained",
dataFileForCompactionPresent.isPresent());
@@ -310,8 +303,7 @@ public class TestCleaner extends TestHoodieClientBase {
*/
@Test
public void testInsertPreppedAndCleanByCommits() throws Exception {
testInsertAndCleanByCommits(HoodieWriteClient::insertPreppedRecords,
HoodieWriteClient::upsertPreppedRecords, true);
testInsertAndCleanByCommits(HoodieWriteClient::insertPreppedRecords, HoodieWriteClient::upsertPreppedRecords, true);
}
/**
@@ -338,20 +330,18 @@ public class TestCleaner extends TestHoodieClientBase {
* @param insertFn Insert API to be tested
* @param upsertFn Upsert API to be tested
* @param isPreppedAPI Flag to indicate if a prepped-version is used. If true, a wrapper function will be used during
* record generation to also tag the regards (de-dupe is implicit as we use uniq record-gen APIs)
* record generation to also tag the regards (de-dupe is implicit as we use uniq record-gen APIs)
* @throws Exception in case of errors
*/
private void testInsertAndCleanByCommits(
Function3<JavaRDD<WriteStatus>, HoodieWriteClient, JavaRDD<HoodieRecord>, String> insertFn,
Function3<JavaRDD<WriteStatus>, HoodieWriteClient, JavaRDD<HoodieRecord>, String> upsertFn,
boolean isPreppedAPI
) throws Exception {
Function3<JavaRDD<WriteStatus>, HoodieWriteClient, JavaRDD<HoodieRecord>, String> upsertFn, boolean isPreppedAPI)
throws Exception {
int maxCommits = 3; // keep upto 3 commits from the past
HoodieWriteConfig cfg = getConfigBuilder().withCompactionConfig(
HoodieCompactionConfig.newBuilder()
HoodieWriteConfig cfg = getConfigBuilder()
.withCompactionConfig(HoodieCompactionConfig.newBuilder()
.withCleanerPolicy(HoodieCleaningPolicy.KEEP_LATEST_FILE_VERSIONS).retainCommits(maxCommits).build())
.withParallelism(1, 1).withBulkInsertParallelism(1)
.withFinalizeWriteParallelism(1)
.withParallelism(1, 1).withBulkInsertParallelism(1).withFinalizeWriteParallelism(1)
.withConsistencyGuardConfig(ConsistencyGuardConfig.newBuilder().withConsistencyCheckEnabled(true).build())
.build();
HoodieWriteClient client = getHoodieWriteClient(cfg);
@@ -370,8 +360,7 @@ public class TestCleaner extends TestHoodieClientBase {
client.startCommitWithTime(newCommitTime);
List<HoodieRecord> records = recordUpsertGenWrappedFunction.apply(newCommitTime, 100);
List<WriteStatus> statuses =
upsertFn.apply(client, jsc.parallelize(records, 1), newCommitTime).collect();
List<WriteStatus> statuses = upsertFn.apply(client, jsc.parallelize(records, 1), newCommitTime).collect();
// Verify there are no errors
assertNoWriteErrors(statuses);
@@ -381,9 +370,9 @@ public class TestCleaner extends TestHoodieClientBase {
Option<HoodieInstant> earliestRetainedCommit = activeTimeline.nthFromLastInstant(maxCommits - 1);
Set<HoodieInstant> acceptableCommits = activeTimeline.getInstants().collect(Collectors.toSet());
if (earliestRetainedCommit.isPresent()) {
acceptableCommits.removeAll(
activeTimeline.findInstantsInRange("000", earliestRetainedCommit.get().getTimestamp()).getInstants()
.collect(Collectors.toSet()));
acceptableCommits
.removeAll(activeTimeline.findInstantsInRange("000", earliestRetainedCommit.get().getTimestamp())
.getInstants().collect(Collectors.toSet()));
acceptableCommits.add(earliestRetainedCommit.get());
}
@@ -412,18 +401,19 @@ public class TestCleaner extends TestHoodieClientBase {
*/
@Test
public void testKeepLatestFileVersions() throws IOException {
HoodieWriteConfig config = HoodieWriteConfig.newBuilder().withPath(basePath).withAssumeDatePartitioning(true)
.withCompactionConfig(HoodieCompactionConfig.newBuilder().withCleanerPolicy(
HoodieCleaningPolicy.KEEP_LATEST_FILE_VERSIONS).retainFileVersions(1).build())
.build();
HoodieWriteConfig config =
HoodieWriteConfig.newBuilder().withPath(basePath).withAssumeDatePartitioning(true)
.withCompactionConfig(HoodieCompactionConfig.newBuilder()
.withCleanerPolicy(HoodieCleaningPolicy.KEEP_LATEST_FILE_VERSIONS).retainFileVersions(1).build())
.build();
// make 1 commit, with 1 file per partition
HoodieTestUtils.createCommitFiles(basePath, "000");
String file1P0C0 = HoodieTestUtils
.createNewDataFile(basePath, HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH, "000");
String file1P1C0 = HoodieTestUtils
.createNewDataFile(basePath, HoodieTestDataGenerator.DEFAULT_SECOND_PARTITION_PATH, "000");
String file1P0C0 =
HoodieTestUtils.createNewDataFile(basePath, HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH, "000");
String file1P1C0 =
HoodieTestUtils.createNewDataFile(basePath, HoodieTestDataGenerator.DEFAULT_SECOND_PARTITION_PATH, "000");
metaClient = HoodieTableMetaClient.reload(metaClient);
HoodieTable table = HoodieTable.getHoodieTable(metaClient, config, jsc);
@@ -434,24 +424,22 @@ public class TestCleaner extends TestHoodieClientBase {
assertEquals("Must not clean any files", 0,
getCleanStat(hoodieCleanStatsOne, HoodieTestDataGenerator.DEFAULT_SECOND_PARTITION_PATH).getSuccessDeleteFiles()
.size());
assertTrue(HoodieTestUtils
.doesDataFileExist(basePath, HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH, "000", file1P0C0));
assertTrue(HoodieTestUtils
.doesDataFileExist(basePath, HoodieTestDataGenerator.DEFAULT_SECOND_PARTITION_PATH, "000", file1P1C0));
assertTrue(HoodieTestUtils.doesDataFileExist(basePath, HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH, "000",
file1P0C0));
assertTrue(HoodieTestUtils.doesDataFileExist(basePath, HoodieTestDataGenerator.DEFAULT_SECOND_PARTITION_PATH, "000",
file1P1C0));
// make next commit, with 1 insert & 1 update per partition
HoodieTestUtils.createCommitFiles(basePath, "001");
metaClient = HoodieTableMetaClient.reload(metaClient);
table = HoodieTable.getHoodieTable(metaClient, config, jsc);
String file2P0C1 = HoodieTestUtils
.createNewDataFile(basePath, HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH, "001"); // insert
String file2P1C1 = HoodieTestUtils
.createNewDataFile(basePath, HoodieTestDataGenerator.DEFAULT_SECOND_PARTITION_PATH, "001"); // insert
HoodieTestUtils
.createDataFile(basePath, HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH, "001", file1P0C0); // update
HoodieTestUtils
.createDataFile(basePath, HoodieTestDataGenerator.DEFAULT_SECOND_PARTITION_PATH, "001", file1P1C0); // update
String file2P0C1 =
HoodieTestUtils.createNewDataFile(basePath, HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH, "001"); // insert
String file2P1C1 =
HoodieTestUtils.createNewDataFile(basePath, HoodieTestDataGenerator.DEFAULT_SECOND_PARTITION_PATH, "001"); // insert
HoodieTestUtils.createDataFile(basePath, HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH, "001", file1P0C0); // update
HoodieTestUtils.createDataFile(basePath, HoodieTestDataGenerator.DEFAULT_SECOND_PARTITION_PATH, "001", file1P1C0); // update
List<HoodieCleanStat> hoodieCleanStatsTwo = table.clean(jsc);
assertEquals("Must clean 1 file", 1,
@@ -460,47 +448,44 @@ public class TestCleaner extends TestHoodieClientBase {
assertEquals("Must clean 1 file", 1,
getCleanStat(hoodieCleanStatsTwo, HoodieTestDataGenerator.DEFAULT_SECOND_PARTITION_PATH).getSuccessDeleteFiles()
.size());
assertTrue(HoodieTestUtils
.doesDataFileExist(basePath, HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH, "001", file2P0C1));
assertTrue(HoodieTestUtils
.doesDataFileExist(basePath, HoodieTestDataGenerator.DEFAULT_SECOND_PARTITION_PATH, "001", file2P1C1));
assertFalse(HoodieTestUtils
.doesDataFileExist(basePath, HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH, "000", file1P0C0));
assertFalse(HoodieTestUtils
.doesDataFileExist(basePath, HoodieTestDataGenerator.DEFAULT_SECOND_PARTITION_PATH, "000", file1P1C0));
assertTrue(HoodieTestUtils.doesDataFileExist(basePath, HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH, "001",
file2P0C1));
assertTrue(HoodieTestUtils.doesDataFileExist(basePath, HoodieTestDataGenerator.DEFAULT_SECOND_PARTITION_PATH, "001",
file2P1C1));
assertFalse(HoodieTestUtils.doesDataFileExist(basePath, HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH, "000",
file1P0C0));
assertFalse(HoodieTestUtils.doesDataFileExist(basePath, HoodieTestDataGenerator.DEFAULT_SECOND_PARTITION_PATH,
"000", file1P1C0));
// make next commit, with 2 updates to existing files, and 1 insert
HoodieTestUtils.createCommitFiles(basePath, "002");
metaClient = HoodieTableMetaClient.reload(metaClient);
table = HoodieTable.getHoodieTable(metaClient, config, jsc);
HoodieTestUtils
.createDataFile(basePath, HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH, "002", file1P0C0); // update
HoodieTestUtils
.createDataFile(basePath, HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH, "002", file2P0C1); // update
String file3P0C2 = HoodieTestUtils
.createNewDataFile(basePath, HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH, "002");
HoodieTestUtils.createDataFile(basePath, HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH, "002", file1P0C0); // update
HoodieTestUtils.createDataFile(basePath, HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH, "002", file2P0C1); // update
String file3P0C2 =
HoodieTestUtils.createNewDataFile(basePath, HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH, "002");
List<HoodieCleanStat> hoodieCleanStatsThree = table.clean(jsc);
assertEquals("Must clean two files", 2,
getCleanStat(hoodieCleanStatsThree, HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH)
.getSuccessDeleteFiles().size());
assertFalse(HoodieTestUtils
.doesDataFileExist(basePath, HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH, "001", file1P0C0));
assertFalse(HoodieTestUtils
.doesDataFileExist(basePath, HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH, "001", file2P0C1));
assertTrue(HoodieTestUtils
.doesDataFileExist(basePath, HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH, "002", file3P0C2));
assertFalse(HoodieTestUtils.doesDataFileExist(basePath, HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH, "001",
file1P0C0));
assertFalse(HoodieTestUtils.doesDataFileExist(basePath, HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH, "001",
file2P0C1));
assertTrue(HoodieTestUtils.doesDataFileExist(basePath, HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH, "002",
file3P0C2));
// No cleaning on partially written file, with no commit.
HoodieTestUtils
.createDataFile(basePath, HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH, "003", file3P0C2); // update
HoodieTestUtils.createDataFile(basePath, HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH, "003", file3P0C2); // update
List<HoodieCleanStat> hoodieCleanStatsFour = table.clean(jsc);
assertEquals("Must not clean any files", 0,
getCleanStat(hoodieCleanStatsFour, HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH).getSuccessDeleteFiles()
.size());
assertTrue(HoodieTestUtils
.doesDataFileExist(basePath, HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH, "002", file3P0C2));
assertTrue(HoodieTestUtils.doesDataFileExist(basePath, HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH, "002",
file3P0C2));
}
/**
@@ -509,37 +494,33 @@ public class TestCleaner extends TestHoodieClientBase {
@Test
public void testKeepLatestFileVersionsMOR() throws IOException {
HoodieWriteConfig config = HoodieWriteConfig.newBuilder().withPath(basePath).withAssumeDatePartitioning(true)
.withCompactionConfig(HoodieCompactionConfig.newBuilder().withCleanerPolicy(
HoodieCleaningPolicy.KEEP_LATEST_FILE_VERSIONS).retainFileVersions(1).build())
.build();
HoodieWriteConfig config =
HoodieWriteConfig.newBuilder().withPath(basePath).withAssumeDatePartitioning(true)
.withCompactionConfig(HoodieCompactionConfig.newBuilder()
.withCleanerPolicy(HoodieCleaningPolicy.KEEP_LATEST_FILE_VERSIONS).retainFileVersions(1).build())
.build();
HoodieTableMetaClient metaClient = HoodieTestUtils.init(jsc.hadoopConfiguration(), basePath,
HoodieTableType.MERGE_ON_READ);
HoodieTableMetaClient metaClient =
HoodieTestUtils.init(jsc.hadoopConfiguration(), basePath, HoodieTableType.MERGE_ON_READ);
// Make 3 files, one base file and 2 log files associated with base file
String file1P0 = HoodieTestUtils
.createNewDataFile(basePath, HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH, "000");
String file2P0L0 = HoodieTestUtils
.createNewLogFile(fs, basePath, HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH, "000", file1P0,
Option.empty());
String file2P0L1 = HoodieTestUtils
.createNewLogFile(fs, basePath, HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH, "000", file1P0,
Option.of(2));
String file1P0 =
HoodieTestUtils.createNewDataFile(basePath, HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH, "000");
String file2P0L0 = HoodieTestUtils.createNewLogFile(fs, basePath,
HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH, "000", file1P0, Option.empty());
String file2P0L1 = HoodieTestUtils.createNewLogFile(fs, basePath,
HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH, "000", file1P0, Option.of(2));
// make 1 compaction commit
HoodieTestUtils.createCompactionCommitFiles(fs, basePath, "000");
// Make 4 files, one base file and 3 log files associated with base file
HoodieTestUtils.createDataFile(basePath, HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH, "001", file1P0);
file2P0L0 = HoodieTestUtils
.createNewLogFile(fs, basePath, HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH, "001", file1P0,
Option.empty());
file2P0L0 = HoodieTestUtils
.createNewLogFile(fs, basePath, HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH, "001", file1P0,
Option.of(2));
file2P0L0 = HoodieTestUtils
.createNewLogFile(fs, basePath, HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH, "001", file1P0,
Option.of(3));
file2P0L0 = HoodieTestUtils.createNewLogFile(fs, basePath, HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH,
"001", file1P0, Option.empty());
file2P0L0 = HoodieTestUtils.createNewLogFile(fs, basePath, HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH,
"001", file1P0, Option.of(2));
file2P0L0 = HoodieTestUtils.createNewLogFile(fs, basePath, HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH,
"001", file1P0, Option.of(3));
// make 1 compaction commit
HoodieTestUtils.createCompactionCommitFiles(fs, basePath, "001");
@@ -548,16 +529,12 @@ public class TestCleaner extends TestHoodieClientBase {
assertEquals("Must clean three files, one parquet and 2 log files", 3,
getCleanStat(hoodieCleanStats, HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH).getSuccessDeleteFiles()
.size());
assertFalse(HoodieTestUtils
.doesDataFileExist(basePath, HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH, "000", file1P0));
assertFalse(
HoodieTestUtils
.doesLogFileExist(basePath, HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH, "000", file2P0L0,
Option.empty()));
assertFalse(
HoodieTestUtils
.doesLogFileExist(basePath, HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH, "000", file2P0L0,
Option.of(2)));
assertFalse(HoodieTestUtils.doesDataFileExist(basePath, HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH, "000",
file1P0));
assertFalse(HoodieTestUtils.doesLogFileExist(basePath, HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH, "000",
file2P0L0, Option.empty()));
assertFalse(HoodieTestUtils.doesLogFileExist(basePath, HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH, "000",
file2P0L0, Option.of(2)));
}
/**
@@ -566,16 +543,17 @@ public class TestCleaner extends TestHoodieClientBase {
@Test
public void testKeepLatestCommits() throws IOException {
HoodieWriteConfig config = HoodieWriteConfig.newBuilder().withPath(basePath).withAssumeDatePartitioning(true)
.withCompactionConfig(HoodieCompactionConfig.newBuilder().withCleanerPolicy(
HoodieCleaningPolicy.KEEP_LATEST_COMMITS).retainCommits(2).build()).build();
.withCompactionConfig(HoodieCompactionConfig.newBuilder()
.withCleanerPolicy(HoodieCleaningPolicy.KEEP_LATEST_COMMITS).retainCommits(2).build())
.build();
// make 1 commit, with 1 file per partition
HoodieTestUtils.createCommitFiles(basePath, "000");
String file1P0C0 = HoodieTestUtils
.createNewDataFile(basePath, HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH, "000");
String file1P1C0 = HoodieTestUtils
.createNewDataFile(basePath, HoodieTestDataGenerator.DEFAULT_SECOND_PARTITION_PATH, "000");
String file1P0C0 =
HoodieTestUtils.createNewDataFile(basePath, HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH, "000");
String file1P1C0 =
HoodieTestUtils.createNewDataFile(basePath, HoodieTestDataGenerator.DEFAULT_SECOND_PARTITION_PATH, "000");
metaClient = HoodieTableMetaClient.reload(metaClient);
HoodieTable table = HoodieTable.getHoodieTable(metaClient, config, jsc);
@@ -587,24 +565,22 @@ public class TestCleaner extends TestHoodieClientBase {
assertEquals("Must not clean any files", 0,
getCleanStat(hoodieCleanStatsOne, HoodieTestDataGenerator.DEFAULT_SECOND_PARTITION_PATH).getSuccessDeleteFiles()
.size());
assertTrue(HoodieTestUtils
.doesDataFileExist(basePath, HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH, "000", file1P0C0));
assertTrue(HoodieTestUtils
.doesDataFileExist(basePath, HoodieTestDataGenerator.DEFAULT_SECOND_PARTITION_PATH, "000", file1P1C0));
assertTrue(HoodieTestUtils.doesDataFileExist(basePath, HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH, "000",
file1P0C0));
assertTrue(HoodieTestUtils.doesDataFileExist(basePath, HoodieTestDataGenerator.DEFAULT_SECOND_PARTITION_PATH, "000",
file1P1C0));
// make next commit, with 1 insert & 1 update per partition
HoodieTestUtils.createCommitFiles(basePath, "001");
metaClient = HoodieTableMetaClient.reload(metaClient);
table = HoodieTable.getHoodieTable(metaClient, config, jsc);
String file2P0C1 = HoodieTestUtils
.createNewDataFile(basePath, HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH, "001"); // insert
String file2P1C1 = HoodieTestUtils
.createNewDataFile(basePath, HoodieTestDataGenerator.DEFAULT_SECOND_PARTITION_PATH, "001"); // insert
HoodieTestUtils
.createDataFile(basePath, HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH, "001", file1P0C0); // update
HoodieTestUtils
.createDataFile(basePath, HoodieTestDataGenerator.DEFAULT_SECOND_PARTITION_PATH, "001", file1P1C0); // update
String file2P0C1 =
HoodieTestUtils.createNewDataFile(basePath, HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH, "001"); // insert
String file2P1C1 =
HoodieTestUtils.createNewDataFile(basePath, HoodieTestDataGenerator.DEFAULT_SECOND_PARTITION_PATH, "001"); // insert
HoodieTestUtils.createDataFile(basePath, HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH, "001", file1P0C0); // update
HoodieTestUtils.createDataFile(basePath, HoodieTestDataGenerator.DEFAULT_SECOND_PARTITION_PATH, "001", file1P1C0); // update
List<HoodieCleanStat> hoodieCleanStatsTwo = table.clean(jsc);
assertEquals("Must not clean any files", 0,
@@ -613,78 +589,73 @@ public class TestCleaner extends TestHoodieClientBase {
assertEquals("Must not clean any files", 0,
getCleanStat(hoodieCleanStatsTwo, HoodieTestDataGenerator.DEFAULT_SECOND_PARTITION_PATH).getSuccessDeleteFiles()
.size());
assertTrue(HoodieTestUtils
.doesDataFileExist(basePath, HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH, "001", file2P0C1));
assertTrue(HoodieTestUtils
.doesDataFileExist(basePath, HoodieTestDataGenerator.DEFAULT_SECOND_PARTITION_PATH, "001", file2P1C1));
assertTrue(HoodieTestUtils
.doesDataFileExist(basePath, HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH, "000", file1P0C0));
assertTrue(HoodieTestUtils
.doesDataFileExist(basePath, HoodieTestDataGenerator.DEFAULT_SECOND_PARTITION_PATH, "000", file1P1C0));
assertTrue(HoodieTestUtils.doesDataFileExist(basePath, HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH, "001",
file2P0C1));
assertTrue(HoodieTestUtils.doesDataFileExist(basePath, HoodieTestDataGenerator.DEFAULT_SECOND_PARTITION_PATH, "001",
file2P1C1));
assertTrue(HoodieTestUtils.doesDataFileExist(basePath, HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH, "000",
file1P0C0));
assertTrue(HoodieTestUtils.doesDataFileExist(basePath, HoodieTestDataGenerator.DEFAULT_SECOND_PARTITION_PATH, "000",
file1P1C0));
// make next commit, with 2 updates to existing files, and 1 insert
HoodieTestUtils.createCommitFiles(basePath, "002");
metaClient = HoodieTableMetaClient.reload(metaClient);
table = HoodieTable.getHoodieTable(metaClient, config, jsc);
HoodieTestUtils
.createDataFile(basePath, HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH, "002", file1P0C0); // update
HoodieTestUtils
.createDataFile(basePath, HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH, "002", file2P0C1); // update
String file3P0C2 = HoodieTestUtils
.createNewDataFile(basePath, HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH, "002");
HoodieTestUtils.createDataFile(basePath, HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH, "002", file1P0C0); // update
HoodieTestUtils.createDataFile(basePath, HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH, "002", file2P0C1); // update
String file3P0C2 =
HoodieTestUtils.createNewDataFile(basePath, HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH, "002");
List<HoodieCleanStat> hoodieCleanStatsThree = table.clean(jsc);
assertEquals("Must not clean any file. We have to keep 1 version before the latest commit time to keep", 0,
getCleanStat(hoodieCleanStatsThree, HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH)
.getSuccessDeleteFiles().size());
assertTrue(HoodieTestUtils
.doesDataFileExist(basePath, HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH, "000", file1P0C0));
assertTrue(HoodieTestUtils.doesDataFileExist(basePath, HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH, "000",
file1P0C0));
// make next commit, with 2 updates to existing files, and 1 insert
HoodieTestUtils.createCommitFiles(basePath, "003");
metaClient = HoodieTableMetaClient.reload(metaClient);
table = HoodieTable.getHoodieTable(metaClient, config, jsc);
HoodieTestUtils
.createDataFile(basePath, HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH, "003", file1P0C0); // update
HoodieTestUtils
.createDataFile(basePath, HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH, "003", file2P0C1); // update
String file4P0C3 = HoodieTestUtils
.createNewDataFile(basePath, HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH, "003");
HoodieTestUtils.createDataFile(basePath, HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH, "003", file1P0C0); // update
HoodieTestUtils.createDataFile(basePath, HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH, "003", file2P0C1); // update
String file4P0C3 =
HoodieTestUtils.createNewDataFile(basePath, HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH, "003");
List<HoodieCleanStat> hoodieCleanStatsFour = table.clean(jsc);
assertEquals("Must not clean one old file", 1,
getCleanStat(hoodieCleanStatsFour, HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH).getSuccessDeleteFiles()
.size());
assertFalse(HoodieTestUtils
.doesDataFileExist(basePath, HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH, "000", file1P0C0));
assertTrue(HoodieTestUtils
.doesDataFileExist(basePath, HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH, "001", file1P0C0));
assertTrue(HoodieTestUtils
.doesDataFileExist(basePath, HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH, "002", file1P0C0));
assertTrue(HoodieTestUtils
.doesDataFileExist(basePath, HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH, "001", file2P0C1));
assertTrue(HoodieTestUtils
.doesDataFileExist(basePath, HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH, "002", file2P0C1));
assertTrue(HoodieTestUtils
.doesDataFileExist(basePath, HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH, "002", file3P0C2));
assertTrue(HoodieTestUtils
.doesDataFileExist(basePath, HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH, "003", file4P0C3));
assertFalse(HoodieTestUtils.doesDataFileExist(basePath, HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH, "000",
file1P0C0));
assertTrue(HoodieTestUtils.doesDataFileExist(basePath, HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH, "001",
file1P0C0));
assertTrue(HoodieTestUtils.doesDataFileExist(basePath, HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH, "002",
file1P0C0));
assertTrue(HoodieTestUtils.doesDataFileExist(basePath, HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH, "001",
file2P0C1));
assertTrue(HoodieTestUtils.doesDataFileExist(basePath, HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH, "002",
file2P0C1));
assertTrue(HoodieTestUtils.doesDataFileExist(basePath, HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH, "002",
file3P0C2));
assertTrue(HoodieTestUtils.doesDataFileExist(basePath, HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH, "003",
file4P0C3));
// No cleaning on partially written file, with no commit.
HoodieTestUtils
.createDataFile(basePath, HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH, "004", file3P0C2); // update
HoodieTestUtils.createDataFile(basePath, HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH, "004", file3P0C2); // update
List<HoodieCleanStat> hoodieCleanStatsFive = table.clean(jsc);
assertEquals("Must not clean any files", 0,
getCleanStat(hoodieCleanStatsFive, HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH).getSuccessDeleteFiles()
.size());
assertTrue(HoodieTestUtils
.doesDataFileExist(basePath, HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH, "001", file1P0C0));
assertTrue(HoodieTestUtils
.doesDataFileExist(basePath, HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH, "001", file2P0C1));
assertTrue(HoodieTestUtils.doesDataFileExist(basePath, HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH, "001",
file1P0C0));
assertTrue(HoodieTestUtils.doesDataFileExist(basePath, HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH, "001",
file2P0C1));
}
/**
@@ -711,8 +682,9 @@ public class TestCleaner extends TestHoodieClientBase {
@Test
public void testCleaningWithZeroPartitonPaths() throws IOException {
HoodieWriteConfig config = HoodieWriteConfig.newBuilder().withPath(basePath).withAssumeDatePartitioning(true)
.withCompactionConfig(HoodieCompactionConfig.newBuilder().withCleanerPolicy(
HoodieCleaningPolicy.KEEP_LATEST_COMMITS).retainCommits(2).build()).build();
.withCompactionConfig(HoodieCompactionConfig.newBuilder()
.withCleanerPolicy(HoodieCleaningPolicy.KEEP_LATEST_COMMITS).retainCommits(2).build())
.build();
// Make a commit, although there are no partitionPaths.
// Example use-case of this is when a client wants to create a table
@@ -732,8 +704,9 @@ public class TestCleaner extends TestHoodieClientBase {
@Test
public void testCleaningSkewedPartitons() throws IOException {
HoodieWriteConfig config = HoodieWriteConfig.newBuilder().withPath(basePath).withAssumeDatePartitioning(true)
.withCompactionConfig(HoodieCompactionConfig.newBuilder().withCleanerPolicy(
HoodieCleaningPolicy.KEEP_LATEST_COMMITS).retainCommits(2).build()).build();
.withCompactionConfig(HoodieCompactionConfig.newBuilder()
.withCleanerPolicy(HoodieCleaningPolicy.KEEP_LATEST_COMMITS).retainCommits(2).build())
.build();
Map<Long, Long> stageOneShuffleReadTaskRecordsCountMap = new HashMap<>();
// Since clean involves repartition in order to uniformly distribute data,
@@ -783,22 +756,20 @@ public class TestCleaner extends TestHoodieClientBase {
HoodieTable table = HoodieTable.getHoodieTable(metaClient, config, jsc);
List<HoodieCleanStat> hoodieCleanStats = table.clean(jsc);
assertEquals(100,
getCleanStat(hoodieCleanStats, HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH).getSuccessDeleteFiles()
.size());
assertEquals(10,
getCleanStat(hoodieCleanStats, HoodieTestDataGenerator.DEFAULT_SECOND_PARTITION_PATH).getSuccessDeleteFiles()
.size());
assertEquals(10,
getCleanStat(hoodieCleanStats, HoodieTestDataGenerator.DEFAULT_THIRD_PARTITION_PATH).getSuccessDeleteFiles()
.size());
assertEquals(100, getCleanStat(hoodieCleanStats, HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH)
.getSuccessDeleteFiles().size());
assertEquals(10, getCleanStat(hoodieCleanStats, HoodieTestDataGenerator.DEFAULT_SECOND_PARTITION_PATH)
.getSuccessDeleteFiles().size());
assertEquals(10, getCleanStat(hoodieCleanStats, HoodieTestDataGenerator.DEFAULT_THIRD_PARTITION_PATH)
.getSuccessDeleteFiles().size());
// 3 tasks are expected since the number of partitions is 3
assertEquals(3, stageOneShuffleReadTaskRecordsCountMap.keySet().size());
// Sum of all records processed = total number of files to clean
assertEquals(120,
stageOneShuffleReadTaskRecordsCountMap.values().stream().reduce((a, b) -> a + b).get().intValue());
assertTrue("The skew in handling files to clean is not removed. "
assertTrue(
"The skew in handling files to clean is not removed. "
+ "Each task should handle more records than the partitionPath with least files "
+ "and less records than the partitionPath with most files.",
stageOneShuffleReadTaskRecordsCountMap.values().stream().filter(a -> a > 10 && a < 100).count() == 3);
@@ -811,17 +782,18 @@ public class TestCleaner extends TestHoodieClientBase {
@Test
public void testKeepLatestCommitsWithPendingCompactions() throws IOException {
HoodieWriteConfig config = HoodieWriteConfig.newBuilder().withPath(basePath).withAssumeDatePartitioning(true)
.withCompactionConfig(HoodieCompactionConfig.newBuilder().withCleanerPolicy(
HoodieCleaningPolicy.KEEP_LATEST_COMMITS).retainCommits(2).build()).build();
.withCompactionConfig(HoodieCompactionConfig.newBuilder()
.withCleanerPolicy(HoodieCleaningPolicy.KEEP_LATEST_COMMITS).retainCommits(2).build())
.build();
// Deletions:
// . FileId Parquet Logs Total Retained Commits
// FileId7 5 10 15 009, 011
// FileId6 5 10 15 009
// FileId5 3 6 9 005
// FileId4 2 4 6 003
// FileId3 1 2 3 001
// FileId2 0 0 0 000
// FileId1 0 0 0 000
// . FileId Parquet Logs Total Retained Commits
// FileId7 5 10 15 009, 011
// FileId6 5 10 15 009
// FileId5 3 6 9 005
// FileId4 2 4 6 003
// FileId3 1 2 3 001
// FileId2 0 0 0 000
// FileId1 0 0 0 000
testPendingCompactions(config, 48, 18);
}
@@ -830,18 +802,20 @@ public class TestCleaner extends TestHoodieClientBase {
*/
@Test
public void testKeepLatestVersionsWithPendingCompactions() throws IOException {
HoodieWriteConfig config = HoodieWriteConfig.newBuilder().withPath(basePath).withAssumeDatePartitioning(true)
.withCompactionConfig(HoodieCompactionConfig.newBuilder().withCleanerPolicy(
HoodieCleaningPolicy.KEEP_LATEST_FILE_VERSIONS).retainFileVersions(2).build()).build();
HoodieWriteConfig config =
HoodieWriteConfig.newBuilder().withPath(basePath).withAssumeDatePartitioning(true)
.withCompactionConfig(HoodieCompactionConfig.newBuilder()
.withCleanerPolicy(HoodieCleaningPolicy.KEEP_LATEST_FILE_VERSIONS).retainFileVersions(2).build())
.build();
// Deletions:
// . FileId Parquet Logs Total Retained Commits
// FileId7 5 10 15 009, 011
// FileId6 4 8 12 007, 009
// FileId5 2 4 6 003 005
// FileId4 1 2 3 001, 003
// FileId3 0 0 0 000, 001
// FileId2 0 0 0 000
// FileId1 0 0 0 000
// . FileId Parquet Logs Total Retained Commits
// FileId7 5 10 15 009, 011
// FileId6 4 8 12 007, 009
// FileId5 2 4 6 003 005
// FileId4 1 2 3 001, 003
// FileId3 0 0 0 000, 001
// FileId2 0 0 0 000
// FileId1 0 0 0 000
testPendingCompactions(config, 36, 9);
}
@@ -853,10 +827,10 @@ public class TestCleaner extends TestHoodieClientBase {
*/
public void testPendingCompactions(HoodieWriteConfig config, int expNumFilesDeleted,
int expNumFilesUnderCompactionDeleted) throws IOException {
HoodieTableMetaClient metaClient = HoodieTestUtils.init(jsc.hadoopConfiguration(), basePath,
HoodieTableType.MERGE_ON_READ);
String[] instants = new String[]{"000", "001", "003", "005", "007", "009", "011", "013"};
String[] compactionInstants = new String[]{"002", "004", "006", "008", "010"};
HoodieTableMetaClient metaClient =
HoodieTestUtils.init(jsc.hadoopConfiguration(), basePath, HoodieTableType.MERGE_ON_READ);
String[] instants = new String[] {"000", "001", "003", "005", "007", "009", "011", "013"};
String[] compactionInstants = new String[] {"002", "004", "006", "008", "010"};
Map<String, String> expFileIdToPendingCompaction = new HashMap<>();
Map<String, String> fileIdToLatestInstantBeforeCompaction = new HashMap<>();
Map<String, List<FileSlice>> compactionInstantsToFileSlices = new HashMap<>();
@@ -870,13 +844,11 @@ public class TestCleaner extends TestHoodieClientBase {
// compactions
// FileIds 2-5 will be under compaction
int maxNumFileIds = 7;
String[] fileIds = new String[]
{"fileId1", "fileId2", "fileId3", "fileId4", "fileId5", "fileId6", "fileId7"};
String[] fileIds = new String[] {"fileId1", "fileId2", "fileId3", "fileId4", "fileId5", "fileId6", "fileId7"};
int maxNumFileIdsForCompaction = 4;
for (int i = 0; i < maxNumFileIds; i++) {
final String fileId = HoodieTestUtils
.createDataFile(basePath, HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH, instants[0],
fileIds[i]);
final String fileId = HoodieTestUtils.createDataFile(basePath,
HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH, instants[0], fileIds[i]);
HoodieTestUtils.createNewLogFile(fs, basePath, HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH, instants[0],
fileId, Option.empty());
HoodieTestUtils.createNewLogFile(fs, basePath, HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH, instants[0],
@@ -887,9 +859,9 @@ public class TestCleaner extends TestHoodieClientBase {
expFileIdToPendingCompaction.put(fileId, compactionInstants[j]);
metaClient = HoodieTableMetaClient.reload(metaClient);
HoodieTable table = HoodieTable.getHoodieTable(metaClient, config, jsc);
FileSlice slice = table.getRTFileSystemView().getLatestFileSlices(
HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH)
.filter(fs -> fs.getFileId().equals(fileId)).findFirst().get();
FileSlice slice =
table.getRTFileSystemView().getLatestFileSlices(HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH)
.filter(fs -> fs.getFileId().equals(fileId)).findFirst().get();
List<FileSlice> slices = new ArrayList<>();
if (compactionInstantsToFileSlices.containsKey(compactionInstants[j])) {
slices = compactionInstantsToFileSlices.get(compactionInstants[j]);
@@ -898,20 +870,16 @@ public class TestCleaner extends TestHoodieClientBase {
compactionInstantsToFileSlices.put(compactionInstants[j], slices);
// Add log-files to simulate delta-commits after pending compaction
HoodieTestUtils.createNewLogFile(fs, basePath, HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH,
compactionInstants[j],
fileId, Option.empty());
compactionInstants[j], fileId, Option.empty());
HoodieTestUtils.createNewLogFile(fs, basePath, HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH,
compactionInstants[j],
fileId, Option.of(2));
compactionInstants[j], fileId, Option.of(2));
} else {
HoodieTestUtils
.createDataFile(basePath, HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH, instants[j], fileId);
HoodieTestUtils
.createNewLogFile(fs, basePath, HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH, instants[j], fileId,
Option.empty());
HoodieTestUtils
.createNewLogFile(fs, basePath, HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH, instants[j], fileId,
Option.of(2));
HoodieTestUtils.createDataFile(basePath, HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH, instants[j],
fileId);
HoodieTestUtils.createNewLogFile(fs, basePath, HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH,
instants[j], fileId, Option.empty());
HoodieTestUtils.createNewLogFile(fs, basePath, HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH,
instants[j], fileId, Option.of(2));
fileIdToLatestInstantBeforeCompaction.put(fileId, instants[j]);
}
}
@@ -921,9 +889,8 @@ public class TestCleaner extends TestHoodieClientBase {
for (String instant : compactionInstants) {
List<FileSlice> fileSliceList = compactionInstantsToFileSlices.get(instant);
if (null != fileSliceList) {
HoodieTestUtils.createCompactionRequest(metaClient, instant,
fileSliceList.stream().map(fs -> Pair.of(HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH, fs))
.collect(Collectors.toList()));
HoodieTestUtils.createCompactionRequest(metaClient, instant, fileSliceList.stream()
.map(fs -> Pair.of(HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH, fs)).collect(Collectors.toList()));
}
}
@@ -939,38 +906,35 @@ public class TestCleaner extends TestHoodieClientBase {
expFileIdToPendingCompaction.entrySet().stream().forEach(entry -> {
String fileId = entry.getKey();
String baseInstantForCompaction = fileIdToLatestInstantBeforeCompaction.get(fileId);
Option<FileSlice> fileSliceForCompaction =
Option.fromJavaOptional(
hoodieTable.getRTFileSystemView().getLatestFileSlicesBeforeOrOn(
HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH,
baseInstantForCompaction, true).filter(fs -> fs.getFileId().equals(fileId)).findFirst());
Option<FileSlice> fileSliceForCompaction = Option.fromJavaOptional(hoodieTable.getRTFileSystemView()
.getLatestFileSlicesBeforeOrOn(HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH, baseInstantForCompaction,
true)
.filter(fs -> fs.getFileId().equals(fileId)).findFirst());
Assert.assertTrue("Base Instant for Compaction must be preserved", fileSliceForCompaction.isPresent());
Assert.assertTrue("FileSlice has data-file", fileSliceForCompaction.get().getDataFile().isPresent());
Assert.assertEquals("FileSlice has log-files", 2,
fileSliceForCompaction.get().getLogFiles().count());
Assert.assertEquals("FileSlice has log-files", 2, fileSliceForCompaction.get().getLogFiles().count());
});
// Test for progress (Did we clean some files ?)
long numFilesUnderCompactionDeleted =
hoodieCleanStats.stream().flatMap(cleanStat -> {
return convertPathToFileIdWithCommitTime(newMetaClient, cleanStat.getDeletePathPatterns()).map(
fileIdWithCommitTime -> {
if (expFileIdToPendingCompaction.containsKey(fileIdWithCommitTime.getKey())) {
Assert.assertTrue("Deleted instant time must be less than pending compaction",
HoodieTimeline.compareTimestamps(
fileIdToLatestInstantBeforeCompaction.get(fileIdWithCommitTime.getKey()),
fileIdWithCommitTime.getValue(), HoodieTimeline.GREATER));
return true;
}
return false;
});
}).filter(x -> x).count();
long numDeleted = hoodieCleanStats.stream()
.flatMap(cleanStat -> cleanStat.getDeletePathPatterns().stream()).count();
long numFilesUnderCompactionDeleted = hoodieCleanStats.stream().flatMap(cleanStat -> {
return convertPathToFileIdWithCommitTime(newMetaClient, cleanStat.getDeletePathPatterns())
.map(fileIdWithCommitTime -> {
if (expFileIdToPendingCompaction.containsKey(fileIdWithCommitTime.getKey())) {
Assert.assertTrue("Deleted instant time must be less than pending compaction",
HoodieTimeline.compareTimestamps(
fileIdToLatestInstantBeforeCompaction.get(fileIdWithCommitTime.getKey()),
fileIdWithCommitTime.getValue(), HoodieTimeline.GREATER));
return true;
}
return false;
});
}).filter(x -> x).count();
long numDeleted =
hoodieCleanStats.stream().flatMap(cleanStat -> cleanStat.getDeletePathPatterns().stream()).count();
// Tighter check for regression
Assert.assertEquals("Correct number of files deleted", expNumFilesDeleted, numDeleted);
Assert.assertEquals("Correct number of files under compaction deleted",
expNumFilesUnderCompactionDeleted, numFilesUnderCompactionDeleted);
Assert.assertEquals("Correct number of files under compaction deleted", expNumFilesUnderCompactionDeleted,
numFilesUnderCompactionDeleted);
}
/**
@@ -991,6 +955,7 @@ public class TestCleaner extends TestHoodieClientBase {
/***
* Helper method to return temporary files count
*
* @return Number of temporary files found
* @throws IOException in case of error
*/
@@ -1004,22 +969,20 @@ public class TestCleaner extends TestHoodieClientBase {
return count;
}
private Stream<Pair<String, String>> convertPathToFileIdWithCommitTime(
final HoodieTableMetaClient metaClient, List<String> paths) {
Predicate<String> roFilePredicate = path ->
path.contains(metaClient.getTableConfig().getROFileFormat().getFileExtension());
Predicate<String> rtFilePredicate = path ->
path.contains(metaClient.getTableConfig().getRTFileFormat().getFileExtension());
Stream<Pair<String, String>> stream1 = paths.stream().filter(roFilePredicate)
.map(fullPath -> {
String fileName = Paths.get(fullPath).getFileName().toString();
return Pair.of(FSUtils.getFileId(fileName), FSUtils.getCommitTime(fileName));
});
Stream<Pair<String, String>> stream2 = paths.stream().filter(rtFilePredicate)
.map(path -> {
return Pair.of(FSUtils.getFileIdFromLogPath(new Path(path)),
FSUtils.getBaseCommitTimeFromLogPath(new Path(path)));
});
private Stream<Pair<String, String>> convertPathToFileIdWithCommitTime(final HoodieTableMetaClient metaClient,
List<String> paths) {
Predicate<String> roFilePredicate =
path -> path.contains(metaClient.getTableConfig().getROFileFormat().getFileExtension());
Predicate<String> rtFilePredicate =
path -> path.contains(metaClient.getTableConfig().getRTFileFormat().getFileExtension());
Stream<Pair<String, String>> stream1 = paths.stream().filter(roFilePredicate).map(fullPath -> {
String fileName = Paths.get(fullPath).getFileName().toString();
return Pair.of(FSUtils.getFileId(fileName), FSUtils.getCommitTime(fileName));
});
Stream<Pair<String, String>> stream2 = paths.stream().filter(rtFilePredicate).map(path -> {
return Pair.of(FSUtils.getFileIdFromLogPath(new Path(path)),
FSUtils.getBaseCommitTimeFromLogPath(new Path(path)));
});
return Stream.concat(stream1, stream2);
}
}

View File

@@ -54,9 +54,8 @@ public class TestClientRollback extends TestHoodieClientBase {
*/
@Test
public void testSavepointAndRollback() throws Exception {
HoodieWriteConfig cfg = getConfigBuilder().withCompactionConfig(
HoodieCompactionConfig.newBuilder().withCleanerPolicy(HoodieCleaningPolicy.KEEP_LATEST_COMMITS).retainCommits(1)
.build()).build();
HoodieWriteConfig cfg = getConfigBuilder().withCompactionConfig(HoodieCompactionConfig.newBuilder()
.withCleanerPolicy(HoodieCleaningPolicy.KEEP_LATEST_COMMITS).retainCommits(1).build()).build();
try (HoodieWriteClient client = getHoodieWriteClient(cfg);) {
HoodieTestDataGenerator.writePartitionMetadata(fs, HoodieTestDataGenerator.DEFAULT_PARTITION_PATHS, basePath);
@@ -95,8 +94,8 @@ public class TestClientRollback extends TestHoodieClientBase {
statuses = client.upsert(jsc.parallelize(records, 1), newCommitTime).collect();
// Verify there are no errors
assertNoWriteErrors(statuses);
List<String> partitionPaths = FSUtils.getAllPartitionPaths(fs, cfg.getBasePath(),
getConfig().shouldAssumeDatePartitioning());
List<String> partitionPaths =
FSUtils.getAllPartitionPaths(fs, cfg.getBasePath(), getConfig().shouldAssumeDatePartitioning());
metaClient = HoodieTableMetaClient.reload(metaClient);
HoodieTable table = HoodieTable.getHoodieTable(metaClient, getConfig(), jsc);
final ReadOptimizedView view1 = table.getROFileSystemView();
@@ -173,8 +172,8 @@ public class TestClientRollback extends TestHoodieClientBase {
String commitTime2 = "20160502020601";
String commitTime3 = "20160506030611";
new File(basePath + "/.hoodie").mkdirs();
HoodieTestDataGenerator
.writePartitionMetadata(fs, new String[]{"2016/05/01", "2016/05/02", "2016/05/06"}, basePath);
HoodieTestDataGenerator.writePartitionMetadata(fs, new String[] {"2016/05/01", "2016/05/02", "2016/05/06"},
basePath);
// Only first two have commit files
HoodieTestUtils.createCommitFiles(basePath, commitTime1, commitTime2);
@@ -196,8 +195,8 @@ public class TestClientRollback extends TestHoodieClientBase {
String file32 = HoodieTestUtils.createDataFile(basePath, "2016/05/02", commitTime3, "id32");
String file33 = HoodieTestUtils.createDataFile(basePath, "2016/05/06", commitTime3, "id33");
HoodieWriteConfig config = HoodieWriteConfig.newBuilder().withPath(basePath).withIndexConfig(
HoodieIndexConfig.newBuilder().withIndexType(HoodieIndex.IndexType.INMEMORY).build()).build();
HoodieWriteConfig config = HoodieWriteConfig.newBuilder().withPath(basePath)
.withIndexConfig(HoodieIndexConfig.newBuilder().withIndexType(HoodieIndex.IndexType.INMEMORY).build()).build();
try (HoodieWriteClient client = getHoodieWriteClient(config, false);) {
@@ -263,8 +262,8 @@ public class TestClientRollback extends TestHoodieClientBase {
String commitTime2 = "20160502020601";
String commitTime3 = "20160506030611";
new File(basePath + "/.hoodie").mkdirs();
HoodieTestDataGenerator
.writePartitionMetadata(fs, new String[]{"2016/05/01", "2016/05/02", "2016/05/06"}, basePath);
HoodieTestDataGenerator.writePartitionMetadata(fs, new String[] {"2016/05/01", "2016/05/02", "2016/05/06"},
basePath);
// One good commit
HoodieTestUtils.createCommitFiles(basePath, commitTime1);
@@ -287,8 +286,8 @@ public class TestClientRollback extends TestHoodieClientBase {
String file33 = HoodieTestUtils.createDataFile(basePath, "2016/05/06", commitTime3, "id33");
// Turn auto rollback off
HoodieWriteConfig config = HoodieWriteConfig.newBuilder().withPath(basePath).withIndexConfig(
HoodieIndexConfig.newBuilder().withIndexType(HoodieIndex.IndexType.INMEMORY).build()).build();
HoodieWriteConfig config = HoodieWriteConfig.newBuilder().withPath(basePath)
.withIndexConfig(HoodieIndexConfig.newBuilder().withIndexType(HoodieIndex.IndexType.INMEMORY).build()).build();
try (HoodieWriteClient client = getHoodieWriteClient(config, false);) {

View File

@@ -67,29 +67,23 @@ public class TestCompactionAdminClient extends TestHoodieClientBase {
@Test
public void testUnscheduleCompactionPlan() throws Exception {
int numEntriesPerInstant = 10;
CompactionTestUtils
.setupAndValidateCompactionOperations(metaClient, false, numEntriesPerInstant, numEntriesPerInstant,
numEntriesPerInstant, numEntriesPerInstant);
CompactionTestUtils.setupAndValidateCompactionOperations(metaClient, false, numEntriesPerInstant,
numEntriesPerInstant, numEntriesPerInstant, numEntriesPerInstant);
// THere are delta-commits after compaction instant
validateUnSchedulePlan(client,
"000", "001", numEntriesPerInstant, 2 * numEntriesPerInstant);
validateUnSchedulePlan(client, "000", "001", numEntriesPerInstant, 2 * numEntriesPerInstant);
// THere are delta-commits after compaction instant
validateUnSchedulePlan(client,
"002", "003", numEntriesPerInstant, 2 * numEntriesPerInstant);
validateUnSchedulePlan(client, "002", "003", numEntriesPerInstant, 2 * numEntriesPerInstant);
// THere are no delta-commits after compaction instant
validateUnSchedulePlan(client,
"004", "005", numEntriesPerInstant, 0);
validateUnSchedulePlan(client, "004", "005", numEntriesPerInstant, 0);
// THere are no delta-commits after compaction instant
validateUnSchedulePlan(client,
"006", "007", numEntriesPerInstant, 0);
validateUnSchedulePlan(client, "006", "007", numEntriesPerInstant, 0);
}
@Test
public void testUnscheduleCompactionFileId() throws Exception {
int numEntriesPerInstant = 10;
CompactionTestUtils
.setupAndValidateCompactionOperations(metaClient, false, numEntriesPerInstant, numEntriesPerInstant,
numEntriesPerInstant, numEntriesPerInstant);
CompactionTestUtils.setupAndValidateCompactionOperations(metaClient, false, numEntriesPerInstant,
numEntriesPerInstant, numEntriesPerInstant, numEntriesPerInstant);
Map<String, CompactionOperation> instantsWithOp =
Arrays.asList("001", "003", "005", "007").stream().map(instant -> {
try {
@@ -97,29 +91,24 @@ public class TestCompactionAdminClient extends TestHoodieClientBase {
} catch (IOException ioe) {
throw new HoodieException(ioe);
}
}).map(instantWithPlan -> instantWithPlan.getRight().getOperations().stream().map(op -> Pair.of(
instantWithPlan.getLeft(), CompactionOperation.convertFromAvroRecordInstance(op))).findFirst().get())
.collect(Collectors.toMap(Pair::getLeft, Pair::getRight));
}).map(instantWithPlan -> instantWithPlan.getRight().getOperations().stream()
.map(op -> Pair.of(instantWithPlan.getLeft(), CompactionOperation.convertFromAvroRecordInstance(op)))
.findFirst().get()).collect(Collectors.toMap(Pair::getLeft, Pair::getRight));
// THere are delta-commits after compaction instant
validateUnScheduleFileId(client,
"000", "001", instantsWithOp.get("001"), 2);
validateUnScheduleFileId(client, "000", "001", instantsWithOp.get("001"), 2);
// THere are delta-commits after compaction instant
validateUnScheduleFileId(client,
"002", "003", instantsWithOp.get("003"), 2);
validateUnScheduleFileId(client, "002", "003", instantsWithOp.get("003"), 2);
// THere are no delta-commits after compaction instant
validateUnScheduleFileId(client,
"004", "005", instantsWithOp.get("005"), 0);
validateUnScheduleFileId(client, "004", "005", instantsWithOp.get("005"), 0);
// THere are no delta-commits after compaction instant
validateUnScheduleFileId(client,
"006", "007", instantsWithOp.get("007"), 0);
validateUnScheduleFileId(client, "006", "007", instantsWithOp.get("007"), 0);
}
@Test
public void testRepairCompactionPlan() throws Exception {
int numEntriesPerInstant = 10;
CompactionTestUtils
.setupAndValidateCompactionOperations(metaClient, false, numEntriesPerInstant, numEntriesPerInstant,
numEntriesPerInstant, numEntriesPerInstant);
CompactionTestUtils.setupAndValidateCompactionOperations(metaClient, false, numEntriesPerInstant,
numEntriesPerInstant, numEntriesPerInstant, numEntriesPerInstant);
// THere are delta-commits after compaction instant
validateRepair("000", "001", numEntriesPerInstant, 2 * numEntriesPerInstant);
// THere are delta-commits after compaction instant
@@ -140,23 +129,20 @@ public class TestCompactionAdminClient extends TestHoodieClientBase {
Assert.assertTrue("Expect some failures in validation", result.stream().filter(r -> !r.isSuccess()).count() > 0);
}
// Now repair
List<Pair<HoodieLogFile, HoodieLogFile>> undoFiles = result.stream().flatMap(r ->
client.getRenamingActionsToAlignWithCompactionOperation(metaClient,
compactionInstant, r.getOperation(), Option.empty()).stream())
.map(rn -> {
try {
client.renameLogFile(metaClient, rn.getKey(), rn.getValue());
} catch (IOException e) {
throw new HoodieIOException(e.getMessage(), e);
}
return rn;
}).collect(Collectors.toList());
Map<String, String> renameFilesFromUndo =
undoFiles.stream().collect(Collectors.toMap(p -> p.getRight().getPath().toString(),
x -> x.getLeft().getPath().toString()));
Map<String, String> expRenameFiles =
renameFiles.stream().collect(Collectors.toMap(p -> p.getLeft().getPath().toString(),
x -> x.getRight().getPath().toString()));
List<Pair<HoodieLogFile, HoodieLogFile>> undoFiles =
result.stream().flatMap(r -> client.getRenamingActionsToAlignWithCompactionOperation(metaClient,
compactionInstant, r.getOperation(), Option.empty()).stream()).map(rn -> {
try {
client.renameLogFile(metaClient, rn.getKey(), rn.getValue());
} catch (IOException e) {
throw new HoodieIOException(e.getMessage(), e);
}
return rn;
}).collect(Collectors.toList());
Map<String, String> renameFilesFromUndo = undoFiles.stream()
.collect(Collectors.toMap(p -> p.getRight().getPath().toString(), x -> x.getLeft().getPath().toString()));
Map<String, String> expRenameFiles = renameFiles.stream()
.collect(Collectors.toMap(p -> p.getLeft().getPath().toString(), x -> x.getRight().getPath().toString()));
if (expNumRepairs > 0) {
Assert.assertFalse("Rename Files must be non-empty", renameFiles.isEmpty());
} else {
@@ -182,14 +168,13 @@ public class TestCompactionAdminClient extends TestHoodieClientBase {
private void ensureValidCompactionPlan(String compactionInstant) throws Exception {
metaClient = new HoodieTableMetaClient(metaClient.getHadoopConf(), basePath, true);
// Ensure compaction-plan is good to begin with
List<ValidationOpResult> validationResults = client.validateCompactionPlan(metaClient,
compactionInstant, 1);
List<ValidationOpResult> validationResults = client.validateCompactionPlan(metaClient, compactionInstant, 1);
Assert.assertFalse("Some validations failed",
validationResults.stream().filter(v -> !v.isSuccess()).findAny().isPresent());
}
private void validateRenameFiles(List<Pair<HoodieLogFile, HoodieLogFile>> renameFiles,
String ingestionInstant, String compactionInstant, HoodieTableFileSystemView fsView) {
private void validateRenameFiles(List<Pair<HoodieLogFile, HoodieLogFile>> renameFiles, String ingestionInstant,
String compactionInstant, HoodieTableFileSystemView fsView) {
// Ensure new names of log-files are on expected lines
Set<HoodieLogFile> uniqNewLogFiles = new HashSet<>();
Set<HoodieLogFile> uniqOldLogFiles = new HashSet<>();
@@ -209,11 +194,10 @@ public class TestCompactionAdminClient extends TestHoodieClientBase {
Assert.assertEquals("File Id is expected", oldLogFile.getFileId(), newLogFile.getFileId());
HoodieLogFile lastLogFileBeforeCompaction =
fsView.getLatestMergedFileSlicesBeforeOrOn(HoodieTestUtils.DEFAULT_PARTITION_PATHS[0], ingestionInstant)
.filter(fs -> fs.getFileId().equals(oldLogFile.getFileId()))
.map(fs -> fs.getLogFiles().findFirst().get()).findFirst().get();
.filter(fs -> fs.getFileId().equals(oldLogFile.getFileId())).map(fs -> fs.getLogFiles().findFirst().get())
.findFirst().get();
Assert.assertEquals("Log Version expected",
lastLogFileBeforeCompaction.getLogVersion() + oldLogFile.getLogVersion(),
newLogFile.getLogVersion());
lastLogFileBeforeCompaction.getLogVersion() + oldLogFile.getLogVersion(), newLogFile.getLogVersion());
Assert.assertTrue("Log version does not collide",
newLogFile.getLogVersion() > lastLogFileBeforeCompaction.getLogVersion());
});
@@ -223,10 +207,9 @@ public class TestCompactionAdminClient extends TestHoodieClientBase {
* Validate Unschedule operations
*/
private List<Pair<HoodieLogFile, HoodieLogFile>> validateUnSchedulePlan(CompactionAdminClient client,
String ingestionInstant, String compactionInstant, int numEntriesPerInstant, int expNumRenames)
throws Exception {
return validateUnSchedulePlan(client, ingestionInstant, compactionInstant, numEntriesPerInstant,
expNumRenames, false);
String ingestionInstant, String compactionInstant, int numEntriesPerInstant, int expNumRenames) throws Exception {
return validateUnSchedulePlan(client, ingestionInstant, compactionInstant, numEntriesPerInstant, expNumRenames,
false);
}
/**
@@ -240,8 +223,7 @@ public class TestCompactionAdminClient extends TestHoodieClientBase {
// Check suggested rename operations
List<Pair<HoodieLogFile, HoodieLogFile>> renameFiles =
client.getRenamingActionsForUnschedulingCompactionPlan(metaClient, compactionInstant, 1,
Option.empty(), false);
client.getRenamingActionsForUnschedulingCompactionPlan(metaClient, compactionInstant, 1, Option.empty(), false);
metaClient = new HoodieTableMetaClient(metaClient.getHadoopConf(), basePath, true);
// Log files belonging to file-slices created because of compaction request must be renamed
@@ -250,8 +232,7 @@ public class TestCompactionAdminClient extends TestHoodieClientBase {
final HoodieTableFileSystemView fsView =
new HoodieTableFileSystemView(metaClient, metaClient.getCommitsAndCompactionTimeline());
Set<HoodieLogFile> expLogFilesToBeRenamed = fsView.getLatestFileSlices(HoodieTestUtils.DEFAULT_PARTITION_PATHS[0])
.filter(fs -> fs.getBaseInstantTime().equals(compactionInstant))
.flatMap(fs -> fs.getLogFiles())
.filter(fs -> fs.getBaseInstantTime().equals(compactionInstant)).flatMap(fs -> fs.getLogFiles())
.collect(Collectors.toSet());
Assert.assertEquals("Log files belonging to file-slices created because of compaction request must be renamed",
expLogFilesToBeRenamed, gotLogFilesToBeRenamed);
@@ -286,7 +267,8 @@ public class TestCompactionAdminClient extends TestHoodieClientBase {
newFsView.getLatestFileSlicesBeforeOrOn(HoodieTestUtils.DEFAULT_PARTITION_PATHS[0], compactionInstant, true)
.filter(fs -> fs.getBaseInstantTime().equals(compactionInstant)).forEach(fs -> {
Assert.assertFalse("No Data file must be present", fs.getDataFile().isPresent());
Assert.assertTrue("No Log Files", fs.getLogFiles().count() == 0); });
Assert.assertTrue("No Log Files", fs.getLogFiles().count() == 0);
});
// Ensure same number of log-files before and after renaming per fileId
Map<String, Long> fileIdToCountsAfterRenaming =
@@ -295,8 +277,8 @@ public class TestCompactionAdminClient extends TestHoodieClientBase {
.map(fs -> Pair.of(fs.getFileId(), fs.getLogFiles().count()))
.collect(Collectors.toMap(Pair::getKey, Pair::getValue));
Assert.assertEquals("Each File Id has same number of log-files",
fileIdToCountsBeforeRenaming, fileIdToCountsAfterRenaming);
Assert.assertEquals("Each File Id has same number of log-files", fileIdToCountsBeforeRenaming,
fileIdToCountsAfterRenaming);
Assert.assertEquals("Not Empty", numEntriesPerInstant, fileIdToCountsAfterRenaming.size());
Assert.assertEquals("Expected number of renames", expNumRenames, renameFiles.size());
return renameFiles;
@@ -305,15 +287,14 @@ public class TestCompactionAdminClient extends TestHoodieClientBase {
/**
* Validate Unschedule operations
*/
private void validateUnScheduleFileId(CompactionAdminClient client, String ingestionInstant,
String compactionInstant, CompactionOperation op, int expNumRenames) throws Exception {
private void validateUnScheduleFileId(CompactionAdminClient client, String ingestionInstant, String compactionInstant,
CompactionOperation op, int expNumRenames) throws Exception {
ensureValidCompactionPlan(compactionInstant);
// Check suggested rename operations
List<Pair<HoodieLogFile, HoodieLogFile>> renameFiles =
client.getRenamingActionsForUnschedulingCompactionOperation(metaClient, compactionInstant, op,
Option.empty(), false);
List<Pair<HoodieLogFile, HoodieLogFile>> renameFiles = client
.getRenamingActionsForUnschedulingCompactionOperation(metaClient, compactionInstant, op, Option.empty(), false);
metaClient = new HoodieTableMetaClient(metaClient.getHadoopConf(), basePath, true);
// Log files belonging to file-slices created because of compaction request must be renamed
@@ -323,8 +304,7 @@ public class TestCompactionAdminClient extends TestHoodieClientBase {
new HoodieTableFileSystemView(metaClient, metaClient.getCommitsAndCompactionTimeline());
Set<HoodieLogFile> expLogFilesToBeRenamed = fsView.getLatestFileSlices(HoodieTestUtils.DEFAULT_PARTITION_PATHS[0])
.filter(fs -> fs.getBaseInstantTime().equals(compactionInstant))
.filter(fs -> fs.getFileId().equals(op.getFileId()))
.flatMap(fs -> fs.getLogFiles())
.filter(fs -> fs.getFileId().equals(op.getFileId())).flatMap(fs -> fs.getLogFiles())
.collect(Collectors.toSet());
Assert.assertEquals("Log files belonging to file-slices created because of compaction request must be renamed",
expLogFilesToBeRenamed, gotLogFilesToBeRenamed);
@@ -359,8 +339,8 @@ public class TestCompactionAdminClient extends TestHoodieClientBase {
.map(fs -> Pair.of(fs.getFileId(), fs.getLogFiles().count()))
.collect(Collectors.toMap(Pair::getKey, Pair::getValue));
Assert.assertEquals("Each File Id has same number of log-files",
fileIdToCountsBeforeRenaming, fileIdToCountsAfterRenaming);
Assert.assertEquals("Each File Id has same number of log-files", fileIdToCountsBeforeRenaming,
fileIdToCountsAfterRenaming);
Assert.assertEquals("Not Empty", 1, fileIdToCountsAfterRenaming.size());
Assert.assertEquals("Expected number of renames", expNumRenames, renameFiles.size());
}

View File

@@ -51,26 +51,23 @@ public class TestConsistencyGuard extends HoodieClientTestHarness {
ConsistencyGuard passing = new FailSafeConsistencyGuard(fs, getConsistencyGuardConfig(1, 1000, 1000));
passing.waitTillFileAppears(new Path(basePath + "/partition/path/f1_1-0-1_000.parquet"));
passing.waitTillFileAppears(new Path(basePath + "/partition/path/f2_1-0-1_000.parquet"));
passing.waitTillAllFilesAppear(basePath + "/partition/path",
Arrays.asList(basePath + "/partition/path/f1_1-0-1_000.parquet",
basePath + "/partition/path/f2_1-0-1_000.parquet"));
passing.waitTillAllFilesAppear(basePath + "/partition/path", Arrays
.asList(basePath + "/partition/path/f1_1-0-1_000.parquet", basePath + "/partition/path/f2_1-0-1_000.parquet"));
fs.delete(new Path(basePath + "/partition/path/f1_1-0-1_000.parquet"), false);
fs.delete(new Path(basePath + "/partition/path/f2_1-0-1_000.parquet"), false);
passing.waitTillFileDisappears(new Path(basePath + "/partition/path/f1_1-0-1_000.parquet"));
passing.waitTillFileDisappears(new Path(basePath + "/partition/path/f2_1-0-1_000.parquet"));
passing.waitTillAllFilesDisappear(basePath + "/partition/path",
Arrays.asList(basePath + "/partition/path/f1_1-0-1_000.parquet",
basePath + "/partition/path/f2_1-0-1_000.parquet"));
passing.waitTillAllFilesDisappear(basePath + "/partition/path", Arrays
.asList(basePath + "/partition/path/f1_1-0-1_000.parquet", basePath + "/partition/path/f2_1-0-1_000.parquet"));
}
@Test(expected = TimeoutException.class)
public void testCheckFailingAppear() throws Exception {
HoodieClientTestUtils.fakeDataFile(basePath, "partition/path", "000", "f1");
ConsistencyGuard passing = new FailSafeConsistencyGuard(fs, getConsistencyGuardConfig());
passing.waitTillAllFilesAppear(basePath + "/partition/path",
Arrays.asList(basePath + "/partition/path/f1_1-0-2_000.parquet",
basePath + "/partition/path/f2_1-0-2_000.parquet"));
passing.waitTillAllFilesAppear(basePath + "/partition/path", Arrays
.asList(basePath + "/partition/path/f1_1-0-2_000.parquet", basePath + "/partition/path/f2_1-0-2_000.parquet"));
}
@@ -85,9 +82,8 @@ public class TestConsistencyGuard extends HoodieClientTestHarness {
public void testCheckFailingDisappear() throws Exception {
HoodieClientTestUtils.fakeDataFile(basePath, "partition/path", "000", "f1");
ConsistencyGuard passing = new FailSafeConsistencyGuard(fs, getConsistencyGuardConfig());
passing.waitTillAllFilesDisappear(basePath + "/partition/path",
Arrays.asList(basePath + "/partition/path/f1_1-0-1_000.parquet",
basePath + "/partition/path/f2_1-0-2_000.parquet"));
passing.waitTillAllFilesDisappear(basePath + "/partition/path", Arrays
.asList(basePath + "/partition/path/f1_1-0-1_000.parquet", basePath + "/partition/path/f2_1-0-2_000.parquet"));
}
@Test(expected = TimeoutException.class)

View File

@@ -110,17 +110,15 @@ public class TestHoodieClientBase extends HoodieClientTestHarness {
*/
HoodieWriteConfig.Builder getConfigBuilder() {
return HoodieWriteConfig.newBuilder().withPath(basePath).withSchema(HoodieTestDataGenerator.TRIP_EXAMPLE_SCHEMA)
.withParallelism(2, 2)
.withBulkInsertParallelism(2).withFinalizeWriteParallelism(2)
.withParallelism(2, 2).withBulkInsertParallelism(2).withFinalizeWriteParallelism(2)
.withWriteStatusClass(MetadataMergeWriteStatus.class)
.withConsistencyGuardConfig(ConsistencyGuardConfig.newBuilder().withConsistencyCheckEnabled(true).build())
.withCompactionConfig(HoodieCompactionConfig.newBuilder().compactionSmallFileSize(1024 * 1024).build())
.withStorageConfig(HoodieStorageConfig.newBuilder().limitFileSize(1024 * 1024).build())
.forTable("test-trip-table")
.withIndexConfig(HoodieIndexConfig.newBuilder().withIndexType(IndexType.BLOOM).build())
.withEmbeddedTimelineServerEnabled(true).withFileSystemViewConfig(
FileSystemViewStorageConfig.newBuilder().withStorageType(FileSystemViewStorageType.EMBEDDED_KV_STORE)
.build());
.withEmbeddedTimelineServerEnabled(true).withFileSystemViewConfig(FileSystemViewStorageConfig.newBuilder()
.withStorageType(FileSystemViewStorageType.EMBEDDED_KV_STORE).build());
}
protected HoodieTable getHoodieTable(HoodieTableMetaClient metaClient, HoodieWriteConfig config) {
@@ -200,15 +198,13 @@ public class TestHoodieClientBase extends HoodieClientTestHarness {
* @return Wrapped function
*/
private Function2<List<HoodieRecord>, String, Integer> wrapRecordsGenFunctionForPreppedCalls(
final HoodieWriteConfig writeConfig,
final Function2<List<HoodieRecord>, String, Integer> recordGenFunction) {
final HoodieWriteConfig writeConfig, final Function2<List<HoodieRecord>, String, Integer> recordGenFunction) {
return (commit, numRecords) -> {
final HoodieIndex index = HoodieIndex.createIndex(writeConfig, jsc);
List<HoodieRecord> records = recordGenFunction.apply(commit, numRecords);
final HoodieTableMetaClient metaClient = new HoodieTableMetaClient(jsc.hadoopConfiguration(), basePath, true);
HoodieTable table = HoodieTable.getHoodieTable(metaClient, writeConfig, jsc);
JavaRDD<HoodieRecord> taggedRecords =
index.tagLocation(jsc.parallelize(records, 1), jsc, table);
JavaRDD<HoodieRecord> taggedRecords = index.tagLocation(jsc.parallelize(records, 1), jsc, table);
return taggedRecords.collect();
};
}
@@ -221,10 +217,8 @@ public class TestHoodieClientBase extends HoodieClientTestHarness {
* @param wrapped Actual Records Generation function
* @return Wrapped Function
*/
Function2<List<HoodieRecord>, String, Integer> generateWrapRecordsFn(
boolean isPreppedAPI,
HoodieWriteConfig writeConfig,
Function2<List<HoodieRecord>, String, Integer> wrapped) {
Function2<List<HoodieRecord>, String, Integer> generateWrapRecordsFn(boolean isPreppedAPI,
HoodieWriteConfig writeConfig, Function2<List<HoodieRecord>, String, Integer> wrapped) {
if (isPreppedAPI) {
return wrapRecordsGenFunctionForPreppedCalls(writeConfig, wrapped);
} else {
@@ -247,22 +241,15 @@ public class TestHoodieClientBase extends HoodieClientTestHarness {
* @return RDD of write-status
* @throws Exception in case of error
*/
JavaRDD<WriteStatus> insertFirstBatch(
HoodieWriteConfig writeConfig,
HoodieWriteClient client,
String newCommitTime,
String initCommitTime,
int numRecordsInThisCommit,
Function3<JavaRDD<WriteStatus>, HoodieWriteClient, JavaRDD<HoodieRecord>, String> writeFn,
boolean isPreppedAPI,
boolean assertForCommit,
int expRecordsInThisCommit) throws Exception {
JavaRDD<WriteStatus> insertFirstBatch(HoodieWriteConfig writeConfig, HoodieWriteClient client, String newCommitTime,
String initCommitTime, int numRecordsInThisCommit,
Function3<JavaRDD<WriteStatus>, HoodieWriteClient, JavaRDD<HoodieRecord>, String> writeFn, boolean isPreppedAPI,
boolean assertForCommit, int expRecordsInThisCommit) throws Exception {
final Function2<List<HoodieRecord>, String, Integer> recordGenFunction =
generateWrapRecordsFn(isPreppedAPI, writeConfig, dataGen::generateInserts);
return writeBatch(client, newCommitTime, initCommitTime, Option.empty(), initCommitTime,
numRecordsInThisCommit, recordGenFunction, writeFn, assertForCommit,
expRecordsInThisCommit, expRecordsInThisCommit, 1);
return writeBatch(client, newCommitTime, initCommitTime, Option.empty(), initCommitTime, numRecordsInThisCommit,
recordGenFunction, writeFn, assertForCommit, expRecordsInThisCommit, expRecordsInThisCommit, 1);
}
/**
@@ -284,27 +271,17 @@ public class TestHoodieClientBase extends HoodieClientTestHarness {
* @return RDD of write-status
* @throws Exception in case of error
*/
JavaRDD<WriteStatus> updateBatch(
HoodieWriteConfig writeConfig,
HoodieWriteClient client,
String newCommitTime,
String prevCommitTime,
Option<List<String>> commitTimesBetweenPrevAndNew,
String initCommitTime,
JavaRDD<WriteStatus> updateBatch(HoodieWriteConfig writeConfig, HoodieWriteClient client, String newCommitTime,
String prevCommitTime, Option<List<String>> commitTimesBetweenPrevAndNew, String initCommitTime,
int numRecordsInThisCommit,
Function3<JavaRDD<WriteStatus>, HoodieWriteClient, JavaRDD<HoodieRecord>, String> writeFn,
boolean isPreppedAPI,
boolean assertForCommit,
int expRecordsInThisCommit,
int expTotalRecords,
int expTotalCommits)
throws Exception {
Function3<JavaRDD<WriteStatus>, HoodieWriteClient, JavaRDD<HoodieRecord>, String> writeFn, boolean isPreppedAPI,
boolean assertForCommit, int expRecordsInThisCommit, int expTotalRecords, int expTotalCommits) throws Exception {
final Function2<List<HoodieRecord>, String, Integer> recordGenFunction =
generateWrapRecordsFn(isPreppedAPI, writeConfig, dataGen::generateUniqueUpdates);
return writeBatch(client, newCommitTime, prevCommitTime, commitTimesBetweenPrevAndNew, initCommitTime,
numRecordsInThisCommit, recordGenFunction, writeFn, assertForCommit,
expRecordsInThisCommit, expTotalRecords, expTotalCommits);
numRecordsInThisCommit, recordGenFunction, writeFn, assertForCommit, expRecordsInThisCommit, expTotalRecords,
expTotalCommits);
}
/**
@@ -324,22 +301,13 @@ public class TestHoodieClientBase extends HoodieClientTestHarness {
* @param expTotalCommits Expected number of commits (including this commit)
* @throws Exception in case of error
*/
JavaRDD<WriteStatus> writeBatch(
HoodieWriteClient client,
String newCommitTime,
String prevCommitTime,
Option<List<String>> commitTimesBetweenPrevAndNew,
String initCommitTime,
int numRecordsInThisCommit,
JavaRDD<WriteStatus> writeBatch(HoodieWriteClient client, String newCommitTime, String prevCommitTime,
Option<List<String>> commitTimesBetweenPrevAndNew, String initCommitTime, int numRecordsInThisCommit,
Function2<List<HoodieRecord>, String, Integer> recordGenFunction,
Function3<JavaRDD<WriteStatus>, HoodieWriteClient, JavaRDD<HoodieRecord>, String> writeFn,
boolean assertForCommit,
int expRecordsInThisCommit,
int expTotalRecords,
int expTotalCommits)
throws Exception {
boolean assertForCommit, int expRecordsInThisCommit, int expTotalRecords, int expTotalCommits) throws Exception {
//Write 1 (only inserts)
// Write 1 (only inserts)
client.startCommitWithTime(newCommitTime);
List<HoodieRecord> records = recordGenFunction.apply(newCommitTime, numRecordsInThisCommit);
@@ -373,8 +341,7 @@ public class TestHoodieClientBase extends HoodieClientTestHarness {
HoodieClientTestUtils.read(jsc, basePath, sqlContext, fs, fullPartitionPaths).count());
// Check that the incremental consumption from prevCommitTime
assertEquals("Incremental consumption from " + prevCommitTime
+ " should give all records in latest commit",
assertEquals("Incremental consumption from " + prevCommitTime + " should give all records in latest commit",
HoodieClientTestUtils.readCommit(basePath, sqlContext, timeline, newCommitTime).count(),
HoodieClientTestUtils.readSince(basePath, sqlContext, timeline, prevCommitTime).count());
if (commitTimesBetweenPrevAndNew.isPresent()) {
@@ -407,8 +374,7 @@ public class TestHoodieClientBase extends HoodieClientTestHarness {
* @param commitTime Commit Timestamp
* @throws IOException in case of error
*/
void updateAllFilesInPartition(List<String> files, String partitionPath, String commitTime)
throws IOException {
void updateAllFilesInPartition(List<String> files, String partitionPath, String commitTime) throws IOException {
for (String fileId : files) {
HoodieTestUtils.createDataFile(basePath, partitionPath, commitTime, fileId);
}
@@ -423,8 +389,7 @@ public class TestHoodieClientBase extends HoodieClientTestHarness {
* @return Created files
* @throws IOException in case of error
*/
List<String> createFilesInPartition(String partitionPath, String commitTime, int numFiles)
throws IOException {
List<String> createFilesInPartition(String partitionPath, String commitTime, int numFiles) throws IOException {
List<String> files = new ArrayList<>();
for (int i = 0; i < numFiles; i++) {
files.add(HoodieTestUtils.createNewDataFile(basePath, partitionPath, commitTime));

View File

@@ -116,8 +116,8 @@ public class TestHoodieClientOnCopyOnWriteStorage extends TestHoodieClientBase {
*/
@Test
public void testAutoCommitOnBulkInsertPrepped() throws Exception {
testAutoCommit((writeClient, recordRDD, commitTime)
-> writeClient.bulkInsertPreppedRecords(recordRDD, commitTime, Option.empty()), true);
testAutoCommit((writeClient, recordRDD, commitTime) -> writeClient.bulkInsertPreppedRecords(recordRDD, commitTime,
Option.empty()), true);
}
/**
@@ -126,8 +126,7 @@ public class TestHoodieClientOnCopyOnWriteStorage extends TestHoodieClientBase {
* @param writeFn One of HoodieWriteClient Write API
* @throws Exception in case of failure
*/
private void testAutoCommit(
Function3<JavaRDD<WriteStatus>, HoodieWriteClient, JavaRDD<HoodieRecord>, String> writeFn,
private void testAutoCommit(Function3<JavaRDD<WriteStatus>, HoodieWriteClient, JavaRDD<HoodieRecord>, String> writeFn,
boolean isPrepped) throws Exception {
// Set autoCommit false
HoodieWriteConfig cfg = getConfigBuilder().withAutoCommit(false).build();
@@ -136,9 +135,8 @@ public class TestHoodieClientOnCopyOnWriteStorage extends TestHoodieClientBase {
String prevCommitTime = "000";
String newCommitTime = "001";
int numRecords = 200;
JavaRDD<WriteStatus> result =
insertFirstBatch(cfg, client, newCommitTime, prevCommitTime, numRecords, writeFn, isPrepped, false,
numRecords);
JavaRDD<WriteStatus> result = insertFirstBatch(cfg, client, newCommitTime, prevCommitTime, numRecords, writeFn,
isPrepped, false, numRecords);
assertFalse("If Autocommit is false, then commit should not be made automatically",
HoodieTestUtils.doesCommitExist(basePath, newCommitTime));
@@ -184,16 +182,16 @@ public class TestHoodieClientOnCopyOnWriteStorage extends TestHoodieClientBase {
String recordKey = UUID.randomUUID().toString();
HoodieKey keyOne = new HoodieKey(recordKey, "2018-01-01");
HoodieRecord recordOne = new HoodieRecord(keyOne,
HoodieTestDataGenerator.generateRandomValue(keyOne, newCommitTime));
HoodieRecord recordOne =
new HoodieRecord(keyOne, HoodieTestDataGenerator.generateRandomValue(keyOne, newCommitTime));
HoodieKey keyTwo = new HoodieKey(recordKey, "2018-02-01");
HoodieRecord recordTwo = new HoodieRecord(keyTwo,
HoodieTestDataGenerator.generateRandomValue(keyTwo, newCommitTime));
HoodieRecord recordTwo =
new HoodieRecord(keyTwo, HoodieTestDataGenerator.generateRandomValue(keyTwo, newCommitTime));
// Same key and partition as keyTwo
HoodieRecord recordThree = new HoodieRecord(keyTwo,
HoodieTestDataGenerator.generateRandomValue(keyTwo, newCommitTime));
HoodieRecord recordThree =
new HoodieRecord(keyTwo, HoodieTestDataGenerator.generateRandomValue(keyTwo, newCommitTime));
JavaRDD<HoodieRecord> records = jsc.parallelize(Arrays.asList(recordOne, recordTwo, recordThree), 1);
@@ -205,21 +203,18 @@ public class TestHoodieClientOnCopyOnWriteStorage extends TestHoodieClientBase {
// dedup should be done based on both recordKey and partitionPath
HoodieWriteClient clientWithDummyNonGlobalIndex = getWriteClientWithDummyIndex(false);
dedupedRecs =
clientWithDummyNonGlobalIndex.deduplicateRecords(records, 1).collect();
dedupedRecs = clientWithDummyNonGlobalIndex.deduplicateRecords(records, 1).collect();
assertEquals(2, dedupedRecs.size());
assertNodupesWithinPartition(dedupedRecs);
// Perform write-action and check
try (HoodieWriteClient client = getHoodieWriteClient(
getConfigBuilder().combineInput(true, true).build(), false);) {
try (HoodieWriteClient client = getHoodieWriteClient(getConfigBuilder().combineInput(true, true).build(), false);) {
client.startCommitWithTime(newCommitTime);
List<WriteStatus> statuses = writeFn.apply(client, records, newCommitTime).collect();
assertNoWriteErrors(statuses);
assertEquals(2, statuses.size());
assertNodupesWithinPartition(
statuses.stream().map(WriteStatus::getWrittenRecords)
.flatMap(Collection::stream).collect(Collectors.toList()));
assertNodupesWithinPartition(statuses.stream().map(WriteStatus::getWrittenRecords).flatMap(Collection::stream)
.collect(Collectors.toList()));
}
}
@@ -241,8 +236,7 @@ public class TestHoodieClientOnCopyOnWriteStorage extends TestHoodieClientBase {
*/
@Test
public void testUpserts() throws Exception {
testUpsertsInternal(getConfig(),
HoodieWriteClient::upsert, false);
testUpsertsInternal(getConfig(), HoodieWriteClient::upsert, false);
}
/**
@@ -250,8 +244,7 @@ public class TestHoodieClientOnCopyOnWriteStorage extends TestHoodieClientBase {
*/
@Test
public void testUpsertsPrepped() throws Exception {
testUpsertsInternal(getConfig(),
HoodieWriteClient::upsertPreppedRecords, true);
testUpsertsInternal(getConfig(), HoodieWriteClient::upsertPreppedRecords, true);
}
/**
@@ -262,16 +255,16 @@ public class TestHoodieClientOnCopyOnWriteStorage extends TestHoodieClientBase {
* @throws Exception in case of error
*/
private void testUpsertsInternal(HoodieWriteConfig hoodieWriteConfig,
Function3<JavaRDD<WriteStatus>, HoodieWriteClient, JavaRDD<HoodieRecord>, String> writeFn,
boolean isPrepped) throws Exception {
Function3<JavaRDD<WriteStatus>, HoodieWriteClient, JavaRDD<HoodieRecord>, String> writeFn, boolean isPrepped)
throws Exception {
HoodieWriteClient client = getHoodieWriteClient(hoodieWriteConfig, false);
//Write 1 (only inserts)
// Write 1 (only inserts)
String newCommitTime = "001";
String initCommitTime = "000";
int numRecords = 200;
insertFirstBatch(hoodieWriteConfig,
client, newCommitTime, initCommitTime, numRecords, HoodieWriteClient::insert, isPrepped, true, numRecords);
insertFirstBatch(hoodieWriteConfig, client, newCommitTime, initCommitTime, numRecords, HoodieWriteClient::insert,
isPrepped, true, numRecords);
// Write 2 (updates)
String prevCommitTime = newCommitTime;
@@ -279,8 +272,8 @@ public class TestHoodieClientOnCopyOnWriteStorage extends TestHoodieClientBase {
numRecords = 100;
String commitTimeBetweenPrevAndNew = "002";
updateBatch(hoodieWriteConfig, client, newCommitTime, prevCommitTime,
Option.of(Arrays.asList(commitTimeBetweenPrevAndNew)),
initCommitTime, numRecords, writeFn, isPrepped, true, numRecords, 200, 2);
Option.of(Arrays.asList(commitTimeBetweenPrevAndNew)), initCommitTime, numRecords, writeFn, isPrepped, true,
numRecords, 200, 2);
}
/**
@@ -291,8 +284,7 @@ public class TestHoodieClientOnCopyOnWriteStorage extends TestHoodieClientBase {
HoodieWriteClient client = getHoodieWriteClient(getConfig(), false);
/**
* Write 1 (inserts and deletes)
* Write actual 200 insert records and ignore 100 delete records
* Write 1 (inserts and deletes) Write actual 200 insert records and ignore 100 delete records
*/
String initCommitTime = "000";
String newCommitTime = "001";
@@ -308,10 +300,8 @@ public class TestHoodieClientOnCopyOnWriteStorage extends TestHoodieClientBase {
return recordsInFirstBatch;
};
writeBatch(client, newCommitTime, initCommitTime, Option.empty(), initCommitTime,
//unused as genFn uses hard-coded number of inserts/updates/deletes
-1,
recordGenFunction, HoodieWriteClient::upsert, true,
200, 200, 1);
// unused as genFn uses hard-coded number of inserts/updates/deletes
-1, recordGenFunction, HoodieWriteClient::upsert, true, 200, 200, 1);
/**
* Write 2 (deletes+writes)
@@ -320,17 +310,15 @@ public class TestHoodieClientOnCopyOnWriteStorage extends TestHoodieClientBase {
newCommitTime = "004";
final List<HoodieRecord> recordsInSecondBatch = new ArrayList<>();
recordGenFunction =
(String commitTime, Integer numRecordsInThisCommit) -> {
List<HoodieRecord> fewRecordsForDelete = recordsInFirstBatch.subList(0, 50);
List<HoodieRecord> fewRecordsForUpdate = recordsInFirstBatch.subList(50, 100);
recordsInSecondBatch.addAll(dataGen.generateDeletesFromExistingRecords(fewRecordsForDelete));
recordsInSecondBatch.addAll(fewRecordsForUpdate);
return recordsInSecondBatch;
};
writeBatch(client, newCommitTime, prevCommitTime, Option.empty(), initCommitTime,
100, recordGenFunction, HoodieWriteClient::upsert, true,
50, 150, 2);
recordGenFunction = (String commitTime, Integer numRecordsInThisCommit) -> {
List<HoodieRecord> fewRecordsForDelete = recordsInFirstBatch.subList(0, 50);
List<HoodieRecord> fewRecordsForUpdate = recordsInFirstBatch.subList(50, 100);
recordsInSecondBatch.addAll(dataGen.generateDeletesFromExistingRecords(fewRecordsForDelete));
recordsInSecondBatch.addAll(fewRecordsForUpdate);
return recordsInSecondBatch;
};
writeBatch(client, newCommitTime, prevCommitTime, Option.empty(), initCommitTime, 100, recordGenFunction,
HoodieWriteClient::upsert, true, 50, 150, 2);
}
/**
@@ -342,7 +330,7 @@ public class TestHoodieClientOnCopyOnWriteStorage extends TestHoodieClientBase {
final int insertSplitLimit = 100;
// setup the small file handling params
HoodieWriteConfig config = getSmallInsertWriteConfig(insertSplitLimit); // hold upto 200 records max
dataGen = new HoodieTestDataGenerator(new String[]{testPartitionPath});
dataGen = new HoodieTestDataGenerator(new String[] {testPartitionPath});
HoodieWriteClient client = getHoodieWriteClient(config, false);
@@ -359,8 +347,10 @@ public class TestHoodieClientOnCopyOnWriteStorage extends TestHoodieClientBase {
assertEquals("Just 1 file needs to be added.", 1, statuses.size());
String file1 = statuses.get(0).getFileId();
Assert.assertEquals("file should contain 100 records", readRowKeysFromParquet(jsc.hadoopConfiguration(),
new Path(basePath, statuses.get(0).getStat().getPath())).size(), 100);
Assert.assertEquals("file should contain 100 records",
readRowKeysFromParquet(jsc.hadoopConfiguration(), new Path(basePath, statuses.get(0).getStat().getPath()))
.size(),
100);
// Update + Inserts such that they just expand file1
String commitTime2 = "002";
@@ -379,8 +369,8 @@ public class TestHoodieClientOnCopyOnWriteStorage extends TestHoodieClientBase {
assertEquals("Existing file should be expanded", file1, statuses.get(0).getFileId());
assertEquals("Existing file should be expanded", commitTime1, statuses.get(0).getStat().getPrevCommit());
Path newFile = new Path(basePath, statuses.get(0).getStat().getPath());
assertEquals("file should contain 140 records",
readRowKeysFromParquet(jsc.hadoopConfiguration(), newFile).size(), 140);
assertEquals("file should contain 140 records", readRowKeysFromParquet(jsc.hadoopConfiguration(), newFile).size(),
140);
List<GenericRecord> records = ParquetUtils.readAvroRecords(jsc.hadoopConfiguration(), newFile);
for (GenericRecord record : records) {
@@ -406,8 +396,8 @@ public class TestHoodieClientOnCopyOnWriteStorage extends TestHoodieClientBase {
HoodieTable table = getHoodieTable(metadata, config);
ReadOptimizedView fileSystemView = table.getROFileSystemView();
List<HoodieDataFile> files = fileSystemView.getLatestDataFilesBeforeOrOn(testPartitionPath, commitTime3)
.collect(Collectors.toList());
List<HoodieDataFile> files =
fileSystemView.getLatestDataFilesBeforeOrOn(testPartitionPath, commitTime3).collect(Collectors.toList());
int numTotalInsertsInCommit3 = 0;
int numTotalUpdatesInCommit3 = 0;
for (HoodieDataFile file : files) {
@@ -453,7 +443,7 @@ public class TestHoodieClientOnCopyOnWriteStorage extends TestHoodieClientBase {
final int insertSplitLimit = 100;
// setup the small file handling params
HoodieWriteConfig config = getSmallInsertWriteConfig(insertSplitLimit); // hold upto 200 records max
dataGen = new HoodieTestDataGenerator(new String[]{testPartitionPath});
dataGen = new HoodieTestDataGenerator(new String[] {testPartitionPath});
HoodieWriteClient client = getHoodieWriteClient(config, false);
// Inserts => will write file1
@@ -465,12 +455,14 @@ public class TestHoodieClientOnCopyOnWriteStorage extends TestHoodieClientBase {
List<WriteStatus> statuses = client.insert(insertRecordsRDD1, commitTime1).collect();
assertNoWriteErrors(statuses);
assertPartitionMetadata(new String[]{testPartitionPath}, fs);
assertPartitionMetadata(new String[] {testPartitionPath}, fs);
assertEquals("Just 1 file needs to be added.", 1, statuses.size());
String file1 = statuses.get(0).getFileId();
assertEquals("file should contain 100 records", readRowKeysFromParquet(jsc.hadoopConfiguration(),
new Path(basePath, statuses.get(0).getStat().getPath())).size(), 100);
assertEquals("file should contain 100 records",
readRowKeysFromParquet(jsc.hadoopConfiguration(), new Path(basePath, statuses.get(0).getStat().getPath()))
.size(),
100);
// Second, set of Inserts should just expand file1
String commitTime2 = "002";
@@ -485,8 +477,8 @@ public class TestHoodieClientOnCopyOnWriteStorage extends TestHoodieClientBase {
assertEquals("Existing file should be expanded", file1, statuses.get(0).getFileId());
assertEquals("Existing file should be expanded", commitTime1, statuses.get(0).getStat().getPrevCommit());
Path newFile = new Path(basePath, statuses.get(0).getStat().getPath());
assertEquals("file should contain 140 records",
readRowKeysFromParquet(jsc.hadoopConfiguration(), newFile).size(), 140);
assertEquals("file should contain 140 records", readRowKeysFromParquet(jsc.hadoopConfiguration(), newFile).size(),
140);
List<GenericRecord> records = ParquetUtils.readAvroRecords(jsc.hadoopConfiguration(), newFile);
for (GenericRecord record : records) {
@@ -510,8 +502,7 @@ public class TestHoodieClientOnCopyOnWriteStorage extends TestHoodieClientBase {
HoodieTableMetaClient metaClient = new HoodieTableMetaClient(jsc.hadoopConfiguration(), basePath);
HoodieTable table = getHoodieTable(metaClient, config);
List<HoodieDataFile> files = table.getROFileSystemView()
.getLatestDataFilesBeforeOrOn(testPartitionPath, commitTime3)
.collect(Collectors.toList());
.getLatestDataFilesBeforeOrOn(testPartitionPath, commitTime3).collect(Collectors.toList());
assertEquals("Total of 2 valid data files", 2, files.size());
int totalInserts = 0;
@@ -598,10 +589,11 @@ public class TestHoodieClientOnCopyOnWriteStorage extends TestHoodieClientBase {
String filename = HoodieTestUtils.getCommitFilePath(basePath, commitTime);
FileInputStream inputStream = new FileInputStream(filename);
String everything = FileIOUtils.readAsUTFString(inputStream);
HoodieCommitMetadata metadata = HoodieCommitMetadata.fromJsonString(everything.toString(),
HoodieCommitMetadata.class);
HoodieRollingStatMetadata rollingStatMetadata = HoodieCommitMetadata.fromJsonString(metadata.getExtraMetadata()
.get(HoodieRollingStatMetadata.ROLLING_STAT_METADATA_KEY), HoodieRollingStatMetadata.class);
HoodieCommitMetadata metadata =
HoodieCommitMetadata.fromJsonString(everything.toString(), HoodieCommitMetadata.class);
HoodieRollingStatMetadata rollingStatMetadata = HoodieCommitMetadata.fromJsonString(
metadata.getExtraMetadata().get(HoodieRollingStatMetadata.ROLLING_STAT_METADATA_KEY),
HoodieRollingStatMetadata.class);
int inserts = 0;
for (Map.Entry<String, Map<String, HoodieRollingStat>> pstat : rollingStatMetadata.getPartitionToRollingStats()
.entrySet()) {
@@ -628,8 +620,9 @@ public class TestHoodieClientOnCopyOnWriteStorage extends TestHoodieClientBase {
inputStream = new FileInputStream(filename);
everything = FileIOUtils.readAsUTFString(inputStream);
metadata = HoodieCommitMetadata.fromJsonString(everything.toString(), HoodieCommitMetadata.class);
rollingStatMetadata = HoodieCommitMetadata.fromJsonString(metadata.getExtraMetadata()
.get(HoodieRollingStatMetadata.ROLLING_STAT_METADATA_KEY), HoodieRollingStatMetadata.class);
rollingStatMetadata = HoodieCommitMetadata.fromJsonString(
metadata.getExtraMetadata().get(HoodieRollingStatMetadata.ROLLING_STAT_METADATA_KEY),
HoodieRollingStatMetadata.class);
inserts = 0;
int upserts = 0;
for (Map.Entry<String, Map<String, HoodieRollingStat>> pstat : rollingStatMetadata.getPartitionToRollingStats()
@@ -649,8 +642,7 @@ public class TestHoodieClientOnCopyOnWriteStorage extends TestHoodieClientBase {
*/
@Test
public void testConsistencyCheckDuringFinalize() throws Exception {
HoodieTableMetaClient metaClient = new HoodieTableMetaClient(jsc.hadoopConfiguration(),
basePath);
HoodieTableMetaClient metaClient = new HoodieTableMetaClient(jsc.hadoopConfiguration(), basePath);
String commitTime = "000";
HoodieWriteConfig cfg = getConfigBuilder().withAutoCommit(false).build();
HoodieWriteClient client = getHoodieWriteClient(cfg);
@@ -684,11 +676,8 @@ public class TestHoodieClientOnCopyOnWriteStorage extends TestHoodieClientBase {
private Pair<Path, JavaRDD<WriteStatus>> testConsistencyCheck(HoodieTableMetaClient metaClient, String commitTime)
throws Exception {
HoodieWriteConfig cfg = getConfigBuilder().withAutoCommit(false)
.withConsistencyGuardConfig(ConsistencyGuardConfig.newBuilder()
.withConsistencyCheckEnabled(true)
.withMaxConsistencyCheckIntervalMs(1)
.withInitialConsistencyCheckIntervalMs(1)
.build())
.withConsistencyGuardConfig(ConsistencyGuardConfig.newBuilder().withConsistencyCheckEnabled(true)
.withMaxConsistencyCheckIntervalMs(1).withInitialConsistencyCheckIntervalMs(1).build())
.build();
HoodieWriteClient client = getHoodieWriteClient(cfg);
@@ -699,10 +688,10 @@ public class TestHoodieClientOnCopyOnWriteStorage extends TestHoodieClientBase {
// Create a dummy marker file to simulate the case that a marker file was created without data file.
// This should fail the commit
String partitionPath = Arrays.stream(fs.globStatus(new Path(String.format("%s/*/*/*/*",
metaClient.getMarkerFolderPath(commitTime))),
path -> path.toString().endsWith(HoodieTableMetaClient.MARKER_EXTN))).limit(1)
.map(status -> status.getPath().getParent().toString()).collect(Collectors.toList()).get(0);
String partitionPath = Arrays
.stream(fs.globStatus(new Path(String.format("%s/*/*/*/*", metaClient.getMarkerFolderPath(commitTime))),
path -> path.toString().endsWith(HoodieTableMetaClient.MARKER_EXTN)))
.limit(1).map(status -> status.getPath().getParent().toString()).collect(Collectors.toList()).get(0);
Path markerFilePath = new Path(String.format("%s/%s", partitionPath,
FSUtils.makeMarkerFile(commitTime, "1-0-1", UUID.randomUUID().toString())));
metaClient.getFs().create(markerFilePath);
@@ -722,9 +711,10 @@ public class TestHoodieClientOnCopyOnWriteStorage extends TestHoodieClientBase {
*/
private HoodieWriteConfig getSmallInsertWriteConfig(int insertSplitSize) {
HoodieWriteConfig.Builder builder = getConfigBuilder();
return builder.withCompactionConfig(
HoodieCompactionConfig.newBuilder().compactionSmallFileSize(HoodieTestDataGenerator.SIZE_PER_RECORD * 15)
.insertSplitSize(insertSplitSize).build()) // tolerate upto 15 records
return builder
.withCompactionConfig(
HoodieCompactionConfig.newBuilder().compactionSmallFileSize(HoodieTestDataGenerator.SIZE_PER_RECORD * 15)
.insertSplitSize(insertSplitSize).build()) // tolerate upto 15 records
.withStorageConfig(
HoodieStorageConfig.newBuilder().limitFileSize(HoodieTestDataGenerator.SIZE_PER_RECORD * 20).build())
.build();

View File

@@ -113,8 +113,7 @@ public class TestHoodieReadClient extends TestHoodieClientBase {
*/
@Test
public void testTagLocationAfterInsert() throws Exception {
testTagLocation(getConfig(), HoodieWriteClient::insert,
HoodieWriteClient::upsert, false);
testTagLocation(getConfig(), HoodieWriteClient::insert, HoodieWriteClient::upsert, false);
}
/**
@@ -122,8 +121,8 @@ public class TestHoodieReadClient extends TestHoodieClientBase {
*/
@Test
public void testTagLocationAfterInsertPrepped() throws Exception {
testTagLocation(getConfig(), HoodieWriteClient::insertPreppedRecords,
HoodieWriteClient::upsertPreppedRecords, true);
testTagLocation(getConfig(), HoodieWriteClient::insertPreppedRecords, HoodieWriteClient::upsertPreppedRecords,
true);
}
/**
@@ -140,9 +139,9 @@ public class TestHoodieReadClient extends TestHoodieClientBase {
*/
@Test
public void testTagLocationAfterBulkInsertPrepped() throws Exception {
testTagLocation(getConfigBuilder().withBulkInsertParallelism(1).build(),
(writeClient, recordRDD, commitTime)
-> writeClient.bulkInsertPreppedRecords(recordRDD, commitTime, Option.empty()),
testTagLocation(
getConfigBuilder().withBulkInsertParallelism(1).build(), (writeClient, recordRDD, commitTime) -> writeClient
.bulkInsertPreppedRecords(recordRDD, commitTime, Option.empty()),
HoodieWriteClient::upsertPreppedRecords, true);
}
@@ -155,27 +154,22 @@ public class TestHoodieReadClient extends TestHoodieClientBase {
* @param isPrepped isPrepped flag.
* @throws Exception in case of error
*/
private void testTagLocation(
HoodieWriteConfig hoodieWriteConfig,
private void testTagLocation(HoodieWriteConfig hoodieWriteConfig,
Function3<JavaRDD<WriteStatus>, HoodieWriteClient, JavaRDD<HoodieRecord>, String> insertFn,
Function3<JavaRDD<WriteStatus>, HoodieWriteClient, JavaRDD<HoodieRecord>, String> updateFn,
boolean isPrepped)
Function3<JavaRDD<WriteStatus>, HoodieWriteClient, JavaRDD<HoodieRecord>, String> updateFn, boolean isPrepped)
throws Exception {
try (HoodieWriteClient client = getHoodieWriteClient(hoodieWriteConfig);) {
//Write 1 (only inserts)
// Write 1 (only inserts)
String newCommitTime = "001";
String initCommitTime = "000";
int numRecords = 200;
JavaRDD<WriteStatus> result =
insertFirstBatch(hoodieWriteConfig, client, newCommitTime, initCommitTime, numRecords, insertFn, isPrepped,
true, numRecords);
JavaRDD<WriteStatus> result = insertFirstBatch(hoodieWriteConfig, client, newCommitTime, initCommitTime,
numRecords, insertFn, isPrepped, true, numRecords);
// Construct HoodieRecord from the WriteStatus but set HoodieKey, Data and HoodieRecordLocation accordingly
// since they have been modified in the DAG
JavaRDD<HoodieRecord> recordRDD =
jsc.parallelize(
result.collect().stream().map(WriteStatus::getWrittenRecords).flatMap(Collection::stream)
.map(record -> new HoodieRecord(record.getKey(), null))
.collect(Collectors.toList()));
jsc.parallelize(result.collect().stream().map(WriteStatus::getWrittenRecords).flatMap(Collection::stream)
.map(record -> new HoodieRecord(record.getKey(), null)).collect(Collectors.toList()));
// Should have 100 records in table (check using Index), all in locations marked at commit
HoodieReadClient readClient = getHoodieReadClient(hoodieWriteConfig.getBasePath());
List<HoodieRecord> taggedRecords = readClient.tagLocation(recordRDD).collect();
@@ -187,14 +181,11 @@ public class TestHoodieReadClient extends TestHoodieClientBase {
numRecords = 100;
String commitTimeBetweenPrevAndNew = "002";
result = updateBatch(hoodieWriteConfig, client, newCommitTime, prevCommitTime,
Option.of(Arrays.asList(commitTimeBetweenPrevAndNew)),
initCommitTime, numRecords, updateFn, isPrepped,
true, numRecords, 200, 2);
Option.of(Arrays.asList(commitTimeBetweenPrevAndNew)), initCommitTime, numRecords, updateFn, isPrepped, true,
numRecords, 200, 2);
recordRDD =
jsc.parallelize(
result.collect().stream().map(WriteStatus::getWrittenRecords).flatMap(Collection::stream)
.map(record -> new HoodieRecord(record.getKey(), null))
.collect(Collectors.toList()));
jsc.parallelize(result.collect().stream().map(WriteStatus::getWrittenRecords).flatMap(Collection::stream)
.map(record -> new HoodieRecord(record.getKey(), null)).collect(Collectors.toList()));
// Index should be able to locate all updates in correct locations.
readClient = getHoodieReadClient(hoodieWriteConfig.getBasePath());
taggedRecords = readClient.tagLocation(recordRDD).collect();

View File

@@ -71,19 +71,17 @@ public class TestMultiFS extends HoodieClientTestHarness {
protected HoodieWriteConfig getHoodieWriteConfig(String basePath) {
return HoodieWriteConfig.newBuilder().withPath(basePath).withEmbeddedTimelineServerEnabled(true)
.withSchema(HoodieTestDataGenerator.TRIP_EXAMPLE_SCHEMA).withParallelism(2, 2)
.forTable(tableName).withIndexConfig(
HoodieIndexConfig.newBuilder().withIndexType(HoodieIndex.IndexType.BLOOM).build()).build();
.withSchema(HoodieTestDataGenerator.TRIP_EXAMPLE_SCHEMA).withParallelism(2, 2).forTable(tableName)
.withIndexConfig(HoodieIndexConfig.newBuilder().withIndexType(HoodieIndex.IndexType.BLOOM).build()).build();
}
@Test
public void readLocalWriteHDFS() throws Exception {
// Initialize table and filesystem
HoodieTableMetaClient
.initTableType(jsc.hadoopConfiguration(), dfsBasePath, HoodieTableType.valueOf(tableType), tableName,
HoodieAvroPayload.class.getName());
HoodieTableMetaClient.initTableType(jsc.hadoopConfiguration(), dfsBasePath, HoodieTableType.valueOf(tableType),
tableName, HoodieAvroPayload.class.getName());
//Create write client to write some records in
// Create write client to write some records in
HoodieWriteConfig cfg = getHoodieWriteConfig(dfsBasePath);
HoodieWriteConfig localConfig = getHoodieWriteConfig(tablePath);
@@ -105,9 +103,8 @@ public class TestMultiFS extends HoodieClientTestHarness {
assertEquals("Should contain 100 records", readRecords.count(), records.size());
// Write to local
HoodieTableMetaClient
.initTableType(jsc.hadoopConfiguration(), tablePath, HoodieTableType.valueOf(tableType), tableName,
HoodieAvroPayload.class.getName());
HoodieTableMetaClient.initTableType(jsc.hadoopConfiguration(), tablePath, HoodieTableType.valueOf(tableType),
tableName, HoodieAvroPayload.class.getName());
String writeCommitTime = localWriteClient.startCommit();
logger.info("Starting write commit " + writeCommitTime);
@@ -120,8 +117,8 @@ public class TestMultiFS extends HoodieClientTestHarness {
fs = FSUtils.getFs(tablePath, HoodieTestUtils.getDefaultHadoopConf());
metaClient = new HoodieTableMetaClient(fs.getConf(), tablePath);
timeline = new HoodieActiveTimeline(metaClient).getCommitTimeline();
Dataset<Row> localReadRecords = HoodieClientTestUtils
.readCommit(tablePath, sqlContext, timeline, writeCommitTime);
Dataset<Row> localReadRecords =
HoodieClientTestUtils.readCommit(tablePath, sqlContext, timeline, writeCommitTime);
assertEquals("Should contain 100 records", localReadRecords.count(), localRecords.size());
}
}

View File

@@ -28,14 +28,14 @@ import org.mockito.Mockito;
public class TestWriteStatus {
@Test
public void testFailureFraction() throws IOException {
public void testFailureFraction() throws IOException {
WriteStatus status = new WriteStatus(true, 0.1);
Throwable t = new Exception("some error in writing");
for (int i = 0; i < 1000; i++) {
status.markFailure(Mockito.mock(HoodieRecord.class), t, null);
}
assertTrue(status.getFailedRecords().size() > 0);
assertTrue(status.getFailedRecords().size() < 150); //150 instead of 100, to prevent flaky test
assertTrue(status.getFailedRecords().size() < 150); // 150 instead of 100, to prevent flaky test
assertTrue(status.hasErrors());
}

View File

@@ -117,8 +117,7 @@ public class HoodieClientTestUtils {
public static SparkConf getSparkConfForTest(String appName) {
SparkConf sparkConf = new SparkConf().setAppName(appName)
.set("spark.serializer", "org.apache.spark.serializer.KryoSerializer")
.setMaster("local[8]");
.set("spark.serializer", "org.apache.spark.serializer.KryoSerializer").setMaster("local[8]");
return HoodieReadClient.addHoodieSupport(sparkConf);
}
@@ -126,8 +125,8 @@ public class HoodieClientTestUtils {
List<HoodieInstant> commitsToReturn) throws IOException {
HashMap<String, String> fileIdToFullPath = new HashMap<>();
for (HoodieInstant commit : commitsToReturn) {
HoodieCommitMetadata metadata = HoodieCommitMetadata.fromBytes(commitTimeline.getInstantDetails(commit).get(),
HoodieCommitMetadata.class);
HoodieCommitMetadata metadata =
HoodieCommitMetadata.fromBytes(commitTimeline.getInstantDetails(commit).get(), HoodieCommitMetadata.class);
fileIdToFullPath.putAll(metadata.getFileIdAndFullPaths(basePath));
}
return fileIdToFullPath;
@@ -140,8 +139,8 @@ public class HoodieClientTestUtils {
new HoodieException("No commit exists at " + commitTime);
}
try {
HashMap<String, String> paths = getLatestFileIDsToFullPath(basePath, commitTimeline,
Arrays.asList(commitInstant));
HashMap<String, String> paths =
getLatestFileIDsToFullPath(basePath, commitTimeline, Arrays.asList(commitInstant));
log.info("Path :" + paths.values());
return sqlContext.read().parquet(paths.values().toArray(new String[paths.size()]))
.filter(String.format("%s ='%s'", HoodieRecord.COMMIT_TIME_METADATA_FIELD, commitTime));
@@ -155,8 +154,8 @@ public class HoodieClientTestUtils {
*/
public static Dataset<Row> readSince(String basePath, SQLContext sqlContext, HoodieTimeline commitTimeline,
String lastCommitTime) {
List<HoodieInstant> commitsToReturn = commitTimeline.findInstantsAfter(lastCommitTime, Integer.MAX_VALUE)
.getInstants().collect(Collectors.toList());
List<HoodieInstant> commitsToReturn =
commitTimeline.findInstantsAfter(lastCommitTime, Integer.MAX_VALUE).getInstants().collect(Collectors.toList());
try {
// Go over the commit metadata, and obtain the new files that need to be read.
HashMap<String, String> fileIdToFullPath = getLatestFileIDsToFullPath(basePath, commitTimeline, commitsToReturn);
@@ -170,17 +169,14 @@ public class HoodieClientTestUtils {
/**
* Reads the paths under the a hoodie dataset out as a DataFrame
*/
public static Dataset<Row> read(JavaSparkContext jsc, String basePath, SQLContext
sqlContext,
FileSystem
fs, String...
paths) {
public static Dataset<Row> read(JavaSparkContext jsc, String basePath, SQLContext sqlContext, FileSystem fs,
String... paths) {
List<String> filteredPaths = new ArrayList<>();
try {
HoodieTableMetaClient metaClient = new HoodieTableMetaClient(fs.getConf(), basePath, true);
for (String path : paths) {
ReadOptimizedView fileSystemView = new HoodieTableFileSystemView(
metaClient, metaClient.getCommitsTimeline().filterCompletedInstants(), fs.globStatus(new Path(path)));
ReadOptimizedView fileSystemView = new HoodieTableFileSystemView(metaClient,
metaClient.getCommitsTimeline().filterCompletedInstants(), fs.globStatus(new Path(path)));
List<HoodieDataFile> latestFiles = fileSystemView.getLatestDataFiles().collect(Collectors.toList());
for (HoodieDataFile file : latestFiles) {
filteredPaths.add(file.getPath());
@@ -192,29 +188,20 @@ public class HoodieClientTestUtils {
}
}
public static String writeParquetFile(String basePath,
String partitionPath,
String filename,
List<HoodieRecord> records,
Schema schema,
BloomFilter filter,
boolean createCommitTime) throws IOException {
public static String writeParquetFile(String basePath, String partitionPath, String filename,
List<HoodieRecord> records, Schema schema, BloomFilter filter, boolean createCommitTime) throws IOException {
if (filter == null) {
filter = new BloomFilter(10000, 0.0000001);
}
HoodieAvroWriteSupport writeSupport = new HoodieAvroWriteSupport(new AvroSchemaConverter().convert(schema), schema,
filter);
HoodieAvroWriteSupport writeSupport =
new HoodieAvroWriteSupport(new AvroSchemaConverter().convert(schema), schema, filter);
String commitTime = FSUtils.getCommitTime(filename);
HoodieParquetConfig config = new HoodieParquetConfig(writeSupport, CompressionCodecName.GZIP,
ParquetWriter.DEFAULT_BLOCK_SIZE, ParquetWriter.DEFAULT_PAGE_SIZE, 120 * 1024 * 1024,
HoodieTestUtils.getDefaultHadoopConf(),
Double.valueOf(HoodieStorageConfig.DEFAULT_STREAM_COMPRESSION_RATIO));
HoodieParquetWriter writer = new HoodieParquetWriter(
commitTime,
new Path(basePath + "/" + partitionPath + "/" + filename),
config,
schema);
HoodieTestUtils.getDefaultHadoopConf(), Double.valueOf(HoodieStorageConfig.DEFAULT_STREAM_COMPRESSION_RATIO));
HoodieParquetWriter writer =
new HoodieParquetWriter(commitTime, new Path(basePath + "/" + partitionPath + "/" + filename), config, schema);
int seqId = 1;
for (HoodieRecord record : records) {
GenericRecord avroRecord = (GenericRecord) record.getData().getInsertValue(schema).get();
@@ -232,18 +219,14 @@ public class HoodieClientTestUtils {
return filename;
}
public static String writeParquetFile(String basePath,
String partitionPath,
List<HoodieRecord> records,
Schema schema,
BloomFilter filter,
boolean createCommitTime) throws IOException, InterruptedException {
public static String writeParquetFile(String basePath, String partitionPath, List<HoodieRecord> records,
Schema schema, BloomFilter filter, boolean createCommitTime) throws IOException, InterruptedException {
Thread.sleep(1000);
String commitTime = HoodieTestUtils.makeNewCommitTime();
String fileId = UUID.randomUUID().toString();
String filename = FSUtils.makeDataFileName(commitTime, "1-0-1", fileId);
HoodieTestUtils.createCommitFiles(basePath, commitTime);
return HoodieClientTestUtils
.writeParquetFile(basePath, partitionPath, filename, records, schema, filter, createCommitTime);
return HoodieClientTestUtils.writeParquetFile(basePath, partitionPath, filename, records, schema, filter,
createCommitTime);
}
}

View File

@@ -69,21 +69,14 @@ public class HoodieTestDataGenerator {
public static final String DEFAULT_SECOND_PARTITION_PATH = "2015/03/16";
public static final String DEFAULT_THIRD_PARTITION_PATH = "2015/03/17";
public static final String[] DEFAULT_PARTITION_PATHS = {
DEFAULT_FIRST_PARTITION_PATH,
DEFAULT_SECOND_PARTITION_PATH,
DEFAULT_THIRD_PARTITION_PATH
};
public static final String[] DEFAULT_PARTITION_PATHS =
{DEFAULT_FIRST_PARTITION_PATH, DEFAULT_SECOND_PARTITION_PATH, DEFAULT_THIRD_PARTITION_PATH};
public static final int DEFAULT_PARTITION_DEPTH = 3;
public static String TRIP_EXAMPLE_SCHEMA = "{\"type\": \"record\"," + "\"name\": \"triprec\"," + "\"fields\": [ "
+ "{\"name\": \"timestamp\",\"type\": \"double\"},"
+ "{\"name\": \"_row_key\", \"type\": \"string\"},"
+ "{\"name\": \"rider\", \"type\": \"string\"},"
+ "{\"name\": \"driver\", \"type\": \"string\"},"
+ "{\"name\": \"begin_lat\", \"type\": \"double\"},"
+ "{\"name\": \"begin_lon\", \"type\": \"double\"},"
+ "{\"name\": \"end_lat\", \"type\": \"double\"},"
+ "{\"name\": \"end_lon\", \"type\": \"double\"},"
+ "{\"name\": \"timestamp\",\"type\": \"double\"}," + "{\"name\": \"_row_key\", \"type\": \"string\"},"
+ "{\"name\": \"rider\", \"type\": \"string\"}," + "{\"name\": \"driver\", \"type\": \"string\"},"
+ "{\"name\": \"begin_lat\", \"type\": \"double\"}," + "{\"name\": \"begin_lon\", \"type\": \"double\"},"
+ "{\"name\": \"end_lat\", \"type\": \"double\"}," + "{\"name\": \"end_lon\", \"type\": \"double\"},"
+ "{\"name\":\"fare\",\"type\": \"double\"}]}";
public static Schema avroSchema = new Schema.Parser().parse(TRIP_EXAMPLE_SCHEMA);
public static Schema avroSchemaWithMetadataFields = HoodieAvroUtils.addMetadataFields(avroSchema);
@@ -174,8 +167,8 @@ public class HoodieTestDataGenerator {
public static void createCompactionAuxiliaryMetadata(String basePath, HoodieInstant instant,
Configuration configuration) throws IOException {
Path commitFile = new Path(
basePath + "/" + HoodieTableMetaClient.AUXILIARYFOLDER_NAME + "/" + instant.getFileName());
Path commitFile =
new Path(basePath + "/" + HoodieTableMetaClient.AUXILIARYFOLDER_NAME + "/" + instant.getFileName());
FileSystem fs = FSUtils.getFs(basePath, configuration);
FSDataOutputStream os = fs.create(commitFile, true);
HoodieCompactionPlan workload = new HoodieCompactionPlan();
@@ -189,8 +182,8 @@ public class HoodieTestDataGenerator {
public static void createSavepointFile(String basePath, String commitTime, Configuration configuration)
throws IOException {
Path commitFile = new Path(basePath + "/" + HoodieTableMetaClient.METAFOLDER_NAME
+ "/" + HoodieTimeline.makeSavePointFileName(commitTime));
Path commitFile = new Path(basePath + "/" + HoodieTableMetaClient.METAFOLDER_NAME + "/"
+ HoodieTimeline.makeSavePointFileName(commitTime));
FileSystem fs = FSUtils.getFs(basePath, configuration);
FSDataOutputStream os = fs.create(commitFile, true);
HoodieCommitMetadata commitMetadata = new HoodieCommitMetadata();
@@ -212,7 +205,7 @@ public class HoodieTestDataGenerator {
/**
* Generates new inserts, uniformly across the partition paths above. It also updates the list of existing keys.
*/
public Stream<HoodieRecord> generateInsertsStream(String commitTime, Integer n) {
public Stream<HoodieRecord> generateInsertsStream(String commitTime, Integer n) {
int currSize = getNumExistingKeys();
return IntStream.range(0, n).boxed().map(i -> {
@@ -233,16 +226,15 @@ public class HoodieTestDataGenerator {
public List<HoodieRecord> generateSameKeyInserts(String commitTime, List<HoodieRecord> origin) throws IOException {
List<HoodieRecord> copy = new ArrayList<>();
for (HoodieRecord r: origin) {
for (HoodieRecord r : origin) {
HoodieKey key = r.getKey();
HoodieRecord record = new HoodieRecord(key, generateRandomValue(key, commitTime));
HoodieRecord record = new HoodieRecord(key, generateRandomValue(key, commitTime));
copy.add(record);
}
return copy;
}
public List<HoodieRecord> generateInsertsWithHoodieAvroPayload(String commitTime, int limit) throws
IOException {
public List<HoodieRecord> generateInsertsWithHoodieAvroPayload(String commitTime, int limit) throws IOException {
List<HoodieRecord> inserts = new ArrayList<>();
int currSize = getNumExistingKeys();
for (int i = 0; i < limit; i++) {
@@ -290,8 +282,8 @@ public class HoodieTestDataGenerator {
}
public HoodieRecord generateDeleteRecord(HoodieKey key) throws IOException {
TestRawTripPayload payload = new TestRawTripPayload(Option.empty(), key.getRecordKey(), key.getPartitionPath(),
null, true);
TestRawTripPayload payload =
new TestRawTripPayload(Option.empty(), key.getRecordKey(), key.getPartitionPath(), null, true);
return new HoodieRecord(key, payload);
}

View File

@@ -70,8 +70,7 @@ public class TestBoundedInMemoryExecutor extends HoodieClientTestHarness {
}
@Override
protected void finish() {
}
protected void finish() {}
@Override
protected Integer getResult() {
@@ -79,11 +78,10 @@ public class TestBoundedInMemoryExecutor extends HoodieClientTestHarness {
}
};
SparkBoundedInMemoryExecutor<HoodieRecord,
Tuple2<HoodieRecord, Option<IndexedRecord>>, Integer> executor = null;
SparkBoundedInMemoryExecutor<HoodieRecord, Tuple2<HoodieRecord, Option<IndexedRecord>>, Integer> executor = null;
try {
executor = new SparkBoundedInMemoryExecutor(hoodieWriteConfig,
hoodieRecords.iterator(), consumer, getTransformFunction(HoodieTestDataGenerator.avroSchema));
executor = new SparkBoundedInMemoryExecutor(hoodieWriteConfig, hoodieRecords.iterator(), consumer,
getTransformFunction(HoodieTestDataGenerator.avroSchema));
int result = executor.execute();
// It should buffer and write 100 records
Assert.assertEquals(result, 100);

View File

@@ -80,18 +80,17 @@ public class TestBoundedInMemoryQueue extends HoodieClientTestHarness {
final BoundedInMemoryQueue<HoodieRecord, HoodieInsertValueGenResult<HoodieRecord>> queue =
new BoundedInMemoryQueue(FileIOUtils.KB, getTransformFunction(HoodieTestDataGenerator.avroSchema));
// Produce
Future<Boolean> resFuture =
executorService.submit(() -> {
new IteratorBasedQueueProducer<>(hoodieRecords.iterator()).produce(queue);
queue.close();
return true;
});
Future<Boolean> resFuture = executorService.submit(() -> {
new IteratorBasedQueueProducer<>(hoodieRecords.iterator()).produce(queue);
queue.close();
return true;
});
final Iterator<HoodieRecord> originalRecordIterator = hoodieRecords.iterator();
int recordsRead = 0;
while (queue.iterator().hasNext()) {
final HoodieRecord originalRecord = originalRecordIterator.next();
final Option<IndexedRecord> originalInsertValue = originalRecord.getData()
.getInsertValue(HoodieTestDataGenerator.avroSchema);
final Option<IndexedRecord> originalInsertValue =
originalRecord.getData().getInsertValue(HoodieTestDataGenerator.avroSchema);
final HoodieInsertValueGenResult<HoodieRecord> payload = queue.iterator().next();
// Ensure that record ordering is guaranteed.
Assert.assertEquals(originalRecord, payload.record);
@@ -176,10 +175,10 @@ public class TestBoundedInMemoryQueue extends HoodieClientTestHarness {
});
// Used to ensure that consumer sees the records generated by a single producer in FIFO order
Map<Integer, Integer> lastSeenMap = IntStream.range(0, numProducers).boxed()
.collect(Collectors.toMap(Function.identity(), x -> -1));
Map<Integer, Integer> countMap = IntStream.range(0, numProducers).boxed()
.collect(Collectors.toMap(Function.identity(), x -> 0));
Map<Integer, Integer> lastSeenMap =
IntStream.range(0, numProducers).boxed().collect(Collectors.toMap(Function.identity(), x -> -1));
Map<Integer, Integer> countMap =
IntStream.range(0, numProducers).boxed().collect(Collectors.toMap(Function.identity(), x -> 0));
// Read recs and ensure we have covered all producer recs.
while (queue.iterator().hasNext()) {
@@ -198,7 +197,7 @@ public class TestBoundedInMemoryQueue extends HoodieClientTestHarness {
Assert.assertEquals(Integer.valueOf(numRecords), countMap.get(i));
}
//Ensure Close future is done
// Ensure Close future is done
closeFuture.get();
}
@@ -210,15 +209,13 @@ public class TestBoundedInMemoryQueue extends HoodieClientTestHarness {
final List<HoodieRecord> hoodieRecords = dataGen.generateInserts(commitTime, numRecords);
// maximum number of records to keep in memory.
final int recordLimit = 5;
final SizeEstimator<HoodieInsertValueGenResult<HoodieRecord>> sizeEstimator =
new DefaultSizeEstimator<>();
HoodieInsertValueGenResult<HoodieRecord> payload = getTransformFunction(HoodieTestDataGenerator.avroSchema)
.apply(hoodieRecords.get(0));
final SizeEstimator<HoodieInsertValueGenResult<HoodieRecord>> sizeEstimator = new DefaultSizeEstimator<>();
HoodieInsertValueGenResult<HoodieRecord> payload =
getTransformFunction(HoodieTestDataGenerator.avroSchema).apply(hoodieRecords.get(0));
final long objSize = sizeEstimator.sizeEstimate(payload);
final long memoryLimitInBytes = recordLimit * objSize;
final BoundedInMemoryQueue<HoodieRecord, HoodieInsertValueGenResult<HoodieRecord>> queue =
new BoundedInMemoryQueue(memoryLimitInBytes,
getTransformFunction(HoodieTestDataGenerator.avroSchema));
new BoundedInMemoryQueue(memoryLimitInBytes, getTransformFunction(HoodieTestDataGenerator.avroSchema));
// Produce
Future<Boolean> resFuture = executorService.submit(() -> {
@@ -259,11 +256,10 @@ public class TestBoundedInMemoryQueue extends HoodieClientTestHarness {
public void testException() throws Exception {
final int numRecords = 256;
final List<HoodieRecord> hoodieRecords = dataGen.generateInserts(commitTime, numRecords);
final SizeEstimator<Tuple2<HoodieRecord, Option<IndexedRecord>>> sizeEstimator =
new DefaultSizeEstimator<>();
final SizeEstimator<Tuple2<HoodieRecord, Option<IndexedRecord>>> sizeEstimator = new DefaultSizeEstimator<>();
// queue memory limit
HoodieInsertValueGenResult<HoodieRecord> payload = getTransformFunction(HoodieTestDataGenerator.avroSchema)
.apply(hoodieRecords.get(0));
HoodieInsertValueGenResult<HoodieRecord> payload =
getTransformFunction(HoodieTestDataGenerator.avroSchema).apply(hoodieRecords.get(0));
final long objSize = sizeEstimator.sizeEstimate(new Tuple2<>(payload.record, payload.insertValue));
final long memoryLimitInBytes = 4 * objSize;

View File

@@ -87,8 +87,8 @@ public class TestUpdateMapFunction extends HoodieClientTestHarness {
insertRecords
.add(new HoodieRecord(new HoodieKey(rowChange3.getRowKey(), rowChange3.getPartitionPath()), rowChange3));
HoodieCreateHandle createHandle = new HoodieCreateHandle(config, "100", table, rowChange1.getPartitionPath(),
"f1-0", insertRecords.iterator());
HoodieCreateHandle createHandle =
new HoodieCreateHandle(config, "100", table, rowChange1.getPartitionPath(), "f1-0", insertRecords.iterator());
createHandle.write();
WriteStatus insertResult = createHandle.close();
return insertResult;
@@ -111,8 +111,8 @@ public class TestUpdateMapFunction extends HoodieClientTestHarness {
+ "\"time\":\"2016-01-31T03:16:41.415Z\",\"number\":12,\"added_field\":1}";
List<HoodieRecord> updateRecords = new ArrayList<>();
TestRawTripPayload rowChange1 = new TestRawTripPayload(recordStr1);
HoodieRecord record1 = new HoodieRecord(new HoodieKey(rowChange1.getRowKey(), rowChange1.getPartitionPath()),
rowChange1);
HoodieRecord record1 =
new HoodieRecord(new HoodieKey(rowChange1.getRowKey(), rowChange1.getPartitionPath()), rowChange1);
record1.unseal();
record1.setCurrentLocation(new HoodieRecordLocation("100", fileId));
record1.seal();

View File

@@ -105,22 +105,18 @@ public class TestHBaseQPSResourceAllocator extends HoodieClientTestHarness {
private HoodieWriteConfig.Builder getConfigBuilder(HoodieHBaseIndexConfig hoodieHBaseIndexConfig) {
return HoodieWriteConfig.newBuilder().withPath(basePath).withSchema(HoodieTestDataGenerator.TRIP_EXAMPLE_SCHEMA)
.withParallelism(1, 1).withCompactionConfig(
HoodieCompactionConfig.newBuilder().compactionSmallFileSize(1024 * 1024).withInlineCompaction(false)
.build()).withAutoCommit(false)
.withStorageConfig(HoodieStorageConfig.newBuilder().limitFileSize(1024 * 1024).build())
.forTable("test-trip-table").withIndexConfig(
HoodieIndexConfig.newBuilder().withIndexType(HoodieIndex.IndexType.HBASE)
.withHBaseIndexConfig(hoodieHBaseIndexConfig)
.build());
.withParallelism(1, 1)
.withCompactionConfig(HoodieCompactionConfig.newBuilder().compactionSmallFileSize(1024 * 1024)
.withInlineCompaction(false).build())
.withAutoCommit(false).withStorageConfig(HoodieStorageConfig.newBuilder().limitFileSize(1024 * 1024).build())
.forTable("test-trip-table").withIndexConfig(HoodieIndexConfig.newBuilder()
.withIndexType(HoodieIndex.IndexType.HBASE).withHBaseIndexConfig(hoodieHBaseIndexConfig).build());
}
private HoodieHBaseIndexConfig getConfigWithResourceAllocator(Option<String> resourceAllocatorClass) {
HoodieHBaseIndexConfig.Builder builder =
new HoodieHBaseIndexConfig.Builder()
.hbaseZkPort(Integer.valueOf(hbaseConfig.get("hbase.zookeeper.property.clientPort")))
.hbaseZkQuorum(hbaseConfig.get("hbase.zookeeper.quorum")).hbaseTableName(tableName)
.hbaseIndexGetBatchSize(100);
HoodieHBaseIndexConfig.Builder builder = new HoodieHBaseIndexConfig.Builder()
.hbaseZkPort(Integer.valueOf(hbaseConfig.get("hbase.zookeeper.property.clientPort")))
.hbaseZkQuorum(hbaseConfig.get("hbase.zookeeper.quorum")).hbaseTableName(tableName).hbaseIndexGetBatchSize(100);
if (resourceAllocatorClass.isPresent()) {
builder.withQPSResourceAllocatorType(resourceAllocatorClass.get());
}

View File

@@ -76,8 +76,7 @@ public class TestHbaseIndex extends HoodieClientTestHarness {
private static Configuration hbaseConfig;
private static String tableName = "test_table";
public TestHbaseIndex() throws Exception {
}
public TestHbaseIndex() throws Exception {}
@AfterClass
public static void clean() throws Exception {
@@ -154,9 +153,8 @@ public class TestHbaseIndex extends HoodieClientTestHarness {
javaRDD = index.tagLocation(writeRecords, jsc, hoodieTable);
assertTrue(javaRDD.filter(record -> record.isCurrentLocationKnown()).collect().size() == 200);
assertTrue(javaRDD.map(record -> record.getKey().getRecordKey()).distinct().count() == 200);
assertTrue(javaRDD.filter(
record -> (record.getCurrentLocation() != null && record.getCurrentLocation().getInstantTime()
.equals(newCommitTime))).distinct().count() == 200);
assertTrue(javaRDD.filter(record -> (record.getCurrentLocation() != null
&& record.getCurrentLocation().getInstantTime().equals(newCommitTime))).distinct().count() == 200);
}
}
@@ -188,9 +186,8 @@ public class TestHbaseIndex extends HoodieClientTestHarness {
JavaRDD<HoodieRecord> javaRDD = index.tagLocation(writeRecords, jsc, hoodieTable);
assertTrue(javaRDD.filter(record -> record.isCurrentLocationKnown()).collect().size() == 10);
assertTrue(javaRDD.map(record -> record.getKey().getRecordKey()).distinct().count() == 10);
assertTrue(javaRDD.filter(
record -> (record.getCurrentLocation() != null && record.getCurrentLocation().getInstantTime()
.equals(newCommitTime))).distinct().count() == 10);
assertTrue(javaRDD.filter(record -> (record.getCurrentLocation() != null
&& record.getCurrentLocation().getInstantTime().equals(newCommitTime))).distinct().count() == 10);
}
@Test
@@ -348,11 +345,7 @@ public class TestHbaseIndex extends HoodieClientTestHarness {
HoodieWriteConfig config = getConfig();
HBaseIndex index = new HBaseIndex(config);
final JavaRDD<WriteStatus> writeStatusRDD = jsc.parallelize(
Arrays.asList(
getSampleWriteStatus(1, 2),
getSampleWriteStatus(0, 3),
getSampleWriteStatus(10, 0)),
10);
Arrays.asList(getSampleWriteStatus(1, 2), getSampleWriteStatus(0, 3), getSampleWriteStatus(10, 0)), 10);
final Tuple2<Long, Integer> tuple = index.getHBasePutAccessParallelism(writeStatusRDD);
final int hbasePutAccessParallelism = Integer.parseInt(tuple._2.toString());
final int hbaseNumPuts = Integer.parseInt(tuple._1.toString());
@@ -365,11 +358,8 @@ public class TestHbaseIndex extends HoodieClientTestHarness {
public void testsHBasePutAccessParallelismWithNoInserts() {
HoodieWriteConfig config = getConfig();
HBaseIndex index = new HBaseIndex(config);
final JavaRDD<WriteStatus> writeStatusRDD = jsc.parallelize(
Arrays.asList(
getSampleWriteStatus(0, 2),
getSampleWriteStatus(0, 1)),
10);
final JavaRDD<WriteStatus> writeStatusRDD =
jsc.parallelize(Arrays.asList(getSampleWriteStatus(0, 2), getSampleWriteStatus(0, 1)), 10);
final Tuple2<Long, Integer> tuple = index.getHBasePutAccessParallelism(writeStatusRDD);
final int hbasePutAccessParallelism = Integer.parseInt(tuple._2.toString());
final int hbaseNumPuts = Integer.parseInt(tuple._1.toString());
@@ -411,17 +401,16 @@ public class TestHbaseIndex extends HoodieClientTestHarness {
private HoodieWriteConfig.Builder getConfigBuilder() {
return HoodieWriteConfig.newBuilder().withPath(basePath).withSchema(HoodieTestDataGenerator.TRIP_EXAMPLE_SCHEMA)
.withParallelism(1, 1).withCompactionConfig(
HoodieCompactionConfig.newBuilder().compactionSmallFileSize(1024 * 1024).withInlineCompaction(false)
.build()).withAutoCommit(false)
.withStorageConfig(HoodieStorageConfig.newBuilder().limitFileSize(1024 * 1024).build())
.forTable("test-trip-table").withIndexConfig(
HoodieIndexConfig.newBuilder().withIndexType(HoodieIndex.IndexType.HBASE)
.withHBaseIndexConfig(
new HoodieHBaseIndexConfig.Builder()
.hbaseZkPort(Integer.valueOf(hbaseConfig.get("hbase.zookeeper.property.clientPort")))
.hbaseZkQuorum(hbaseConfig.get("hbase.zookeeper.quorum")).hbaseTableName(tableName)
.hbaseIndexGetBatchSize(100).build())
.build());
.withParallelism(1, 1)
.withCompactionConfig(HoodieCompactionConfig.newBuilder().compactionSmallFileSize(1024 * 1024)
.withInlineCompaction(false).build())
.withAutoCommit(false).withStorageConfig(HoodieStorageConfig.newBuilder().limitFileSize(1024 * 1024).build())
.forTable("test-trip-table")
.withIndexConfig(HoodieIndexConfig.newBuilder().withIndexType(HoodieIndex.IndexType.HBASE)
.withHBaseIndexConfig(new HoodieHBaseIndexConfig.Builder()
.hbaseZkPort(Integer.valueOf(hbaseConfig.get("hbase.zookeeper.property.clientPort")))
.hbaseZkQuorum(hbaseConfig.get("hbase.zookeeper.quorum")).hbaseTableName(tableName)
.hbaseIndexGetBatchSize(100).build())
.build());
}
}

View File

@@ -50,9 +50,10 @@ public class TestHoodieIndex extends HoodieClientTestHarness {
HoodieWriteConfig.Builder clientConfigBuilder = HoodieWriteConfig.newBuilder();
HoodieIndexConfig.Builder indexConfigBuilder = HoodieIndexConfig.newBuilder();
// Different types
HoodieWriteConfig config = clientConfigBuilder.withPath(basePath).withIndexConfig(
indexConfigBuilder.withIndexType(HoodieIndex.IndexType.HBASE)
.withHBaseIndexConfig(new HoodieHBaseIndexConfig.Builder().build()).build()).build();
HoodieWriteConfig config = clientConfigBuilder.withPath(basePath)
.withIndexConfig(indexConfigBuilder.withIndexType(HoodieIndex.IndexType.HBASE)
.withHBaseIndexConfig(new HoodieHBaseIndexConfig.Builder().build()).build())
.build();
assertTrue(HoodieIndex.createIndex(config, jsc) instanceof HBaseIndex);
config = clientConfigBuilder.withPath(basePath)
.withIndexConfig(indexConfigBuilder.withIndexType(HoodieIndex.IndexType.INMEMORY).build()).build();

View File

@@ -45,12 +45,9 @@ public class TestBucketizedBloomCheckPartitioner {
assertEquals("f1 should have 4 buckets", 4, assignments.get("f1").size());
assertEquals("f2 should have 4 buckets", 4, assignments.get("f2").size());
assertEquals("f3 should have 2 buckets", 2, assignments.get("f3").size());
assertArrayEquals("f1 spread across 3 partitions", new Integer[]{0, 0, 1, 3},
assignments.get("f1").toArray());
assertArrayEquals("f2 spread across 3 partitions", new Integer[]{1, 2, 2, 0},
assignments.get("f2").toArray());
assertArrayEquals("f3 spread across 2 partitions", new Integer[]{3, 1},
assignments.get("f3").toArray());
assertArrayEquals("f1 spread across 3 partitions", new Integer[] {0, 0, 1, 3}, assignments.get("f1").toArray());
assertArrayEquals("f2 spread across 3 partitions", new Integer[] {1, 2, 2, 0}, assignments.get("f2").toArray());
assertArrayEquals("f3 spread across 2 partitions", new Integer[] {3, 1}, assignments.get("f3").toArray());
}
@Test
@@ -64,9 +61,9 @@ public class TestBucketizedBloomCheckPartitioner {
BucketizedBloomCheckPartitioner partitioner = new BucketizedBloomCheckPartitioner(100, comparisons1, 10);
Map<String, List<Integer>> assignments = partitioner.getFileGroupToPartitions();
assignments.entrySet().stream().forEach(e -> assertEquals(10, e.getValue().size()));
Map<Integer, Long> partitionToNumBuckets = assignments.entrySet().stream()
.flatMap(e -> e.getValue().stream().map(p -> Pair.of(p, e.getKey())))
.collect(Collectors.groupingBy(Pair::getLeft, Collectors.counting()));
Map<Integer, Long> partitionToNumBuckets =
assignments.entrySet().stream().flatMap(e -> e.getValue().stream().map(p -> Pair.of(p, e.getKey())))
.collect(Collectors.groupingBy(Pair::getLeft, Collectors.counting()));
partitionToNumBuckets.entrySet().stream().forEach(e -> assertEquals(1L, e.getValue().longValue()));
}

View File

@@ -75,8 +75,8 @@ public class TestHoodieBloomIndex extends HoodieClientTestHarness {
@Parameterized.Parameters(name = "{index}: Test with rangePruning={0}, treeFiltering ={1}, bucketizedChecking is:{2}")
public static Collection<Object[]> data() {
Object[][] data = new Object[][]{{true, true, true}, {false, true, true}, {true, true, false},
{true, false, true}};
Object[][] data =
new Object[][] {{true, true, true}, {false, true, true}, {true, true, false}, {true, false, true}};
return Arrays.asList(data);
}
@@ -106,12 +106,9 @@ public class TestHoodieBloomIndex extends HoodieClientTestHarness {
private HoodieWriteConfig makeConfig() {
HoodieWriteConfig config = HoodieWriteConfig.newBuilder().withPath(basePath)
.withIndexConfig(HoodieIndexConfig.newBuilder()
.bloomIndexPruneByRanges(rangePruning)
.bloomIndexTreebasedFilter(treeFiltering)
.bloomIndexBucketizedChecking(bucketizedChecking)
.bloomIndexKeysPerBucket(2)
.build())
.withIndexConfig(HoodieIndexConfig.newBuilder().bloomIndexPruneByRanges(rangePruning)
.bloomIndexTreebasedFilter(treeFiltering).bloomIndexBucketizedChecking(bucketizedChecking)
.bloomIndexKeysPerBucket(2).build())
.build();
return config;
}
@@ -130,36 +127,31 @@ public class TestHoodieBloomIndex extends HoodieClientTestHarness {
new File(basePath + "/2016/04/01").mkdirs();
new File(basePath + "/2015/03/12").mkdirs();
TestRawTripPayload rowChange1 = new TestRawTripPayload(
"{\"_row_key\":\"000\",\"time\":\"2016-01-31T03:16:41.415Z\",\"number\":12}");
HoodieRecord record1 = new HoodieRecord(new HoodieKey(rowChange1.getRowKey(), rowChange1.getPartitionPath()),
rowChange1);
TestRawTripPayload rowChange2 = new TestRawTripPayload(
"{\"_row_key\":\"001\",\"time\":\"2016-01-31T03:16:41.415Z\",\"number\":12}");
HoodieRecord record2 = new HoodieRecord(new HoodieKey(rowChange2.getRowKey(), rowChange2.getPartitionPath()),
rowChange2);
TestRawTripPayload rowChange3 = new TestRawTripPayload(
"{\"_row_key\":\"002\",\"time\":\"2016-01-31T03:16:41.415Z\",\"number\":12}");
HoodieRecord record3 = new HoodieRecord(new HoodieKey(rowChange3.getRowKey(), rowChange3.getPartitionPath()),
rowChange3);
TestRawTripPayload rowChange4 = new TestRawTripPayload(
"{\"_row_key\":\"003\",\"time\":\"2016-01-31T03:16:41.415Z\",\"number\":12}");
HoodieRecord record4 = new HoodieRecord(new HoodieKey(rowChange4.getRowKey(), rowChange4.getPartitionPath()),
rowChange4);
TestRawTripPayload rowChange1 =
new TestRawTripPayload("{\"_row_key\":\"000\",\"time\":\"2016-01-31T03:16:41.415Z\",\"number\":12}");
HoodieRecord record1 =
new HoodieRecord(new HoodieKey(rowChange1.getRowKey(), rowChange1.getPartitionPath()), rowChange1);
TestRawTripPayload rowChange2 =
new TestRawTripPayload("{\"_row_key\":\"001\",\"time\":\"2016-01-31T03:16:41.415Z\",\"number\":12}");
HoodieRecord record2 =
new HoodieRecord(new HoodieKey(rowChange2.getRowKey(), rowChange2.getPartitionPath()), rowChange2);
TestRawTripPayload rowChange3 =
new TestRawTripPayload("{\"_row_key\":\"002\",\"time\":\"2016-01-31T03:16:41.415Z\",\"number\":12}");
HoodieRecord record3 =
new HoodieRecord(new HoodieKey(rowChange3.getRowKey(), rowChange3.getPartitionPath()), rowChange3);
TestRawTripPayload rowChange4 =
new TestRawTripPayload("{\"_row_key\":\"003\",\"time\":\"2016-01-31T03:16:41.415Z\",\"number\":12}");
HoodieRecord record4 =
new HoodieRecord(new HoodieKey(rowChange4.getRowKey(), rowChange4.getPartitionPath()), rowChange4);
HoodieClientTestUtils
.writeParquetFile(basePath, "2016/04/01", "2_0_20160401010101.parquet",
Lists.newArrayList(), schema, null, false);
HoodieClientTestUtils
.writeParquetFile(basePath, "2015/03/12", "1_0_20150312101010.parquet",
Lists.newArrayList(), schema, null, false);
HoodieClientTestUtils
.writeParquetFile(basePath, "2015/03/12", "3_0_20150312101010.parquet",
Arrays.asList(record1), schema, null, false);
HoodieClientTestUtils
.writeParquetFile(basePath, "2015/03/12", "4_0_20150312101010.parquet",
Arrays.asList(record2, record3, record4), schema, null,
false);
HoodieClientTestUtils.writeParquetFile(basePath, "2016/04/01", "2_0_20160401010101.parquet", Lists.newArrayList(),
schema, null, false);
HoodieClientTestUtils.writeParquetFile(basePath, "2015/03/12", "1_0_20150312101010.parquet", Lists.newArrayList(),
schema, null, false);
HoodieClientTestUtils.writeParquetFile(basePath, "2015/03/12", "3_0_20150312101010.parquet", Arrays.asList(record1),
schema, null, false);
HoodieClientTestUtils.writeParquetFile(basePath, "2015/03/12", "4_0_20150312101010.parquet",
Arrays.asList(record2, record3, record4), schema, null, false);
List<String> partitions = Arrays.asList("2016/01/21", "2016/04/01", "2015/03/12");
metaClient = HoodieTableMetaClient.reload(metaClient);
@@ -188,11 +180,11 @@ public class TestHoodieBloomIndex extends HoodieClientTestHarness {
// no longer sorted, but should have same files.
List<Tuple2<String, BloomIndexFileInfo>> expected = Arrays.asList(
new Tuple2<>("2016/04/01", new BloomIndexFileInfo("2")),
new Tuple2<>("2015/03/12", new BloomIndexFileInfo("1")),
new Tuple2<>("2015/03/12", new BloomIndexFileInfo("3", "000", "000")),
new Tuple2<>("2015/03/12", new BloomIndexFileInfo("4", "001", "003")));
List<Tuple2<String, BloomIndexFileInfo>> expected =
Arrays.asList(new Tuple2<>("2016/04/01", new BloomIndexFileInfo("2")),
new Tuple2<>("2015/03/12", new BloomIndexFileInfo("1")),
new Tuple2<>("2015/03/12", new BloomIndexFileInfo("3", "000", "000")),
new Tuple2<>("2015/03/12", new BloomIndexFileInfo("4", "001", "003")));
assertEquals(expected, filesList);
}
}
@@ -203,20 +195,21 @@ public class TestHoodieBloomIndex extends HoodieClientTestHarness {
HoodieBloomIndex index = new HoodieBloomIndex(config);
final Map<String, List<BloomIndexFileInfo>> partitionToFileIndexInfo = new HashMap<>();
partitionToFileIndexInfo.put("2017/10/22", Arrays.asList(new BloomIndexFileInfo("f1"),
new BloomIndexFileInfo("f2", "000", "000"), new BloomIndexFileInfo("f3", "001", "003"),
new BloomIndexFileInfo("f4", "002", "007"), new BloomIndexFileInfo("f5", "009", "010")));
partitionToFileIndexInfo.put("2017/10/22",
Arrays.asList(new BloomIndexFileInfo("f1"), new BloomIndexFileInfo("f2", "000", "000"),
new BloomIndexFileInfo("f3", "001", "003"), new BloomIndexFileInfo("f4", "002", "007"),
new BloomIndexFileInfo("f5", "009", "010")));
JavaPairRDD<String, String> partitionRecordKeyPairRDD = jsc.parallelize(Arrays.asList(
new Tuple2<>("2017/10/22", "003"), new Tuple2<>("2017/10/22", "002"), new Tuple2<>("2017/10/22", "005"),
new Tuple2<>("2017/10/22", "004"))).mapToPair(t -> t);
JavaPairRDD<String, String> partitionRecordKeyPairRDD =
jsc.parallelize(Arrays.asList(new Tuple2<>("2017/10/22", "003"), new Tuple2<>("2017/10/22", "002"),
new Tuple2<>("2017/10/22", "005"), new Tuple2<>("2017/10/22", "004"))).mapToPair(t -> t);
List<Tuple2<String, HoodieKey>> comparisonKeyList = index.explodeRecordRDDWithFileComparisons(
partitionToFileIndexInfo, partitionRecordKeyPairRDD).collect();
List<Tuple2<String, HoodieKey>> comparisonKeyList =
index.explodeRecordRDDWithFileComparisons(partitionToFileIndexInfo, partitionRecordKeyPairRDD).collect();
assertEquals(10, comparisonKeyList.size());
Map<String, List<String>> recordKeyToFileComps = comparisonKeyList.stream().collect(Collectors.groupingBy(
t -> t._2.getRecordKey(), Collectors.mapping(t -> t._1, Collectors.toList())));
Map<String, List<String>> recordKeyToFileComps = comparisonKeyList.stream()
.collect(Collectors.groupingBy(t -> t._2.getRecordKey(), Collectors.mapping(t -> t._1, Collectors.toList())));
assertEquals(4, recordKeyToFileComps.size());
assertEquals(new HashSet<>(Arrays.asList("f1", "f3", "f4")), new HashSet<>(recordKeyToFileComps.get("002")));
@@ -238,25 +231,24 @@ public class TestHoodieBloomIndex extends HoodieClientTestHarness {
String recordStr4 = "{\"_row_key\":\"4eb5b87c-1fej-4edd-87b4-6ec96dc405a0\","
+ "\"time\":\"2016-01-31T03:16:41.415Z\",\"number\":32}";
TestRawTripPayload rowChange1 = new TestRawTripPayload(recordStr1);
HoodieRecord record1 = new HoodieRecord(new HoodieKey(rowChange1.getRowKey(), rowChange1.getPartitionPath()),
rowChange1);
HoodieRecord record1 =
new HoodieRecord(new HoodieKey(rowChange1.getRowKey(), rowChange1.getPartitionPath()), rowChange1);
TestRawTripPayload rowChange2 = new TestRawTripPayload(recordStr2);
HoodieRecord record2 = new HoodieRecord(new HoodieKey(rowChange2.getRowKey(), rowChange2.getPartitionPath()),
rowChange2);
HoodieRecord record2 =
new HoodieRecord(new HoodieKey(rowChange2.getRowKey(), rowChange2.getPartitionPath()), rowChange2);
TestRawTripPayload rowChange3 = new TestRawTripPayload(recordStr3);
HoodieRecord record3 = new HoodieRecord(new HoodieKey(rowChange3.getRowKey(), rowChange3.getPartitionPath()),
rowChange3);
HoodieRecord record3 =
new HoodieRecord(new HoodieKey(rowChange3.getRowKey(), rowChange3.getPartitionPath()), rowChange3);
TestRawTripPayload rowChange4 = new TestRawTripPayload(recordStr4);
HoodieRecord record4 = new HoodieRecord(new HoodieKey(rowChange4.getRowKey(), rowChange4.getPartitionPath()),
rowChange4);
HoodieRecord record4 =
new HoodieRecord(new HoodieKey(rowChange4.getRowKey(), rowChange4.getPartitionPath()), rowChange4);
// We write record1, record2 to a parquet file, but the bloom filter contains (record1,
// record2, record3).
BloomFilter filter = new BloomFilter(10000, 0.0000001);
filter.add(record3.getRecordKey());
String filename = HoodieClientTestUtils
.writeParquetFile(basePath, "2016/01/31",
Arrays.asList(record1, record2), schema, filter, true);
String filename = HoodieClientTestUtils.writeParquetFile(basePath, "2016/01/31", Arrays.asList(record1, record2),
schema, filter, true);
// The bloom filter contains 3 records
assertTrue(filter.mightContain(record1.getRecordKey()));
@@ -265,19 +257,19 @@ public class TestHoodieBloomIndex extends HoodieClientTestHarness {
assertFalse(filter.mightContain(record4.getRecordKey()));
// Compare with file
List<String> uuids = Arrays.asList(record1.getRecordKey(), record2.getRecordKey(), record3.getRecordKey(),
record4.getRecordKey());
List<String> uuids =
Arrays.asList(record1.getRecordKey(), record2.getRecordKey(), record3.getRecordKey(), record4.getRecordKey());
List<String> results = HoodieKeyLookupHandle.checkCandidatesAgainstFile(jsc.hadoopConfiguration(), uuids,
new Path(basePath + "/2016/01/31/" + filename));
assertEquals(results.size(), 2);
assertTrue(results.get(0).equals("1eb5b87a-1feh-4edd-87b4-6ec96dc405a0") || results.get(1).equals(
"1eb5b87a-1feh-4edd-87b4-6ec96dc405a0"));
assertTrue(results.get(0).equals("2eb5b87b-1feu-4edd-87b4-6ec96dc405a0") || results.get(1).equals(
"2eb5b87b-1feu-4edd-87b4-6ec96dc405a0"));
assertTrue(results.get(0).equals("1eb5b87a-1feh-4edd-87b4-6ec96dc405a0")
|| results.get(1).equals("1eb5b87a-1feh-4edd-87b4-6ec96dc405a0"));
assertTrue(results.get(0).equals("2eb5b87b-1feu-4edd-87b4-6ec96dc405a0")
|| results.get(1).equals("2eb5b87b-1feu-4edd-87b4-6ec96dc405a0"));
// TODO(vc): Need more coverage on actual filenames
//assertTrue(results.get(0)._2().equals(filename));
//assertTrue(results.get(1)._2().equals(filename));
// assertTrue(results.get(0)._2().equals(filename));
// assertTrue(results.get(1)._2().equals(filename));
}
@Test
@@ -306,27 +298,23 @@ public class TestHoodieBloomIndex extends HoodieClientTestHarness {
String rowKey1 = UUID.randomUUID().toString();
String rowKey2 = UUID.randomUUID().toString();
String rowKey3 = UUID.randomUUID().toString();
String recordStr1 = "{\"_row_key\":\"" + rowKey1 + "\","
+ "\"time\":\"2016-01-31T03:16:41.415Z\",\"number\":12}";
String recordStr2 = "{\"_row_key\":\"" + rowKey2 + "\","
+ "\"time\":\"2016-01-31T03:20:41.415Z\",\"number\":100}";
String recordStr3 = "{\"_row_key\":\"" + rowKey3 + "\","
+ "\"time\":\"2016-01-31T03:16:41.415Z\",\"number\":15}";
String recordStr1 = "{\"_row_key\":\"" + rowKey1 + "\"," + "\"time\":\"2016-01-31T03:16:41.415Z\",\"number\":12}";
String recordStr2 = "{\"_row_key\":\"" + rowKey2 + "\"," + "\"time\":\"2016-01-31T03:20:41.415Z\",\"number\":100}";
String recordStr3 = "{\"_row_key\":\"" + rowKey3 + "\"," + "\"time\":\"2016-01-31T03:16:41.415Z\",\"number\":15}";
// place same row key under a different partition.
String recordStr4 = "{\"_row_key\":\"" + rowKey1 + "\","
+ "\"time\":\"2015-01-31T03:16:41.415Z\",\"number\":32}";
String recordStr4 = "{\"_row_key\":\"" + rowKey1 + "\"," + "\"time\":\"2015-01-31T03:16:41.415Z\",\"number\":32}";
TestRawTripPayload rowChange1 = new TestRawTripPayload(recordStr1);
HoodieRecord record1 = new HoodieRecord(new HoodieKey(rowChange1.getRowKey(), rowChange1.getPartitionPath()),
rowChange1);
HoodieRecord record1 =
new HoodieRecord(new HoodieKey(rowChange1.getRowKey(), rowChange1.getPartitionPath()), rowChange1);
TestRawTripPayload rowChange2 = new TestRawTripPayload(recordStr2);
HoodieRecord record2 = new HoodieRecord(new HoodieKey(rowChange2.getRowKey(), rowChange2.getPartitionPath()),
rowChange2);
HoodieRecord record2 =
new HoodieRecord(new HoodieKey(rowChange2.getRowKey(), rowChange2.getPartitionPath()), rowChange2);
TestRawTripPayload rowChange3 = new TestRawTripPayload(recordStr3);
HoodieRecord record3 = new HoodieRecord(new HoodieKey(rowChange3.getRowKey(), rowChange3.getPartitionPath()),
rowChange3);
HoodieRecord record3 =
new HoodieRecord(new HoodieKey(rowChange3.getRowKey(), rowChange3.getPartitionPath()), rowChange3);
TestRawTripPayload rowChange4 = new TestRawTripPayload(recordStr4);
HoodieRecord record4 = new HoodieRecord(new HoodieKey(rowChange4.getRowKey(), rowChange4.getPartitionPath()),
rowChange4);
HoodieRecord record4 =
new HoodieRecord(new HoodieKey(rowChange4.getRowKey(), rowChange4.getPartitionPath()), rowChange4);
JavaRDD<HoodieRecord> recordRDD = jsc.parallelize(Arrays.asList(record1, record2, record3, record4));
// Also create the metadata and config
@@ -406,8 +394,8 @@ public class TestHoodieBloomIndex extends HoodieClientTestHarness {
// Let's tag
HoodieBloomIndex bloomIndex = new HoodieBloomIndex(config);
JavaPairRDD<HoodieKey, Option<Pair<String, String>>> taggedRecordRDD = bloomIndex
.fetchRecordLocation(keysRDD, jsc, table);
JavaPairRDD<HoodieKey, Option<Pair<String, String>>> taggedRecordRDD =
bloomIndex.fetchRecordLocation(keysRDD, jsc, table);
// Should not find any files
for (Tuple2<HoodieKey, Option<Pair<String, String>>> record : taggedRecordRDD.collect()) {
@@ -456,17 +444,16 @@ public class TestHoodieBloomIndex extends HoodieClientTestHarness {
// We write record1 to a parquet file, using a bloom filter having both records
TestRawTripPayload rowChange1 = new TestRawTripPayload(recordStr1);
HoodieRecord record1 = new HoodieRecord(new HoodieKey(rowChange1.getRowKey(), rowChange1.getPartitionPath()),
rowChange1);
HoodieRecord record1 =
new HoodieRecord(new HoodieKey(rowChange1.getRowKey(), rowChange1.getPartitionPath()), rowChange1);
TestRawTripPayload rowChange2 = new TestRawTripPayload(recordStr2);
HoodieRecord record2 = new HoodieRecord(new HoodieKey(rowChange2.getRowKey(), rowChange2.getPartitionPath()),
rowChange2);
HoodieRecord record2 =
new HoodieRecord(new HoodieKey(rowChange2.getRowKey(), rowChange2.getPartitionPath()), rowChange2);
BloomFilter filter = new BloomFilter(10000, 0.0000001);
filter.add(record2.getRecordKey());
String filename = HoodieClientTestUtils
.writeParquetFile(basePath, "2016/01/31",
Arrays.asList(record1), schema, filter, true);
String filename =
HoodieClientTestUtils.writeParquetFile(basePath, "2016/01/31", Arrays.asList(record1), schema, filter, true);
assertTrue(filter.mightContain(record1.getRecordKey()));
assertTrue(filter.mightContain(record2.getRecordKey()));

View File

@@ -58,8 +58,7 @@ public class TestHoodieGlobalBloomIndex extends HoodieClientTestHarness {
private String schemaStr;
private Schema schema;
public TestHoodieGlobalBloomIndex() throws Exception {
}
public TestHoodieGlobalBloomIndex() throws Exception {}
@Before
public void setUp() throws Exception {
@@ -94,35 +93,31 @@ public class TestHoodieGlobalBloomIndex extends HoodieClientTestHarness {
new File(basePath + "/2015/03/12").mkdirs();
new File(basePath + "/2015/03/12/" + HoodiePartitionMetadata.HOODIE_PARTITION_METAFILE).createNewFile();
TestRawTripPayload rowChange1 = new TestRawTripPayload(
"{\"_row_key\":\"000\",\"time\":\"2016-01-31T03:16:41.415Z\",\"number\":12}");
HoodieRecord record1 = new HoodieRecord(new HoodieKey(rowChange1.getRowKey(), rowChange1.getPartitionPath()),
rowChange1);
TestRawTripPayload rowChange2 = new TestRawTripPayload(
"{\"_row_key\":\"001\",\"time\":\"2016-01-31T03:16:41.415Z\",\"number\":12}");
HoodieRecord record2 = new HoodieRecord(new HoodieKey(rowChange2.getRowKey(), rowChange2.getPartitionPath()),
rowChange2);
TestRawTripPayload rowChange3 = new TestRawTripPayload(
"{\"_row_key\":\"002\",\"time\":\"2016-01-31T03:16:41.415Z\",\"number\":12}");
HoodieRecord record3 = new HoodieRecord(new HoodieKey(rowChange3.getRowKey(), rowChange3.getPartitionPath()),
rowChange3);
TestRawTripPayload rowChange4 = new TestRawTripPayload(
"{\"_row_key\":\"003\",\"time\":\"2016-01-31T03:16:41.415Z\",\"number\":12}");
HoodieRecord record4 = new HoodieRecord(new HoodieKey(rowChange4.getRowKey(), rowChange4.getPartitionPath()),
rowChange4);
TestRawTripPayload rowChange1 =
new TestRawTripPayload("{\"_row_key\":\"000\",\"time\":\"2016-01-31T03:16:41.415Z\",\"number\":12}");
HoodieRecord record1 =
new HoodieRecord(new HoodieKey(rowChange1.getRowKey(), rowChange1.getPartitionPath()), rowChange1);
TestRawTripPayload rowChange2 =
new TestRawTripPayload("{\"_row_key\":\"001\",\"time\":\"2016-01-31T03:16:41.415Z\",\"number\":12}");
HoodieRecord record2 =
new HoodieRecord(new HoodieKey(rowChange2.getRowKey(), rowChange2.getPartitionPath()), rowChange2);
TestRawTripPayload rowChange3 =
new TestRawTripPayload("{\"_row_key\":\"002\",\"time\":\"2016-01-31T03:16:41.415Z\",\"number\":12}");
HoodieRecord record3 =
new HoodieRecord(new HoodieKey(rowChange3.getRowKey(), rowChange3.getPartitionPath()), rowChange3);
TestRawTripPayload rowChange4 =
new TestRawTripPayload("{\"_row_key\":\"003\",\"time\":\"2016-01-31T03:16:41.415Z\",\"number\":12}");
HoodieRecord record4 =
new HoodieRecord(new HoodieKey(rowChange4.getRowKey(), rowChange4.getPartitionPath()), rowChange4);
HoodieClientTestUtils
.writeParquetFile(basePath, "2016/04/01", "2_0_20160401010101.parquet",
Lists.newArrayList(), schema, null, false);
HoodieClientTestUtils
.writeParquetFile(basePath, "2015/03/12", "1_0_20150312101010.parquet",
Lists.newArrayList(), schema, null, false);
HoodieClientTestUtils
.writeParquetFile(basePath, "2015/03/12", "3_0_20150312101010.parquet",
Arrays.asList(record1), schema, null, false);
HoodieClientTestUtils
.writeParquetFile(basePath, "2015/03/12", "4_0_20150312101010.parquet",
Arrays.asList(record2, record3, record4), schema, null, false);
HoodieClientTestUtils.writeParquetFile(basePath, "2016/04/01", "2_0_20160401010101.parquet", Lists.newArrayList(),
schema, null, false);
HoodieClientTestUtils.writeParquetFile(basePath, "2015/03/12", "1_0_20150312101010.parquet", Lists.newArrayList(),
schema, null, false);
HoodieClientTestUtils.writeParquetFile(basePath, "2015/03/12", "3_0_20150312101010.parquet", Arrays.asList(record1),
schema, null, false);
HoodieClientTestUtils.writeParquetFile(basePath, "2015/03/12", "4_0_20150312101010.parquet",
Arrays.asList(record2, record3, record4), schema, null, false);
// intentionally missed the partition "2015/03/12" to see if the GlobalBloomIndex can pick it up
List<String> partitions = Arrays.asList("2016/01/21", "2016/04/01");
@@ -154,10 +149,8 @@ public class TestHoodieGlobalBloomIndex extends HoodieClientTestHarness {
Map<String, BloomIndexFileInfo> expected = new HashMap<>();
expected.put("2016/04/01/2", new BloomIndexFileInfo("2"));
expected.put("2015/03/12/1", new BloomIndexFileInfo("1"));
expected.put("2015/03/12/3",
new BloomIndexFileInfo("3", "000", "000"));
expected.put("2015/03/12/4",
new BloomIndexFileInfo("4", "001", "003"));
expected.put("2015/03/12/3", new BloomIndexFileInfo("3", "000", "000"));
expected.put("2015/03/12/4", new BloomIndexFileInfo("4", "001", "003"));
assertEquals(expected, filesMap);
}
@@ -172,28 +165,24 @@ public class TestHoodieGlobalBloomIndex extends HoodieClientTestHarness {
partitionToFileIndexInfo.put("2017/10/22", Arrays.asList(new BloomIndexFileInfo("f1"),
new BloomIndexFileInfo("f2", "000", "000"), new BloomIndexFileInfo("f3", "001", "003")));
partitionToFileIndexInfo.put("2017/10/23", Arrays.asList(
new BloomIndexFileInfo("f4", "002", "007"), new BloomIndexFileInfo("f5", "009", "010")));
partitionToFileIndexInfo.put("2017/10/23",
Arrays.asList(new BloomIndexFileInfo("f4", "002", "007"), new BloomIndexFileInfo("f5", "009", "010")));
// the partition partition of the key of the incoming records will be ignored
JavaPairRDD<String, String> partitionRecordKeyPairRDD = jsc.parallelize(Arrays.asList(
new Tuple2<>("2017/10/21", "003"), new Tuple2<>("2017/10/22", "002"), new Tuple2<>("2017/10/22", "005"),
new Tuple2<>("2017/10/23", "004"))).mapToPair(t -> t);
JavaPairRDD<String, String> partitionRecordKeyPairRDD =
jsc.parallelize(Arrays.asList(new Tuple2<>("2017/10/21", "003"), new Tuple2<>("2017/10/22", "002"),
new Tuple2<>("2017/10/22", "005"), new Tuple2<>("2017/10/23", "004"))).mapToPair(t -> t);
List<Tuple2<String, HoodieKey>> comparisonKeyList =
index.explodeRecordRDDWithFileComparisons(partitionToFileIndexInfo, partitionRecordKeyPairRDD).collect();
/* expecting:
f4, HoodieKey { recordKey=003 partitionPath=2017/10/23}
f1, HoodieKey { recordKey=003 partitionPath=2017/10/22}
f3, HoodieKey { recordKey=003 partitionPath=2017/10/22}
f4, HoodieKey { recordKey=002 partitionPath=2017/10/23}
f1, HoodieKey { recordKey=002 partitionPath=2017/10/22}
f3, HoodieKey { recordKey=002 partitionPath=2017/10/22}
f4, HoodieKey { recordKey=005 partitionPath=2017/10/23}
f1, HoodieKey { recordKey=005 partitionPath=2017/10/22}
f4, HoodieKey { recordKey=004 partitionPath=2017/10/23}
f1, HoodieKey { recordKey=004 partitionPath=2017/10/22}
/*
* expecting: f4, HoodieKey { recordKey=003 partitionPath=2017/10/23} f1, HoodieKey { recordKey=003
* partitionPath=2017/10/22} f3, HoodieKey { recordKey=003 partitionPath=2017/10/22} f4, HoodieKey { recordKey=002
* partitionPath=2017/10/23} f1, HoodieKey { recordKey=002 partitionPath=2017/10/22} f3, HoodieKey { recordKey=002
* partitionPath=2017/10/22} f4, HoodieKey { recordKey=005 partitionPath=2017/10/23} f1, HoodieKey { recordKey=005
* partitionPath=2017/10/22} f4, HoodieKey { recordKey=004 partitionPath=2017/10/23} f1, HoodieKey { recordKey=004
* partitionPath=2017/10/22}
*/
assertEquals(10, comparisonKeyList.size());
@@ -225,31 +214,31 @@ public class TestHoodieGlobalBloomIndex extends HoodieClientTestHarness {
new File(basePath + "/2015/03/12").mkdirs();
new File(basePath + "/2015/03/12/" + HoodiePartitionMetadata.HOODIE_PARTITION_METAFILE).createNewFile();
TestRawTripPayload rowChange1 = new TestRawTripPayload(
"{\"_row_key\":\"000\",\"time\":\"2016-01-31T03:16:41.415Z\",\"number\":12}");
HoodieRecord record1 = new HoodieRecord(new HoodieKey(rowChange1.getRowKey(), rowChange1.getPartitionPath()),
rowChange1);
TestRawTripPayload rowChange2 = new TestRawTripPayload(
"{\"_row_key\":\"001\",\"time\":\"2016-01-31T03:16:41.415Z\",\"number\":12}");
HoodieRecord record2 = new HoodieRecord(new HoodieKey(rowChange2.getRowKey(), rowChange2.getPartitionPath()),
rowChange2);
TestRawTripPayload rowChange3 = new TestRawTripPayload(
"{\"_row_key\":\"002\",\"time\":\"2016-01-31T03:16:41.415Z\",\"number\":12}");
HoodieRecord record3 = new HoodieRecord(new HoodieKey(rowChange3.getRowKey(), rowChange3.getPartitionPath()),
rowChange3);
TestRawTripPayload rowChange1 =
new TestRawTripPayload("{\"_row_key\":\"000\",\"time\":\"2016-01-31T03:16:41.415Z\",\"number\":12}");
HoodieRecord record1 =
new HoodieRecord(new HoodieKey(rowChange1.getRowKey(), rowChange1.getPartitionPath()), rowChange1);
TestRawTripPayload rowChange2 =
new TestRawTripPayload("{\"_row_key\":\"001\",\"time\":\"2016-01-31T03:16:41.415Z\",\"number\":12}");
HoodieRecord record2 =
new HoodieRecord(new HoodieKey(rowChange2.getRowKey(), rowChange2.getPartitionPath()), rowChange2);
TestRawTripPayload rowChange3 =
new TestRawTripPayload("{\"_row_key\":\"002\",\"time\":\"2016-01-31T03:16:41.415Z\",\"number\":12}");
HoodieRecord record3 =
new HoodieRecord(new HoodieKey(rowChange3.getRowKey(), rowChange3.getPartitionPath()), rowChange3);
// this record will be saved in table and will be tagged to the incoming record5
TestRawTripPayload rowChange4 = new TestRawTripPayload(
"{\"_row_key\":\"003\",\"time\":\"2016-01-31T03:16:41.415Z\",\"number\":12}");
HoodieRecord record4 = new HoodieRecord(new HoodieKey(rowChange4.getRowKey(), rowChange4.getPartitionPath()),
rowChange4);
TestRawTripPayload rowChange4 =
new TestRawTripPayload("{\"_row_key\":\"003\",\"time\":\"2016-01-31T03:16:41.415Z\",\"number\":12}");
HoodieRecord record4 =
new HoodieRecord(new HoodieKey(rowChange4.getRowKey(), rowChange4.getPartitionPath()), rowChange4);
// this has the same record key as record4 but different time so different partition, but globalbloomIndex should
// tag the original partition of the saved record4
TestRawTripPayload rowChange5 = new TestRawTripPayload(
"{\"_row_key\":\"003\",\"time\":\"2016-02-31T03:16:41.415Z\",\"number\":12}");
HoodieRecord record5 = new HoodieRecord(new HoodieKey(rowChange5.getRowKey(), rowChange5.getPartitionPath()),
rowChange4);
TestRawTripPayload rowChange5 =
new TestRawTripPayload("{\"_row_key\":\"003\",\"time\":\"2016-02-31T03:16:41.415Z\",\"number\":12}");
HoodieRecord record5 =
new HoodieRecord(new HoodieKey(rowChange5.getRowKey(), rowChange5.getPartitionPath()), rowChange4);
JavaRDD<HoodieRecord> recordRDD = jsc.parallelize(Arrays.asList(record1, record2, record3, record5));

View File

@@ -75,9 +75,9 @@ public class TestHoodieCommitArchiveLog extends HoodieClientTestHarness {
@Test
public void testArchiveEmptyDataset() throws IOException {
HoodieWriteConfig cfg = HoodieWriteConfig.newBuilder().withPath(basePath)
.withSchema(HoodieTestDataGenerator.TRIP_EXAMPLE_SCHEMA).withParallelism(2, 2)
.forTable("test-trip-table").build();
HoodieWriteConfig cfg =
HoodieWriteConfig.newBuilder().withPath(basePath).withSchema(HoodieTestDataGenerator.TRIP_EXAMPLE_SCHEMA)
.withParallelism(2, 2).forTable("test-trip-table").build();
metaClient = HoodieTableMetaClient.reload(metaClient);
HoodieCommitArchiveLog archiveLog = new HoodieCommitArchiveLog(cfg, metaClient);
boolean result = archiveLog.archiveIfRequired(jsc);
@@ -88,8 +88,7 @@ public class TestHoodieCommitArchiveLog extends HoodieClientTestHarness {
public void testArchiveDatasetWithArchival() throws IOException {
HoodieWriteConfig cfg = HoodieWriteConfig.newBuilder().withPath(basePath)
.withSchema(HoodieTestDataGenerator.TRIP_EXAMPLE_SCHEMA).withParallelism(2, 2)
.withCompactionConfig(
HoodieCompactionConfig.newBuilder().retainCommits(1).archiveCommitsWith(2, 4).build())
.withCompactionConfig(HoodieCompactionConfig.newBuilder().retainCommits(1).archiveCommitsWith(2, 4).build())
.forTable("test-trip-table").build();
HoodieTestUtils.init(hadoopConf, basePath);
// Requested Compaction
@@ -149,7 +148,7 @@ public class TestHoodieCommitArchiveLog extends HoodieClientTestHarness {
HoodieTestUtils.createCleanFiles(basePath, "105", dfs.getConf());
HoodieTestUtils.createInflightCleanFiles(basePath, dfs.getConf(), "106", "107");
//reload the timeline and get all the commmits before archive
// reload the timeline and get all the commmits before archive
timeline = metaClient.getActiveTimeline().reload().getAllCommitsTimeline().filterCompletedInstants();
List<HoodieInstant> originalCommits = timeline.getInstants().collect(Collectors.toList());
@@ -163,49 +162,47 @@ public class TestHoodieCommitArchiveLog extends HoodieClientTestHarness {
assertTrue(archiveLog.archiveIfRequired(jsc));
//reload the timeline and remove the remaining commits
// reload the timeline and remove the remaining commits
timeline = metaClient.getActiveTimeline().reload().getAllCommitsTimeline().filterCompletedInstants();
originalCommits.removeAll(timeline.getInstants().collect(Collectors.toList()));
// Check compaction instants
List<HoodieInstant> instants =
HoodieTableMetaClient.scanHoodieInstantsFromFileSystem(metaClient.getFs(),
new Path(metaClient.getMetaAuxiliaryPath()),
HoodieActiveTimeline.VALID_EXTENSIONS_IN_ACTIVE_TIMELINE);
List<HoodieInstant> instants = HoodieTableMetaClient.scanHoodieInstantsFromFileSystem(metaClient.getFs(),
new Path(metaClient.getMetaAuxiliaryPath()), HoodieActiveTimeline.VALID_EXTENSIONS_IN_ACTIVE_TIMELINE);
assertEquals("Should delete all compaction instants < 104", 4, instants.size());
assertFalse("Requested Compaction must be absent for 100", instants.contains(
new HoodieInstant(State.REQUESTED, HoodieTimeline.COMPACTION_ACTION, "100")));
assertFalse("Inflight Compaction must be absent for 100", instants.contains(
new HoodieInstant(State.INFLIGHT, HoodieTimeline.COMPACTION_ACTION, "100")));
assertFalse("Requested Compaction must be absent for 101", instants.contains(
new HoodieInstant(State.REQUESTED, HoodieTimeline.COMPACTION_ACTION, "101")));
assertFalse("Inflight Compaction must be absent for 101", instants.contains(
new HoodieInstant(State.INFLIGHT, HoodieTimeline.COMPACTION_ACTION, "101")));
assertFalse("Requested Compaction must be absent for 102", instants.contains(
new HoodieInstant(State.REQUESTED, HoodieTimeline.COMPACTION_ACTION, "102")));
assertFalse("Inflight Compaction must be absent for 102", instants.contains(
new HoodieInstant(State.INFLIGHT, HoodieTimeline.COMPACTION_ACTION, "102")));
assertFalse("Requested Compaction must be absent for 103", instants.contains(
new HoodieInstant(State.REQUESTED, HoodieTimeline.COMPACTION_ACTION, "103")));
assertFalse("Inflight Compaction must be absent for 103", instants.contains(
new HoodieInstant(State.INFLIGHT, HoodieTimeline.COMPACTION_ACTION, "103")));
assertTrue("Requested Compaction must be present for 104", instants.contains(
new HoodieInstant(State.REQUESTED, HoodieTimeline.COMPACTION_ACTION, "104")));
assertTrue("Inflight Compaction must be present for 104", instants.contains(
new HoodieInstant(State.INFLIGHT, HoodieTimeline.COMPACTION_ACTION, "104")));
assertTrue("Requested Compaction must be present for 105", instants.contains(
new HoodieInstant(State.REQUESTED, HoodieTimeline.COMPACTION_ACTION, "105")));
assertTrue("Inflight Compaction must be present for 105", instants.contains(
new HoodieInstant(State.INFLIGHT, HoodieTimeline.COMPACTION_ACTION, "105")));
assertFalse("Requested Compaction must be absent for 100",
instants.contains(new HoodieInstant(State.REQUESTED, HoodieTimeline.COMPACTION_ACTION, "100")));
assertFalse("Inflight Compaction must be absent for 100",
instants.contains(new HoodieInstant(State.INFLIGHT, HoodieTimeline.COMPACTION_ACTION, "100")));
assertFalse("Requested Compaction must be absent for 101",
instants.contains(new HoodieInstant(State.REQUESTED, HoodieTimeline.COMPACTION_ACTION, "101")));
assertFalse("Inflight Compaction must be absent for 101",
instants.contains(new HoodieInstant(State.INFLIGHT, HoodieTimeline.COMPACTION_ACTION, "101")));
assertFalse("Requested Compaction must be absent for 102",
instants.contains(new HoodieInstant(State.REQUESTED, HoodieTimeline.COMPACTION_ACTION, "102")));
assertFalse("Inflight Compaction must be absent for 102",
instants.contains(new HoodieInstant(State.INFLIGHT, HoodieTimeline.COMPACTION_ACTION, "102")));
assertFalse("Requested Compaction must be absent for 103",
instants.contains(new HoodieInstant(State.REQUESTED, HoodieTimeline.COMPACTION_ACTION, "103")));
assertFalse("Inflight Compaction must be absent for 103",
instants.contains(new HoodieInstant(State.INFLIGHT, HoodieTimeline.COMPACTION_ACTION, "103")));
assertTrue("Requested Compaction must be present for 104",
instants.contains(new HoodieInstant(State.REQUESTED, HoodieTimeline.COMPACTION_ACTION, "104")));
assertTrue("Inflight Compaction must be present for 104",
instants.contains(new HoodieInstant(State.INFLIGHT, HoodieTimeline.COMPACTION_ACTION, "104")));
assertTrue("Requested Compaction must be present for 105",
instants.contains(new HoodieInstant(State.REQUESTED, HoodieTimeline.COMPACTION_ACTION, "105")));
assertTrue("Inflight Compaction must be present for 105",
instants.contains(new HoodieInstant(State.INFLIGHT, HoodieTimeline.COMPACTION_ACTION, "105")));
//read the file
Reader reader = HoodieLogFormat.newReader(dfs,
new HoodieLogFile(new Path(basePath + "/.hoodie/.commits_.archive.1_1-0-1")),
HoodieArchivedMetaEntry.getClassSchema());
// read the file
Reader reader =
HoodieLogFormat.newReader(dfs, new HoodieLogFile(new Path(basePath + "/.hoodie/.commits_.archive.1_1-0-1")),
HoodieArchivedMetaEntry.getClassSchema());
int archivedRecordsCount = 0;
List<IndexedRecord> readRecords = new ArrayList<>();
//read the avro blocks and validate the number of records written in each avro block
// read the avro blocks and validate the number of records written in each avro block
while (reader.hasNext()) {
HoodieAvroDataBlock blk = (HoodieAvroDataBlock) reader.next();
List<IndexedRecord> records = blk.getRecords();
@@ -215,7 +212,7 @@ public class TestHoodieCommitArchiveLog extends HoodieClientTestHarness {
}
assertEquals("Total archived records and total read records are the same count", 8, archivedRecordsCount);
//make sure the archived commits are the same as the (originalcommits - commitsleft)
// make sure the archived commits are the same as the (originalcommits - commitsleft)
List<String> readCommits = readRecords.stream().map(r -> (GenericRecord) r).map(r -> {
return r.get("commitTime").toString();
}).collect(Collectors.toList());
@@ -232,9 +229,9 @@ public class TestHoodieCommitArchiveLog extends HoodieClientTestHarness {
@Test
public void testArchiveDatasetWithNoArchival() throws IOException {
HoodieWriteConfig cfg = HoodieWriteConfig.newBuilder().withPath(basePath)
.withSchema(HoodieTestDataGenerator.TRIP_EXAMPLE_SCHEMA).withParallelism(2, 2)
.forTable("test-trip-table").withCompactionConfig(
HoodieCompactionConfig.newBuilder().retainCommits(1).archiveCommitsWith(2, 5).build()).build();
.withSchema(HoodieTestDataGenerator.TRIP_EXAMPLE_SCHEMA).withParallelism(2, 2).forTable("test-trip-table")
.withCompactionConfig(HoodieCompactionConfig.newBuilder().retainCommits(1).archiveCommitsWith(2, 5).build())
.build();
metaClient = HoodieTableMetaClient.reload(metaClient);
HoodieCommitArchiveLog archiveLog = new HoodieCommitArchiveLog(cfg, metaClient);
// Requested Compaction
@@ -273,35 +270,33 @@ public class TestHoodieCommitArchiveLog extends HoodieClientTestHarness {
timeline = metaClient.getActiveTimeline().reload().getCommitsTimeline().filterCompletedInstants();
assertEquals("Should not archive commits when maxCommitsToKeep is 5", 4, timeline.countInstants());
List<HoodieInstant> instants =
HoodieTableMetaClient.scanHoodieInstantsFromFileSystem(metaClient.getFs(),
new Path(metaClient.getMetaAuxiliaryPath()),
HoodieActiveTimeline.VALID_EXTENSIONS_IN_ACTIVE_TIMELINE);
List<HoodieInstant> instants = HoodieTableMetaClient.scanHoodieInstantsFromFileSystem(metaClient.getFs(),
new Path(metaClient.getMetaAuxiliaryPath()), HoodieActiveTimeline.VALID_EXTENSIONS_IN_ACTIVE_TIMELINE);
assertEquals("Should not delete any aux compaction files when maxCommitsToKeep is 5", 8, instants.size());
assertTrue("Requested Compaction must be present for 100", instants.contains(
new HoodieInstant(State.REQUESTED, HoodieTimeline.COMPACTION_ACTION, "100")));
assertTrue("Inflight Compaction must be present for 100", instants.contains(
new HoodieInstant(State.INFLIGHT, HoodieTimeline.COMPACTION_ACTION, "100")));
assertTrue("Requested Compaction must be present for 101", instants.contains(
new HoodieInstant(State.REQUESTED, HoodieTimeline.COMPACTION_ACTION, "101")));
assertTrue("Inflight Compaction must be present for 101", instants.contains(
new HoodieInstant(State.INFLIGHT, HoodieTimeline.COMPACTION_ACTION, "101")));
assertTrue("Requested Compaction must be present for 102", instants.contains(
new HoodieInstant(State.REQUESTED, HoodieTimeline.COMPACTION_ACTION, "102")));
assertTrue("Inflight Compaction must be present for 102", instants.contains(
new HoodieInstant(State.INFLIGHT, HoodieTimeline.COMPACTION_ACTION, "102")));
assertTrue("Requested Compaction must be present for 103", instants.contains(
new HoodieInstant(State.REQUESTED, HoodieTimeline.COMPACTION_ACTION, "103")));
assertTrue("Inflight Compaction must be present for 103", instants.contains(
new HoodieInstant(State.INFLIGHT, HoodieTimeline.COMPACTION_ACTION, "103")));
assertTrue("Requested Compaction must be present for 100",
instants.contains(new HoodieInstant(State.REQUESTED, HoodieTimeline.COMPACTION_ACTION, "100")));
assertTrue("Inflight Compaction must be present for 100",
instants.contains(new HoodieInstant(State.INFLIGHT, HoodieTimeline.COMPACTION_ACTION, "100")));
assertTrue("Requested Compaction must be present for 101",
instants.contains(new HoodieInstant(State.REQUESTED, HoodieTimeline.COMPACTION_ACTION, "101")));
assertTrue("Inflight Compaction must be present for 101",
instants.contains(new HoodieInstant(State.INFLIGHT, HoodieTimeline.COMPACTION_ACTION, "101")));
assertTrue("Requested Compaction must be present for 102",
instants.contains(new HoodieInstant(State.REQUESTED, HoodieTimeline.COMPACTION_ACTION, "102")));
assertTrue("Inflight Compaction must be present for 102",
instants.contains(new HoodieInstant(State.INFLIGHT, HoodieTimeline.COMPACTION_ACTION, "102")));
assertTrue("Requested Compaction must be present for 103",
instants.contains(new HoodieInstant(State.REQUESTED, HoodieTimeline.COMPACTION_ACTION, "103")));
assertTrue("Inflight Compaction must be present for 103",
instants.contains(new HoodieInstant(State.INFLIGHT, HoodieTimeline.COMPACTION_ACTION, "103")));
}
@Test
public void testArchiveCommitSafety() throws IOException {
HoodieWriteConfig cfg = HoodieWriteConfig.newBuilder().withPath(basePath)
.withSchema(HoodieTestDataGenerator.TRIP_EXAMPLE_SCHEMA).withParallelism(2, 2)
.forTable("test-trip-table").withCompactionConfig(
HoodieCompactionConfig.newBuilder().retainCommits(1).archiveCommitsWith(2, 5).build()).build();
.withSchema(HoodieTestDataGenerator.TRIP_EXAMPLE_SCHEMA).withParallelism(2, 2).forTable("test-trip-table")
.withCompactionConfig(HoodieCompactionConfig.newBuilder().retainCommits(1).archiveCommitsWith(2, 5).build())
.build();
metaClient = HoodieTableMetaClient.reload(metaClient);
HoodieCommitArchiveLog archiveLog = new HoodieCommitArchiveLog(cfg, metaClient);
HoodieTestDataGenerator.createCommitFile(basePath, "100", dfs.getConf());
@@ -325,9 +320,9 @@ public class TestHoodieCommitArchiveLog extends HoodieClientTestHarness {
@Test
public void testArchiveCommitSavepointNoHole() throws IOException {
HoodieWriteConfig cfg = HoodieWriteConfig.newBuilder().withPath(basePath)
.withSchema(HoodieTestDataGenerator.TRIP_EXAMPLE_SCHEMA).withParallelism(2, 2)
.forTable("test-trip-table").withCompactionConfig(
HoodieCompactionConfig.newBuilder().retainCommits(1).archiveCommitsWith(2, 5).build()).build();
.withSchema(HoodieTestDataGenerator.TRIP_EXAMPLE_SCHEMA).withParallelism(2, 2).forTable("test-trip-table")
.withCompactionConfig(HoodieCompactionConfig.newBuilder().retainCommits(1).archiveCommitsWith(2, 5).build())
.build();
metaClient = HoodieTableMetaClient.reload(metaClient);
HoodieCommitArchiveLog archiveLog = new HoodieCommitArchiveLog(cfg, metaClient);
HoodieTestDataGenerator.createCommitFile(basePath, "100", dfs.getConf());
@@ -357,9 +352,9 @@ public class TestHoodieCommitArchiveLog extends HoodieClientTestHarness {
@Test
public void testArchiveCommitCompactionNoHole() throws IOException {
HoodieWriteConfig cfg = HoodieWriteConfig.newBuilder().withPath(basePath)
.withSchema(HoodieTestDataGenerator.TRIP_EXAMPLE_SCHEMA).withParallelism(2, 2)
.forTable("test-trip-table").withCompactionConfig(
HoodieCompactionConfig.newBuilder().retainCommits(1).archiveCommitsWith(2, 5).build()).build();
.withSchema(HoodieTestDataGenerator.TRIP_EXAMPLE_SCHEMA).withParallelism(2, 2).forTable("test-trip-table")
.withCompactionConfig(HoodieCompactionConfig.newBuilder().retainCommits(1).archiveCommitsWith(2, 5).build())
.build();
metaClient = HoodieTableMetaClient.reload(metaClient);
HoodieCommitArchiveLog archiveLog = new HoodieCommitArchiveLog(cfg, metaClient);
HoodieTestDataGenerator.createCommitFile(basePath, "100", dfs.getConf());
@@ -382,9 +377,8 @@ public class TestHoodieCommitArchiveLog extends HoodieClientTestHarness {
timeline = metaClient.getActiveTimeline().reload().getCommitsAndCompactionTimeline();
assertFalse("Instants before oldest pending compaction can be removed",
timeline.containsInstant(new HoodieInstant(false, HoodieTimeline.COMMIT_ACTION, "100")));
assertEquals(
"Since we have a pending compaction at 101, we should never archive any commit "
+ "after 101 (we only " + "archive 100)", 7, timeline.countInstants());
assertEquals("Since we have a pending compaction at 101, we should never archive any commit "
+ "after 101 (we only " + "archive 100)", 7, timeline.countInstants());
assertTrue("Requested Compaction must still be present",
timeline.containsInstant(new HoodieInstant(State.REQUESTED, HoodieTimeline.COMPACTION_ACTION, "101")));
assertTrue("Instants greater than oldest pending compaction must be present",

View File

@@ -86,9 +86,10 @@ public class TestHoodieCompactor extends HoodieClientTestHarness {
private HoodieWriteConfig.Builder getConfigBuilder() {
return HoodieWriteConfig.newBuilder().withPath(basePath).withSchema(HoodieTestDataGenerator.TRIP_EXAMPLE_SCHEMA)
.withParallelism(2, 2).withCompactionConfig(
HoodieCompactionConfig.newBuilder().compactionSmallFileSize(1024 * 1024).withInlineCompaction(false)
.build()).withStorageConfig(HoodieStorageConfig.newBuilder().limitFileSize(1024 * 1024).build())
.withParallelism(2, 2)
.withCompactionConfig(HoodieCompactionConfig.newBuilder().compactionSmallFileSize(1024 * 1024)
.withInlineCompaction(false).build())
.withStorageConfig(HoodieStorageConfig.newBuilder().limitFileSize(1024 * 1024).build())
.withMemoryConfig(HoodieMemoryConfig.newBuilder().withMaxDFSStreamBufferSize(1 * 1024 * 1024).build())
.forTable("test-trip-table")
.withIndexConfig(HoodieIndexConfig.newBuilder().withIndexType(HoodieIndex.IndexType.BLOOM).build());
@@ -146,16 +147,15 @@ public class TestHoodieCompactor extends HoodieClientTestHarness {
updatedRecords = index.tagLocation(updatedRecordsRDD, jsc, table).collect();
// Write them to corresponding avro logfiles
HoodieTestUtils
.writeRecordsToLogFiles(fs, metaClient.getBasePath(), HoodieTestDataGenerator.avroSchemaWithMetadataFields,
updatedRecords);
HoodieTestUtils.writeRecordsToLogFiles(fs, metaClient.getBasePath(),
HoodieTestDataGenerator.avroSchemaWithMetadataFields, updatedRecords);
// Verify that all data file has one log file
metaClient = HoodieTableMetaClient.reload(metaClient);
table = HoodieTable.getHoodieTable(metaClient, config, jsc);
for (String partitionPath : dataGen.getPartitionPaths()) {
List<FileSlice> groupedLogFiles = table.getRTFileSystemView().getLatestFileSlices(partitionPath)
.collect(Collectors.toList());
List<FileSlice> groupedLogFiles =
table.getRTFileSystemView().getLatestFileSlices(partitionPath).collect(Collectors.toList());
for (FileSlice fileSlice : groupedLogFiles) {
assertEquals("There should be 1 log file written for every data file", 1, fileSlice.getLogFiles().count());
}
@@ -173,8 +173,7 @@ public class TestHoodieCompactor extends HoodieClientTestHarness {
for (String partitionPath : dataGen.getPartitionPaths()) {
List<WriteStatus> writeStatuses = result.collect();
assertTrue(writeStatuses.stream()
.filter(writeStatus -> writeStatus.getStat().getPartitionPath().contentEquals(partitionPath))
.count() > 0);
.filter(writeStatus -> writeStatus.getStat().getPartitionPath().contentEquals(partitionPath)).count() > 0);
}
}
}

View File

@@ -78,7 +78,7 @@ public class TestHoodieMergeHandle extends HoodieClientTestHarness {
public void testUpsertsForMultipleRecordsInSameFile() throws Exception {
// Create records in a single partition
String partitionPath = HoodieTestDataGenerator.DEFAULT_PARTITION_PATHS[0];
dataGen = new HoodieTestDataGenerator(new String[]{partitionPath});
dataGen = new HoodieTestDataGenerator(new String[] {partitionPath});
// Build a write config with bulkinsertparallelism set
HoodieWriteConfig cfg = getConfigBuilder().build();
@@ -86,9 +86,8 @@ public class TestHoodieMergeHandle extends HoodieClientTestHarness {
FileSystem fs = FSUtils.getFs(basePath, jsc.hadoopConfiguration());
/**
* Write 1 (only inserts)
* This will do a bulk insert of 44 records of which there are 2 records repeated 21 times each.
* id1 (21 records), id2 (21 records), id3, id4
* Write 1 (only inserts) This will do a bulk insert of 44 records of which there are 2 records repeated 21 times
* each. id1 (21 records), id2 (21 records), id3, id4
*/
String newCommitTime = "001";
client.startCommitWithTime(newCommitTime);
@@ -113,16 +112,13 @@ public class TestHoodieMergeHandle extends HoodieClientTestHarness {
assertEquals("Expecting a single commit.", 1,
timeline.findInstantsAfter("000", Integer.MAX_VALUE).countInstants());
Assert.assertEquals("Latest commit should be 001", newCommitTime, timeline.lastInstant().get().getTimestamp());
assertEquals("Must contain 44 records",
records.size(),
assertEquals("Must contain 44 records", records.size(),
HoodieClientTestUtils.readCommit(basePath, sqlContext, timeline, newCommitTime).count());
/**
* Write 2 (insert)
* This will do a bulk insert of 1 record with the same row_key as record1 in the previous insert - id1.
* At this point, we will have 2 files with the row_keys as shown here -
* File 1 - id1 (21 records), id2 (21 records), id3, id4
* File 2 - id1
* Write 2 (insert) This will do a bulk insert of 1 record with the same row_key as record1 in the previous insert
* - id1. At this point, we will have 2 files with the row_keys as shown here - File 1 - id1 (21 records), id2 (21
* records), id3, id4 File 2 - id1
*/
newCommitTime = "002";
client.startCommitWithTime(newCommitTime);
@@ -138,19 +134,15 @@ public class TestHoodieMergeHandle extends HoodieClientTestHarness {
// verify that there are 2 commits
metaClient = HoodieTableMetaClient.reload(metaClient);
timeline = new HoodieActiveTimeline(metaClient).getCommitTimeline();
assertEquals("Expecting two commits.", 2, timeline.findInstantsAfter("000", Integer.MAX_VALUE)
.countInstants());
assertEquals("Expecting two commits.", 2, timeline.findInstantsAfter("000", Integer.MAX_VALUE).countInstants());
Assert.assertEquals("Latest commit should be 002", newCommitTime, timeline.lastInstant().get().getTimestamp());
Dataset<Row> dataSet = getRecords();
assertEquals("Must contain 45 records", 45, dataSet.count());
/**
* Write 3 (insert)
* This will bulk insert 2 new completely new records.
* At this point, we will have 2 files with the row_keys as shown here -
* File 1 - id1 (21 records), id2 (21 records), id3, id4
* File 2 - id1
* File 3 - id5, id6
* Write 3 (insert) This will bulk insert 2 new completely new records. At this point, we will have 2 files with
* the row_keys as shown here - File 1 - id1 (21 records), id2 (21 records), id3, id4 File 2 - id1 File 3 - id5,
* id6
*/
newCommitTime = "003";
client.startCommitWithTime(newCommitTime);
@@ -162,19 +154,16 @@ public class TestHoodieMergeHandle extends HoodieClientTestHarness {
// verify that there are now 3 commits
metaClient = HoodieTableMetaClient.reload(metaClient);
timeline = new HoodieActiveTimeline(metaClient).getCommitTimeline();
assertEquals("Expecting three commits.", 3, timeline.findInstantsAfter("000", Integer.MAX_VALUE)
.countInstants());
assertEquals("Expecting three commits.", 3, timeline.findInstantsAfter("000", Integer.MAX_VALUE).countInstants());
Assert.assertEquals("Latest commit should be 003", newCommitTime, timeline.lastInstant().get().getTimestamp());
dataSet = getRecords();
assertEquals("Must contain 47 records", 47, dataSet.count());
/**
* Write 4 (updates)
* This will generate 2 upsert records with id1 and id2. The rider and driver names in the update records
* will be rider-004 and driver-004.
* After the upsert is complete, all the records with id1 in File 1 and File 2 must be updated, all the records
* with id2 in File 2 must also be updated.
* Also, none of the other records in File 1, File 2 and File 3 must be updated.
* Write 4 (updates) This will generate 2 upsert records with id1 and id2. The rider and driver names in the
* update records will be rider-004 and driver-004. After the upsert is complete, all the records with id1 in File
* 1 and File 2 must be updated, all the records with id2 in File 2 must also be updated. Also, none of the other
* records in File 1, File 2 and File 3 must be updated.
*/
newCommitTime = "004";
client.startCommitWithTime(newCommitTime);
@@ -195,8 +184,7 @@ public class TestHoodieMergeHandle extends HoodieClientTestHarness {
// verify there are now 4 commits
timeline = new HoodieActiveTimeline(metaClient).getCommitTimeline();
assertEquals("Expecting four commits.", 4, timeline.findInstantsAfter("000", Integer.MAX_VALUE)
.countInstants());
assertEquals("Expecting four commits.", 4, timeline.findInstantsAfter("000", Integer.MAX_VALUE).countInstants());
Assert.assertEquals("Latest commit should be 004", timeline.lastInstant().get().getTimestamp(), newCommitTime);
// Check the entire dataset has 47 records still
@@ -247,15 +235,16 @@ public class TestHoodieMergeHandle extends HoodieClientTestHarness {
Assert.assertTrue(statuses.stream()
.filter(status -> status.getStat().getPrevCommit() != HoodieWriteStat.NULL_COMMIT).count() > 0);
// Num writes should be equal to the number of records inserted
Assert.assertEquals((long) statuses.stream()
.map(status -> status.getStat().getNumWrites()).reduce((a, b) -> a + b).get(), 100);
Assert.assertEquals(
(long) statuses.stream().map(status -> status.getStat().getNumWrites()).reduce((a, b) -> a + b).get(), 100);
// Num update writes should be equal to the number of records updated
Assert.assertEquals((long) statuses.stream()
.map(status -> status.getStat().getNumUpdateWrites()).reduce((a, b) -> a + b).get(), 0);
Assert.assertEquals(
(long) statuses.stream().map(status -> status.getStat().getNumUpdateWrites()).reduce((a, b) -> a + b).get(),
0);
// Num update writes should be equal to the number of insert records converted to updates as part of small file
// handling
Assert.assertEquals((long) statuses.stream()
.map(status -> status.getStat().getNumInserts()).reduce((a, b) -> a + b).get(), 100);
Assert.assertEquals(
(long) statuses.stream().map(status -> status.getStat().getNumInserts()).reduce((a, b) -> a + b).get(), 100);
// Update all the 100 records
metaClient = HoodieTableMetaClient.reload(metaClient);
@@ -269,18 +258,20 @@ public class TestHoodieMergeHandle extends HoodieClientTestHarness {
statuses = writeClient.upsert(updatedRecordsRDD, newCommitTime).collect();
// All records should be upserts into existing parquet
Assert.assertEquals(statuses.stream()
.filter(status -> status.getStat().getPrevCommit() == HoodieWriteStat.NULL_COMMIT).count(), 0);
Assert.assertEquals(
statuses.stream().filter(status -> status.getStat().getPrevCommit() == HoodieWriteStat.NULL_COMMIT).count(),
0);
// Num writes should be equal to the number of records inserted
Assert.assertEquals((long) statuses.stream()
.map(status -> status.getStat().getNumWrites()).reduce((a, b) -> a + b).get(), 100);
Assert.assertEquals(
(long) statuses.stream().map(status -> status.getStat().getNumWrites()).reduce((a, b) -> a + b).get(), 100);
// Num update writes should be equal to the number of records updated
Assert.assertEquals((long) statuses.stream()
.map(status -> status.getStat().getNumUpdateWrites()).reduce((a, b) -> a + b).get(), 100);
Assert.assertEquals(
(long) statuses.stream().map(status -> status.getStat().getNumUpdateWrites()).reduce((a, b) -> a + b).get(),
100);
// Num update writes should be equal to the number of insert records converted to updates as part of small file
// handling
Assert.assertEquals((long) statuses.stream()
.map(status -> status.getStat().getNumInserts()).reduce((a, b) -> a + b).get(), 0);
Assert.assertEquals(
(long) statuses.stream().map(status -> status.getStat().getNumInserts()).reduce((a, b) -> a + b).get(), 0);
newCommitTime = "102";
writeClient.startCommitWithTime(newCommitTime);
@@ -294,15 +285,16 @@ public class TestHoodieMergeHandle extends HoodieClientTestHarness {
Assert.assertEquals((long) statuses.stream()
.filter(status -> status.getStat().getPrevCommit() == HoodieWriteStat.NULL_COMMIT).count(), 0);
// Num writes should be equal to the total number of records written
Assert.assertEquals((long) statuses.stream()
.map(status -> status.getStat().getNumWrites()).reduce((a, b) -> a + b).get(), 200);
Assert.assertEquals(
(long) statuses.stream().map(status -> status.getStat().getNumWrites()).reduce((a, b) -> a + b).get(), 200);
// Num update writes should be equal to the number of records updated (including inserts converted as updates)
Assert.assertEquals((long) statuses.stream()
.map(status -> status.getStat().getNumUpdateWrites()).reduce((a, b) -> a + b).get(), 100);
Assert.assertEquals(
(long) statuses.stream().map(status -> status.getStat().getNumUpdateWrites()).reduce((a, b) -> a + b).get(),
100);
// Num update writes should be equal to the number of insert records converted to updates as part of small file
// handling
Assert.assertEquals((long) statuses.stream()
.map(status -> status.getStat().getNumInserts()).reduce((a, b) -> a + b).get(), 100);
Assert.assertEquals(
(long) statuses.stream().map(status -> status.getStat().getNumInserts()).reduce((a, b) -> a + b).get(), 100);
// Verify all records have location set
statuses.forEach(writeStatus -> {
writeStatus.getWrittenRecords().forEach(r -> {
@@ -319,8 +311,7 @@ public class TestHoodieMergeHandle extends HoodieClientTestHarness {
for (int i = 0; i < fullPartitionPaths.length; i++) {
fullPartitionPaths[i] = String.format("%s/%s/*", basePath, dataGen.getPartitionPaths()[i]);
}
Dataset<Row> dataSet = HoodieClientTestUtils.read(jsc, basePath, sqlContext, fs,
fullPartitionPaths);
Dataset<Row> dataSet = HoodieClientTestUtils.read(jsc, basePath, sqlContext, fs, fullPartitionPaths);
return dataSet;
}
@@ -343,8 +334,7 @@ public class TestHoodieMergeHandle extends HoodieClientTestHarness {
.withStorageConfig(HoodieStorageConfig.newBuilder().limitFileSize(1024 * 1024).build())
.forTable("test-trip-table")
.withIndexConfig(HoodieIndexConfig.newBuilder().withIndexType(HoodieIndex.IndexType.BLOOM).build())
.withBulkInsertParallelism(2)
.withWriteStatusClass(TestWriteStatus.class);
.withBulkInsertParallelism(2).withWriteStatusClass(TestWriteStatus.class);
}
/**

View File

@@ -1,62 +1,60 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hudi.io.storage;
import static org.junit.Assert.fail;
import java.io.IOException;
import org.apache.avro.generic.IndexedRecord;
import org.apache.hadoop.fs.Path;
import org.apache.hudi.TestHoodieClientBase;
import org.apache.hudi.common.HoodieTestDataGenerator;
import org.apache.hudi.config.HoodieWriteConfig;
import org.apache.hudi.table.HoodieTable;
import org.junit.Assert;
import org.junit.Test;
/**
* Tests for {@link HoodieStorageWriterFactory}.
*/
public class TestHoodieStorageWriterFactory extends TestHoodieClientBase {
@Test
public void testGetStorageWriter() throws IOException {
// parquet file format.
final String commitTime = "100";
final Path parquetPath = new Path(basePath + "/partition/path/f1_1-0-1_000.parquet");
final HoodieWriteConfig cfg = getConfig();
HoodieTable table = HoodieTable.getHoodieTable(metaClient, cfg, jsc);
HoodieStorageWriter<IndexedRecord> parquetWriter =
HoodieStorageWriterFactory.getStorageWriter(
commitTime, parquetPath, table, cfg, HoodieTestDataGenerator.avroSchema);
Assert.assertTrue(parquetWriter instanceof HoodieParquetWriter);
// other file format exception.
final Path logPath = new Path(basePath + "/partition/path/f.b51192a8-574b-4a85-b246-bcfec03ac8bf_100.log.2_1-0-1");
try {
HoodieStorageWriter<IndexedRecord> logWriter =
HoodieStorageWriterFactory.getStorageWriter(
commitTime, logPath, table, cfg, HoodieTestDataGenerator.avroSchema);
fail("should fail since log storage writer is not supported yet.");
} catch (Exception e) {
Assert.assertTrue(e instanceof UnsupportedOperationException);
Assert.assertTrue(e.getMessage().contains("format not supported yet."));
}
}
}
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hudi.io.storage;
import static org.junit.Assert.fail;
import java.io.IOException;
import org.apache.avro.generic.IndexedRecord;
import org.apache.hadoop.fs.Path;
import org.apache.hudi.TestHoodieClientBase;
import org.apache.hudi.common.HoodieTestDataGenerator;
import org.apache.hudi.config.HoodieWriteConfig;
import org.apache.hudi.table.HoodieTable;
import org.junit.Assert;
import org.junit.Test;
/**
* Tests for {@link HoodieStorageWriterFactory}.
*/
public class TestHoodieStorageWriterFactory extends TestHoodieClientBase {
@Test
public void testGetStorageWriter() throws IOException {
// parquet file format.
final String commitTime = "100";
final Path parquetPath = new Path(basePath + "/partition/path/f1_1-0-1_000.parquet");
final HoodieWriteConfig cfg = getConfig();
HoodieTable table = HoodieTable.getHoodieTable(metaClient, cfg, jsc);
HoodieStorageWriter<IndexedRecord> parquetWriter = HoodieStorageWriterFactory.getStorageWriter(commitTime,
parquetPath, table, cfg, HoodieTestDataGenerator.avroSchema);
Assert.assertTrue(parquetWriter instanceof HoodieParquetWriter);
// other file format exception.
final Path logPath = new Path(basePath + "/partition/path/f.b51192a8-574b-4a85-b246-bcfec03ac8bf_100.log.2_1-0-1");
try {
HoodieStorageWriter<IndexedRecord> logWriter = HoodieStorageWriterFactory.getStorageWriter(commitTime, logPath,
table, cfg, HoodieTestDataGenerator.avroSchema);
fail("should fail since log storage writer is not supported yet.");
} catch (Exception e) {
Assert.assertTrue(e instanceof UnsupportedOperationException);
Assert.assertTrue(e.getMessage().contains("format not supported yet."));
}
}
}

View File

@@ -60,8 +60,8 @@ public class TestHoodieCompactionStrategy {
sizesMap.put(100 * MB, Lists.newArrayList(MB));
sizesMap.put(90 * MB, Lists.newArrayList(1024 * MB));
UnBoundedCompactionStrategy strategy = new UnBoundedCompactionStrategy();
HoodieWriteConfig writeConfig = HoodieWriteConfig.newBuilder().withPath("/tmp").withCompactionConfig(
HoodieCompactionConfig.newBuilder().withCompactionStrategy(strategy).build()).build();
HoodieWriteConfig writeConfig = HoodieWriteConfig.newBuilder().withPath("/tmp")
.withCompactionConfig(HoodieCompactionConfig.newBuilder().withCompactionStrategy(strategy).build()).build();
List<HoodieCompactionOperation> operations = createCompactionOperations(writeConfig, sizesMap);
List<HoodieCompactionOperation> returned = strategy.orderAndFilter(writeConfig, operations, new ArrayList<>());
assertEquals("UnBounded should not re-order or filter", operations, returned);
@@ -123,26 +123,21 @@ public class TestHoodieCompactionStrategy {
sizesMap.put(90 * MB, Lists.newArrayList(1024 * MB));
Map<Long, String> keyToPartitionMap = new ImmutableMap.Builder().put(120 * MB, partitionPaths[2])
.put(110 * MB, partitionPaths[2])
.put(100 * MB, partitionPaths[1])
.put(90 * MB, partitionPaths[0])
.build();
.put(110 * MB, partitionPaths[2]).put(100 * MB, partitionPaths[1]).put(90 * MB, partitionPaths[0]).build();
DayBasedCompactionStrategy strategy = new DayBasedCompactionStrategy();
HoodieWriteConfig writeConfig = HoodieWriteConfig.newBuilder().withPath("/tmp").withCompactionConfig(
HoodieCompactionConfig.newBuilder().withCompactionStrategy(strategy)
.withTargetPartitionsPerDayBasedCompaction(1)
.build()).build();
HoodieWriteConfig writeConfig =
HoodieWriteConfig.newBuilder().withPath("/tmp").withCompactionConfig(HoodieCompactionConfig.newBuilder()
.withCompactionStrategy(strategy).withTargetPartitionsPerDayBasedCompaction(1).build()).build();
List<HoodieCompactionOperation> operations = createCompactionOperations(writeConfig, sizesMap, keyToPartitionMap);
List<HoodieCompactionOperation> returned = strategy.orderAndFilter(writeConfig, operations, new ArrayList<>());
assertTrue("DayBasedCompactionStrategy should have resulted in fewer compactions",
returned.size() < operations.size());
Assert.assertEquals("DayBasedCompactionStrategy should have resulted in fewer compactions",
returned.size(), 2);
Assert.assertEquals("DayBasedCompactionStrategy should have resulted in fewer compactions", returned.size(), 2);
int comparision = strategy.getComparator().compare(returned.get(returned.size() - 1).getPartitionPath(), returned
.get(0).getPartitionPath());
int comparision = strategy.getComparator().compare(returned.get(returned.size() - 1).getPartitionPath(),
returned.get(0).getPartitionPath());
// Either the partition paths are sorted in descending order or they are equal
assertTrue("DayBasedCompactionStrategy should sort partitions in descending order", comparision >= 0);
}
@@ -167,20 +162,14 @@ public class TestHoodieCompactionStrategy {
String currentDayPlus1 = format.format(BoundedPartitionAwareCompactionStrategy.getDateAtOffsetFromToday(1));
String currentDayPlus5 = format.format(BoundedPartitionAwareCompactionStrategy.getDateAtOffsetFromToday(5));
Map<Long, String> keyToPartitionMap = new ImmutableMap.Builder()
.put(120 * MB, currentDay)
.put(110 * MB, currentDayMinus1)
.put(100 * MB, currentDayMinus2)
.put(80 * MB, currentDayMinus3)
.put(90 * MB, currentDayPlus1)
.put(70 * MB, currentDayPlus5)
.build();
Map<Long, String> keyToPartitionMap = new ImmutableMap.Builder().put(120 * MB, currentDay)
.put(110 * MB, currentDayMinus1).put(100 * MB, currentDayMinus2).put(80 * MB, currentDayMinus3)
.put(90 * MB, currentDayPlus1).put(70 * MB, currentDayPlus5).build();
BoundedPartitionAwareCompactionStrategy strategy = new BoundedPartitionAwareCompactionStrategy();
HoodieWriteConfig writeConfig = HoodieWriteConfig.newBuilder().withPath("/tmp").withCompactionConfig(
HoodieCompactionConfig.newBuilder().withCompactionStrategy(strategy)
.withTargetPartitionsPerDayBasedCompaction(2)
.build()).build();
HoodieWriteConfig writeConfig =
HoodieWriteConfig.newBuilder().withPath("/tmp").withCompactionConfig(HoodieCompactionConfig.newBuilder()
.withCompactionStrategy(strategy).withTargetPartitionsPerDayBasedCompaction(2).build()).build();
List<HoodieCompactionOperation> operations = createCompactionOperations(writeConfig, sizesMap, keyToPartitionMap);
List<HoodieCompactionOperation> returned = strategy.orderAndFilter(writeConfig, operations, new ArrayList<>());
@@ -189,8 +178,8 @@ public class TestHoodieCompactionStrategy {
Assert.assertEquals("BoundedPartitionAwareCompactionStrategy should have resulted in fewer compactions",
returned.size(), 5);
int comparision = strategy.getComparator().compare(returned.get(returned.size() - 1).getPartitionPath(), returned
.get(0).getPartitionPath());
int comparision = strategy.getComparator().compare(returned.get(returned.size() - 1).getPartitionPath(),
returned.get(0).getPartitionPath());
// Either the partition paths are sorted in descending order or they are equal
assertTrue("BoundedPartitionAwareCompactionStrategy should sort partitions in descending order", comparision >= 0);
}
@@ -215,34 +204,29 @@ public class TestHoodieCompactionStrategy {
String currentDayPlus1 = format.format(BoundedPartitionAwareCompactionStrategy.getDateAtOffsetFromToday(1));
String currentDayPlus5 = format.format(BoundedPartitionAwareCompactionStrategy.getDateAtOffsetFromToday(5));
Map<Long, String> keyToPartitionMap = new ImmutableMap.Builder()
.put(120 * MB, currentDay)
.put(110 * MB, currentDayMinus1)
.put(100 * MB, currentDayMinus2)
.put(80 * MB, currentDayMinus3)
.put(90 * MB, currentDayPlus1)
.put(70 * MB, currentDayPlus5)
.build();
Map<Long, String> keyToPartitionMap = new ImmutableMap.Builder().put(120 * MB, currentDay)
.put(110 * MB, currentDayMinus1).put(100 * MB, currentDayMinus2).put(80 * MB, currentDayMinus3)
.put(90 * MB, currentDayPlus1).put(70 * MB, currentDayPlus5).build();
UnBoundedPartitionAwareCompactionStrategy strategy = new UnBoundedPartitionAwareCompactionStrategy();
HoodieWriteConfig writeConfig = HoodieWriteConfig.newBuilder().withPath("/tmp").withCompactionConfig(
HoodieCompactionConfig.newBuilder().withCompactionStrategy(strategy)
.withTargetPartitionsPerDayBasedCompaction(2)
.build()).build();
HoodieWriteConfig writeConfig =
HoodieWriteConfig.newBuilder().withPath("/tmp").withCompactionConfig(HoodieCompactionConfig.newBuilder()
.withCompactionStrategy(strategy).withTargetPartitionsPerDayBasedCompaction(2).build()).build();
List<HoodieCompactionOperation> operations = createCompactionOperations(writeConfig, sizesMap, keyToPartitionMap);
List<HoodieCompactionOperation> returned = strategy.orderAndFilter(writeConfig, operations, new ArrayList<>());
assertTrue("UnBoundedPartitionAwareCompactionStrategy should not include last " + writeConfig
.getTargetPartitionsPerDayBasedCompaction() + " partitions or later partitions from today",
assertTrue(
"UnBoundedPartitionAwareCompactionStrategy should not include last "
+ writeConfig.getTargetPartitionsPerDayBasedCompaction() + " partitions or later partitions from today",
returned.size() < operations.size());
Assert.assertEquals("BoundedPartitionAwareCompactionStrategy should have resulted in 1 compaction",
returned.size(), 1);
Assert.assertEquals("BoundedPartitionAwareCompactionStrategy should have resulted in 1 compaction", returned.size(),
1);
}
private List<HoodieCompactionOperation> createCompactionOperations(HoodieWriteConfig config,
Map<Long, List<Long>> sizesMap) {
Map<Long, String> keyToPartitionMap = sizesMap.entrySet().stream().map(e ->
Pair.of(e.getKey(), partitionPaths[new Random().nextInt(partitionPaths.length - 1)]))
Map<Long, String> keyToPartitionMap = sizesMap.entrySet().stream()
.map(e -> Pair.of(e.getKey(), partitionPaths[new Random().nextInt(partitionPaths.length - 1)]))
.collect(Collectors.toMap(Pair::getKey, Pair::getValue));
return createCompactionOperations(config, sizesMap, keyToPartitionMap);
}
@@ -256,9 +240,7 @@ public class TestHoodieCompactionStrategy {
String partitionPath = keyToPartitionMap.get(k);
List<HoodieLogFile> logFiles = v.stream().map(TestHoodieLogFile::newLogFile).collect(Collectors.toList());
operations.add(new HoodieCompactionOperation(df.getCommitTime(),
logFiles.stream().map(s -> s.getPath().toString()).collect(Collectors.toList()),
df.getPath(),
df.getFileId(),
logFiles.stream().map(s -> s.getPath().toString()).collect(Collectors.toList()), df.getPath(), df.getFileId(),
partitionPath,
config.getCompactionStrategy().captureMetrics(config, Option.of(df), partitionPath, logFiles)));
});

View File

@@ -105,9 +105,8 @@ public class TestCopyOnWriteTable extends HoodieClientTestHarness {
return Pair.of(io.makeNewPath(record.getPartitionPath()), writeToken);
}).collect().get(0);
Assert.assertEquals(newPathWithWriteToken.getKey().toString(),
this.basePath + "/" + partitionPath + "/"
+ FSUtils.makeDataFileName(commitTime, newPathWithWriteToken.getRight(), fileName));
Assert.assertEquals(newPathWithWriteToken.getKey().toString(), this.basePath + "/" + partitionPath + "/"
+ FSUtils.makeDataFileName(commitTime, newPathWithWriteToken.getRight(), fileName));
}
private HoodieWriteConfig makeHoodieClientConfig() throws Exception {
@@ -173,9 +172,8 @@ public class TestCopyOnWriteTable extends HoodieClientTestHarness {
assertTrue(filter.mightContain(record.getRecordKey()));
}
// Create a commit file
new File(
this.basePath + "/" + HoodieTableMetaClient.METAFOLDER_NAME + "/" + FSUtils.getCommitTime(parquetFile.getName())
+ ".commit").createNewFile();
new File(this.basePath + "/" + HoodieTableMetaClient.METAFOLDER_NAME + "/"
+ FSUtils.getCommitTime(parquetFile.getName()) + ".commit").createNewFile();
// Read the parquet file, check the record content
List<GenericRecord> fileRecords = ParquetUtils.readAvroRecords(jsc.hadoopConfiguration(), parquetFilePath);
@@ -197,8 +195,8 @@ public class TestCopyOnWriteTable extends HoodieClientTestHarness {
updatedRecord1.seal();
TestRawTripPayload rowChange4 = new TestRawTripPayload(recordStr4);
HoodieRecord insertedRecord1 = new HoodieRecord(
new HoodieKey(rowChange4.getRowKey(), rowChange4.getPartitionPath()), rowChange4);
HoodieRecord insertedRecord1 =
new HoodieRecord(new HoodieKey(rowChange4.getRowKey(), rowChange4.getPartitionPath()), rowChange4);
List<HoodieRecord> updatedRecords = Arrays.asList(updatedRecord1, insertedRecord1);
@@ -206,11 +204,10 @@ public class TestCopyOnWriteTable extends HoodieClientTestHarness {
String newCommitTime = HoodieTestUtils.makeNewCommitTime();
metaClient = HoodieTableMetaClient.reload(metaClient);
final HoodieCopyOnWriteTable newTable = new HoodieCopyOnWriteTable(config, jsc);
List<WriteStatus> statuses =
jsc.parallelize(Arrays.asList(1)).map(x -> {
return newTable.handleUpdate(newCommitTime, updatedRecord1.getCurrentLocation().getFileId(),
updatedRecords.iterator());
}).flatMap(x -> HoodieClientTestUtils.collectStatuses(x).iterator()).collect();
List<WriteStatus> statuses = jsc.parallelize(Arrays.asList(1)).map(x -> {
return newTable.handleUpdate(newCommitTime, updatedRecord1.getCurrentLocation().getFileId(),
updatedRecords.iterator());
}).flatMap(x -> HoodieClientTestUtils.collectStatuses(x).iterator()).collect();
// Check the updated file
File updatedParquetFile = null;
@@ -218,7 +215,7 @@ public class TestCopyOnWriteTable extends HoodieClientTestHarness {
if (file.getName().endsWith(".parquet")) {
if (FSUtils.getFileId(file.getName()).equals(FSUtils.getFileId(parquetFile.getName()))
&& HoodieTimeline.compareTimestamps(FSUtils.getCommitTime(file.getName()),
FSUtils.getCommitTime(parquetFile.getName()), HoodieTimeline.GREATER)) {
FSUtils.getCommitTime(parquetFile.getName()), HoodieTimeline.GREATER)) {
updatedParquetFile = file;
break;
}
@@ -227,8 +224,8 @@ public class TestCopyOnWriteTable extends HoodieClientTestHarness {
assertTrue(updatedParquetFile != null);
// Check whether the record has been updated
Path updatedParquetFilePath = new Path(updatedParquetFile.getAbsolutePath());
BloomFilter updatedFilter = ParquetUtils.readBloomFilterFromParquetMetadata(jsc.hadoopConfiguration(),
updatedParquetFilePath);
BloomFilter updatedFilter =
ParquetUtils.readBloomFilterFromParquetMetadata(jsc.hadoopConfiguration(), updatedParquetFilePath);
for (HoodieRecord record : records) {
// No change to the _row_key
assertTrue(updatedFilter.mightContain(record.getRecordKey()));
@@ -250,15 +247,15 @@ public class TestCopyOnWriteTable extends HoodieClientTestHarness {
// Also check the numRecordsWritten
WriteStatus writeStatus = statuses.get(0);
assertTrue("Should be only one file generated", statuses.size() == 1);
assertEquals(4, writeStatus.getStat().getNumWrites());//3 rewritten records + 1 new record
assertEquals(4, writeStatus.getStat().getNumWrites());// 3 rewritten records + 1 new record
}
private List<HoodieRecord> newHoodieRecords(int n, String time) throws Exception {
List<HoodieRecord> records = new ArrayList<>();
for (int i = 0; i < n; i++) {
String recordStr = String.format("{\"_row_key\":\"%s\",\"time\":\"%s\",\"number\":%d}",
UUID.randomUUID().toString(), time, i);
String recordStr =
String.format("{\"_row_key\":\"%s\",\"time\":\"%s\",\"number\":%d}", UUID.randomUUID().toString(), time, i);
TestRawTripPayload rowChange = new TestRawTripPayload(recordStr);
records.add(new HoodieRecord(new HoodieKey(rowChange.getRowKey(), rowChange.getPartitionPath()), rowChange));
}
@@ -269,8 +266,8 @@ public class TestCopyOnWriteTable extends HoodieClientTestHarness {
@Test
public void testMetadataAggregateFromWriteStatus() throws Exception {
// Prepare the AvroParquetIO
HoodieWriteConfig config = makeHoodieClientConfigBuilder().withWriteStatusClass(MetadataMergeWriteStatus.class)
.build();
HoodieWriteConfig config =
makeHoodieClientConfigBuilder().withWriteStatusClass(MetadataMergeWriteStatus.class).build();
String firstCommitTime = HoodieTestUtils.makeNewCommitTime();
metaClient = HoodieTableMetaClient.reload(metaClient);
@@ -297,8 +294,8 @@ public class TestCopyOnWriteTable extends HoodieClientTestHarness {
return table.handleInsert(firstCommitTime, FSUtils.createNewFileIdPfx(), records.iterator());
}).flatMap(x -> HoodieClientTestUtils.collectStatuses(x).iterator()).collect();
Map<String, String> allWriteStatusMergedMetadataMap = MetadataMergeWriteStatus
.mergeMetadataForWriteStatuses(writeStatuses);
Map<String, String> allWriteStatusMergedMetadataMap =
MetadataMergeWriteStatus.mergeMetadataForWriteStatuses(writeStatuses);
assertTrue(allWriteStatusMergedMetadataMap.containsKey("InputRecordCount_1506582000"));
// For metadata key InputRecordCount_1506582000, value is 2 for each record. So sum of this
// should be 2 * 3
@@ -359,9 +356,8 @@ public class TestCopyOnWriteTable extends HoodieClientTestHarness {
@Test
public void testFileSizeUpsertRecords() throws Exception {
HoodieWriteConfig config = makeHoodieClientConfigBuilder().withStorageConfig(
HoodieStorageConfig.newBuilder().limitFileSize(64 * 1024).parquetBlockSize(64 * 1024).parquetPageSize(64 * 1024)
.build()).build();
HoodieWriteConfig config = makeHoodieClientConfigBuilder().withStorageConfig(HoodieStorageConfig.newBuilder()
.limitFileSize(64 * 1024).parquetBlockSize(64 * 1024).parquetPageSize(64 * 1024).build()).build();
String commitTime = HoodieTestUtils.makeNewCommitTime();
metaClient = HoodieTableMetaClient.reload(metaClient);
HoodieCopyOnWriteTable table = new HoodieCopyOnWriteTable(config, jsc);
@@ -369,9 +365,8 @@ public class TestCopyOnWriteTable extends HoodieClientTestHarness {
List<HoodieRecord> records = new ArrayList<>();
// Approx 1150 records are written for block size of 64KB
for (int i = 0; i < 2000; i++) {
String recordStr =
"{\"_row_key\":\"" + UUID.randomUUID().toString() + "\",\"time\":\"2016-01-31T03:16:41.415Z\",\"number\":" + i
+ "}";
String recordStr = "{\"_row_key\":\"" + UUID.randomUUID().toString()
+ "\",\"time\":\"2016-01-31T03:16:41.415Z\",\"number\":" + i + "}";
TestRawTripPayload rowChange = new TestRawTripPayload(recordStr);
records.add(new HoodieRecord(new HoodieKey(rowChange.getRowKey(), rowChange.getPartitionPath()), rowChange));
}
@@ -393,19 +388,19 @@ public class TestCopyOnWriteTable extends HoodieClientTestHarness {
}
private UpsertPartitioner getUpsertPartitioner(int smallFileSize, int numInserts,
int numUpdates, int fileSize, String testPartitionPath, boolean autoSplitInserts) throws Exception {
HoodieWriteConfig config = makeHoodieClientConfigBuilder().withCompactionConfig(
HoodieCompactionConfig.newBuilder().compactionSmallFileSize(smallFileSize).insertSplitSize(100)
.autoTuneInsertSplits(autoSplitInserts).build()).withStorageConfig(
HoodieStorageConfig.newBuilder().limitFileSize(1000 * 1024).build()).build();
private UpsertPartitioner getUpsertPartitioner(int smallFileSize, int numInserts, int numUpdates, int fileSize,
String testPartitionPath, boolean autoSplitInserts) throws Exception {
HoodieWriteConfig config = makeHoodieClientConfigBuilder()
.withCompactionConfig(HoodieCompactionConfig.newBuilder().compactionSmallFileSize(smallFileSize)
.insertSplitSize(100).autoTuneInsertSplits(autoSplitInserts).build())
.withStorageConfig(HoodieStorageConfig.newBuilder().limitFileSize(1000 * 1024).build()).build();
HoodieClientTestUtils.fakeCommitFile(basePath, "001");
HoodieClientTestUtils.fakeDataFile(basePath, testPartitionPath, "001", "file1", fileSize);
metaClient = HoodieTableMetaClient.reload(metaClient);
HoodieCopyOnWriteTable table = new HoodieCopyOnWriteTable(config, jsc);
HoodieTestDataGenerator dataGenerator = new HoodieTestDataGenerator(new String[]{testPartitionPath});
HoodieTestDataGenerator dataGenerator = new HoodieTestDataGenerator(new String[] {testPartitionPath});
List<HoodieRecord> insertRecords = dataGenerator.generateInserts("001", numInserts);
List<HoodieRecord> updateRecords = dataGenerator.generateUpdates("001", numUpdates);
for (HoodieRecord updateRec : updateRecords) {
@@ -429,8 +424,7 @@ public class TestCopyOnWriteTable extends HoodieClientTestHarness {
public void testUpsertPartitioner() throws Exception {
final String testPartitionPath = "2016/09/26";
// Inserts + Updates... Check all updates go together & inserts subsplit
UpsertPartitioner partitioner = getUpsertPartitioner(0, 200, 100, 1024,
testPartitionPath, false);
UpsertPartitioner partitioner = getUpsertPartitioner(0, 200, 100, 1024, testPartitionPath, false);
List<HoodieCopyOnWriteTable.InsertBucket> insertBuckets = partitioner.getInsertBuckets(testPartitionPath);
assertEquals("Total of 2 insert buckets", 2, insertBuckets.size());
}
@@ -441,8 +435,7 @@ public class TestCopyOnWriteTable extends HoodieClientTestHarness {
final String testPartitionPath = "2016/09/26";
// Inserts + Updates .. Check updates go together & inserts subsplit, after expanding
// smallest file
UpsertPartitioner partitioner = getUpsertPartitioner(1000 * 1024, 400, 100, 800 * 1024, testPartitionPath,
false);
UpsertPartitioner partitioner = getUpsertPartitioner(1000 * 1024, 400, 100, 800 * 1024, testPartitionPath, false);
List<HoodieCopyOnWriteTable.InsertBucket> insertBuckets = partitioner.getInsertBuckets(testPartitionPath);
assertEquals("Should have 3 partitions", 3, partitioner.numPartitions());
@@ -476,8 +469,8 @@ public class TestCopyOnWriteTable extends HoodieClientTestHarness {
@Test
public void testInsertUpsertWithHoodieAvroPayload() throws Exception {
HoodieWriteConfig config = makeHoodieClientConfigBuilder().withStorageConfig(
HoodieStorageConfig.newBuilder().limitFileSize(1000 * 1024).build()).build();
HoodieWriteConfig config = makeHoodieClientConfigBuilder()
.withStorageConfig(HoodieStorageConfig.newBuilder().limitFileSize(1000 * 1024).build()).build();
metaClient = HoodieTableMetaClient.reload(metaClient);
final HoodieCopyOnWriteTable table = new HoodieCopyOnWriteTable(config, jsc);
String commitTime = "000";

View File

@@ -79,20 +79,15 @@ public class TestHoodieRecordSizing {
private static LinkedList<Option<byte[]>> generateCommitMetadataList() throws IOException {
LinkedList<Option<byte[]>> commits = new LinkedList<>();
// First commit with non zero records and bytes
commits.push(Option.of(generateCommitMetadataWith(2000, 10000).toJsonString()
.getBytes(StandardCharsets.UTF_8)));
commits.push(Option.of(generateCommitMetadataWith(2000, 10000).toJsonString().getBytes(StandardCharsets.UTF_8)));
// Second commit with non zero records and bytes
commits.push(Option.of(generateCommitMetadataWith(1500, 7500).toJsonString()
.getBytes(StandardCharsets.UTF_8)));
commits.push(Option.of(generateCommitMetadataWith(1500, 7500).toJsonString().getBytes(StandardCharsets.UTF_8)));
// Third commit with both zero records and zero bytes
commits.push(Option.of(generateCommitMetadataWith(0, 0).toJsonString()
.getBytes(StandardCharsets.UTF_8)));
commits.push(Option.of(generateCommitMetadataWith(0, 0).toJsonString().getBytes(StandardCharsets.UTF_8)));
// Fourth commit with zero records
commits.push(Option.of(generateCommitMetadataWith(0, 1500).toJsonString()
.getBytes(StandardCharsets.UTF_8)));
commits.push(Option.of(generateCommitMetadataWith(0, 1500).toJsonString().getBytes(StandardCharsets.UTF_8)));
// Fifth commit with zero bytes
commits.push(Option.of(generateCommitMetadataWith(2500, 0).toJsonString()
.getBytes(StandardCharsets.UTF_8)));
commits.push(Option.of(generateCommitMetadataWith(2500, 0).toJsonString().getBytes(StandardCharsets.UTF_8)));
return commits;
}

View File

@@ -128,8 +128,8 @@ public class TestMergeOnReadTable extends HoodieClientTestHarness {
assertFalse(commit.isPresent());
FileStatus[] allFiles = HoodieTestUtils.listAllDataFilesInPath(metaClient.getFs(), cfg.getBasePath());
ReadOptimizedView roView = new HoodieTableFileSystemView(metaClient,
metaClient.getCommitTimeline().filterCompletedInstants(), allFiles);
ReadOptimizedView roView =
new HoodieTableFileSystemView(metaClient, metaClient.getCommitTimeline().filterCompletedInstants(), allFiles);
Stream<HoodieDataFile> dataFilesToRead = roView.getLatestDataFiles();
assertTrue(!dataFilesToRead.findAny().isPresent());
@@ -198,8 +198,8 @@ public class TestMergeOnReadTable extends HoodieClientTestHarness {
List<WriteStatus> statuses = client.upsert(writeRecords, newCommitTime).collect();
assertNoWriteErrors(statuses);
Map<String, String> allWriteStatusMergedMetadataMap = MetadataMergeWriteStatus
.mergeMetadataForWriteStatuses(statuses);
Map<String, String> allWriteStatusMergedMetadataMap =
MetadataMergeWriteStatus.mergeMetadataForWriteStatuses(statuses);
assertTrue(allWriteStatusMergedMetadataMap.containsKey("InputRecordCount_1506582000"));
// For metadata key InputRecordCount_1506582000, value is 2 for each record. So sum of this
// should be 2 * records.size()
@@ -236,8 +236,8 @@ public class TestMergeOnReadTable extends HoodieClientTestHarness {
assertFalse(commit.isPresent());
FileStatus[] allFiles = HoodieTestUtils.listAllDataFilesInPath(metaClient.getFs(), cfg.getBasePath());
ReadOptimizedView roView = new HoodieTableFileSystemView(metaClient,
metaClient.getCommitTimeline().filterCompletedInstants(), allFiles);
ReadOptimizedView roView =
new HoodieTableFileSystemView(metaClient, metaClient.getCommitTimeline().filterCompletedInstants(), allFiles);
Stream<HoodieDataFile> dataFilesToRead = roView.getLatestDataFiles();
assertTrue(!dataFilesToRead.findAny().isPresent());
@@ -284,7 +284,7 @@ public class TestMergeOnReadTable extends HoodieClientTestHarness {
List<String> dataFiles = roView.getLatestDataFiles().map(hf -> hf.getPath()).collect(Collectors.toList());
List<GenericRecord> recordsRead = HoodieMergeOnReadTestUtils.getRecordsUsingInputFormat(dataFiles, basePath);
//Wrote 20 records and deleted 20 records, so remaining 20-20 = 0
// Wrote 20 records and deleted 20 records, so remaining 20-20 = 0
assertEquals("Must contain 0 records", 0, recordsRead.size());
}
}
@@ -292,7 +292,7 @@ public class TestMergeOnReadTable extends HoodieClientTestHarness {
@Test
public void testCOWToMORConvertedDatasetRollback() throws Exception {
//Set TableType to COW
// Set TableType to COW
HoodieTestUtils.init(jsc.hadoopConfiguration(), basePath, HoodieTableType.COPY_ON_WRITE);
HoodieWriteConfig cfg = getConfig(true);
@@ -308,7 +308,7 @@ public class TestMergeOnReadTable extends HoodieClientTestHarness {
JavaRDD<HoodieRecord> writeRecords = jsc.parallelize(records, 1);
List<WriteStatus> statuses = client.upsert(writeRecords, newCommitTime).collect();
//verify there are no errors
// verify there are no errors
assertNoWriteErrors(statuses);
HoodieTableMetaClient metaClient = new HoodieTableMetaClient(jsc.hadoopConfiguration(), cfg.getBasePath());
@@ -328,17 +328,17 @@ public class TestMergeOnReadTable extends HoodieClientTestHarness {
// Verify there are no errors
assertNoWriteErrors(statuses);
//Set TableType to MOR
// Set TableType to MOR
HoodieTestUtils.init(jsc.hadoopConfiguration(), basePath, HoodieTableType.MERGE_ON_READ);
//rollback a COW commit when TableType is MOR
// rollback a COW commit when TableType is MOR
client.rollback(newCommitTime);
metaClient = HoodieTableMetaClient.reload(metaClient);
HoodieTable hoodieTable = HoodieTable.getHoodieTable(metaClient, cfg, jsc);
FileStatus[] allFiles = HoodieTestUtils.listAllDataFilesInPath(metaClient.getFs(), cfg.getBasePath());
HoodieTableFileSystemView roView = new HoodieTableFileSystemView(metaClient,
hoodieTable.getCompletedCommitsTimeline(), allFiles);
HoodieTableFileSystemView roView =
new HoodieTableFileSystemView(metaClient, hoodieTable.getCompletedCommitsTimeline(), allFiles);
final String absentCommit = newCommitTime;
assertFalse(roView.getLatestDataFiles().filter(file -> {
@@ -383,8 +383,8 @@ public class TestMergeOnReadTable extends HoodieClientTestHarness {
assertFalse(commit.isPresent());
FileStatus[] allFiles = HoodieTestUtils.listAllDataFilesInPath(metaClient.getFs(), cfg.getBasePath());
ReadOptimizedView roView = new HoodieTableFileSystemView(metaClient,
metaClient.getCommitTimeline().filterCompletedInstants(), allFiles);
ReadOptimizedView roView =
new HoodieTableFileSystemView(metaClient, metaClient.getCommitTimeline().filterCompletedInstants(), allFiles);
Stream<HoodieDataFile> dataFilesToRead = roView.getLatestDataFiles();
assertTrue(!dataFilesToRead.findAny().isPresent());
@@ -417,8 +417,8 @@ public class TestMergeOnReadTable extends HoodieClientTestHarness {
secondClient.rollback(commitTime1);
allFiles = HoodieTestUtils.listAllDataFilesInPath(metaClient.getFs(), cfg.getBasePath());
// After rollback, there should be no parquet file with the failed commit time
Assert.assertEquals(Arrays.asList(allFiles).stream().filter(file -> file.getPath().getName()
.contains(commitTime1)).collect(Collectors.toList()).size(), 0);
Assert.assertEquals(Arrays.asList(allFiles).stream()
.filter(file -> file.getPath().getName().contains(commitTime1)).collect(Collectors.toList()).size(), 0);
dataFiles = roView.getLatestDataFiles().map(hf -> hf.getPath()).collect(Collectors.toList());
recordsRead = HoodieMergeOnReadTestUtils.getRecordsUsingInputFormat(dataFiles, basePath);
assertEquals(recordsRead.size(), 200);
@@ -450,8 +450,8 @@ public class TestMergeOnReadTable extends HoodieClientTestHarness {
thirdClient.rollback(commitTime2);
allFiles = HoodieTestUtils.listAllDataFilesInPath(metaClient.getFs(), cfg.getBasePath());
// After rollback, there should be no parquet file with the failed commit time
Assert.assertEquals(Arrays.asList(allFiles).stream().filter(file -> file.getPath().getName()
.contains(commitTime2)).collect(Collectors.toList()).size(), 0);
Assert.assertEquals(Arrays.asList(allFiles).stream()
.filter(file -> file.getPath().getName().contains(commitTime2)).collect(Collectors.toList()).size(), 0);
metaClient = HoodieTableMetaClient.reload(metaClient);
hoodieTable = HoodieTable.getHoodieTable(metaClient, cfg, jsc);
@@ -488,9 +488,8 @@ public class TestMergeOnReadTable extends HoodieClientTestHarness {
roView = new HoodieTableFileSystemView(metaClient, metaClient.getCommitsTimeline(), allFiles);
List<HoodieDataFile> dataFiles2 = roView.getLatestDataFiles().collect(Collectors.toList());
final String compactedCommitTime = metaClient.getActiveTimeline().reload().getCommitsTimeline().lastInstant()
.get()
.getTimestamp();
final String compactedCommitTime =
metaClient.getActiveTimeline().reload().getCommitsTimeline().lastInstant().get().getTimestamp();
assertTrue(roView.getLatestDataFiles().filter(file -> {
if (compactedCommitTime.equals(file.getCommitTime())) {
@@ -550,8 +549,8 @@ public class TestMergeOnReadTable extends HoodieClientTestHarness {
assertFalse(commit.isPresent());
FileStatus[] allFiles = HoodieTestUtils.listAllDataFilesInPath(metaClient.getFs(), cfg.getBasePath());
ReadOptimizedView roView = new HoodieTableFileSystemView(metaClient,
metaClient.getCommitTimeline().filterCompletedInstants(), allFiles);
ReadOptimizedView roView =
new HoodieTableFileSystemView(metaClient, metaClient.getCommitTimeline().filterCompletedInstants(), allFiles);
Stream<HoodieDataFile> dataFilesToRead = roView.getLatestDataFiles();
assertTrue(!dataFilesToRead.findAny().isPresent());
@@ -637,9 +636,8 @@ public class TestMergeOnReadTable extends HoodieClientTestHarness {
metaClient = HoodieTableMetaClient.reload(metaClient);
roView = new HoodieTableFileSystemView(metaClient, metaClient.getCommitsTimeline(), allFiles);
final String compactedCommitTime = metaClient.getActiveTimeline().reload().getCommitsTimeline().lastInstant()
.get()
.getTimestamp();
final String compactedCommitTime =
metaClient.getActiveTimeline().reload().getCommitsTimeline().lastInstant().get().getTimestamp();
assertTrue(roView.getLatestDataFiles().filter(file -> {
if (compactedCommitTime.equals(file.getCommitTime())) {
@@ -670,33 +668,32 @@ public class TestMergeOnReadTable extends HoodieClientTestHarness {
metaClient = HoodieTableMetaClient.reload(metaClient);
allFiles = HoodieTestUtils.listAllDataFilesInPath(metaClient.getFs(), cfg.getBasePath());
roView = new HoodieTableFileSystemView(metaClient,
metaClient.getCommitTimeline().filterCompletedInstants(), allFiles);
roView =
new HoodieTableFileSystemView(metaClient, metaClient.getCommitTimeline().filterCompletedInstants(), allFiles);
dataFilesToRead = roView.getLatestDataFiles();
assertTrue(!dataFilesToRead.findAny().isPresent());
RealtimeView rtView = new HoodieTableFileSystemView(metaClient,
metaClient.getCommitTimeline().filterCompletedInstants(), allFiles);
List<HoodieFileGroup> fileGroups = ((HoodieTableFileSystemView) rtView).getAllFileGroups().collect(Collectors
.toList());
RealtimeView rtView =
new HoodieTableFileSystemView(metaClient, metaClient.getCommitTimeline().filterCompletedInstants(), allFiles);
List<HoodieFileGroup> fileGroups =
((HoodieTableFileSystemView) rtView).getAllFileGroups().collect(Collectors.toList());
assertTrue(fileGroups.isEmpty());
// make sure there are no log files remaining
assertTrue(((HoodieTableFileSystemView) rtView).getAllFileGroups().filter(fileGroup -> fileGroup
.getAllRawFileSlices().filter(f -> f.getLogFiles().count() == 0).count() == 0).count() == 0L);
assertTrue(((HoodieTableFileSystemView) rtView).getAllFileGroups()
.filter(fileGroup -> fileGroup.getAllRawFileSlices().filter(f -> f.getLogFiles().count() == 0).count() == 0)
.count() == 0L);
}
}
protected HoodieWriteConfig getHoodieWriteConfigWithSmallFileHandlingOff() {
return HoodieWriteConfig.newBuilder().withPath(basePath)
.withSchema(TRIP_EXAMPLE_SCHEMA)
.withParallelism(2, 2)
.withAutoCommit(false).withAssumeDatePartitioning(true).withCompactionConfig(HoodieCompactionConfig.newBuilder()
.compactionSmallFileSize(1 * 1024).withInlineCompaction(false)
.withMaxNumDeltaCommitsBeforeCompaction(1).build())
return HoodieWriteConfig.newBuilder().withPath(basePath).withSchema(TRIP_EXAMPLE_SCHEMA).withParallelism(2, 2)
.withAutoCommit(false).withAssumeDatePartitioning(true)
.withCompactionConfig(HoodieCompactionConfig.newBuilder().compactionSmallFileSize(1 * 1024)
.withInlineCompaction(false).withMaxNumDeltaCommitsBeforeCompaction(1).build())
.withEmbeddedTimelineServerEnabled(true)
.withStorageConfig(HoodieStorageConfig.newBuilder().limitFileSize(1 * 1024).build())
.forTable("test-trip-table").build();
.withStorageConfig(HoodieStorageConfig.newBuilder().limitFileSize(1 * 1024).build()).forTable("test-trip-table")
.build();
}
@Test
@@ -730,8 +727,8 @@ public class TestMergeOnReadTable extends HoodieClientTestHarness {
ReadOptimizedView roView = new HoodieTableFileSystemView(metaClient,
metaClient.getCommitsTimeline().filterCompletedInstants(), allFiles);
Stream<HoodieDataFile> dataFilesToRead = roView.getLatestDataFiles();
Map<String, Long> parquetFileIdToSize = dataFilesToRead.collect(
Collectors.toMap(HoodieDataFile::getFileId, HoodieDataFile::getFileSize));
Map<String, Long> parquetFileIdToSize =
dataFilesToRead.collect(Collectors.toMap(HoodieDataFile::getFileId, HoodieDataFile::getFileSize));
roView = new HoodieTableFileSystemView(metaClient, hoodieTable.getCompletedCommitsTimeline(), allFiles);
dataFilesToRead = roView.getLatestDataFiles();
@@ -740,8 +737,7 @@ public class TestMergeOnReadTable extends HoodieClientTestHarness {
dataFilesList.size() > 0);
/**
* Write 2 (only updates + inserts, written to .log file + correction of existing parquet
* file size)
* Write 2 (only updates + inserts, written to .log file + correction of existing parquet file size)
*/
newCommitTime = "002";
client.startCommitWithTime(newCommitTime);
@@ -766,15 +762,15 @@ public class TestMergeOnReadTable extends HoodieClientTestHarness {
hoodieTable.getActiveTimeline().reload().getCommitsTimeline().filterCompletedInstants(), allFiles);
dataFilesToRead = roView.getLatestDataFiles();
List<HoodieDataFile> newDataFilesList = dataFilesToRead.collect(Collectors.toList());
Map<String, Long> parquetFileIdToNewSize = newDataFilesList.stream().collect(
Collectors.toMap(HoodieDataFile::getFileId, HoodieDataFile::getFileSize));
Map<String, Long> parquetFileIdToNewSize =
newDataFilesList.stream().collect(Collectors.toMap(HoodieDataFile::getFileId, HoodieDataFile::getFileSize));
assertTrue(parquetFileIdToNewSize.entrySet().stream()
.filter(entry -> parquetFileIdToSize.get(entry.getKey()) < entry.getValue()).count() > 0);
List<String> dataFiles = roView.getLatestDataFiles().map(hf -> hf.getPath()).collect(Collectors.toList());
List<GenericRecord> recordsRead = HoodieMergeOnReadTestUtils.getRecordsUsingInputFormat(dataFiles, basePath);
//Wrote 20 records in 2 batches
// Wrote 20 records in 2 batches
assertEquals("Must contain 40 records", 40, recordsRead.size());
}
}
@@ -805,9 +801,8 @@ public class TestMergeOnReadTable extends HoodieClientTestHarness {
updatedRecords = readClient.tagLocation(updatedRecordsRDD).collect();
// Write them to corresponding avro logfiles
HoodieTestUtils
.writeRecordsToLogFiles(metaClient.getFs(), metaClient.getBasePath(),
HoodieTestDataGenerator.avroSchemaWithMetadataFields, updatedRecords);
HoodieTestUtils.writeRecordsToLogFiles(metaClient.getFs(), metaClient.getBasePath(),
HoodieTestDataGenerator.avroSchemaWithMetadataFields, updatedRecords);
// Verify that all data file has one log file
metaClient = HoodieTableMetaClient.reload(metaClient);
@@ -816,8 +811,8 @@ public class TestMergeOnReadTable extends HoodieClientTestHarness {
((SyncableFileSystemView) (table.getRTFileSystemView())).reset();
for (String partitionPath : dataGen.getPartitionPaths()) {
List<FileSlice> groupedLogFiles = table.getRTFileSystemView().getLatestFileSlices(partitionPath)
.collect(Collectors.toList());
List<FileSlice> groupedLogFiles =
table.getRTFileSystemView().getLatestFileSlices(partitionPath).collect(Collectors.toList());
for (FileSlice fileSlice : groupedLogFiles) {
assertEquals("There should be 1 log file written for every data file", 1, fileSlice.getLogFiles().count());
}
@@ -836,12 +831,12 @@ public class TestMergeOnReadTable extends HoodieClientTestHarness {
table = HoodieTable.getHoodieTable(metaClient, config, jsc);
HoodieActiveTimeline timeline = metaClient.getActiveTimeline();
assertTrue("Compaction commit should be > than last insert", HoodieTimeline.compareTimestamps(
timeline.lastInstant().get().getTimestamp(), newCommitTime, HoodieTimeline.GREATER));
assertTrue("Compaction commit should be > than last insert", HoodieTimeline
.compareTimestamps(timeline.lastInstant().get().getTimestamp(), newCommitTime, HoodieTimeline.GREATER));
for (String partitionPath : dataGen.getPartitionPaths()) {
List<FileSlice> groupedLogFiles = table.getRTFileSystemView().getLatestFileSlices(partitionPath)
.collect(Collectors.toList());
List<FileSlice> groupedLogFiles =
table.getRTFileSystemView().getLatestFileSlices(partitionPath).collect(Collectors.toList());
for (FileSlice slice : groupedLogFiles) {
assertTrue("After compaction there should be no log files visiable on a Realtime view",
slice.getLogFiles().collect(Collectors.toList()).isEmpty());
@@ -869,19 +864,18 @@ public class TestMergeOnReadTable extends HoodieClientTestHarness {
JavaRDD<WriteStatus> statuses = writeClient.insert(recordsRDD, newCommitTime);
writeClient.commit(newCommitTime, statuses);
HoodieTable table = HoodieTable
.getHoodieTable(new HoodieTableMetaClient(jsc.hadoopConfiguration(), basePath), config,
jsc);
HoodieTable table =
HoodieTable.getHoodieTable(new HoodieTableMetaClient(jsc.hadoopConfiguration(), basePath), config, jsc);
RealtimeView tableRTFileSystemView = table.getRTFileSystemView();
long numLogFiles = 0;
for (String partitionPath : dataGen.getPartitionPaths()) {
Assert.assertTrue(tableRTFileSystemView.getLatestFileSlices(partitionPath).filter(fileSlice ->
fileSlice.getDataFile().isPresent()).count() == 0);
Assert.assertTrue(tableRTFileSystemView.getLatestFileSlices(partitionPath).filter(fileSlice ->
fileSlice.getLogFiles().count() > 0).count() > 0);
numLogFiles += tableRTFileSystemView.getLatestFileSlices(partitionPath).filter(fileSlice ->
fileSlice.getLogFiles().count() > 0).count();
Assert.assertTrue(tableRTFileSystemView.getLatestFileSlices(partitionPath)
.filter(fileSlice -> fileSlice.getDataFile().isPresent()).count() == 0);
Assert.assertTrue(tableRTFileSystemView.getLatestFileSlices(partitionPath)
.filter(fileSlice -> fileSlice.getLogFiles().count() > 0).count() > 0);
numLogFiles += tableRTFileSystemView.getLatestFileSlices(partitionPath)
.filter(fileSlice -> fileSlice.getLogFiles().count() > 0).count();
}
Assert.assertTrue(numLogFiles > 0);
@@ -910,10 +904,10 @@ public class TestMergeOnReadTable extends HoodieClientTestHarness {
List<WriteStatus> writeStatuses = statuses.collect();
// Ensure that inserts are written to only log files
Assert.assertEquals(writeStatuses.stream().filter(writeStatus -> !writeStatus.getStat().getPath().contains("log")
).count(), 0);
Assert.assertTrue(writeStatuses.stream().filter(writeStatus -> writeStatus.getStat().getPath().contains("log")
).count() > 0);
Assert.assertEquals(
writeStatuses.stream().filter(writeStatus -> !writeStatus.getStat().getPath().contains("log")).count(), 0);
Assert.assertTrue(
writeStatuses.stream().filter(writeStatus -> writeStatus.getStat().getPath().contains("log")).count() > 0);
// rollback a failed commit
boolean rollback = writeClient.rollback(newCommitTime);
@@ -934,9 +928,8 @@ public class TestMergeOnReadTable extends HoodieClientTestHarness {
// and calling rollback twice
final String lastCommitTime = newCommitTime;
HoodieTableMetaClient metaClient = new HoodieTableMetaClient(jsc.hadoopConfiguration(), basePath);
HoodieInstant last =
metaClient.getCommitsTimeline().getInstants().filter(instant -> instant.getTimestamp().equals(lastCommitTime))
.findFirst().get();
HoodieInstant last = metaClient.getCommitsTimeline().getInstants()
.filter(instant -> instant.getTimestamp().equals(lastCommitTime)).findFirst().get();
String fileName = last.getFileName();
// Save the .commit file to local directory.
// Rollback will be called twice to test the case where rollback failed first time and retried.
@@ -944,8 +937,8 @@ public class TestMergeOnReadTable extends HoodieClientTestHarness {
TemporaryFolder folder = new TemporaryFolder();
folder.create();
File file = folder.newFile();
metaClient.getFs()
.copyToLocalFile(new Path(metaClient.getMetaPath(), fileName), new Path(file.getAbsolutePath()));
metaClient.getFs().copyToLocalFile(new Path(metaClient.getMetaPath(), fileName),
new Path(file.getAbsolutePath()));
writeClient.rollback(newCommitTime);
metaClient = HoodieTableMetaClient.reload(metaClient);
@@ -954,12 +947,12 @@ public class TestMergeOnReadTable extends HoodieClientTestHarness {
long numLogFiles = 0;
for (String partitionPath : dataGen.getPartitionPaths()) {
Assert.assertTrue(tableRTFileSystemView.getLatestFileSlices(partitionPath).filter(fileSlice ->
fileSlice.getDataFile().isPresent()).count() == 0);
Assert.assertTrue(tableRTFileSystemView.getLatestFileSlices(partitionPath).filter(fileSlice ->
fileSlice.getLogFiles().count() > 0).count() == 0);
numLogFiles += tableRTFileSystemView.getLatestFileSlices(partitionPath).filter(fileSlice ->
fileSlice.getLogFiles().count() > 0).count();
Assert.assertTrue(tableRTFileSystemView.getLatestFileSlices(partitionPath)
.filter(fileSlice -> fileSlice.getDataFile().isPresent()).count() == 0);
Assert.assertTrue(tableRTFileSystemView.getLatestFileSlices(partitionPath)
.filter(fileSlice -> fileSlice.getLogFiles().count() > 0).count() == 0);
numLogFiles += tableRTFileSystemView.getLatestFileSlices(partitionPath)
.filter(fileSlice -> fileSlice.getLogFiles().count() > 0).count();
}
Assert.assertTrue(numLogFiles == 0);
metaClient.getFs().copyFromLocalFile(new Path(file.getAbsolutePath()),
@@ -987,19 +980,18 @@ public class TestMergeOnReadTable extends HoodieClientTestHarness {
// trigger an action
statuses.collect();
HoodieTable table = HoodieTable
.getHoodieTable(new HoodieTableMetaClient(jsc.hadoopConfiguration(), basePath), config,
jsc);
HoodieTable table =
HoodieTable.getHoodieTable(new HoodieTableMetaClient(jsc.hadoopConfiguration(), basePath), config, jsc);
RealtimeView tableRTFileSystemView = table.getRTFileSystemView();
long numLogFiles = 0;
for (String partitionPath : dataGen.getPartitionPaths()) {
Assert.assertTrue(tableRTFileSystemView.getLatestFileSlices(partitionPath).filter(fileSlice ->
fileSlice.getDataFile().isPresent()).count() == 0);
Assert.assertTrue(tableRTFileSystemView.getLatestFileSlices(partitionPath).filter(fileSlice ->
fileSlice.getLogFiles().count() > 0).count() > 0);
numLogFiles += tableRTFileSystemView.getLatestFileSlices(partitionPath).filter(fileSlice ->
fileSlice.getLogFiles().count() > 0).count();
Assert.assertTrue(tableRTFileSystemView.getLatestFileSlices(partitionPath)
.filter(fileSlice -> fileSlice.getDataFile().isPresent()).count() == 0);
Assert.assertTrue(tableRTFileSystemView.getLatestFileSlices(partitionPath)
.filter(fileSlice -> fileSlice.getLogFiles().count() > 0).count() > 0);
numLogFiles += tableRTFileSystemView.getLatestFileSlices(partitionPath)
.filter(fileSlice -> fileSlice.getLogFiles().count() > 0).count();
}
Assert.assertTrue(numLogFiles > 0);
@@ -1016,10 +1008,10 @@ public class TestMergeOnReadTable extends HoodieClientTestHarness {
tableRTFileSystemView = table.getRTFileSystemView();
((SyncableFileSystemView) tableRTFileSystemView).reset();
for (String partitionPath : dataGen.getPartitionPaths()) {
Assert.assertTrue(tableRTFileSystemView.getLatestFileSlices(partitionPath).filter(fileSlice ->
fileSlice.getDataFile().isPresent()).count() == 0);
Assert.assertTrue(tableRTFileSystemView.getLatestFileSlices(partitionPath).filter(fileSlice ->
fileSlice.getLogFiles().count() > 0).count() > 0);
Assert.assertTrue(tableRTFileSystemView.getLatestFileSlices(partitionPath)
.filter(fileSlice -> fileSlice.getDataFile().isPresent()).count() == 0);
Assert.assertTrue(tableRTFileSystemView.getLatestFileSlices(partitionPath)
.filter(fileSlice -> fileSlice.getLogFiles().count() > 0).count() > 0);
}
}
}
@@ -1053,13 +1045,16 @@ public class TestMergeOnReadTable extends HoodieClientTestHarness {
// Read from commit file
table = HoodieTable.getHoodieTable(metaClient, cfg, jsc);
HoodieCommitMetadata metadata = HoodieCommitMetadata.fromBytes(table.getActiveTimeline().getInstantDetails(table
.getActiveTimeline().getDeltaCommitTimeline().lastInstant().get()).get(), HoodieCommitMetadata.class);
HoodieRollingStatMetadata rollingStatMetadata = HoodieCommitMetadata.fromBytes(metadata.getExtraMetadata()
.get(HoodieRollingStatMetadata.ROLLING_STAT_METADATA_KEY).getBytes(), HoodieRollingStatMetadata.class);
HoodieCommitMetadata metadata = HoodieCommitMetadata.fromBytes(
table.getActiveTimeline()
.getInstantDetails(table.getActiveTimeline().getDeltaCommitTimeline().lastInstant().get()).get(),
HoodieCommitMetadata.class);
HoodieRollingStatMetadata rollingStatMetadata = HoodieCommitMetadata.fromBytes(
metadata.getExtraMetadata().get(HoodieRollingStatMetadata.ROLLING_STAT_METADATA_KEY).getBytes(),
HoodieRollingStatMetadata.class);
int inserts = 0;
for (Map.Entry<String, Map<String, HoodieRollingStat>> pstat :
rollingStatMetadata.getPartitionToRollingStats().entrySet()) {
for (Map.Entry<String, Map<String, HoodieRollingStat>> pstat : rollingStatMetadata.getPartitionToRollingStats()
.entrySet()) {
for (Map.Entry<String, HoodieRollingStat> stat : pstat.getValue().entrySet()) {
inserts += stat.getValue().getInserts();
}
@@ -1075,10 +1070,13 @@ public class TestMergeOnReadTable extends HoodieClientTestHarness {
// Read from commit file
table = HoodieTable.getHoodieTable(metaClient, cfg, jsc);
metadata = HoodieCommitMetadata.fromBytes(table.getActiveTimeline().getInstantDetails(table
.getActiveTimeline().getDeltaCommitTimeline().lastInstant().get()).get(), HoodieCommitMetadata.class);
rollingStatMetadata = HoodieCommitMetadata.fromBytes(metadata.getExtraMetadata()
.get(HoodieRollingStatMetadata.ROLLING_STAT_METADATA_KEY).getBytes(), HoodieRollingStatMetadata.class);
metadata = HoodieCommitMetadata.fromBytes(
table.getActiveTimeline()
.getInstantDetails(table.getActiveTimeline().getDeltaCommitTimeline().lastInstant().get()).get(),
HoodieCommitMetadata.class);
rollingStatMetadata = HoodieCommitMetadata.fromBytes(
metadata.getExtraMetadata().get(HoodieRollingStatMetadata.ROLLING_STAT_METADATA_KEY).getBytes(),
HoodieRollingStatMetadata.class);
inserts = 0;
int upserts = 0;
for (Map.Entry<String, Map<String, HoodieRollingStat>> pstat : rollingStatMetadata.getPartitionToRollingStats()
@@ -1096,10 +1094,13 @@ public class TestMergeOnReadTable extends HoodieClientTestHarness {
// Read from commit file
table = HoodieTable.getHoodieTable(metaClient, cfg, jsc);
metadata = HoodieCommitMetadata.fromBytes(table.getActiveTimeline().getInstantDetails(table
.getActiveTimeline().getDeltaCommitTimeline().lastInstant().get()).get(), HoodieCommitMetadata.class);
rollingStatMetadata = HoodieCommitMetadata.fromBytes(metadata.getExtraMetadata()
.get(HoodieRollingStatMetadata.ROLLING_STAT_METADATA_KEY).getBytes(), HoodieRollingStatMetadata.class);
metadata = HoodieCommitMetadata.fromBytes(
table.getActiveTimeline()
.getInstantDetails(table.getActiveTimeline().getDeltaCommitTimeline().lastInstant().get()).get(),
HoodieCommitMetadata.class);
rollingStatMetadata = HoodieCommitMetadata.fromBytes(
metadata.getExtraMetadata().get(HoodieRollingStatMetadata.ROLLING_STAT_METADATA_KEY).getBytes(),
HoodieRollingStatMetadata.class);
inserts = 0;
upserts = 0;
for (Map.Entry<String, Map<String, HoodieRollingStat>> pstat : rollingStatMetadata.getPartitionToRollingStats()
@@ -1138,13 +1139,16 @@ public class TestMergeOnReadTable extends HoodieClientTestHarness {
// Read from commit file
table = HoodieTable.getHoodieTable(metaClient, cfg, jsc);
HoodieCommitMetadata metadata = HoodieCommitMetadata.fromBytes(table.getActiveTimeline().getInstantDetails(table
.getActiveTimeline().getDeltaCommitTimeline().lastInstant().get()).get(), HoodieCommitMetadata.class);
HoodieRollingStatMetadata rollingStatMetadata = HoodieCommitMetadata.fromBytes(metadata.getExtraMetadata()
.get(HoodieRollingStatMetadata.ROLLING_STAT_METADATA_KEY).getBytes(), HoodieRollingStatMetadata.class);
HoodieCommitMetadata metadata = HoodieCommitMetadata.fromBytes(
table.getActiveTimeline()
.getInstantDetails(table.getActiveTimeline().getDeltaCommitTimeline().lastInstant().get()).get(),
HoodieCommitMetadata.class);
HoodieRollingStatMetadata rollingStatMetadata = HoodieCommitMetadata.fromBytes(
metadata.getExtraMetadata().get(HoodieRollingStatMetadata.ROLLING_STAT_METADATA_KEY).getBytes(),
HoodieRollingStatMetadata.class);
int inserts = 0;
for (Map.Entry<String, Map<String, HoodieRollingStat>> pstat :
rollingStatMetadata.getPartitionToRollingStats().entrySet()) {
for (Map.Entry<String, Map<String, HoodieRollingStat>> pstat : rollingStatMetadata.getPartitionToRollingStats()
.entrySet()) {
for (Map.Entry<String, HoodieRollingStat> stat : pstat.getValue().entrySet()) {
inserts += stat.getValue().getInserts();
fileIdToInsertsMap.put(stat.getKey(), stat.getValue().getInserts());
@@ -1164,10 +1168,13 @@ public class TestMergeOnReadTable extends HoodieClientTestHarness {
// Read from commit file
table = HoodieTable.getHoodieTable(metaClient, cfg, jsc);
metadata = HoodieCommitMetadata.fromBytes(table.getActiveTimeline().getInstantDetails(table
.getActiveTimeline().getDeltaCommitTimeline().lastInstant().get()).get(), HoodieCommitMetadata.class);
rollingStatMetadata = HoodieCommitMetadata.fromBytes(metadata.getExtraMetadata()
.get(HoodieRollingStatMetadata.ROLLING_STAT_METADATA_KEY).getBytes(), HoodieRollingStatMetadata.class);
metadata = HoodieCommitMetadata.fromBytes(
table.getActiveTimeline()
.getInstantDetails(table.getActiveTimeline().getDeltaCommitTimeline().lastInstant().get()).get(),
HoodieCommitMetadata.class);
rollingStatMetadata = HoodieCommitMetadata.fromBytes(
metadata.getExtraMetadata().get(HoodieRollingStatMetadata.ROLLING_STAT_METADATA_KEY).getBytes(),
HoodieRollingStatMetadata.class);
inserts = 0;
int upserts = 0;
for (Map.Entry<String, Map<String, HoodieRollingStat>> pstat : rollingStatMetadata.getPartitionToRollingStats()
@@ -1192,17 +1199,20 @@ public class TestMergeOnReadTable extends HoodieClientTestHarness {
// Read from commit file
table = HoodieTable.getHoodieTable(metaClient, cfg, jsc);
metadata = HoodieCommitMetadata.fromBytes(table.getActiveTimeline().getInstantDetails(table
.getActiveTimeline().getCommitsTimeline().lastInstant().get()).get(), HoodieCommitMetadata.class);
HoodieRollingStatMetadata rollingStatMetadata1 = HoodieCommitMetadata.fromBytes(metadata.getExtraMetadata()
.get(HoodieRollingStatMetadata.ROLLING_STAT_METADATA_KEY).getBytes(), HoodieRollingStatMetadata.class);
metadata = HoodieCommitMetadata.fromBytes(
table.getActiveTimeline()
.getInstantDetails(table.getActiveTimeline().getCommitsTimeline().lastInstant().get()).get(),
HoodieCommitMetadata.class);
HoodieRollingStatMetadata rollingStatMetadata1 = HoodieCommitMetadata.fromBytes(
metadata.getExtraMetadata().get(HoodieRollingStatMetadata.ROLLING_STAT_METADATA_KEY).getBytes(),
HoodieRollingStatMetadata.class);
// Ensure that the rolling stats from the extra metadata of delta commits is copied over to the compaction commit
for (Map.Entry<String, Map<String, HoodieRollingStat>> entry : rollingStatMetadata.getPartitionToRollingStats()
.entrySet()) {
Assert.assertTrue(rollingStatMetadata1.getPartitionToRollingStats().containsKey(entry.getKey()));
Assert.assertEquals(rollingStatMetadata1.getPartitionToRollingStats().get(entry.getKey()).size(), entry
.getValue().size());
Assert.assertEquals(rollingStatMetadata1.getPartitionToRollingStats().get(entry.getKey()).size(),
entry.getValue().size());
}
// Write inserts + updates
@@ -1217,10 +1227,13 @@ public class TestMergeOnReadTable extends HoodieClientTestHarness {
// Read from commit file
table = HoodieTable.getHoodieTable(metaClient, cfg, jsc);
metadata = HoodieCommitMetadata.fromBytes(table.getActiveTimeline().getInstantDetails(table
.getActiveTimeline().getDeltaCommitTimeline().lastInstant().get()).get(), HoodieCommitMetadata.class);
rollingStatMetadata = HoodieCommitMetadata.fromBytes(metadata.getExtraMetadata()
.get(HoodieRollingStatMetadata.ROLLING_STAT_METADATA_KEY).getBytes(), HoodieRollingStatMetadata.class);
metadata = HoodieCommitMetadata.fromBytes(
table.getActiveTimeline()
.getInstantDetails(table.getActiveTimeline().getDeltaCommitTimeline().lastInstant().get()).get(),
HoodieCommitMetadata.class);
rollingStatMetadata = HoodieCommitMetadata.fromBytes(
metadata.getExtraMetadata().get(HoodieRollingStatMetadata.ROLLING_STAT_METADATA_KEY).getBytes(),
HoodieRollingStatMetadata.class);
inserts = 0;
upserts = 0;
for (Map.Entry<String, Map<String, HoodieRollingStat>> pstat : rollingStatMetadata.getPartitionToRollingStats()
@@ -1249,12 +1262,10 @@ public class TestMergeOnReadTable extends HoodieClientTestHarness {
protected HoodieWriteConfig.Builder getConfigBuilder(Boolean autoCommit, HoodieIndex.IndexType indexType) {
return HoodieWriteConfig.newBuilder().withPath(basePath).withSchema(TRIP_EXAMPLE_SCHEMA).withParallelism(2, 2)
.withAutoCommit(autoCommit).withAssumeDatePartitioning(true)
.withCompactionConfig(
HoodieCompactionConfig.newBuilder().compactionSmallFileSize(1024 * 1024 * 1024).withInlineCompaction(false)
.withMaxNumDeltaCommitsBeforeCompaction(1).build())
.withCompactionConfig(HoodieCompactionConfig.newBuilder().compactionSmallFileSize(1024 * 1024 * 1024)
.withInlineCompaction(false).withMaxNumDeltaCommitsBeforeCompaction(1).build())
.withStorageConfig(HoodieStorageConfig.newBuilder().limitFileSize(1024 * 1024 * 1024).build())
.withEmbeddedTimelineServerEnabled(true)
.forTable("test-trip-table")
.withEmbeddedTimelineServerEnabled(true).forTable("test-trip-table")
.withIndexConfig(HoodieIndexConfig.newBuilder().withIndexType(indexType).build());
}
@@ -1264,4 +1275,4 @@ public class TestMergeOnReadTable extends HoodieClientTestHarness {
assertFalse("Errors found in write of " + status.getFileId(), status.hasErrors());
}
}
}
}