1
0

Fixing missing hoodie record location in HoodieRecord when record is read from disk after being spilled

This commit is contained in:
Nishith Agarwal
2018-07-12 17:45:10 -07:00
committed by vinoth chandar
parent f62890ca1f
commit 44caf0d40c
5 changed files with 44 additions and 13 deletions

View File

@@ -165,9 +165,12 @@ public class TestHoodieReadClient extends TestHoodieClientBase {
JavaRDD<WriteStatus> result =
insertFirstBatch(hoodieWriteConfig, client, newCommitTime, initCommitTime, numRecords, insertFn, isPrepped,
true, numRecords);
// Construct HoodieRecord from the WriteStatus but set HoodieKey, Data and HoodieRecordLocation accordingly
// since they have been modified in the DAG
JavaRDD<HoodieRecord> recordRDD =
jsc.parallelize(
result.collect().stream().map(WriteStatus::getWrittenRecords).flatMap(Collection::stream)
.map(record -> new HoodieRecord(record.getKey(), null))
.collect(Collectors.toList()));
// Should have 100 records in table (check using Index), all in locations marked at commit
HoodieReadClient readClient = new HoodieReadClient(jsc, hoodieWriteConfig.getBasePath());
@@ -186,6 +189,7 @@ public class TestHoodieReadClient extends TestHoodieClientBase {
recordRDD =
jsc.parallelize(
result.collect().stream().map(WriteStatus::getWrittenRecords).flatMap(Collection::stream)
.map(record -> new HoodieRecord(record.getKey(), null))
.collect(Collectors.toList()));
// Index should be able to locate all updates in correct locations.
readClient = new HoodieReadClient(jsc, hoodieWriteConfig.getBasePath());