1
0

[HUDI-65] commitTime rename to instantTime (#1431)

This commit is contained in:
Zhiyuan Zhao
2020-03-23 09:06:00 +08:00
committed by GitHub
parent 38c3ccc51a
commit 0241b21f77
68 changed files with 673 additions and 673 deletions

View File

@@ -80,7 +80,7 @@ public abstract class AbstractBaseTestSource extends AvroSource {
super(props, sparkContext, sparkSession, schemaProvider);
}
protected static Stream<GenericRecord> fetchNextBatch(TypedProperties props, int sourceLimit, String commitTime,
protected static Stream<GenericRecord> fetchNextBatch(TypedProperties props, int sourceLimit, String instantTime,
int partition) {
int maxUniqueKeys =
props.getInteger(TestSourceConfig.MAX_UNIQUE_RECORDS_PROP, TestSourceConfig.DEFAULT_MAX_UNIQUE_RECORDS);
@@ -116,14 +116,14 @@ public abstract class AbstractBaseTestSource extends AvroSource {
LOG.info("After adjustments => NumInserts=" + numInserts + ", NumUpdates=" + (numUpdates - 50) + ", NumDeletes=50, maxUniqueRecords="
+ maxUniqueKeys);
// if we generate update followed by deletes -> some keys in update batch might be picked up for deletes. Hence generating delete batch followed by updates
deleteStream = dataGenerator.generateUniqueDeleteRecordStream(commitTime, 50).map(hr -> AbstractBaseTestSource.toGenericRecord(hr, dataGenerator));
updateStream = dataGenerator.generateUniqueUpdatesStream(commitTime, numUpdates - 50).map(hr -> AbstractBaseTestSource.toGenericRecord(hr, dataGenerator));
deleteStream = dataGenerator.generateUniqueDeleteRecordStream(instantTime, 50).map(hr -> AbstractBaseTestSource.toGenericRecord(hr, dataGenerator));
updateStream = dataGenerator.generateUniqueUpdatesStream(instantTime, numUpdates - 50).map(hr -> AbstractBaseTestSource.toGenericRecord(hr, dataGenerator));
} else {
LOG.info("After adjustments => NumInserts=" + numInserts + ", NumUpdates=" + numUpdates + ", maxUniqueRecords=" + maxUniqueKeys);
updateStream = dataGenerator.generateUniqueUpdatesStream(commitTime, numUpdates)
updateStream = dataGenerator.generateUniqueUpdatesStream(instantTime, numUpdates)
.map(hr -> AbstractBaseTestSource.toGenericRecord(hr, dataGenerator));
}
Stream<GenericRecord> insertStream = dataGenerator.generateInsertsStream(commitTime, numInserts, false)
Stream<GenericRecord> insertStream = dataGenerator.generateInsertsStream(instantTime, numInserts, false)
.map(hr -> AbstractBaseTestSource.toGenericRecord(hr, dataGenerator));
return Stream.concat(deleteStream, Stream.concat(updateStream, insertStream));
}

View File

@@ -99,14 +99,14 @@ public abstract class AbstractDFSSourceTestBase extends UtilitiesTestBase {
* Generates a batch of test data and writes the data to a file.
*
* @param filename The name of the file.
* @param commitTime The commit time.
* @param instantTime The commit time.
* @param n The number of records to generate.
* @return The file path.
* @throws IOException
*/
Path generateOneFile(String filename, String commitTime, int n) throws IOException {
Path generateOneFile(String filename, String instantTime, int n) throws IOException {
Path path = new Path(dfsRoot, filename + fileSuffix);
writeNewDataToFile(dataGenerator.generateInserts(commitTime, n, useFlattenedSchema), path);
writeNewDataToFile(dataGenerator.generateInserts(instantTime, n, useFlattenedSchema), path);
return path;
}

View File

@@ -52,12 +52,12 @@ public class DistributedTestDataSource extends AbstractBaseTestSource {
@Override
protected InputBatch<JavaRDD<GenericRecord>> fetchNewData(Option<String> lastCkptStr, long sourceLimit) {
int nextCommitNum = lastCkptStr.map(s -> Integer.parseInt(s) + 1).orElse(0);
String commitTime = String.format("%05d", nextCommitNum);
String instantTime = String.format("%05d", nextCommitNum);
LOG.info("Source Limit is set to " + sourceLimit);
// No new data.
if (sourceLimit <= 0) {
return new InputBatch<>(Option.empty(), commitTime);
return new InputBatch<>(Option.empty(), instantTime);
}
TypedProperties newProps = new TypedProperties();
@@ -76,8 +76,8 @@ public class DistributedTestDataSource extends AbstractBaseTestSource {
if (!dataGeneratorMap.containsKey(p)) {
initDataGen(newProps, p);
}
return fetchNextBatch(newProps, perPartitionSourceLimit, commitTime, p).iterator();
return fetchNextBatch(newProps, perPartitionSourceLimit, instantTime, p).iterator();
}, true);
return new InputBatch<>(Option.of(avroRDD), commitTime);
return new InputBatch<>(Option.of(avroRDD), instantTime);
}
}

View File

@@ -49,7 +49,7 @@ public class TestDataSource extends AbstractBaseTestSource {
protected InputBatch<JavaRDD<GenericRecord>> fetchNewData(Option<String> lastCheckpointStr, long sourceLimit) {
int nextCommitNum = lastCheckpointStr.map(s -> Integer.parseInt(s) + 1).orElse(0);
String commitTime = String.format("%05d", nextCommitNum);
String instantTime = String.format("%05d", nextCommitNum);
LOG.info("Source Limit is set to " + sourceLimit);
// No new data.
@@ -58,8 +58,8 @@ public class TestDataSource extends AbstractBaseTestSource {
}
List<GenericRecord> records =
fetchNextBatch(props, (int) sourceLimit, commitTime, DEFAULT_PARTITION_NUM).collect(Collectors.toList());
fetchNextBatch(props, (int) sourceLimit, instantTime, DEFAULT_PARTITION_NUM).collect(Collectors.toList());
JavaRDD<GenericRecord> avroRDD = sparkContext.<GenericRecord>parallelize(records, 4);
return new InputBatch<>(Option.of(avroRDD), commitTime);
return new InputBatch<>(Option.of(avroRDD), instantTime);
}
}