[HUDI-65] commitTime rename to instantTime (#1431)
This commit is contained in:
@@ -80,7 +80,7 @@ public abstract class AbstractBaseTestSource extends AvroSource {
|
||||
super(props, sparkContext, sparkSession, schemaProvider);
|
||||
}
|
||||
|
||||
protected static Stream<GenericRecord> fetchNextBatch(TypedProperties props, int sourceLimit, String commitTime,
|
||||
protected static Stream<GenericRecord> fetchNextBatch(TypedProperties props, int sourceLimit, String instantTime,
|
||||
int partition) {
|
||||
int maxUniqueKeys =
|
||||
props.getInteger(TestSourceConfig.MAX_UNIQUE_RECORDS_PROP, TestSourceConfig.DEFAULT_MAX_UNIQUE_RECORDS);
|
||||
@@ -116,14 +116,14 @@ public abstract class AbstractBaseTestSource extends AvroSource {
|
||||
LOG.info("After adjustments => NumInserts=" + numInserts + ", NumUpdates=" + (numUpdates - 50) + ", NumDeletes=50, maxUniqueRecords="
|
||||
+ maxUniqueKeys);
|
||||
// if we generate update followed by deletes -> some keys in update batch might be picked up for deletes. Hence generating delete batch followed by updates
|
||||
deleteStream = dataGenerator.generateUniqueDeleteRecordStream(commitTime, 50).map(hr -> AbstractBaseTestSource.toGenericRecord(hr, dataGenerator));
|
||||
updateStream = dataGenerator.generateUniqueUpdatesStream(commitTime, numUpdates - 50).map(hr -> AbstractBaseTestSource.toGenericRecord(hr, dataGenerator));
|
||||
deleteStream = dataGenerator.generateUniqueDeleteRecordStream(instantTime, 50).map(hr -> AbstractBaseTestSource.toGenericRecord(hr, dataGenerator));
|
||||
updateStream = dataGenerator.generateUniqueUpdatesStream(instantTime, numUpdates - 50).map(hr -> AbstractBaseTestSource.toGenericRecord(hr, dataGenerator));
|
||||
} else {
|
||||
LOG.info("After adjustments => NumInserts=" + numInserts + ", NumUpdates=" + numUpdates + ", maxUniqueRecords=" + maxUniqueKeys);
|
||||
updateStream = dataGenerator.generateUniqueUpdatesStream(commitTime, numUpdates)
|
||||
updateStream = dataGenerator.generateUniqueUpdatesStream(instantTime, numUpdates)
|
||||
.map(hr -> AbstractBaseTestSource.toGenericRecord(hr, dataGenerator));
|
||||
}
|
||||
Stream<GenericRecord> insertStream = dataGenerator.generateInsertsStream(commitTime, numInserts, false)
|
||||
Stream<GenericRecord> insertStream = dataGenerator.generateInsertsStream(instantTime, numInserts, false)
|
||||
.map(hr -> AbstractBaseTestSource.toGenericRecord(hr, dataGenerator));
|
||||
return Stream.concat(deleteStream, Stream.concat(updateStream, insertStream));
|
||||
}
|
||||
|
||||
@@ -99,14 +99,14 @@ public abstract class AbstractDFSSourceTestBase extends UtilitiesTestBase {
|
||||
* Generates a batch of test data and writes the data to a file.
|
||||
*
|
||||
* @param filename The name of the file.
|
||||
* @param commitTime The commit time.
|
||||
* @param instantTime The commit time.
|
||||
* @param n The number of records to generate.
|
||||
* @return The file path.
|
||||
* @throws IOException
|
||||
*/
|
||||
Path generateOneFile(String filename, String commitTime, int n) throws IOException {
|
||||
Path generateOneFile(String filename, String instantTime, int n) throws IOException {
|
||||
Path path = new Path(dfsRoot, filename + fileSuffix);
|
||||
writeNewDataToFile(dataGenerator.generateInserts(commitTime, n, useFlattenedSchema), path);
|
||||
writeNewDataToFile(dataGenerator.generateInserts(instantTime, n, useFlattenedSchema), path);
|
||||
return path;
|
||||
}
|
||||
|
||||
|
||||
@@ -52,12 +52,12 @@ public class DistributedTestDataSource extends AbstractBaseTestSource {
|
||||
@Override
|
||||
protected InputBatch<JavaRDD<GenericRecord>> fetchNewData(Option<String> lastCkptStr, long sourceLimit) {
|
||||
int nextCommitNum = lastCkptStr.map(s -> Integer.parseInt(s) + 1).orElse(0);
|
||||
String commitTime = String.format("%05d", nextCommitNum);
|
||||
String instantTime = String.format("%05d", nextCommitNum);
|
||||
LOG.info("Source Limit is set to " + sourceLimit);
|
||||
|
||||
// No new data.
|
||||
if (sourceLimit <= 0) {
|
||||
return new InputBatch<>(Option.empty(), commitTime);
|
||||
return new InputBatch<>(Option.empty(), instantTime);
|
||||
}
|
||||
|
||||
TypedProperties newProps = new TypedProperties();
|
||||
@@ -76,8 +76,8 @@ public class DistributedTestDataSource extends AbstractBaseTestSource {
|
||||
if (!dataGeneratorMap.containsKey(p)) {
|
||||
initDataGen(newProps, p);
|
||||
}
|
||||
return fetchNextBatch(newProps, perPartitionSourceLimit, commitTime, p).iterator();
|
||||
return fetchNextBatch(newProps, perPartitionSourceLimit, instantTime, p).iterator();
|
||||
}, true);
|
||||
return new InputBatch<>(Option.of(avroRDD), commitTime);
|
||||
return new InputBatch<>(Option.of(avroRDD), instantTime);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -49,7 +49,7 @@ public class TestDataSource extends AbstractBaseTestSource {
|
||||
protected InputBatch<JavaRDD<GenericRecord>> fetchNewData(Option<String> lastCheckpointStr, long sourceLimit) {
|
||||
|
||||
int nextCommitNum = lastCheckpointStr.map(s -> Integer.parseInt(s) + 1).orElse(0);
|
||||
String commitTime = String.format("%05d", nextCommitNum);
|
||||
String instantTime = String.format("%05d", nextCommitNum);
|
||||
LOG.info("Source Limit is set to " + sourceLimit);
|
||||
|
||||
// No new data.
|
||||
@@ -58,8 +58,8 @@ public class TestDataSource extends AbstractBaseTestSource {
|
||||
}
|
||||
|
||||
List<GenericRecord> records =
|
||||
fetchNextBatch(props, (int) sourceLimit, commitTime, DEFAULT_PARTITION_NUM).collect(Collectors.toList());
|
||||
fetchNextBatch(props, (int) sourceLimit, instantTime, DEFAULT_PARTITION_NUM).collect(Collectors.toList());
|
||||
JavaRDD<GenericRecord> avroRDD = sparkContext.<GenericRecord>parallelize(records, 4);
|
||||
return new InputBatch<>(Option.of(avroRDD), commitTime);
|
||||
return new InputBatch<>(Option.of(avroRDD), instantTime);
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user