1
0

[HUDI-2394] Implement Kafka Sink Protocol for Hudi for Ingesting Immutable Data (#3592)

- Fixing packaging, naming of classes
 - Use of log4j over slf4j for uniformity
- More follow-on fixes
 - Added a version to control/coordinator events.
 - Eliminated the config added to write config
 - Fixed fetching of checkpoints based on table type
 - Clean up of naming, code placement

Co-authored-by: Rajesh Mahindra <rmahindra@Rajeshs-MacBook-Pro.local>
Co-authored-by: Vinoth Chandar <vinoth@apache.org>
This commit is contained in:
rmahindra123
2021-09-10 18:20:26 -07:00
committed by GitHub
parent bd1d2d4952
commit e528dd798a
51 changed files with 4710 additions and 22 deletions

View File

@@ -71,17 +71,20 @@ public final class SchemaTestUtil {
return toRecords(getSimpleSchema(), getSimpleSchema(), from, limit);
}
public static List<String> generateTestJsonRecords(int from, int limit) throws IOException, URISyntaxException {
Path dataPath = initializeSampleDataPath();
try (Stream<String> stream = Files.lines(dataPath)) {
return stream.skip(from).limit(limit).collect(Collectors.toList());
} catch (IOException e) {
throw new HoodieIOException("Could not read data from " + RESOURCE_SAMPLE_DATA, e);
}
}
private static List<IndexedRecord> toRecords(Schema writerSchema, Schema readerSchema, int from, int limit)
throws IOException, URISyntaxException {
GenericDatumReader<IndexedRecord> reader = new GenericDatumReader<>(writerSchema, readerSchema);
// Required to register the necessary JAR:// file system
URI resource = SchemaTestUtil.class.getResource(RESOURCE_SAMPLE_DATA).toURI();
Path dataPath;
if (resource.toString().contains("!")) {
dataPath = uriToPath(resource);
} else {
dataPath = Paths.get(SchemaTestUtil.class.getResource(RESOURCE_SAMPLE_DATA).toURI());
}
Path dataPath = initializeSampleDataPath();
try (Stream<String> stream = Files.lines(dataPath)) {
return stream.skip(from).limit(limit).map(s -> {
@@ -96,6 +99,21 @@ public final class SchemaTestUtil {
}
}
/**
* Required to register the necessary JAR:// file system.
* @return Path to the sample data in the resource file.
* @throws IOException
* @throws URISyntaxException
*/
private static Path initializeSampleDataPath() throws IOException, URISyntaxException {
URI resource = SchemaTestUtil.class.getResource(RESOURCE_SAMPLE_DATA).toURI();
if (resource.toString().contains("!")) {
return uriToPath(resource);
} else {
return Paths.get(SchemaTestUtil.class.getResource(RESOURCE_SAMPLE_DATA).toURI());
}
}
public static Path uriToPath(URI uri) throws IOException {
final Map<String, String> env = new HashMap<>();
final String[] array = uri.toString().split("!");