1
0

[HUDI-2394] Implement Kafka Sink Protocol for Hudi for Ingesting Immutable Data (#3592)

- Fixing packaging, naming of classes
 - Use of log4j over slf4j for uniformity
- More follow-on fixes
 - Added a version to control/coordinator events.
 - Eliminated the config added to write config
 - Fixed fetching of checkpoints based on table type
 - Clean up of naming, code placement

Co-authored-by: Rajesh Mahindra <rmahindra@Rajeshs-MacBook-Pro.local>
Co-authored-by: Vinoth Chandar <vinoth@apache.org>
This commit is contained in:
rmahindra123
2021-09-10 18:20:26 -07:00
committed by GitHub
parent bd1d2d4952
commit e528dd798a
51 changed files with 4710 additions and 22 deletions

View File

@@ -30,7 +30,8 @@ public class ConfigGroups {
FLINK_SQL("Flink Sql Configs"),
WRITE_CLIENT("Write Client Configs"),
METRICS("Metrics Configs"),
RECORD_PAYLOAD("Record Payload Config");
RECORD_PAYLOAD("Record Payload Config"),
KAFKA_CONNECT("Kafka Connect Configs");
public final String name;
@@ -72,6 +73,9 @@ public class ConfigGroups {
description = "These set of configs are used to enable monitoring and reporting of key"
+ "Hudi stats and metrics.";
break;
case KAFKA_CONNECT:
description = "These set of configs are used for Kakfa Connect Sink Connector for writing Hudi Tables";
break;
default:
description = "Please fill in the description for Config Group Name: " + names.name;
break;

View File

@@ -26,6 +26,7 @@ import org.apache.hudi.common.model.WriteOperationType;
import org.apache.hudi.common.table.timeline.HoodieActiveTimeline;
import org.apache.hudi.common.table.timeline.HoodieTimeline;
import org.apache.hudi.exception.HoodieException;
import org.apache.log4j.LogManager;
import org.apache.log4j.Logger;

View File

@@ -71,17 +71,20 @@ public final class SchemaTestUtil {
return toRecords(getSimpleSchema(), getSimpleSchema(), from, limit);
}
public static List<String> generateTestJsonRecords(int from, int limit) throws IOException, URISyntaxException {
Path dataPath = initializeSampleDataPath();
try (Stream<String> stream = Files.lines(dataPath)) {
return stream.skip(from).limit(limit).collect(Collectors.toList());
} catch (IOException e) {
throw new HoodieIOException("Could not read data from " + RESOURCE_SAMPLE_DATA, e);
}
}
private static List<IndexedRecord> toRecords(Schema writerSchema, Schema readerSchema, int from, int limit)
throws IOException, URISyntaxException {
GenericDatumReader<IndexedRecord> reader = new GenericDatumReader<>(writerSchema, readerSchema);
// Required to register the necessary JAR:// file system
URI resource = SchemaTestUtil.class.getResource(RESOURCE_SAMPLE_DATA).toURI();
Path dataPath;
if (resource.toString().contains("!")) {
dataPath = uriToPath(resource);
} else {
dataPath = Paths.get(SchemaTestUtil.class.getResource(RESOURCE_SAMPLE_DATA).toURI());
}
Path dataPath = initializeSampleDataPath();
try (Stream<String> stream = Files.lines(dataPath)) {
return stream.skip(from).limit(limit).map(s -> {
@@ -96,6 +99,21 @@ public final class SchemaTestUtil {
}
}
/**
* Required to register the necessary JAR:// file system.
* @return Path to the sample data in the resource file.
* @throws IOException
* @throws URISyntaxException
*/
private static Path initializeSampleDataPath() throws IOException, URISyntaxException {
URI resource = SchemaTestUtil.class.getResource(RESOURCE_SAMPLE_DATA).toURI();
if (resource.toString().contains("!")) {
return uriToPath(resource);
} else {
return Paths.get(SchemaTestUtil.class.getResource(RESOURCE_SAMPLE_DATA).toURI());
}
}
public static Path uriToPath(URI uri) throws IOException {
final Map<String, String> env = new HashMap<>();
final String[] array = uri.toString().split("!");