[HUDI-426] Bootstrap datasource integration (#1702)
This commit is contained in:
@@ -69,6 +69,11 @@ public class HoodieAvroUtils {
|
||||
|
||||
private static ThreadLocal<BinaryDecoder> reuseDecoder = ThreadLocal.withInitial(() -> null);
|
||||
|
||||
// As per https://avro.apache.org/docs/current/spec.html#names
|
||||
private static String INVALID_AVRO_CHARS_IN_NAMES = "[^A-Za-z0-9_]";
|
||||
private static String INVALID_AVRO_FIRST_CHAR_IN_NAMES = "[^A-Za-z_]";
|
||||
private static String MASK_FOR_INVALID_CHARS_IN_NAMES = "__";
|
||||
|
||||
// All metadata fields are optional strings.
|
||||
public static final Schema METADATA_FIELD_SCHEMA =
|
||||
Schema.createUnion(Arrays.asList(Schema.create(Schema.Type.NULL), Schema.create(Schema.Type.STRING)));
|
||||
@@ -444,4 +449,21 @@ public class HoodieAvroUtils {
|
||||
}
|
||||
return fieldSchema.getLogicalType() == LogicalTypes.date();
|
||||
}
|
||||
|
||||
public static Schema getNullSchema() {
|
||||
return Schema.create(Schema.Type.NULL);
|
||||
}
|
||||
|
||||
/**
|
||||
* Sanitizes Name according to Avro rule for names.
|
||||
* Removes characters other than the ones mentioned in https://avro.apache.org/docs/current/spec.html#names .
|
||||
* @param name input name
|
||||
* @return sanitized name
|
||||
*/
|
||||
public static String sanitizeName(String name) {
|
||||
if (name.substring(0,1).matches(INVALID_AVRO_FIRST_CHAR_IN_NAMES)) {
|
||||
name = name.replaceFirst(INVALID_AVRO_FIRST_CHAR_IN_NAMES, MASK_FOR_INVALID_CHARS_IN_NAMES);
|
||||
}
|
||||
return name.replaceAll(INVALID_AVRO_CHARS_IN_NAMES, MASK_FOR_INVALID_CHARS_IN_NAMES);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -18,6 +18,7 @@
|
||||
|
||||
package org.apache.hudi.common.model;
|
||||
|
||||
import org.apache.hadoop.fs.Path;
|
||||
import org.apache.hudi.common.fs.FSUtils;
|
||||
|
||||
import com.fasterxml.jackson.annotation.JsonAutoDetect;
|
||||
@@ -126,6 +127,18 @@ public class HoodieCommitMetadata implements Serializable {
|
||||
return fullPaths;
|
||||
}
|
||||
|
||||
public Map<HoodieFileGroupId, String> getFileGroupIdAndFullPaths(String basePath) {
|
||||
Map<HoodieFileGroupId, String> fileGroupIdToFullPaths = new HashMap<>();
|
||||
for (Map.Entry<String, List<HoodieWriteStat>> entry : getPartitionToWriteStats().entrySet()) {
|
||||
for (HoodieWriteStat stat : entry.getValue()) {
|
||||
HoodieFileGroupId fileGroupId = new HoodieFileGroupId(stat.getPartitionPath(), stat.getFileId());
|
||||
Path fullPath = new Path(basePath, stat.getPath());
|
||||
fileGroupIdToFullPaths.put(fileGroupId, fullPath.toString());
|
||||
}
|
||||
}
|
||||
return fileGroupIdToFullPaths;
|
||||
}
|
||||
|
||||
public String toJsonString() throws IOException {
|
||||
if (partitionToWriteStats.containsKey(null)) {
|
||||
LOG.info("partition path is null for " + partitionToWriteStats.get(null));
|
||||
|
||||
Reference in New Issue
Block a user