[HUDI-1276] [HUDI-1459] Make Clustering/ReplaceCommit and Metadata table be compatible (#2422)
* [HUDI-1276] [HUDI-1459] Make Clustering/ReplaceCommit and Metadata table be compatible * Use filesystemview and json format from metadata. Add tests Co-authored-by: Satish Kotha <satishkotha@uber.com>
This commit is contained in:
@@ -33,6 +33,7 @@ import org.apache.hudi.avro.model.HoodieCleanMetadata;
|
||||
import org.apache.hudi.avro.model.HoodieCleanerPlan;
|
||||
import org.apache.hudi.avro.model.HoodieCompactionPlan;
|
||||
import org.apache.hudi.avro.model.HoodieInstantInfo;
|
||||
import org.apache.hudi.avro.model.HoodieReplaceCommitMetadata;
|
||||
import org.apache.hudi.avro.model.HoodieRequestedReplaceMetadata;
|
||||
import org.apache.hudi.avro.model.HoodieRestoreMetadata;
|
||||
import org.apache.hudi.avro.model.HoodieRollbackMetadata;
|
||||
@@ -158,10 +159,14 @@ public class TimelineMetadataUtils {
|
||||
return deserializeAvroMetadata(bytes, HoodieSavepointMetadata.class);
|
||||
}
|
||||
|
||||
public static HoodieRequestedReplaceMetadata deserializeRequestedReplaceMetadta(byte[] bytes) throws IOException {
|
||||
public static HoodieRequestedReplaceMetadata deserializeRequestedReplaceMetadata(byte[] bytes) throws IOException {
|
||||
return deserializeAvroMetadata(bytes, HoodieRequestedReplaceMetadata.class);
|
||||
}
|
||||
|
||||
public static HoodieReplaceCommitMetadata deserializeHoodieReplaceMetadata(byte[] bytes) throws IOException {
|
||||
return deserializeAvroMetadata(bytes, HoodieReplaceCommitMetadata.class);
|
||||
}
|
||||
|
||||
public static <T extends SpecificRecordBase> T deserializeAvroMetadata(byte[] bytes, Class<T> clazz)
|
||||
throws IOException {
|
||||
DatumReader<T> reader = new SpecificDatumReader<>(clazz);
|
||||
|
||||
@@ -62,6 +62,7 @@ import java.util.function.Predicate;
|
||||
import java.util.stream.Collectors;
|
||||
import java.util.stream.Stream;
|
||||
|
||||
import static org.apache.hudi.common.table.timeline.HoodieTimeline.GREATER_THAN;
|
||||
import static org.apache.hudi.common.table.timeline.HoodieTimeline.GREATER_THAN_OR_EQUALS;
|
||||
import static org.apache.hudi.common.table.timeline.HoodieTimeline.METADATA_BOOTSTRAP_INSTANT_TS;
|
||||
|
||||
@@ -690,6 +691,16 @@ public abstract class AbstractTableFileSystemView implements SyncableFileSystemV
|
||||
return getAllFileGroupsIncludingReplaced(partitionPath).filter(fg -> isFileGroupReplacedBeforeOrOn(fg.getFileGroupId(), maxCommitTime));
|
||||
}
|
||||
|
||||
@Override
|
||||
public Stream<HoodieFileGroup> getReplacedFileGroupsBefore(String maxCommitTime, String partitionPath) {
|
||||
return getAllFileGroupsIncludingReplaced(partitionPath).filter(fg -> isFileGroupReplacedBefore(fg.getFileGroupId(), maxCommitTime));
|
||||
}
|
||||
|
||||
@Override
|
||||
public Stream<HoodieFileGroup> getAllReplacedFileGroups(String partitionPath) {
|
||||
return getAllFileGroupsIncludingReplaced(partitionPath).filter(fg -> isFileGroupReplaced(fg.getFileGroupId()));
|
||||
}
|
||||
|
||||
@Override
|
||||
public final Stream<Pair<HoodieFileGroupId, HoodieInstant>> getFileGroupsInPendingClustering() {
|
||||
try {
|
||||
@@ -1041,6 +1052,15 @@ public abstract class AbstractTableFileSystemView implements SyncableFileSystemV
|
||||
return isFileGroupReplacedBeforeOrOn(fileGroupId, instants.stream().max(Comparator.naturalOrder()).get());
|
||||
}
|
||||
|
||||
private boolean isFileGroupReplacedBefore(HoodieFileGroupId fileGroupId, String instant) {
|
||||
Option<HoodieInstant> hoodieInstantOption = getReplaceInstant(fileGroupId);
|
||||
if (!hoodieInstantOption.isPresent()) {
|
||||
return false;
|
||||
}
|
||||
|
||||
return HoodieTimeline.compareTimestamps(instant, GREATER_THAN, hoodieInstantOption.get().getTimestamp());
|
||||
}
|
||||
|
||||
private boolean isFileGroupReplacedBeforeOrOn(HoodieFileGroupId fileGroupId, String instant) {
|
||||
Option<HoodieInstant> hoodieInstantOption = getReplaceInstant(fileGroupId);
|
||||
if (!hoodieInstantOption.isPresent()) {
|
||||
|
||||
@@ -199,6 +199,16 @@ public class PriorityBasedFileSystemView implements SyncableFileSystemView, Seri
|
||||
return execute(maxCommitTime, partitionPath, preferredView::getReplacedFileGroupsBeforeOrOn, secondaryView::getReplacedFileGroupsBeforeOrOn);
|
||||
}
|
||||
|
||||
@Override
|
||||
public Stream<HoodieFileGroup> getReplacedFileGroupsBefore(String maxCommitTime, String partitionPath) {
|
||||
return execute(maxCommitTime, partitionPath, preferredView::getReplacedFileGroupsBefore, secondaryView::getReplacedFileGroupsBefore);
|
||||
}
|
||||
|
||||
@Override
|
||||
public Stream<HoodieFileGroup> getAllReplacedFileGroups(String partitionPath) {
|
||||
return execute(partitionPath, preferredView::getAllReplacedFileGroups, secondaryView::getAllReplacedFileGroups);
|
||||
}
|
||||
|
||||
@Override
|
||||
public Stream<Pair<String, CompactionOperation>> getPendingCompactionOperations() {
|
||||
return execute(preferredView::getPendingCompactionOperations, secondaryView::getPendingCompactionOperations);
|
||||
|
||||
@@ -91,6 +91,12 @@ public class RemoteHoodieTableFileSystemView implements SyncableFileSystemView,
|
||||
public static final String ALL_REPLACED_FILEGROUPS_BEFORE_OR_ON =
|
||||
String.format("%s/%s", BASE_URL, "filegroups/replaced/beforeoron/");
|
||||
|
||||
public static final String ALL_REPLACED_FILEGROUPS_BEFORE =
|
||||
String.format("%s/%s", BASE_URL, "filegroups/replaced/before/");
|
||||
|
||||
public static final String ALL_REPLACED_FILEGROUPS_PARTITION =
|
||||
String.format("%s/%s", BASE_URL, "filegroups/replaced/partition/");
|
||||
|
||||
public static final String PENDING_CLUSTERING_FILEGROUPS = String.format("%s/%s", BASE_URL, "clustering/pending/");
|
||||
|
||||
|
||||
@@ -380,6 +386,30 @@ public class RemoteHoodieTableFileSystemView implements SyncableFileSystemView,
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public Stream<HoodieFileGroup> getReplacedFileGroupsBefore(String maxCommitTime, String partitionPath) {
|
||||
Map<String, String> paramsMap = getParamsWithAdditionalParam(partitionPath, MAX_INSTANT_PARAM, maxCommitTime);
|
||||
try {
|
||||
List<FileGroupDTO> fileGroups = executeRequest(ALL_REPLACED_FILEGROUPS_BEFORE, paramsMap,
|
||||
new TypeReference<List<FileGroupDTO>>() {}, RequestMethod.GET);
|
||||
return fileGroups.stream().map(dto -> FileGroupDTO.toFileGroup(dto, metaClient));
|
||||
} catch (IOException e) {
|
||||
throw new HoodieRemoteException(e);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public Stream<HoodieFileGroup> getAllReplacedFileGroups(String partitionPath) {
|
||||
Map<String, String> paramsMap = getParamsWithPartitionPath(partitionPath);
|
||||
try {
|
||||
List<FileGroupDTO> fileGroups = executeRequest(ALL_REPLACED_FILEGROUPS_PARTITION, paramsMap,
|
||||
new TypeReference<List<FileGroupDTO>>() {}, RequestMethod.GET);
|
||||
return fileGroups.stream().map(dto -> FileGroupDTO.toFileGroup(dto, metaClient));
|
||||
} catch (IOException e) {
|
||||
throw new HoodieRemoteException(e);
|
||||
}
|
||||
}
|
||||
|
||||
public boolean refresh() {
|
||||
Map<String, String> paramsMap = getParams();
|
||||
try {
|
||||
|
||||
@@ -167,10 +167,20 @@ public interface TableFileSystemView {
|
||||
HoodieTimeline getTimeline();
|
||||
|
||||
/**
|
||||
* Stream all the replaced file groups before maxCommitTime.
|
||||
* Stream all the replaced file groups before or on maxCommitTime for given partition.
|
||||
*/
|
||||
Stream<HoodieFileGroup> getReplacedFileGroupsBeforeOrOn(String maxCommitTime, String partitionPath);
|
||||
|
||||
/**
|
||||
* Stream all the replaced file groups before maxCommitTime for given partition.
|
||||
*/
|
||||
Stream<HoodieFileGroup> getReplacedFileGroupsBefore(String maxCommitTime, String partitionPath);
|
||||
|
||||
/**
|
||||
* Stream all the replaced file groups for given partition.
|
||||
*/
|
||||
Stream<HoodieFileGroup> getAllReplacedFileGroups(String partitionPath);
|
||||
|
||||
/**
|
||||
* Filegroups that are in pending clustering.
|
||||
*/
|
||||
|
||||
@@ -86,7 +86,7 @@ public class ClusteringUtils {
|
||||
LOG.warn("No content found in requested file for instant " + pendingReplaceInstant);
|
||||
return Option.empty();
|
||||
}
|
||||
HoodieRequestedReplaceMetadata requestedReplaceMetadata = TimelineMetadataUtils.deserializeRequestedReplaceMetadta(content.get());
|
||||
HoodieRequestedReplaceMetadata requestedReplaceMetadata = TimelineMetadataUtils.deserializeRequestedReplaceMetadata(content.get());
|
||||
if (WriteOperationType.CLUSTER.name().equals(requestedReplaceMetadata.getOperationType())) {
|
||||
return Option.of(Pair.of(pendingReplaceInstant, requestedReplaceMetadata.getClusteringPlan()));
|
||||
}
|
||||
|
||||
@@ -25,6 +25,7 @@ import org.apache.hudi.avro.model.HoodieRestoreMetadata;
|
||||
import org.apache.hudi.avro.model.HoodieRollbackMetadata;
|
||||
import org.apache.hudi.common.model.HoodieCommitMetadata;
|
||||
import org.apache.hudi.common.model.HoodieRecord;
|
||||
import org.apache.hudi.common.model.HoodieReplaceCommitMetadata;
|
||||
import org.apache.hudi.common.table.HoodieTableMetaClient;
|
||||
import org.apache.hudi.common.table.timeline.HoodieInstant;
|
||||
import org.apache.hudi.common.table.timeline.HoodieTimeline;
|
||||
@@ -92,6 +93,12 @@ public class HoodieTableMetadataUtil {
|
||||
case HoodieTimeline.SAVEPOINT_ACTION:
|
||||
// Nothing to be done here
|
||||
break;
|
||||
case HoodieTimeline.REPLACE_COMMIT_ACTION:
|
||||
HoodieReplaceCommitMetadata replaceMetadata = HoodieReplaceCommitMetadata.fromBytes(
|
||||
timeline.getInstantDetails(instant).get(), HoodieReplaceCommitMetadata.class);
|
||||
// Note: we only add new files created here. Replaced files are removed from metadata later by cleaner.
|
||||
records = Option.of(convertMetadataToRecords(replaceMetadata, instant.getTimestamp()));
|
||||
break;
|
||||
default:
|
||||
throw new HoodieException("Unknown type of action " + instant.getAction());
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user