1
0

[HUDI-1276] [HUDI-1459] Make Clustering/ReplaceCommit and Metadata table be compatible (#2422)

* [HUDI-1276] [HUDI-1459] Make Clustering/ReplaceCommit and Metadata table be compatible

* Use filesystemview and json format from metadata. Add tests

Co-authored-by: Satish Kotha <satishkotha@uber.com>
This commit is contained in:
vinoth chandar
2021-01-09 16:53:34 -08:00
committed by GitHub
parent 79ec7b4894
commit 65866c45ec
17 changed files with 301 additions and 33 deletions

View File

@@ -33,6 +33,7 @@ import org.apache.hudi.avro.model.HoodieCleanMetadata;
import org.apache.hudi.avro.model.HoodieCleanerPlan;
import org.apache.hudi.avro.model.HoodieCompactionPlan;
import org.apache.hudi.avro.model.HoodieInstantInfo;
import org.apache.hudi.avro.model.HoodieReplaceCommitMetadata;
import org.apache.hudi.avro.model.HoodieRequestedReplaceMetadata;
import org.apache.hudi.avro.model.HoodieRestoreMetadata;
import org.apache.hudi.avro.model.HoodieRollbackMetadata;
@@ -158,10 +159,14 @@ public class TimelineMetadataUtils {
return deserializeAvroMetadata(bytes, HoodieSavepointMetadata.class);
}
public static HoodieRequestedReplaceMetadata deserializeRequestedReplaceMetadta(byte[] bytes) throws IOException {
public static HoodieRequestedReplaceMetadata deserializeRequestedReplaceMetadata(byte[] bytes) throws IOException {
return deserializeAvroMetadata(bytes, HoodieRequestedReplaceMetadata.class);
}
public static HoodieReplaceCommitMetadata deserializeHoodieReplaceMetadata(byte[] bytes) throws IOException {
return deserializeAvroMetadata(bytes, HoodieReplaceCommitMetadata.class);
}
public static <T extends SpecificRecordBase> T deserializeAvroMetadata(byte[] bytes, Class<T> clazz)
throws IOException {
DatumReader<T> reader = new SpecificDatumReader<>(clazz);

View File

@@ -62,6 +62,7 @@ import java.util.function.Predicate;
import java.util.stream.Collectors;
import java.util.stream.Stream;
import static org.apache.hudi.common.table.timeline.HoodieTimeline.GREATER_THAN;
import static org.apache.hudi.common.table.timeline.HoodieTimeline.GREATER_THAN_OR_EQUALS;
import static org.apache.hudi.common.table.timeline.HoodieTimeline.METADATA_BOOTSTRAP_INSTANT_TS;
@@ -690,6 +691,16 @@ public abstract class AbstractTableFileSystemView implements SyncableFileSystemV
return getAllFileGroupsIncludingReplaced(partitionPath).filter(fg -> isFileGroupReplacedBeforeOrOn(fg.getFileGroupId(), maxCommitTime));
}
@Override
public Stream<HoodieFileGroup> getReplacedFileGroupsBefore(String maxCommitTime, String partitionPath) {
return getAllFileGroupsIncludingReplaced(partitionPath).filter(fg -> isFileGroupReplacedBefore(fg.getFileGroupId(), maxCommitTime));
}
@Override
public Stream<HoodieFileGroup> getAllReplacedFileGroups(String partitionPath) {
return getAllFileGroupsIncludingReplaced(partitionPath).filter(fg -> isFileGroupReplaced(fg.getFileGroupId()));
}
@Override
public final Stream<Pair<HoodieFileGroupId, HoodieInstant>> getFileGroupsInPendingClustering() {
try {
@@ -1041,6 +1052,15 @@ public abstract class AbstractTableFileSystemView implements SyncableFileSystemV
return isFileGroupReplacedBeforeOrOn(fileGroupId, instants.stream().max(Comparator.naturalOrder()).get());
}
private boolean isFileGroupReplacedBefore(HoodieFileGroupId fileGroupId, String instant) {
Option<HoodieInstant> hoodieInstantOption = getReplaceInstant(fileGroupId);
if (!hoodieInstantOption.isPresent()) {
return false;
}
return HoodieTimeline.compareTimestamps(instant, GREATER_THAN, hoodieInstantOption.get().getTimestamp());
}
private boolean isFileGroupReplacedBeforeOrOn(HoodieFileGroupId fileGroupId, String instant) {
Option<HoodieInstant> hoodieInstantOption = getReplaceInstant(fileGroupId);
if (!hoodieInstantOption.isPresent()) {

View File

@@ -199,6 +199,16 @@ public class PriorityBasedFileSystemView implements SyncableFileSystemView, Seri
return execute(maxCommitTime, partitionPath, preferredView::getReplacedFileGroupsBeforeOrOn, secondaryView::getReplacedFileGroupsBeforeOrOn);
}
@Override
public Stream<HoodieFileGroup> getReplacedFileGroupsBefore(String maxCommitTime, String partitionPath) {
return execute(maxCommitTime, partitionPath, preferredView::getReplacedFileGroupsBefore, secondaryView::getReplacedFileGroupsBefore);
}
@Override
public Stream<HoodieFileGroup> getAllReplacedFileGroups(String partitionPath) {
return execute(partitionPath, preferredView::getAllReplacedFileGroups, secondaryView::getAllReplacedFileGroups);
}
@Override
public Stream<Pair<String, CompactionOperation>> getPendingCompactionOperations() {
return execute(preferredView::getPendingCompactionOperations, secondaryView::getPendingCompactionOperations);

View File

@@ -91,6 +91,12 @@ public class RemoteHoodieTableFileSystemView implements SyncableFileSystemView,
public static final String ALL_REPLACED_FILEGROUPS_BEFORE_OR_ON =
String.format("%s/%s", BASE_URL, "filegroups/replaced/beforeoron/");
public static final String ALL_REPLACED_FILEGROUPS_BEFORE =
String.format("%s/%s", BASE_URL, "filegroups/replaced/before/");
public static final String ALL_REPLACED_FILEGROUPS_PARTITION =
String.format("%s/%s", BASE_URL, "filegroups/replaced/partition/");
public static final String PENDING_CLUSTERING_FILEGROUPS = String.format("%s/%s", BASE_URL, "clustering/pending/");
@@ -380,6 +386,30 @@ public class RemoteHoodieTableFileSystemView implements SyncableFileSystemView,
}
}
@Override
public Stream<HoodieFileGroup> getReplacedFileGroupsBefore(String maxCommitTime, String partitionPath) {
Map<String, String> paramsMap = getParamsWithAdditionalParam(partitionPath, MAX_INSTANT_PARAM, maxCommitTime);
try {
List<FileGroupDTO> fileGroups = executeRequest(ALL_REPLACED_FILEGROUPS_BEFORE, paramsMap,
new TypeReference<List<FileGroupDTO>>() {}, RequestMethod.GET);
return fileGroups.stream().map(dto -> FileGroupDTO.toFileGroup(dto, metaClient));
} catch (IOException e) {
throw new HoodieRemoteException(e);
}
}
@Override
public Stream<HoodieFileGroup> getAllReplacedFileGroups(String partitionPath) {
Map<String, String> paramsMap = getParamsWithPartitionPath(partitionPath);
try {
List<FileGroupDTO> fileGroups = executeRequest(ALL_REPLACED_FILEGROUPS_PARTITION, paramsMap,
new TypeReference<List<FileGroupDTO>>() {}, RequestMethod.GET);
return fileGroups.stream().map(dto -> FileGroupDTO.toFileGroup(dto, metaClient));
} catch (IOException e) {
throw new HoodieRemoteException(e);
}
}
public boolean refresh() {
Map<String, String> paramsMap = getParams();
try {

View File

@@ -167,10 +167,20 @@ public interface TableFileSystemView {
HoodieTimeline getTimeline();
/**
* Stream all the replaced file groups before maxCommitTime.
* Stream all the replaced file groups before or on maxCommitTime for given partition.
*/
Stream<HoodieFileGroup> getReplacedFileGroupsBeforeOrOn(String maxCommitTime, String partitionPath);
/**
* Stream all the replaced file groups before maxCommitTime for given partition.
*/
Stream<HoodieFileGroup> getReplacedFileGroupsBefore(String maxCommitTime, String partitionPath);
/**
* Stream all the replaced file groups for given partition.
*/
Stream<HoodieFileGroup> getAllReplacedFileGroups(String partitionPath);
/**
* Filegroups that are in pending clustering.
*/

View File

@@ -86,7 +86,7 @@ public class ClusteringUtils {
LOG.warn("No content found in requested file for instant " + pendingReplaceInstant);
return Option.empty();
}
HoodieRequestedReplaceMetadata requestedReplaceMetadata = TimelineMetadataUtils.deserializeRequestedReplaceMetadta(content.get());
HoodieRequestedReplaceMetadata requestedReplaceMetadata = TimelineMetadataUtils.deserializeRequestedReplaceMetadata(content.get());
if (WriteOperationType.CLUSTER.name().equals(requestedReplaceMetadata.getOperationType())) {
return Option.of(Pair.of(pendingReplaceInstant, requestedReplaceMetadata.getClusteringPlan()));
}

View File

@@ -25,6 +25,7 @@ import org.apache.hudi.avro.model.HoodieRestoreMetadata;
import org.apache.hudi.avro.model.HoodieRollbackMetadata;
import org.apache.hudi.common.model.HoodieCommitMetadata;
import org.apache.hudi.common.model.HoodieRecord;
import org.apache.hudi.common.model.HoodieReplaceCommitMetadata;
import org.apache.hudi.common.table.HoodieTableMetaClient;
import org.apache.hudi.common.table.timeline.HoodieInstant;
import org.apache.hudi.common.table.timeline.HoodieTimeline;
@@ -92,6 +93,12 @@ public class HoodieTableMetadataUtil {
case HoodieTimeline.SAVEPOINT_ACTION:
// Nothing to be done here
break;
case HoodieTimeline.REPLACE_COMMIT_ACTION:
HoodieReplaceCommitMetadata replaceMetadata = HoodieReplaceCommitMetadata.fromBytes(
timeline.getInstantDetails(instant).get(), HoodieReplaceCommitMetadata.class);
// Note: we only add new files created here. Replaced files are removed from metadata later by cleaner.
records = Option.of(convertMetadataToRecords(replaceMetadata, instant.getTimestamp()));
break;
default:
throw new HoodieException("Unknown type of action " + instant.getAction());
}