[HUDI-1352] Add FileSystemView APIs to query pending clustering operations (#2202)
This commit is contained in:
@@ -84,6 +84,11 @@
|
|||||||
<import>${basedir}/src/main/avro/HoodieBootstrapSourceFilePartitionInfo.avsc</import>
|
<import>${basedir}/src/main/avro/HoodieBootstrapSourceFilePartitionInfo.avsc</import>
|
||||||
<import>${basedir}/src/main/avro/HoodieBootstrapIndexInfo.avsc</import>
|
<import>${basedir}/src/main/avro/HoodieBootstrapIndexInfo.avsc</import>
|
||||||
<import>${basedir}/src/main/avro/HoodieBootstrapMetadata.avsc</import>
|
<import>${basedir}/src/main/avro/HoodieBootstrapMetadata.avsc</import>
|
||||||
|
<import>${basedir}/src/main/avro/HoodieSliceInfo.avsc</import>
|
||||||
|
<import>${basedir}/src/main/avro/HoodieClusteringGroup.avsc</import>
|
||||||
|
<import>${basedir}/src/main/avro/HoodieClusteringStrategy.avsc</import>
|
||||||
|
<import>${basedir}/src/main/avro/HoodieClusteringPlan.avsc</import>
|
||||||
|
<import>${basedir}/src/main/avro/HoodieRequestedReplaceMetadata.avsc</import>
|
||||||
</imports>
|
</imports>
|
||||||
</configuration>
|
</configuration>
|
||||||
</plugin>
|
</plugin>
|
||||||
|
|||||||
49
hudi-common/src/main/avro/HoodieClusteringGroup.avsc
Normal file
49
hudi-common/src/main/avro/HoodieClusteringGroup.avsc
Normal file
@@ -0,0 +1,49 @@
|
|||||||
|
/*
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one
|
||||||
|
* or more contributor license agreements. See the NOTICE file
|
||||||
|
* distributed with this work for additional information
|
||||||
|
* regarding copyright ownership. The ASF licenses this file
|
||||||
|
* to you under the Apache License, Version 2.0 (the
|
||||||
|
* "License"); you may not use this file except in compliance
|
||||||
|
* with the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
{
|
||||||
|
"namespace":"org.apache.hudi.avro.model",
|
||||||
|
"type":"record",
|
||||||
|
"name":"HoodieClusteringGroup",
|
||||||
|
"type":"record",
|
||||||
|
"fields":[
|
||||||
|
{
|
||||||
|
/* Group of files that needs to merged. All the slices in a group will belong to same partition initially.
|
||||||
|
* Files of different partitions may be grouped later when we have better on disk layout with indexing support.
|
||||||
|
*/
|
||||||
|
"name":"slices",
|
||||||
|
"type":["null", {
|
||||||
|
"type":"array",
|
||||||
|
"items": "HoodieSliceInfo"
|
||||||
|
}],
|
||||||
|
"default": null
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name":"metrics",
|
||||||
|
"type":["null", {
|
||||||
|
"type":"map",
|
||||||
|
"values":"double"
|
||||||
|
}],
|
||||||
|
"default": null
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name":"version",
|
||||||
|
"type":["int", "null"],
|
||||||
|
"default": 1
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
50
hudi-common/src/main/avro/HoodieClusteringPlan.avsc
Normal file
50
hudi-common/src/main/avro/HoodieClusteringPlan.avsc
Normal file
@@ -0,0 +1,50 @@
|
|||||||
|
/*
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one
|
||||||
|
* or more contributor license agreements. See the NOTICE file
|
||||||
|
* distributed with this work for additional information
|
||||||
|
* regarding copyright ownership. The ASF licenses this file
|
||||||
|
* to you under the Apache License, Version 2.0 (the
|
||||||
|
* "License"); you may not use this file except in compliance
|
||||||
|
* with the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
{
|
||||||
|
"namespace":"org.apache.hudi.avro.model",
|
||||||
|
"type":"record",
|
||||||
|
"name":"HoodieClusteringPlan",
|
||||||
|
"fields":[
|
||||||
|
{
|
||||||
|
"name":"inputGroups",
|
||||||
|
"type":["null", {
|
||||||
|
"type":"array",
|
||||||
|
"items": "HoodieClusteringGroup"
|
||||||
|
}],
|
||||||
|
"default": null
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name":"strategy",
|
||||||
|
"type":["HoodieClusteringStrategy", "null"],
|
||||||
|
"default": null
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name":"extraMetadata",
|
||||||
|
"type":["null", {
|
||||||
|
"type":"map",
|
||||||
|
"values":"string"
|
||||||
|
}],
|
||||||
|
"default": null
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name":"version",
|
||||||
|
"type":["int", "null"],
|
||||||
|
"default": 1
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
42
hudi-common/src/main/avro/HoodieClusteringStrategy.avsc
Normal file
42
hudi-common/src/main/avro/HoodieClusteringStrategy.avsc
Normal file
@@ -0,0 +1,42 @@
|
|||||||
|
/*
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one
|
||||||
|
* or more contributor license agreements. See the NOTICE file
|
||||||
|
* distributed with this work for additional information
|
||||||
|
* regarding copyright ownership. The ASF licenses this file
|
||||||
|
* to you under the Apache License, Version 2.0 (the
|
||||||
|
* "License"); you may not use this file except in compliance
|
||||||
|
* with the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
{
|
||||||
|
"namespace":"org.apache.hudi.avro.model",
|
||||||
|
"name":"HoodieClusteringStrategy",
|
||||||
|
"type":"record",
|
||||||
|
"fields":[
|
||||||
|
{
|
||||||
|
"name":"strategyClassName", /* have to be subclass of ClusteringStrategy interface defined in hudi. ClusteringStrategy class include methods like getPartitioner */
|
||||||
|
"type":["null","string"],
|
||||||
|
"default": null
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name":"strategyParams", /* Parameters could be different for different strategies. example, if sorting is needed for the strategy, parameters can contain sortColumns */
|
||||||
|
"type":["null", {
|
||||||
|
"type":"map",
|
||||||
|
"values":"string"
|
||||||
|
}],
|
||||||
|
"default": null
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name":"version",
|
||||||
|
"type":["int", "null"],
|
||||||
|
"default": 1
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
@@ -0,0 +1,47 @@
|
|||||||
|
/*
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one
|
||||||
|
* or more contributor license agreements. See the NOTICE file
|
||||||
|
* distributed with this work for additional information
|
||||||
|
* regarding copyright ownership. The ASF licenses this file
|
||||||
|
* to you under the Apache License, Version 2.0 (the
|
||||||
|
* "License"); you may not use this file except in compliance
|
||||||
|
* with the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
{
|
||||||
|
"namespace":"org.apache.hudi.avro.model",
|
||||||
|
"type":"record",
|
||||||
|
"name":"HoodieRequestedReplaceMetadata",
|
||||||
|
"fields":[
|
||||||
|
{
|
||||||
|
"name":"operationType",
|
||||||
|
"type":["null", "string"],
|
||||||
|
"default": ""
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name":"clusteringPlan", /* only set if operationType == clustering" */
|
||||||
|
"type":["HoodieClusteringPlan", "null"],
|
||||||
|
"default": null
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name":"extraMetadata",
|
||||||
|
"type":["null", {
|
||||||
|
"type":"map",
|
||||||
|
"values":"string"
|
||||||
|
}],
|
||||||
|
"default": null
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name":"version",
|
||||||
|
"type":["int", "null"],
|
||||||
|
"default": 1
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
56
hudi-common/src/main/avro/HoodieSliceInfo.avsc
Normal file
56
hudi-common/src/main/avro/HoodieSliceInfo.avsc
Normal file
@@ -0,0 +1,56 @@
|
|||||||
|
/*
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one
|
||||||
|
* or more contributor license agreements. See the NOTICE file
|
||||||
|
* distributed with this work for additional information
|
||||||
|
* regarding copyright ownership. The ASF licenses this file
|
||||||
|
* to you under the Apache License, Version 2.0 (the
|
||||||
|
* "License"); you may not use this file except in compliance
|
||||||
|
* with the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
{
|
||||||
|
"namespace":"org.apache.hudi.avro.model",
|
||||||
|
"type":"record",
|
||||||
|
"name":"HoodieSliceInfo",
|
||||||
|
"fields":[
|
||||||
|
{
|
||||||
|
"name":"dataFilePath",
|
||||||
|
"type":["null","string"],
|
||||||
|
"default": null
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name":"deltaFilePaths",
|
||||||
|
"type":["null", {
|
||||||
|
"type":"array",
|
||||||
|
"items":"string"
|
||||||
|
}],
|
||||||
|
"default": null
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name":"fileId",
|
||||||
|
"type":["null","string"]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name":"partitionPath",
|
||||||
|
"type":["null","string"],
|
||||||
|
"default": null
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name":"bootstrapFilePath",
|
||||||
|
"type":["null", "string"],
|
||||||
|
"default": null
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name":"version",
|
||||||
|
"type":["int", "null"],
|
||||||
|
"default": 1
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
@@ -40,6 +40,8 @@ public enum WriteOperationType {
|
|||||||
BOOTSTRAP("bootstrap"),
|
BOOTSTRAP("bootstrap"),
|
||||||
// insert overwrite
|
// insert overwrite
|
||||||
INSERT_OVERWRITE("insert_overwrite"),
|
INSERT_OVERWRITE("insert_overwrite"),
|
||||||
|
// cluster
|
||||||
|
CLUSTER("cluster"),
|
||||||
// used for old version
|
// used for old version
|
||||||
UNKNOWN("unknown");
|
UNKNOWN("unknown");
|
||||||
|
|
||||||
|
|||||||
@@ -412,6 +412,14 @@ public class HoodieActiveTimeline extends HoodieDefaultTimeline {
|
|||||||
createFileInMetaPath(instant.getFileName(), content, overwrite);
|
createFileInMetaPath(instant.getFileName(), content, overwrite);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Saves content for inflight/requested REPLACE instant.
|
||||||
|
*/
|
||||||
|
public void saveToPendingReplaceCommit(HoodieInstant instant, Option<byte[]> content) {
|
||||||
|
ValidationUtils.checkArgument(instant.getAction().equals(HoodieTimeline.REPLACE_COMMIT_ACTION));
|
||||||
|
createFileInMetaPath(instant.getFileName(), content, false);
|
||||||
|
}
|
||||||
|
|
||||||
public void saveToCleanRequested(HoodieInstant instant, Option<byte[]> content) {
|
public void saveToCleanRequested(HoodieInstant instant, Option<byte[]> content) {
|
||||||
ValidationUtils.checkArgument(instant.getAction().equals(HoodieTimeline.CLEAN_ACTION));
|
ValidationUtils.checkArgument(instant.getAction().equals(HoodieTimeline.CLEAN_ACTION));
|
||||||
ValidationUtils.checkArgument(instant.getState().equals(State.REQUESTED));
|
ValidationUtils.checkArgument(instant.getState().equals(State.REQUESTED));
|
||||||
|
|||||||
@@ -118,6 +118,12 @@ public class HoodieDefaultTimeline implements HoodieTimeline {
|
|||||||
instants.stream().filter(s -> s.getAction().equals(REPLACE_COMMIT_ACTION)).filter(s -> s.isCompleted()), details);
|
instants.stream().filter(s -> s.getAction().equals(REPLACE_COMMIT_ACTION)).filter(s -> s.isCompleted()), details);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public HoodieTimeline filterPendingReplaceTimeline() {
|
||||||
|
return new HoodieDefaultTimeline(instants.stream().filter(
|
||||||
|
s -> s.getAction().equals(HoodieTimeline.REPLACE_COMMIT_ACTION) && !s.isCompleted()), details);
|
||||||
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public HoodieTimeline filterPendingCompactionTimeline() {
|
public HoodieTimeline filterPendingCompactionTimeline() {
|
||||||
return new HoodieDefaultTimeline(
|
return new HoodieDefaultTimeline(
|
||||||
|
|||||||
@@ -151,6 +151,12 @@ public interface HoodieTimeline extends Serializable {
|
|||||||
*/
|
*/
|
||||||
HoodieTimeline filterPendingCompactionTimeline();
|
HoodieTimeline filterPendingCompactionTimeline();
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Filter this timeline to just include requested and inflight replacecommit instants.
|
||||||
|
*/
|
||||||
|
HoodieTimeline filterPendingReplaceTimeline();
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Create a new Timeline with all the instants after startTs.
|
* Create a new Timeline with all the instants after startTs.
|
||||||
*/
|
*/
|
||||||
|
|||||||
@@ -18,19 +18,6 @@
|
|||||||
|
|
||||||
package org.apache.hudi.common.table.timeline;
|
package org.apache.hudi.common.table.timeline;
|
||||||
|
|
||||||
import org.apache.hudi.avro.model.HoodieCleanMetadata;
|
|
||||||
import org.apache.hudi.avro.model.HoodieCleanerPlan;
|
|
||||||
import org.apache.hudi.avro.model.HoodieCompactionPlan;
|
|
||||||
import org.apache.hudi.avro.model.HoodieInstantInfo;
|
|
||||||
import org.apache.hudi.avro.model.HoodieRestoreMetadata;
|
|
||||||
import org.apache.hudi.avro.model.HoodieRollbackMetadata;
|
|
||||||
import org.apache.hudi.avro.model.HoodieRollbackPartitionMetadata;
|
|
||||||
import org.apache.hudi.avro.model.HoodieSavepointMetadata;
|
|
||||||
import org.apache.hudi.avro.model.HoodieSavepointPartitionMetadata;
|
|
||||||
import org.apache.hudi.common.HoodieRollbackStat;
|
|
||||||
import org.apache.hudi.common.util.Option;
|
|
||||||
import org.apache.hudi.common.util.ValidationUtils;
|
|
||||||
|
|
||||||
import org.apache.avro.file.DataFileReader;
|
import org.apache.avro.file.DataFileReader;
|
||||||
import org.apache.avro.file.DataFileWriter;
|
import org.apache.avro.file.DataFileWriter;
|
||||||
import org.apache.avro.file.FileReader;
|
import org.apache.avro.file.FileReader;
|
||||||
@@ -40,6 +27,19 @@ import org.apache.avro.io.DatumWriter;
|
|||||||
import org.apache.avro.specific.SpecificDatumReader;
|
import org.apache.avro.specific.SpecificDatumReader;
|
||||||
import org.apache.avro.specific.SpecificDatumWriter;
|
import org.apache.avro.specific.SpecificDatumWriter;
|
||||||
import org.apache.avro.specific.SpecificRecordBase;
|
import org.apache.avro.specific.SpecificRecordBase;
|
||||||
|
import org.apache.hudi.avro.model.HoodieCleanMetadata;
|
||||||
|
import org.apache.hudi.avro.model.HoodieCleanerPlan;
|
||||||
|
import org.apache.hudi.avro.model.HoodieCompactionPlan;
|
||||||
|
import org.apache.hudi.avro.model.HoodieInstantInfo;
|
||||||
|
import org.apache.hudi.avro.model.HoodieRequestedReplaceMetadata;
|
||||||
|
import org.apache.hudi.avro.model.HoodieRestoreMetadata;
|
||||||
|
import org.apache.hudi.avro.model.HoodieRollbackMetadata;
|
||||||
|
import org.apache.hudi.avro.model.HoodieRollbackPartitionMetadata;
|
||||||
|
import org.apache.hudi.avro.model.HoodieSavepointMetadata;
|
||||||
|
import org.apache.hudi.avro.model.HoodieSavepointPartitionMetadata;
|
||||||
|
import org.apache.hudi.common.HoodieRollbackStat;
|
||||||
|
import org.apache.hudi.common.util.Option;
|
||||||
|
import org.apache.hudi.common.util.ValidationUtils;
|
||||||
|
|
||||||
import java.io.ByteArrayOutputStream;
|
import java.io.ByteArrayOutputStream;
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
@@ -115,6 +115,10 @@ public class TimelineMetadataUtils {
|
|||||||
return serializeAvroMetadata(restoreMetadata, HoodieRestoreMetadata.class);
|
return serializeAvroMetadata(restoreMetadata, HoodieRestoreMetadata.class);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public static Option<byte[]> serializeRequestedReplaceMetadata(HoodieRequestedReplaceMetadata clusteringPlan) throws IOException {
|
||||||
|
return serializeAvroMetadata(clusteringPlan, HoodieRequestedReplaceMetadata.class);
|
||||||
|
}
|
||||||
|
|
||||||
public static <T extends SpecificRecordBase> Option<byte[]> serializeAvroMetadata(T metadata, Class<T> clazz)
|
public static <T extends SpecificRecordBase> Option<byte[]> serializeAvroMetadata(T metadata, Class<T> clazz)
|
||||||
throws IOException {
|
throws IOException {
|
||||||
DatumWriter<T> datumWriter = new SpecificDatumWriter<>(clazz);
|
DatumWriter<T> datumWriter = new SpecificDatumWriter<>(clazz);
|
||||||
@@ -146,6 +150,10 @@ public class TimelineMetadataUtils {
|
|||||||
return deserializeAvroMetadata(bytes, HoodieSavepointMetadata.class);
|
return deserializeAvroMetadata(bytes, HoodieSavepointMetadata.class);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public static HoodieRequestedReplaceMetadata deserializeRequestedReplaceMetadta(byte[] bytes) throws IOException {
|
||||||
|
return deserializeAvroMetadata(bytes, HoodieRequestedReplaceMetadata.class);
|
||||||
|
}
|
||||||
|
|
||||||
public static <T extends SpecificRecordBase> T deserializeAvroMetadata(byte[] bytes, Class<T> clazz)
|
public static <T extends SpecificRecordBase> T deserializeAvroMetadata(byte[] bytes, Class<T> clazz)
|
||||||
throws IOException {
|
throws IOException {
|
||||||
DatumReader<T> reader = new SpecificDatumReader<>(clazz);
|
DatumReader<T> reader = new SpecificDatumReader<>(clazz);
|
||||||
|
|||||||
@@ -0,0 +1,62 @@
|
|||||||
|
/*
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one
|
||||||
|
* or more contributor license agreements. See the NOTICE file
|
||||||
|
* distributed with this work for additional information
|
||||||
|
* regarding copyright ownership. The ASF licenses this file
|
||||||
|
* to you under the Apache License, Version 2.0 (the
|
||||||
|
* "License"); you may not use this file except in compliance
|
||||||
|
* with the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package org.apache.hudi.common.table.timeline.dto;
|
||||||
|
|
||||||
|
import com.fasterxml.jackson.annotation.JsonIgnoreProperties;
|
||||||
|
import com.fasterxml.jackson.annotation.JsonProperty;
|
||||||
|
import org.apache.hudi.common.model.HoodieFileGroupId;
|
||||||
|
import org.apache.hudi.common.table.timeline.HoodieInstant;
|
||||||
|
import org.apache.hudi.common.util.collection.Pair;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* The data transfer object of clustering.
|
||||||
|
*/
|
||||||
|
@JsonIgnoreProperties(ignoreUnknown = true)
|
||||||
|
public class ClusteringOpDTO {
|
||||||
|
|
||||||
|
@JsonProperty("id")
|
||||||
|
private String fileId;
|
||||||
|
|
||||||
|
@JsonProperty("partition")
|
||||||
|
private String partitionPath;
|
||||||
|
|
||||||
|
@JsonProperty("instantTime")
|
||||||
|
private String instantTime;
|
||||||
|
|
||||||
|
@JsonProperty("instantState")
|
||||||
|
private String instantState;
|
||||||
|
|
||||||
|
@JsonProperty("instantAction")
|
||||||
|
private String instantAction;
|
||||||
|
|
||||||
|
public static ClusteringOpDTO fromClusteringOp(HoodieFileGroupId fileGroupId, HoodieInstant instant) {
|
||||||
|
ClusteringOpDTO dto = new ClusteringOpDTO();
|
||||||
|
dto.fileId = fileGroupId.getFileId();
|
||||||
|
dto.partitionPath = fileGroupId.getPartitionPath();
|
||||||
|
dto.instantAction = instant.getAction();
|
||||||
|
dto.instantState = instant.getState().name();
|
||||||
|
dto.instantTime = instant.getTimestamp();
|
||||||
|
return dto;
|
||||||
|
}
|
||||||
|
|
||||||
|
public static Pair<HoodieFileGroupId, HoodieInstant> toClusteringOperation(ClusteringOpDTO dto) {
|
||||||
|
return Pair.of(new HoodieFileGroupId(dto.partitionPath, dto.fileId),
|
||||||
|
new HoodieInstant(HoodieInstant.State.valueOf(dto.instantState), dto.instantAction, dto.instantTime));
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -34,6 +34,7 @@ import org.apache.hudi.common.model.HoodieReplaceCommitMetadata;
|
|||||||
import org.apache.hudi.common.table.HoodieTableMetaClient;
|
import org.apache.hudi.common.table.HoodieTableMetaClient;
|
||||||
import org.apache.hudi.common.table.timeline.HoodieInstant;
|
import org.apache.hudi.common.table.timeline.HoodieInstant;
|
||||||
import org.apache.hudi.common.table.timeline.HoodieTimeline;
|
import org.apache.hudi.common.table.timeline.HoodieTimeline;
|
||||||
|
import org.apache.hudi.common.util.ClusteringUtils;
|
||||||
import org.apache.hudi.common.util.CompactionUtils;
|
import org.apache.hudi.common.util.CompactionUtils;
|
||||||
import org.apache.hudi.common.util.HoodieTimer;
|
import org.apache.hudi.common.util.HoodieTimer;
|
||||||
import org.apache.hudi.common.util.Option;
|
import org.apache.hudi.common.util.Option;
|
||||||
@@ -107,6 +108,7 @@ public abstract class AbstractTableFileSystemView implements SyncableFileSystemV
|
|||||||
resetPendingCompactionOperations(CompactionUtils.getAllPendingCompactionOperations(metaClient).values().stream()
|
resetPendingCompactionOperations(CompactionUtils.getAllPendingCompactionOperations(metaClient).values().stream()
|
||||||
.map(e -> Pair.of(e.getKey(), CompactionOperation.convertFromAvroRecordInstance(e.getValue()))));
|
.map(e -> Pair.of(e.getKey(), CompactionOperation.convertFromAvroRecordInstance(e.getValue()))));
|
||||||
resetBootstrapBaseFileMapping(Stream.empty());
|
resetBootstrapBaseFileMapping(Stream.empty());
|
||||||
|
resetFileGroupsInPendingClustering(ClusteringUtils.getAllFileGroupsInPendingClusteringPlans(metaClient));
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@@ -678,6 +680,15 @@ public abstract class AbstractTableFileSystemView implements SyncableFileSystemV
|
|||||||
return getAllFileGroupsIncludingReplaced(partitionPath).filter(fg -> isFileGroupReplacedBeforeOrOn(fg.getFileGroupId(), maxCommitTime));
|
return getAllFileGroupsIncludingReplaced(partitionPath).filter(fg -> isFileGroupReplacedBeforeOrOn(fg.getFileGroupId(), maxCommitTime));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public final Stream<Pair<HoodieFileGroupId, HoodieInstant>> getFileGroupsInPendingClustering() {
|
||||||
|
try {
|
||||||
|
readLock.lock();
|
||||||
|
return fetchFileGroupsInPendingClustering();
|
||||||
|
} finally {
|
||||||
|
readLock.unlock();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// Fetch APIs to be implemented by concrete sub-classes
|
// Fetch APIs to be implemented by concrete sub-classes
|
||||||
|
|
||||||
@@ -710,6 +721,40 @@ public abstract class AbstractTableFileSystemView implements SyncableFileSystemV
|
|||||||
*/
|
*/
|
||||||
abstract void removePendingCompactionOperations(Stream<Pair<String, CompactionOperation>> operations);
|
abstract void removePendingCompactionOperations(Stream<Pair<String, CompactionOperation>> operations);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Check if there is an outstanding clustering operation (requested/inflight) scheduled for this file.
|
||||||
|
*
|
||||||
|
* @param fgId File-Group Id
|
||||||
|
* @return true if there is a pending clustering, false otherwise
|
||||||
|
*/
|
||||||
|
protected abstract boolean isPendingClusteringScheduledForFileId(HoodieFileGroupId fgId);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Get pending clustering instant time for specified file group. Return None if file group is not in pending
|
||||||
|
* clustering operation.
|
||||||
|
*/
|
||||||
|
protected abstract Option<HoodieInstant> getPendingClusteringInstant(final HoodieFileGroupId fileGroupId);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Fetch all file groups in pending clustering.
|
||||||
|
*/
|
||||||
|
protected abstract Stream<Pair<HoodieFileGroupId, HoodieInstant>> fetchFileGroupsInPendingClustering();
|
||||||
|
|
||||||
|
/**
|
||||||
|
* resets the pending clustering operation and overwrite with the new list.
|
||||||
|
*/
|
||||||
|
abstract void resetFileGroupsInPendingClustering(Map<HoodieFileGroupId, HoodieInstant> fgIdToInstantMap);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Add metadata for file groups in pending clustering operations to the view.
|
||||||
|
*/
|
||||||
|
abstract void addFileGroupsInPendingClustering(Stream<Pair<HoodieFileGroupId, HoodieInstant>> fileGroups);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Remove metadata for file groups in pending clustering operations from the view.
|
||||||
|
*/
|
||||||
|
abstract void removeFileGroupsInPendingClustering(Stream<Pair<HoodieFileGroupId, HoodieInstant>> fileGroups);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Return pending compaction operation for a file-group.
|
* Return pending compaction operation for a file-group.
|
||||||
*
|
*
|
||||||
|
|||||||
@@ -45,6 +45,8 @@ public class FileSystemViewStorageConfig extends DefaultHoodieConfig {
|
|||||||
"hoodie.filesystem.view.spillable.bootstrap.base.file.mem.fraction";
|
"hoodie.filesystem.view.spillable.bootstrap.base.file.mem.fraction";
|
||||||
public static final String FILESYSTEM_VIEW_REPLACED_MEM_FRACTION =
|
public static final String FILESYSTEM_VIEW_REPLACED_MEM_FRACTION =
|
||||||
"hoodie.filesystem.view.spillable.replaced.mem.fraction";
|
"hoodie.filesystem.view.spillable.replaced.mem.fraction";
|
||||||
|
public static final String FILESYSTEM_VIEW_PENDING_CLUSTERING_MEM_FRACTION =
|
||||||
|
"hoodie.filesystem.view.spillable.clustering.mem.fraction";
|
||||||
private static final String ROCKSDB_BASE_PATH_PROP = "hoodie.filesystem.view.rocksdb.base.path";
|
private static final String ROCKSDB_BASE_PATH_PROP = "hoodie.filesystem.view.rocksdb.base.path";
|
||||||
public static final String FILESTYSTEM_REMOTE_TIMELINE_CLIENT_TIMEOUT_SECS =
|
public static final String FILESTYSTEM_REMOTE_TIMELINE_CLIENT_TIMEOUT_SECS =
|
||||||
"hoodie.filesystem.view.remote.timeout.secs";
|
"hoodie.filesystem.view.remote.timeout.secs";
|
||||||
@@ -62,6 +64,7 @@ public class FileSystemViewStorageConfig extends DefaultHoodieConfig {
|
|||||||
private static final Double DEFAULT_MEM_FRACTION_FOR_PENDING_COMPACTION = 0.01;
|
private static final Double DEFAULT_MEM_FRACTION_FOR_PENDING_COMPACTION = 0.01;
|
||||||
private static final Double DEFAULT_MEM_FRACTION_FOR_EXTERNAL_DATA_FILE = 0.05;
|
private static final Double DEFAULT_MEM_FRACTION_FOR_EXTERNAL_DATA_FILE = 0.05;
|
||||||
private static final Double DEFAULT_MEM_FRACTION_FOR_REPLACED_FILEGROUPS = 0.01;
|
private static final Double DEFAULT_MEM_FRACTION_FOR_REPLACED_FILEGROUPS = 0.01;
|
||||||
|
private static final Double DEFAULT_MEM_FRACTION_FOR_PENDING_CLUSTERING_FILEGROUPS = 0.01;
|
||||||
private static final Long DEFAULT_MAX_MEMORY_FOR_VIEW = 100 * 1024 * 1024L; // 100 MB
|
private static final Long DEFAULT_MAX_MEMORY_FOR_VIEW = 100 * 1024 * 1024L; // 100 MB
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@@ -126,6 +129,12 @@ public class FileSystemViewStorageConfig extends DefaultHoodieConfig {
|
|||||||
.longValue();
|
.longValue();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public long getMaxMemoryForPendingClusteringFileGroups() {
|
||||||
|
long totalMemory = Long.parseLong(props.getProperty(FILESYSTEM_VIEW_SPILLABLE_MEM));
|
||||||
|
return new Double(totalMemory * Double.parseDouble(props.getProperty(FILESYSTEM_VIEW_PENDING_CLUSTERING_MEM_FRACTION)))
|
||||||
|
.longValue();
|
||||||
|
}
|
||||||
|
|
||||||
public String getBaseStoreDir() {
|
public String getBaseStoreDir() {
|
||||||
return props.getProperty(FILESYSTEM_VIEW_SPILLABLE_DIR);
|
return props.getProperty(FILESYSTEM_VIEW_SPILLABLE_DIR);
|
||||||
}
|
}
|
||||||
@@ -245,6 +254,8 @@ public class FileSystemViewStorageConfig extends DefaultHoodieConfig {
|
|||||||
FILESYSTEM_VIEW_BOOTSTRAP_BASE_FILE_FRACTION, DEFAULT_MEM_FRACTION_FOR_EXTERNAL_DATA_FILE.toString());
|
FILESYSTEM_VIEW_BOOTSTRAP_BASE_FILE_FRACTION, DEFAULT_MEM_FRACTION_FOR_EXTERNAL_DATA_FILE.toString());
|
||||||
setDefaultOnCondition(props, !props.containsKey(FILESYSTEM_VIEW_REPLACED_MEM_FRACTION),
|
setDefaultOnCondition(props, !props.containsKey(FILESYSTEM_VIEW_REPLACED_MEM_FRACTION),
|
||||||
FILESYSTEM_VIEW_REPLACED_MEM_FRACTION, DEFAULT_MEM_FRACTION_FOR_REPLACED_FILEGROUPS.toString());
|
FILESYSTEM_VIEW_REPLACED_MEM_FRACTION, DEFAULT_MEM_FRACTION_FOR_REPLACED_FILEGROUPS.toString());
|
||||||
|
setDefaultOnCondition(props, !props.containsKey(FILESYSTEM_VIEW_PENDING_CLUSTERING_MEM_FRACTION),
|
||||||
|
FILESYSTEM_VIEW_PENDING_CLUSTERING_MEM_FRACTION, DEFAULT_MEM_FRACTION_FOR_PENDING_CLUSTERING_FILEGROUPS.toString());
|
||||||
|
|
||||||
setDefaultOnCondition(props, !props.containsKey(ROCKSDB_BASE_PATH_PROP), ROCKSDB_BASE_PATH_PROP,
|
setDefaultOnCondition(props, !props.containsKey(ROCKSDB_BASE_PATH_PROP), ROCKSDB_BASE_PATH_PROP,
|
||||||
DEFAULT_ROCKSDB_BASE_PATH);
|
DEFAULT_ROCKSDB_BASE_PATH);
|
||||||
|
|||||||
@@ -70,6 +70,11 @@ public class HoodieTableFileSystemView extends IncrementalTimelineSyncFileSystem
|
|||||||
*/
|
*/
|
||||||
protected Map<HoodieFileGroupId, HoodieInstant> fgIdToReplaceInstants;
|
protected Map<HoodieFileGroupId, HoodieInstant> fgIdToReplaceInstants;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Track file groups in pending clustering.
|
||||||
|
*/
|
||||||
|
protected Map<HoodieFileGroupId, HoodieInstant> fgIdToPendingClustering;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Flag to determine if closed.
|
* Flag to determine if closed.
|
||||||
*/
|
*/
|
||||||
@@ -113,6 +118,7 @@ public class HoodieTableFileSystemView extends IncrementalTimelineSyncFileSystem
|
|||||||
this.partitionToFileGroupsMap = null;
|
this.partitionToFileGroupsMap = null;
|
||||||
this.fgIdToBootstrapBaseFile = null;
|
this.fgIdToBootstrapBaseFile = null;
|
||||||
this.fgIdToReplaceInstants = null;
|
this.fgIdToReplaceInstants = null;
|
||||||
|
this.fgIdToPendingClustering = null;
|
||||||
}
|
}
|
||||||
|
|
||||||
protected Map<String, List<HoodieFileGroup>> createPartitionToFileGroups() {
|
protected Map<String, List<HoodieFileGroup>> createPartitionToFileGroups() {
|
||||||
@@ -134,6 +140,11 @@ public class HoodieTableFileSystemView extends IncrementalTimelineSyncFileSystem
|
|||||||
return replacedFileGroupsMap;
|
return replacedFileGroupsMap;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
protected Map<HoodieFileGroupId, HoodieInstant> createFileIdToPendingClusteringMap(final Map<HoodieFileGroupId, HoodieInstant> fileGroupsInClustering) {
|
||||||
|
Map<HoodieFileGroupId, HoodieInstant> fgInpendingClustering = new ConcurrentHashMap<>(fileGroupsInClustering);
|
||||||
|
return fgInpendingClustering;
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Create a file system view, as of the given timeline, with the provided file statuses.
|
* Create a file system view, as of the given timeline, with the provided file statuses.
|
||||||
*/
|
*/
|
||||||
@@ -189,6 +200,49 @@ public class HoodieTableFileSystemView extends IncrementalTimelineSyncFileSystem
|
|||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
protected boolean isPendingClusteringScheduledForFileId(HoodieFileGroupId fgId) {
|
||||||
|
return fgIdToPendingClustering.containsKey(fgId);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
protected Option<HoodieInstant> getPendingClusteringInstant(HoodieFileGroupId fgId) {
|
||||||
|
return Option.ofNullable(fgIdToPendingClustering.get(fgId));
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
protected Stream<Pair<HoodieFileGroupId, HoodieInstant>> fetchFileGroupsInPendingClustering() {
|
||||||
|
return fgIdToPendingClustering.entrySet().stream().map(entry -> Pair.of(entry.getKey(), entry.getValue()));
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
void resetFileGroupsInPendingClustering(Map<HoodieFileGroupId, HoodieInstant> fgIdToInstantMap) {
|
||||||
|
fgIdToPendingClustering = createFileIdToPendingClusteringMap(fgIdToInstantMap);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
void addFileGroupsInPendingClustering(Stream<Pair<HoodieFileGroupId, HoodieInstant>> fileGroups) {
|
||||||
|
fileGroups.forEach(fileGroupInstantPair -> {
|
||||||
|
ValidationUtils.checkArgument(fgIdToPendingClustering.containsKey(fileGroupInstantPair.getLeft()),
|
||||||
|
"Trying to add a FileGroupId which is already in pending clustering operation. FgId :"
|
||||||
|
+ fileGroupInstantPair.getLeft() + ", new instant: " + fileGroupInstantPair.getRight() + ", existing instant "
|
||||||
|
+ fgIdToPendingClustering.get(fileGroupInstantPair.getLeft()));
|
||||||
|
|
||||||
|
fgIdToPendingClustering.put(fileGroupInstantPair.getLeft(), fileGroupInstantPair.getRight());
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
void removeFileGroupsInPendingClustering(Stream<Pair<HoodieFileGroupId, HoodieInstant>> fileGroups) {
|
||||||
|
fileGroups.forEach(fileGroupInstantPair -> {
|
||||||
|
ValidationUtils.checkArgument(fgIdToPendingClustering.containsKey(fileGroupInstantPair.getLeft()),
|
||||||
|
"Trying to remove a FileGroupId which is not found in pending clustering operation. FgId :"
|
||||||
|
+ fileGroupInstantPair.getLeft() + ", new instant: " + fileGroupInstantPair.getRight());
|
||||||
|
|
||||||
|
fgIdToPendingClustering.remove(fileGroupInstantPair.getLeft());
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Given a partition path, obtain all filegroups within that. All methods, that work at the partition level go through
|
* Given a partition path, obtain all filegroups within that. All methods, that work at the partition level go through
|
||||||
* this.
|
* this.
|
||||||
|
|||||||
@@ -22,6 +22,7 @@ import org.apache.hudi.common.model.CompactionOperation;
|
|||||||
import org.apache.hudi.common.model.FileSlice;
|
import org.apache.hudi.common.model.FileSlice;
|
||||||
import org.apache.hudi.common.model.HoodieBaseFile;
|
import org.apache.hudi.common.model.HoodieBaseFile;
|
||||||
import org.apache.hudi.common.model.HoodieFileGroup;
|
import org.apache.hudi.common.model.HoodieFileGroup;
|
||||||
|
import org.apache.hudi.common.model.HoodieFileGroupId;
|
||||||
import org.apache.hudi.common.table.timeline.HoodieInstant;
|
import org.apache.hudi.common.table.timeline.HoodieInstant;
|
||||||
import org.apache.hudi.common.table.timeline.HoodieTimeline;
|
import org.apache.hudi.common.table.timeline.HoodieTimeline;
|
||||||
import org.apache.hudi.common.util.Functions.Function0;
|
import org.apache.hudi.common.util.Functions.Function0;
|
||||||
@@ -203,6 +204,11 @@ public class PriorityBasedFileSystemView implements SyncableFileSystemView, Seri
|
|||||||
return execute(preferredView::getPendingCompactionOperations, secondaryView::getPendingCompactionOperations);
|
return execute(preferredView::getPendingCompactionOperations, secondaryView::getPendingCompactionOperations);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public Stream<Pair<HoodieFileGroupId, HoodieInstant>> getFileGroupsInPendingClustering() {
|
||||||
|
return execute(preferredView::getFileGroupsInPendingClustering, secondaryView::getFileGroupsInPendingClustering);
|
||||||
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void close() {
|
public void close() {
|
||||||
preferredView.close();
|
preferredView.close();
|
||||||
|
|||||||
@@ -27,10 +27,12 @@ import org.apache.hudi.common.model.CompactionOperation;
|
|||||||
import org.apache.hudi.common.model.FileSlice;
|
import org.apache.hudi.common.model.FileSlice;
|
||||||
import org.apache.hudi.common.model.HoodieBaseFile;
|
import org.apache.hudi.common.model.HoodieBaseFile;
|
||||||
import org.apache.hudi.common.model.HoodieFileGroup;
|
import org.apache.hudi.common.model.HoodieFileGroup;
|
||||||
|
import org.apache.hudi.common.model.HoodieFileGroupId;
|
||||||
import org.apache.hudi.common.table.HoodieTableMetaClient;
|
import org.apache.hudi.common.table.HoodieTableMetaClient;
|
||||||
import org.apache.hudi.common.table.timeline.HoodieInstant;
|
import org.apache.hudi.common.table.timeline.HoodieInstant;
|
||||||
import org.apache.hudi.common.table.timeline.HoodieTimeline;
|
import org.apache.hudi.common.table.timeline.HoodieTimeline;
|
||||||
import org.apache.hudi.common.table.timeline.dto.BaseFileDTO;
|
import org.apache.hudi.common.table.timeline.dto.BaseFileDTO;
|
||||||
|
import org.apache.hudi.common.table.timeline.dto.ClusteringOpDTO;
|
||||||
import org.apache.hudi.common.table.timeline.dto.CompactionOpDTO;
|
import org.apache.hudi.common.table.timeline.dto.CompactionOpDTO;
|
||||||
import org.apache.hudi.common.table.timeline.dto.FileGroupDTO;
|
import org.apache.hudi.common.table.timeline.dto.FileGroupDTO;
|
||||||
import org.apache.hudi.common.table.timeline.dto.FileSliceDTO;
|
import org.apache.hudi.common.table.timeline.dto.FileSliceDTO;
|
||||||
@@ -89,6 +91,9 @@ public class RemoteHoodieTableFileSystemView implements SyncableFileSystemView,
|
|||||||
public static final String ALL_REPLACED_FILEGROUPS_BEFORE_OR_ON =
|
public static final String ALL_REPLACED_FILEGROUPS_BEFORE_OR_ON =
|
||||||
String.format("%s/%s", BASE_URL, "filegroups/replaced/beforeoron/");
|
String.format("%s/%s", BASE_URL, "filegroups/replaced/beforeoron/");
|
||||||
|
|
||||||
|
public static final String PENDING_CLUSTERING_FILEGROUPS = String.format("%s/%s", BASE_URL, "clustering/pending/");
|
||||||
|
|
||||||
|
|
||||||
public static final String LAST_INSTANT = String.format("%s/%s", BASE_URL, "timeline/instant/last");
|
public static final String LAST_INSTANT = String.format("%s/%s", BASE_URL, "timeline/instant/last");
|
||||||
public static final String LAST_INSTANTS = String.format("%s/%s", BASE_URL, "timeline/instants/last");
|
public static final String LAST_INSTANTS = String.format("%s/%s", BASE_URL, "timeline/instants/last");
|
||||||
|
|
||||||
@@ -396,6 +401,18 @@ public class RemoteHoodieTableFileSystemView implements SyncableFileSystemView,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public Stream<Pair<HoodieFileGroupId, HoodieInstant>> getFileGroupsInPendingClustering() {
|
||||||
|
Map<String, String> paramsMap = getParams();
|
||||||
|
try {
|
||||||
|
List<ClusteringOpDTO> dtos = executeRequest(PENDING_CLUSTERING_FILEGROUPS, paramsMap,
|
||||||
|
new TypeReference<List<ClusteringOpDTO>>() {}, RequestMethod.GET);
|
||||||
|
return dtos.stream().map(ClusteringOpDTO::toClusteringOperation);
|
||||||
|
} catch (IOException e) {
|
||||||
|
throw new HoodieRemoteException(e);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void close() {
|
public void close() {
|
||||||
closed = true;
|
closed = true;
|
||||||
|
|||||||
@@ -135,6 +135,65 @@ public class RocksDbBasedFileSystemView extends IncrementalTimelineSyncFileSyste
|
|||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
protected boolean isPendingClusteringScheduledForFileId(HoodieFileGroupId fgId) {
|
||||||
|
return getPendingClusteringInstant(fgId).isPresent();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
protected Option<HoodieInstant> getPendingClusteringInstant(HoodieFileGroupId fgId) {
|
||||||
|
String lookupKey = schemaHelper.getKeyForFileGroupsInPendingClustering(fgId);
|
||||||
|
HoodieInstant pendingClusteringInstant =
|
||||||
|
rocksDB.get(schemaHelper.getColFamilyForFileGroupsInPendingClustering(), lookupKey);
|
||||||
|
return Option.ofNullable(pendingClusteringInstant);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public Stream<Pair<HoodieFileGroupId, HoodieInstant>> fetchFileGroupsInPendingClustering() {
|
||||||
|
return rocksDB.<Pair<HoodieFileGroupId, HoodieInstant>>prefixSearch(schemaHelper.getColFamilyForFileGroupsInPendingClustering(), "")
|
||||||
|
.map(Pair::getValue);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
void resetFileGroupsInPendingClustering(Map<HoodieFileGroupId, HoodieInstant> fgIdToInstantMap) {
|
||||||
|
LOG.info("Resetting file groups in pending clustering to ROCKSDB based file-system view at "
|
||||||
|
+ config.getRocksdbBasePath() + ", Total file-groups=" + fgIdToInstantMap.size());
|
||||||
|
|
||||||
|
// Delete all replaced file groups
|
||||||
|
rocksDB.prefixDelete(schemaHelper.getColFamilyForFileGroupsInPendingClustering(), "part=");
|
||||||
|
// Now add new entries
|
||||||
|
addFileGroupsInPendingClustering(fgIdToInstantMap.entrySet().stream().map(entry -> Pair.of(entry.getKey(), entry.getValue())));
|
||||||
|
LOG.info("Resetting replacedFileGroups to ROCKSDB based file-system view complete");
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
void addFileGroupsInPendingClustering(Stream<Pair<HoodieFileGroupId, HoodieInstant>> fileGroups) {
|
||||||
|
rocksDB.writeBatch(batch ->
|
||||||
|
fileGroups.forEach(fgIdToClusterInstant -> {
|
||||||
|
ValidationUtils.checkArgument(!isPendingClusteringScheduledForFileId(fgIdToClusterInstant.getLeft()),
|
||||||
|
"Duplicate FileGroupId found in pending compaction operations. FgId :"
|
||||||
|
+ fgIdToClusterInstant.getLeft());
|
||||||
|
|
||||||
|
rocksDB.putInBatch(batch, schemaHelper.getColFamilyForFileGroupsInPendingClustering(),
|
||||||
|
schemaHelper.getKeyForFileGroupsInPendingClustering(fgIdToClusterInstant.getKey()), fgIdToClusterInstant);
|
||||||
|
})
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
void removeFileGroupsInPendingClustering(Stream<Pair<HoodieFileGroupId, HoodieInstant>> fileGroups) {
|
||||||
|
rocksDB.writeBatch(batch ->
|
||||||
|
fileGroups.forEach(fgToPendingClusteringInstant -> {
|
||||||
|
ValidationUtils.checkArgument(
|
||||||
|
!isPendingClusteringScheduledForFileId(fgToPendingClusteringInstant.getLeft()),
|
||||||
|
"Trying to remove a FileGroupId which is not found in pending compaction operations. FgId :"
|
||||||
|
+ fgToPendingClusteringInstant.getLeft());
|
||||||
|
rocksDB.deleteInBatch(batch, schemaHelper.getColFamilyForFileGroupsInPendingClustering(),
|
||||||
|
schemaHelper.getKeyForFileGroupsInPendingClustering(fgToPendingClusteringInstant.getLeft()));
|
||||||
|
})
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
protected void resetViewState() {
|
protected void resetViewState() {
|
||||||
LOG.info("Deleting all rocksdb data associated with table filesystem view");
|
LOG.info("Deleting all rocksdb data associated with table filesystem view");
|
||||||
|
|||||||
@@ -51,6 +51,7 @@ public class SpillableMapBasedFileSystemView extends HoodieTableFileSystemView {
|
|||||||
private final long maxMemoryForPendingCompaction;
|
private final long maxMemoryForPendingCompaction;
|
||||||
private final long maxMemoryForBootstrapBaseFile;
|
private final long maxMemoryForBootstrapBaseFile;
|
||||||
private final long maxMemoryForReplaceFileGroups;
|
private final long maxMemoryForReplaceFileGroups;
|
||||||
|
private final long maxMemoryForClusteringFileGroups;
|
||||||
private final String baseStoreDir;
|
private final String baseStoreDir;
|
||||||
|
|
||||||
public SpillableMapBasedFileSystemView(HoodieTableMetaClient metaClient, HoodieTimeline visibleActiveTimeline,
|
public SpillableMapBasedFileSystemView(HoodieTableMetaClient metaClient, HoodieTimeline visibleActiveTimeline,
|
||||||
@@ -60,6 +61,7 @@ public class SpillableMapBasedFileSystemView extends HoodieTableFileSystemView {
|
|||||||
this.maxMemoryForPendingCompaction = config.getMaxMemoryForPendingCompaction();
|
this.maxMemoryForPendingCompaction = config.getMaxMemoryForPendingCompaction();
|
||||||
this.maxMemoryForBootstrapBaseFile = config.getMaxMemoryForBootstrapBaseFile();
|
this.maxMemoryForBootstrapBaseFile = config.getMaxMemoryForBootstrapBaseFile();
|
||||||
this.maxMemoryForReplaceFileGroups = config.getMaxMemoryForReplacedFileGroups();
|
this.maxMemoryForReplaceFileGroups = config.getMaxMemoryForReplacedFileGroups();
|
||||||
|
this.maxMemoryForClusteringFileGroups = config.getMaxMemoryForPendingClusteringFileGroups();
|
||||||
this.baseStoreDir = config.getBaseStoreDir();
|
this.baseStoreDir = config.getBaseStoreDir();
|
||||||
init(metaClient, visibleActiveTimeline);
|
init(metaClient, visibleActiveTimeline);
|
||||||
}
|
}
|
||||||
@@ -130,6 +132,21 @@ public class SpillableMapBasedFileSystemView extends HoodieTableFileSystemView {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
protected Map<HoodieFileGroupId, HoodieInstant> createFileIdToPendingClusteringMap(final Map<HoodieFileGroupId, HoodieInstant> fileGroupsInClustering) {
|
||||||
|
try {
|
||||||
|
LOG.info("Creating file group id to clustering instant map using external spillable Map. Max Mem=" + maxMemoryForClusteringFileGroups
|
||||||
|
+ ", BaseDir=" + baseStoreDir);
|
||||||
|
new File(baseStoreDir).mkdirs();
|
||||||
|
Map<HoodieFileGroupId, HoodieInstant> pendingMap = new ExternalSpillableMap<>(
|
||||||
|
maxMemoryForClusteringFileGroups, baseStoreDir, new DefaultSizeEstimator(), new DefaultSizeEstimator<>());
|
||||||
|
pendingMap.putAll(fileGroupsInClustering);
|
||||||
|
return pendingMap;
|
||||||
|
} catch (IOException e) {
|
||||||
|
throw new RuntimeException(e);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public Stream<HoodieFileGroup> getAllFileGroups() {
|
public Stream<HoodieFileGroup> getAllFileGroups() {
|
||||||
return ((ExternalSpillableMap) partitionToFileGroupsMap).valueStream()
|
return ((ExternalSpillableMap) partitionToFileGroupsMap).valueStream()
|
||||||
|
|||||||
@@ -22,6 +22,7 @@ import org.apache.hudi.common.model.CompactionOperation;
|
|||||||
import org.apache.hudi.common.model.FileSlice;
|
import org.apache.hudi.common.model.FileSlice;
|
||||||
import org.apache.hudi.common.model.HoodieBaseFile;
|
import org.apache.hudi.common.model.HoodieBaseFile;
|
||||||
import org.apache.hudi.common.model.HoodieFileGroup;
|
import org.apache.hudi.common.model.HoodieFileGroup;
|
||||||
|
import org.apache.hudi.common.model.HoodieFileGroupId;
|
||||||
import org.apache.hudi.common.table.timeline.HoodieInstant;
|
import org.apache.hudi.common.table.timeline.HoodieInstant;
|
||||||
import org.apache.hudi.common.table.timeline.HoodieTimeline;
|
import org.apache.hudi.common.table.timeline.HoodieTimeline;
|
||||||
import org.apache.hudi.common.util.Option;
|
import org.apache.hudi.common.util.Option;
|
||||||
@@ -169,4 +170,9 @@ public interface TableFileSystemView {
|
|||||||
* Stream all the replaced file groups before maxCommitTime.
|
* Stream all the replaced file groups before maxCommitTime.
|
||||||
*/
|
*/
|
||||||
Stream<HoodieFileGroup> getReplacedFileGroupsBeforeOrOn(String maxCommitTime, String partitionPath);
|
Stream<HoodieFileGroup> getReplacedFileGroupsBeforeOrOn(String maxCommitTime, String partitionPath);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Filegroups that are in pending clustering.
|
||||||
|
*/
|
||||||
|
Stream<Pair<HoodieFileGroupId, HoodieInstant>> getFileGroupsInPendingClustering();
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -0,0 +1,178 @@
|
|||||||
|
/*
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one
|
||||||
|
* or more contributor license agreements. See the NOTICE file
|
||||||
|
* distributed with this work for additional information
|
||||||
|
* regarding copyright ownership. The ASF licenses this file
|
||||||
|
* to you under the Apache License, Version 2.0 (the
|
||||||
|
* "License"); you may not use this file except in compliance
|
||||||
|
* with the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package org.apache.hudi.common.util;
|
||||||
|
|
||||||
|
import org.apache.hudi.avro.model.HoodieClusteringGroup;
|
||||||
|
import org.apache.hudi.avro.model.HoodieClusteringPlan;
|
||||||
|
import org.apache.hudi.avro.model.HoodieClusteringStrategy;
|
||||||
|
import org.apache.hudi.avro.model.HoodieRequestedReplaceMetadata;
|
||||||
|
import org.apache.hudi.avro.model.HoodieSliceInfo;
|
||||||
|
import org.apache.hudi.common.fs.FSUtils;
|
||||||
|
import org.apache.hudi.common.model.BaseFile;
|
||||||
|
import org.apache.hudi.common.model.FileSlice;
|
||||||
|
import org.apache.hudi.common.model.HoodieFileGroupId;
|
||||||
|
import org.apache.hudi.common.model.HoodieLogFile;
|
||||||
|
import org.apache.hudi.common.model.WriteOperationType;
|
||||||
|
import org.apache.hudi.common.table.HoodieTableMetaClient;
|
||||||
|
import org.apache.hudi.common.table.timeline.HoodieInstant;
|
||||||
|
import org.apache.hudi.common.table.timeline.TimelineMetadataUtils;
|
||||||
|
import org.apache.hudi.common.util.collection.Pair;
|
||||||
|
import org.apache.hudi.exception.HoodieIOException;
|
||||||
|
import org.apache.log4j.LogManager;
|
||||||
|
import org.apache.log4j.Logger;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.util.AbstractMap;
|
||||||
|
import java.util.Arrays;
|
||||||
|
import java.util.HashMap;
|
||||||
|
import java.util.List;
|
||||||
|
import java.util.Map;
|
||||||
|
import java.util.stream.Collectors;
|
||||||
|
import java.util.stream.Stream;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Helper class to generate clustering plan from metadata.
|
||||||
|
*/
|
||||||
|
public class ClusteringUtils {
|
||||||
|
|
||||||
|
private static final Logger LOG = LogManager.getLogger(ClusteringUtils.class);
|
||||||
|
|
||||||
|
public static final String TOTAL_IO_READ_MB = "TOTAL_IO_READ_MB";
|
||||||
|
public static final String TOTAL_LOG_FILE_SIZE = "TOTAL_LOG_FILES_SIZE";
|
||||||
|
public static final String TOTAL_LOG_FILES = "TOTAL_LOG_FILES";
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Get all pending clustering plans along with their instants.
|
||||||
|
*/
|
||||||
|
public static Stream<Pair<HoodieInstant, HoodieClusteringPlan>> getAllPendingClusteringPlans(
|
||||||
|
HoodieTableMetaClient metaClient) {
|
||||||
|
List<HoodieInstant> pendingReplaceInstants =
|
||||||
|
metaClient.getActiveTimeline().filterPendingReplaceTimeline().getInstants().collect(Collectors.toList());
|
||||||
|
return pendingReplaceInstants.stream().map(instant -> getClusteringPlan(metaClient, instant))
|
||||||
|
.filter(Option::isPresent).map(Option::get);
|
||||||
|
}
|
||||||
|
|
||||||
|
public static Option<Pair<HoodieInstant, HoodieClusteringPlan>> getClusteringPlan(HoodieTableMetaClient metaClient, HoodieInstant requestedReplaceInstant) {
|
||||||
|
try {
|
||||||
|
Option<byte[]> content = metaClient.getActiveTimeline().getInstantDetails(requestedReplaceInstant);
|
||||||
|
if (!content.isPresent() || content.get().length == 0) {
|
||||||
|
// few operations create requested file without any content. Assume these are not clustering
|
||||||
|
LOG.warn("No content found in requested file for instant " + requestedReplaceInstant);
|
||||||
|
return Option.empty();
|
||||||
|
}
|
||||||
|
HoodieRequestedReplaceMetadata requestedReplaceMetadata = TimelineMetadataUtils.deserializeRequestedReplaceMetadta(content.get());
|
||||||
|
if (WriteOperationType.CLUSTER.name().equals(requestedReplaceMetadata.getOperationType())) {
|
||||||
|
return Option.of(Pair.of(requestedReplaceInstant, requestedReplaceMetadata.getClusteringPlan()));
|
||||||
|
}
|
||||||
|
return Option.empty();
|
||||||
|
} catch (IOException e) {
|
||||||
|
throw new HoodieIOException("Error reading clustering plan " + requestedReplaceInstant.getTimestamp(), e);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Get filegroups to pending clustering instant mapping for all pending clustering plans.
|
||||||
|
* This includes all clustering operattions in 'requested' and 'inflight' states.
|
||||||
|
*/
|
||||||
|
public static Map<HoodieFileGroupId, HoodieInstant> getAllFileGroupsInPendingClusteringPlans(
|
||||||
|
HoodieTableMetaClient metaClient) {
|
||||||
|
Stream<Pair<HoodieInstant, HoodieClusteringPlan>> pendingClusteringPlans = getAllPendingClusteringPlans(metaClient);
|
||||||
|
Stream<Map.Entry<HoodieFileGroupId, HoodieInstant>> resultStream = pendingClusteringPlans.flatMap(clusteringPlan ->
|
||||||
|
// get all filegroups in the plan
|
||||||
|
getFileGroupEntriesInClusteringPlan(clusteringPlan.getLeft(), clusteringPlan.getRight()));
|
||||||
|
|
||||||
|
Map<HoodieFileGroupId, HoodieInstant> resultMap = resultStream.collect(Collectors.toMap(Map.Entry::getKey, Map.Entry::getValue));
|
||||||
|
LOG.info("Found " + resultMap.size() + " files in pending clustering operations");
|
||||||
|
return resultMap;
|
||||||
|
}
|
||||||
|
|
||||||
|
public static Stream<Pair<HoodieFileGroupId, HoodieInstant>> getFileGroupsInPendingClusteringInstant(
|
||||||
|
HoodieInstant instant, HoodieClusteringPlan clusteringPlan) {
|
||||||
|
Stream<HoodieFileGroupId> partitionToFileIdLists = clusteringPlan.getInputGroups().stream().flatMap(ClusteringUtils::getFileGroupsFromClusteringGroup);
|
||||||
|
return partitionToFileIdLists.map(e -> Pair.of(e, instant));
|
||||||
|
}
|
||||||
|
|
||||||
|
private static Stream<Map.Entry<HoodieFileGroupId, HoodieInstant>> getFileGroupEntriesInClusteringPlan(
|
||||||
|
HoodieInstant instant, HoodieClusteringPlan clusteringPlan) {
|
||||||
|
return getFileGroupsInPendingClusteringInstant(instant, clusteringPlan).map(entry ->
|
||||||
|
new AbstractMap.SimpleEntry<>(entry.getLeft(), entry.getRight()));
|
||||||
|
}
|
||||||
|
|
||||||
|
private static Stream<HoodieFileGroupId> getFileGroupsFromClusteringGroup(HoodieClusteringGroup group) {
|
||||||
|
return group.getSlices().stream().map(slice -> new HoodieFileGroupId(slice.getPartitionPath(), slice.getFileId()));
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Create clustering plan from input fileSliceGroups.
|
||||||
|
*/
|
||||||
|
public static HoodieClusteringPlan createClusteringPlan(String strategyClassName,
|
||||||
|
Map<String, String> strategyParams,
|
||||||
|
List<FileSlice>[] fileSliceGroups,
|
||||||
|
Map<String, String> extraMetadata) {
|
||||||
|
List<HoodieClusteringGroup> clusteringGroups = Arrays.stream(fileSliceGroups).map(fileSliceGroup -> {
|
||||||
|
Map<String, Double> groupMetrics = buildMetrics(fileSliceGroup);
|
||||||
|
List<HoodieSliceInfo> sliceInfos = getFileSliceInfo(fileSliceGroup);
|
||||||
|
return HoodieClusteringGroup.newBuilder().setSlices(sliceInfos).setMetrics(groupMetrics).build();
|
||||||
|
}).collect(Collectors.toList());
|
||||||
|
|
||||||
|
HoodieClusteringStrategy strategy = HoodieClusteringStrategy.newBuilder()
|
||||||
|
.setStrategyClassName(strategyClassName).setStrategyParams(strategyParams)
|
||||||
|
.build();
|
||||||
|
|
||||||
|
HoodieClusteringPlan plan = HoodieClusteringPlan.newBuilder()
|
||||||
|
.setInputGroups(clusteringGroups)
|
||||||
|
.setExtraMetadata(extraMetadata)
|
||||||
|
.setStrategy(strategy)
|
||||||
|
.build();
|
||||||
|
|
||||||
|
return plan;
|
||||||
|
}
|
||||||
|
|
||||||
|
private static List<HoodieSliceInfo> getFileSliceInfo(List<FileSlice> slices) {
|
||||||
|
return slices.stream().map(slice -> new HoodieSliceInfo().newBuilder()
|
||||||
|
.setPartitionPath(slice.getPartitionPath())
|
||||||
|
.setFileId(slice.getFileId())
|
||||||
|
.setDataFilePath(slice.getBaseFile().map(BaseFile::getPath).orElse(null))
|
||||||
|
.setDeltaFilePaths(slice.getLogFiles().map(f -> f.getPath().getName()).collect(Collectors.toList()))
|
||||||
|
.setBootstrapFilePath(slice.getBaseFile().map(bf -> bf.getBootstrapBaseFile().map(bbf -> bbf.getPath()).orElse(null)).orElse(null))
|
||||||
|
.build()).collect(Collectors.toList());
|
||||||
|
}
|
||||||
|
|
||||||
|
private static Map<String, Double> buildMetrics(List<FileSlice> fileSlices) {
|
||||||
|
int numLogFiles = 0;
|
||||||
|
long totalLogFileSize = 0;
|
||||||
|
long totalIORead = 0;
|
||||||
|
|
||||||
|
for (FileSlice slice : fileSlices) {
|
||||||
|
numLogFiles += slice.getLogFiles().count();
|
||||||
|
// Total size of all the log files
|
||||||
|
totalLogFileSize += slice.getLogFiles().map(HoodieLogFile::getFileSize).filter(size -> size >= 0)
|
||||||
|
.reduce(Long::sum).orElse(0L);
|
||||||
|
// Total read will be the base file + all the log files
|
||||||
|
totalIORead =
|
||||||
|
FSUtils.getSizeInMB((slice.getBaseFile().isPresent() ? slice.getBaseFile().get().getFileSize() : 0L) + totalLogFileSize);
|
||||||
|
}
|
||||||
|
|
||||||
|
Map<String, Double> metrics = new HashMap<>();
|
||||||
|
metrics.put(TOTAL_IO_READ_MB, (double) totalIORead);
|
||||||
|
metrics.put(TOTAL_LOG_FILE_SIZE, (double) totalLogFileSize);
|
||||||
|
metrics.put(TOTAL_LOG_FILES, (double) numLogFiles);
|
||||||
|
return metrics;
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -48,6 +48,7 @@ public class RocksDBSchemaHelper {
|
|||||||
private final String colFamilyForBootstrapBaseFile;
|
private final String colFamilyForBootstrapBaseFile;
|
||||||
private final String colFamilyForStoredPartitions;
|
private final String colFamilyForStoredPartitions;
|
||||||
private final String colFamilyForReplacedFileGroups;
|
private final String colFamilyForReplacedFileGroups;
|
||||||
|
private final String colFamilyForPendingClusteringFileGroups;
|
||||||
|
|
||||||
public RocksDBSchemaHelper(HoodieTableMetaClient metaClient) {
|
public RocksDBSchemaHelper(HoodieTableMetaClient metaClient) {
|
||||||
this.colFamilyForBootstrapBaseFile = "hudi_bootstrap_basefile_" + metaClient.getBasePath().replace("/", "_");
|
this.colFamilyForBootstrapBaseFile = "hudi_bootstrap_basefile_" + metaClient.getBasePath().replace("/", "_");
|
||||||
@@ -55,11 +56,12 @@ public class RocksDBSchemaHelper {
|
|||||||
this.colFamilyForStoredPartitions = "hudi_partitions_" + metaClient.getBasePath().replace("/", "_");
|
this.colFamilyForStoredPartitions = "hudi_partitions_" + metaClient.getBasePath().replace("/", "_");
|
||||||
this.colFamilyForView = "hudi_view_" + metaClient.getBasePath().replace("/", "_");
|
this.colFamilyForView = "hudi_view_" + metaClient.getBasePath().replace("/", "_");
|
||||||
this.colFamilyForReplacedFileGroups = "hudi_replaced_fg" + metaClient.getBasePath().replace("/", "_");
|
this.colFamilyForReplacedFileGroups = "hudi_replaced_fg" + metaClient.getBasePath().replace("/", "_");
|
||||||
|
this.colFamilyForPendingClusteringFileGroups = "hudi_pending_clustering_fg" + metaClient.getBasePath().replace("/", "_");
|
||||||
}
|
}
|
||||||
|
|
||||||
public List<String> getAllColumnFamilies() {
|
public List<String> getAllColumnFamilies() {
|
||||||
return Arrays.asList(getColFamilyForView(), getColFamilyForPendingCompaction(), getColFamilyForBootstrapBaseFile(),
|
return Arrays.asList(getColFamilyForView(), getColFamilyForPendingCompaction(), getColFamilyForBootstrapBaseFile(),
|
||||||
getColFamilyForStoredPartitions(), getColFamilyForReplacedFileGroups());
|
getColFamilyForStoredPartitions(), getColFamilyForReplacedFileGroups(), getColFamilyForFileGroupsInPendingClustering());
|
||||||
}
|
}
|
||||||
|
|
||||||
public String getKeyForPartitionLookup(String partition) {
|
public String getKeyForPartitionLookup(String partition) {
|
||||||
@@ -78,6 +80,10 @@ public class RocksDBSchemaHelper {
|
|||||||
return getPartitionFileIdBasedLookup(fgId);
|
return getPartitionFileIdBasedLookup(fgId);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public String getKeyForFileGroupsInPendingClustering(HoodieFileGroupId fgId) {
|
||||||
|
return getPartitionFileIdBasedLookup(fgId);
|
||||||
|
}
|
||||||
|
|
||||||
public String getKeyForSliceView(HoodieFileGroup fileGroup, FileSlice slice) {
|
public String getKeyForSliceView(HoodieFileGroup fileGroup, FileSlice slice) {
|
||||||
return getKeyForSliceView(fileGroup.getPartitionPath(), fileGroup.getFileGroupId().getFileId(),
|
return getKeyForSliceView(fileGroup.getPartitionPath(), fileGroup.getFileGroupId().getFileId(),
|
||||||
slice.getBaseInstantTime());
|
slice.getBaseInstantTime());
|
||||||
@@ -135,4 +141,8 @@ public class RocksDBSchemaHelper {
|
|||||||
public String getColFamilyForReplacedFileGroups() {
|
public String getColFamilyForReplacedFileGroups() {
|
||||||
return colFamilyForReplacedFileGroups;
|
return colFamilyForReplacedFileGroups;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public String getColFamilyForFileGroupsInPendingClustering() {
|
||||||
|
return colFamilyForPendingClusteringFileGroups;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -21,10 +21,12 @@ package org.apache.hudi.common.table.view;
|
|||||||
import org.apache.hadoop.fs.FileStatus;
|
import org.apache.hadoop.fs.FileStatus;
|
||||||
import org.apache.hadoop.fs.Path;
|
import org.apache.hadoop.fs.Path;
|
||||||
import org.apache.hadoop.fs.permission.FsAction;
|
import org.apache.hadoop.fs.permission.FsAction;
|
||||||
|
import org.apache.hudi.avro.model.HoodieClusteringPlan;
|
||||||
import org.apache.hudi.avro.model.HoodieCompactionPlan;
|
import org.apache.hudi.avro.model.HoodieCompactionPlan;
|
||||||
import org.apache.hudi.avro.model.HoodieFSPermission;
|
import org.apache.hudi.avro.model.HoodieFSPermission;
|
||||||
import org.apache.hudi.avro.model.HoodieFileStatus;
|
import org.apache.hudi.avro.model.HoodieFileStatus;
|
||||||
import org.apache.hudi.avro.model.HoodiePath;
|
import org.apache.hudi.avro.model.HoodiePath;
|
||||||
|
import org.apache.hudi.avro.model.HoodieRequestedReplaceMetadata;
|
||||||
import org.apache.hudi.common.bootstrap.FileStatusUtils;
|
import org.apache.hudi.common.bootstrap.FileStatusUtils;
|
||||||
import org.apache.hudi.common.bootstrap.index.BootstrapIndex.IndexWriter;
|
import org.apache.hudi.common.bootstrap.index.BootstrapIndex.IndexWriter;
|
||||||
import org.apache.hudi.common.bootstrap.index.HFileBootstrapIndex;
|
import org.apache.hudi.common.bootstrap.index.HFileBootstrapIndex;
|
||||||
@@ -49,6 +51,7 @@ import org.apache.hudi.common.table.view.TableFileSystemView.BaseFileOnlyView;
|
|||||||
import org.apache.hudi.common.table.view.TableFileSystemView.SliceView;
|
import org.apache.hudi.common.table.view.TableFileSystemView.SliceView;
|
||||||
import org.apache.hudi.common.testutils.HoodieCommonTestHarness;
|
import org.apache.hudi.common.testutils.HoodieCommonTestHarness;
|
||||||
import org.apache.hudi.common.testutils.HoodieTestUtils;
|
import org.apache.hudi.common.testutils.HoodieTestUtils;
|
||||||
|
import org.apache.hudi.common.util.ClusteringUtils;
|
||||||
import org.apache.hudi.common.util.CommitUtils;
|
import org.apache.hudi.common.util.CommitUtils;
|
||||||
import org.apache.hudi.common.util.CompactionUtils;
|
import org.apache.hudi.common.util.CompactionUtils;
|
||||||
import org.apache.hudi.common.util.Option;
|
import org.apache.hudi.common.util.Option;
|
||||||
@@ -1424,6 +1427,63 @@ public class TestHoodieTableFileSystemView extends HoodieCommonTestHarness {
|
|||||||
assertEquals(actualReplacedFileIds, allReplacedFileIds);
|
assertEquals(actualReplacedFileIds, allReplacedFileIds);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testPendingClusteringOperations() throws IOException {
|
||||||
|
String partitionPath1 = "2020/06/27";
|
||||||
|
new File(basePath + "/" + partitionPath1).mkdirs();
|
||||||
|
|
||||||
|
// create 2 fileId in partition1 - fileId1 is replaced later on.
|
||||||
|
String fileId1 = UUID.randomUUID().toString();
|
||||||
|
String fileId2 = UUID.randomUUID().toString();
|
||||||
|
String fileId3 = UUID.randomUUID().toString();
|
||||||
|
|
||||||
|
assertFalse(roView.getLatestBaseFiles(partitionPath1)
|
||||||
|
.anyMatch(dfile -> dfile.getFileId().equals(fileId1) || dfile.getFileId().equals(fileId2) || dfile.getFileId().equals(fileId3)),
|
||||||
|
"No commit, should not find any data file");
|
||||||
|
// Only one commit
|
||||||
|
String commitTime1 = "1";
|
||||||
|
String fileName1 = FSUtils.makeDataFileName(commitTime1, TEST_WRITE_TOKEN, fileId1);
|
||||||
|
String fileName2 = FSUtils.makeDataFileName(commitTime1, TEST_WRITE_TOKEN, fileId2);
|
||||||
|
String fileName3 = FSUtils.makeDataFileName(commitTime1, TEST_WRITE_TOKEN, fileId3);
|
||||||
|
new File(basePath + "/" + partitionPath1 + "/" + fileName1).createNewFile();
|
||||||
|
new File(basePath + "/" + partitionPath1 + "/" + fileName2).createNewFile();
|
||||||
|
new File(basePath + "/" + partitionPath1 + "/" + fileName3).createNewFile();
|
||||||
|
|
||||||
|
HoodieActiveTimeline commitTimeline = metaClient.getActiveTimeline();
|
||||||
|
HoodieInstant instant1 = new HoodieInstant(true, HoodieTimeline.COMMIT_ACTION, commitTime1);
|
||||||
|
saveAsComplete(commitTimeline, instant1, Option.empty());
|
||||||
|
refreshFsView();
|
||||||
|
assertEquals(1, roView.getLatestBaseFiles(partitionPath1)
|
||||||
|
.filter(dfile -> dfile.getFileId().equals(fileId1)).count());
|
||||||
|
assertEquals(1, roView.getLatestBaseFiles(partitionPath1)
|
||||||
|
.filter(dfile -> dfile.getFileId().equals(fileId2)).count());
|
||||||
|
assertEquals(1, roView.getLatestBaseFiles(partitionPath1)
|
||||||
|
.filter(dfile -> dfile.getFileId().equals(fileId3)).count());
|
||||||
|
|
||||||
|
List<FileSlice>[] fileSliceGroups = new List[] {
|
||||||
|
Collections.singletonList(fsView.getLatestFileSlice(partitionPath1, fileId1).get()),
|
||||||
|
Collections.singletonList(fsView.getLatestFileSlice(partitionPath1, fileId2).get())
|
||||||
|
};
|
||||||
|
|
||||||
|
// create pending clustering operation - fileId1, fileId2 are being clustered in different groups
|
||||||
|
HoodieClusteringPlan plan = ClusteringUtils.createClusteringPlan("strategy", new HashMap<>(),
|
||||||
|
fileSliceGroups, Collections.emptyMap());
|
||||||
|
|
||||||
|
String clusterTime = "2";
|
||||||
|
HoodieInstant instant2 = new HoodieInstant(State.REQUESTED, HoodieTimeline.REPLACE_COMMIT_ACTION, clusterTime);
|
||||||
|
HoodieRequestedReplaceMetadata requestedReplaceMetadata = HoodieRequestedReplaceMetadata.newBuilder()
|
||||||
|
.setClusteringPlan(plan).setOperationType(WriteOperationType.CLUSTER.name()).build();
|
||||||
|
metaClient.getActiveTimeline().saveToPendingReplaceCommit(instant2, TimelineMetadataUtils.serializeRequestedReplaceMetadata(requestedReplaceMetadata));
|
||||||
|
|
||||||
|
//make sure view doesnt include fileId1
|
||||||
|
refreshFsView();
|
||||||
|
Set<String> fileIds =
|
||||||
|
fsView.getFileGroupsInPendingClustering().map(e -> e.getLeft().getFileId()).collect(Collectors.toSet());
|
||||||
|
assertTrue(fileIds.contains(fileId1));
|
||||||
|
assertTrue(fileIds.contains(fileId2));
|
||||||
|
assertFalse(fileIds.contains(fileId3));
|
||||||
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
protected HoodieTableType getTableType() {
|
protected HoodieTableType getTableType() {
|
||||||
return HoodieTableType.MERGE_ON_READ;
|
return HoodieTableType.MERGE_ON_READ;
|
||||||
|
|||||||
@@ -0,0 +1,142 @@
|
|||||||
|
/*
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one
|
||||||
|
* or more contributor license agreements. See the NOTICE file
|
||||||
|
* distributed with this work for additional information
|
||||||
|
* regarding copyright ownership. The ASF licenses this file
|
||||||
|
* to you under the Apache License, Version 2.0 (the
|
||||||
|
* "License"); you may not use this file except in compliance
|
||||||
|
* with the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package org.apache.hudi.common.util;
|
||||||
|
|
||||||
|
import org.apache.hudi.avro.model.HoodieClusteringPlan;
|
||||||
|
import org.apache.hudi.avro.model.HoodieRequestedReplaceMetadata;
|
||||||
|
import org.apache.hudi.common.fs.FSUtils;
|
||||||
|
import org.apache.hudi.common.model.FileSlice;
|
||||||
|
import org.apache.hudi.common.model.HoodieBaseFile;
|
||||||
|
import org.apache.hudi.common.model.HoodieFileGroupId;
|
||||||
|
import org.apache.hudi.common.model.HoodieTableType;
|
||||||
|
import org.apache.hudi.common.model.WriteOperationType;
|
||||||
|
import org.apache.hudi.common.table.timeline.HoodieInstant;
|
||||||
|
import org.apache.hudi.common.table.timeline.HoodieTimeline;
|
||||||
|
import org.apache.hudi.common.table.timeline.TimelineMetadataUtils;
|
||||||
|
import org.apache.hudi.common.testutils.HoodieCommonTestHarness;
|
||||||
|
import org.junit.jupiter.api.BeforeEach;
|
||||||
|
import org.junit.jupiter.api.Test;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.util.ArrayList;
|
||||||
|
import java.util.Collections;
|
||||||
|
import java.util.HashMap;
|
||||||
|
import java.util.List;
|
||||||
|
import java.util.Map;
|
||||||
|
import java.util.UUID;
|
||||||
|
import java.util.stream.Collectors;
|
||||||
|
|
||||||
|
import static org.junit.jupiter.api.Assertions.assertEquals;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Tests for {@link ClusteringUtils}.
|
||||||
|
*/
|
||||||
|
public class TestClusteringUtils extends HoodieCommonTestHarness {
|
||||||
|
|
||||||
|
private static final String CLUSTERING_STRATEGY_CLASS = "org.apache.hudi.DefaultClusteringStrategy";
|
||||||
|
private static final Map<String, String> STRATEGY_PARAMS = new HashMap<String, String>() {
|
||||||
|
{
|
||||||
|
put("sortColumn", "record_key");
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
@BeforeEach
|
||||||
|
public void init() throws IOException {
|
||||||
|
initMetaClient();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testClusteringPlanMultipleInstants() throws Exception {
|
||||||
|
String partitionPath1 = "partition1";
|
||||||
|
List<String> fileIds1 = new ArrayList<>();
|
||||||
|
fileIds1.add(UUID.randomUUID().toString());
|
||||||
|
fileIds1.add(UUID.randomUUID().toString());
|
||||||
|
String clusterTime1 = "1";
|
||||||
|
createRequestedReplaceInstant(partitionPath1, clusterTime1, fileIds1);
|
||||||
|
|
||||||
|
List<String> fileIds2 = new ArrayList<>();
|
||||||
|
fileIds2.add(UUID.randomUUID().toString());
|
||||||
|
fileIds2.add(UUID.randomUUID().toString());
|
||||||
|
fileIds2.add(UUID.randomUUID().toString());
|
||||||
|
|
||||||
|
List<String> fileIds3 = new ArrayList<>();
|
||||||
|
fileIds3.add(UUID.randomUUID().toString());
|
||||||
|
|
||||||
|
String clusterTime = "2";
|
||||||
|
createRequestedReplaceInstant(partitionPath1, clusterTime, fileIds2, fileIds3);
|
||||||
|
|
||||||
|
// create replace.requested without clustering plan. this instant should be ignored by ClusteringUtils
|
||||||
|
createRequestedReplaceInstantNotClustering("3");
|
||||||
|
|
||||||
|
// create replace.requested without any metadata content. This instant should be ignored by ClusteringUtils
|
||||||
|
metaClient.getActiveTimeline().createNewInstant(new HoodieInstant(HoodieInstant.State.REQUESTED, HoodieTimeline.REPLACE_COMMIT_ACTION, "4"));
|
||||||
|
|
||||||
|
metaClient.reloadActiveTimeline();
|
||||||
|
assertEquals(4, metaClient.getActiveTimeline().filterPendingReplaceTimeline().countInstants());
|
||||||
|
|
||||||
|
Map<HoodieFileGroupId, HoodieInstant> fileGroupToInstantMap =
|
||||||
|
ClusteringUtils.getAllFileGroupsInPendingClusteringPlans(metaClient);
|
||||||
|
assertEquals(fileIds1.size() + fileIds2.size() + fileIds3.size(), fileGroupToInstantMap.size());
|
||||||
|
validateClusteringInstant(fileIds1, partitionPath1, clusterTime1, fileGroupToInstantMap);
|
||||||
|
validateClusteringInstant(fileIds2, partitionPath1, clusterTime, fileGroupToInstantMap);
|
||||||
|
validateClusteringInstant(fileIds3, partitionPath1, clusterTime, fileGroupToInstantMap);
|
||||||
|
}
|
||||||
|
|
||||||
|
private void validateClusteringInstant(List<String> fileIds, String partitionPath,
|
||||||
|
String expectedInstantTime, Map<HoodieFileGroupId, HoodieInstant> fileGroupToInstantMap) {
|
||||||
|
for (String fileId : fileIds) {
|
||||||
|
assertEquals(expectedInstantTime, fileGroupToInstantMap.get(new HoodieFileGroupId(partitionPath, fileId)).getTimestamp());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private HoodieInstant createRequestedReplaceInstantNotClustering(String instantTime) throws IOException {
|
||||||
|
HoodieInstant newRequestedInstant = new HoodieInstant(HoodieInstant.State.REQUESTED, HoodieTimeline.REPLACE_COMMIT_ACTION, instantTime);
|
||||||
|
HoodieRequestedReplaceMetadata requestedReplaceMetadata = HoodieRequestedReplaceMetadata.newBuilder()
|
||||||
|
.setOperationType(WriteOperationType.UNKNOWN.name()).build();
|
||||||
|
metaClient.getActiveTimeline().saveToPendingReplaceCommit(newRequestedInstant, TimelineMetadataUtils.serializeRequestedReplaceMetadata(requestedReplaceMetadata));
|
||||||
|
return newRequestedInstant;
|
||||||
|
}
|
||||||
|
|
||||||
|
private HoodieInstant createRequestedReplaceInstant(String partitionPath1, String clusterTime, List<String>... fileIds) throws IOException {
|
||||||
|
List<FileSlice>[] fileSliceGroups = new List[fileIds.length];
|
||||||
|
for (int i = 0; i < fileIds.length; i++) {
|
||||||
|
fileSliceGroups[i] = fileIds[i].stream().map(fileId -> generateFileSlice(partitionPath1, fileId, "0")).collect(Collectors.toList());
|
||||||
|
}
|
||||||
|
|
||||||
|
HoodieClusteringPlan clusteringPlan =
|
||||||
|
ClusteringUtils.createClusteringPlan(CLUSTERING_STRATEGY_CLASS, STRATEGY_PARAMS, fileSliceGroups, Collections.emptyMap());
|
||||||
|
|
||||||
|
HoodieInstant clusteringInstant = new HoodieInstant(HoodieInstant.State.REQUESTED, HoodieTimeline.REPLACE_COMMIT_ACTION, clusterTime);
|
||||||
|
HoodieRequestedReplaceMetadata requestedReplaceMetadata = HoodieRequestedReplaceMetadata.newBuilder()
|
||||||
|
.setClusteringPlan(clusteringPlan).setOperationType(WriteOperationType.CLUSTER.name()).build();
|
||||||
|
metaClient.getActiveTimeline().saveToPendingReplaceCommit(clusteringInstant, TimelineMetadataUtils.serializeRequestedReplaceMetadata(requestedReplaceMetadata));
|
||||||
|
return clusteringInstant;
|
||||||
|
}
|
||||||
|
|
||||||
|
private FileSlice generateFileSlice(String partitionPath, String fileId, String baseInstant) {
|
||||||
|
FileSlice fs = new FileSlice(new HoodieFileGroupId(partitionPath, fileId), baseInstant);
|
||||||
|
fs.setBaseFile(new HoodieBaseFile(FSUtils.makeDataFileName(baseInstant, "1-0-1", fileId)));
|
||||||
|
return fs;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
protected HoodieTableType getTableType() {
|
||||||
|
return HoodieTableType.MERGE_ON_READ;
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -20,6 +20,7 @@ package org.apache.hudi.timeline.service;
|
|||||||
|
|
||||||
import org.apache.hudi.common.table.timeline.HoodieTimeline;
|
import org.apache.hudi.common.table.timeline.HoodieTimeline;
|
||||||
import org.apache.hudi.common.table.timeline.dto.BaseFileDTO;
|
import org.apache.hudi.common.table.timeline.dto.BaseFileDTO;
|
||||||
|
import org.apache.hudi.common.table.timeline.dto.ClusteringOpDTO;
|
||||||
import org.apache.hudi.common.table.timeline.dto.CompactionOpDTO;
|
import org.apache.hudi.common.table.timeline.dto.CompactionOpDTO;
|
||||||
import org.apache.hudi.common.table.timeline.dto.FileGroupDTO;
|
import org.apache.hudi.common.table.timeline.dto.FileGroupDTO;
|
||||||
import org.apache.hudi.common.table.timeline.dto.FileSliceDTO;
|
import org.apache.hudi.common.table.timeline.dto.FileSliceDTO;
|
||||||
@@ -297,6 +298,12 @@ public class FileSystemViewHandler {
|
|||||||
ctx.queryParam(RemoteHoodieTableFileSystemView.PARTITION_PARAM,""));
|
ctx.queryParam(RemoteHoodieTableFileSystemView.PARTITION_PARAM,""));
|
||||||
writeValueAsString(ctx, dtos);
|
writeValueAsString(ctx, dtos);
|
||||||
}, true));
|
}, true));
|
||||||
|
|
||||||
|
app.get(RemoteHoodieTableFileSystemView.PENDING_CLUSTERING_FILEGROUPS, new ViewHandler(ctx -> {
|
||||||
|
List<ClusteringOpDTO> dtos = sliceHandler.getFileGroupsInPendingClustering(
|
||||||
|
ctx.validatedQueryParam(RemoteHoodieTableFileSystemView.BASEPATH_PARAM).getOrThrow());
|
||||||
|
writeValueAsString(ctx, dtos);
|
||||||
|
}, true));
|
||||||
}
|
}
|
||||||
|
|
||||||
private static boolean isRefreshCheckDisabledInQuery(Context ctxt) {
|
private static boolean isRefreshCheckDisabledInQuery(Context ctxt) {
|
||||||
|
|||||||
@@ -19,6 +19,7 @@
|
|||||||
package org.apache.hudi.timeline.service.handlers;
|
package org.apache.hudi.timeline.service.handlers;
|
||||||
|
|
||||||
import org.apache.hadoop.conf.Configuration;
|
import org.apache.hadoop.conf.Configuration;
|
||||||
|
import org.apache.hudi.common.table.timeline.dto.ClusteringOpDTO;
|
||||||
import org.apache.hudi.common.table.timeline.dto.CompactionOpDTO;
|
import org.apache.hudi.common.table.timeline.dto.CompactionOpDTO;
|
||||||
import org.apache.hudi.common.table.timeline.dto.FileGroupDTO;
|
import org.apache.hudi.common.table.timeline.dto.FileGroupDTO;
|
||||||
import org.apache.hudi.common.table.timeline.dto.FileSliceDTO;
|
import org.apache.hudi.common.table.timeline.dto.FileSliceDTO;
|
||||||
@@ -93,6 +94,12 @@ public class FileSliceHandler extends Handler {
|
|||||||
.collect(Collectors.toList());
|
.collect(Collectors.toList());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public List<ClusteringOpDTO> getFileGroupsInPendingClustering(String basePath) {
|
||||||
|
return viewManager.getFileSystemView(basePath).getFileGroupsInPendingClustering()
|
||||||
|
.map(fgInstant -> ClusteringOpDTO.fromClusteringOp(fgInstant.getLeft(), fgInstant.getRight()))
|
||||||
|
.collect(Collectors.toList());
|
||||||
|
}
|
||||||
|
|
||||||
public boolean refreshTable(String basePath) {
|
public boolean refreshTable(String basePath) {
|
||||||
viewManager.clearFileSystemView(basePath);
|
viewManager.clearFileSystemView(basePath);
|
||||||
return true;
|
return true;
|
||||||
|
|||||||
Reference in New Issue
Block a user