[HUDI-2766] Cluster update strategy should not be fenced by write config (#4093)

Fix pending clustering rollback test
2021-11-24 23:45:40 +05:30
parent 60b23b9797
commit ff94d92980
3 changed files with 63 additions and 57 deletions
--- a/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/table/action/commit/BaseSparkCommitActionExecutor.java
+++ b/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/table/action/commit/BaseSparkCommitActionExecutor.java
@@ -115,35 +115,31 @@ public abstract class BaseSparkCommitActionExecutor<T extends HoodieRecordPayloa
  }

  private JavaRDD<HoodieRecord<T>> clusteringHandleUpdate(JavaRDD<HoodieRecord<T>> inputRecordsRDD) {
-    if (config.isClusteringEnabled()) {
-      Set<HoodieFileGroupId> fileGroupsInPendingClustering =
-          table.getFileSystemView().getFileGroupsInPendingClustering().map(entry -> entry.getKey()).collect(Collectors.toSet());
-      UpdateStrategy updateStrategy = (UpdateStrategy)ReflectionUtils
-          .loadClass(config.getClusteringUpdatesStrategyClass(), this.context, fileGroupsInPendingClustering);
-      Pair<JavaRDD<HoodieRecord<T>>, Set<HoodieFileGroupId>> recordsAndPendingClusteringFileGroups =
-          (Pair<JavaRDD<HoodieRecord<T>>, Set<HoodieFileGroupId>>)updateStrategy.handleUpdate(inputRecordsRDD);
-      Set<HoodieFileGroupId> fileGroupsWithUpdatesAndPendingClustering = recordsAndPendingClusteringFileGroups.getRight();
-      if (fileGroupsWithUpdatesAndPendingClustering.isEmpty()) {
-        return recordsAndPendingClusteringFileGroups.getLeft();
-      }
-      // there are filegroups pending clustering and receiving updates, so rollback the pending clustering instants
-      // there could be race condition, for example, if the clustering completes after instants are fetched but before rollback completed
-      if (config.isRollbackPendingClustering()) {
-        Set<HoodieInstant> pendingClusteringInstantsToRollback = getAllFileGroupsInPendingClusteringPlans(table.getMetaClient()).entrySet().stream()
-            .filter(e -> fileGroupsWithUpdatesAndPendingClustering.contains(e.getKey()))
-            .map(Map.Entry::getValue)
-            .collect(Collectors.toSet());
-        pendingClusteringInstantsToRollback.forEach(instant -> {
-          String commitTime = HoodieActiveTimeline.createNewInstantTime();
-          table.scheduleRollback(context, commitTime, instant, false, config.shouldRollbackUsingMarkers());
-          table.rollback(context, commitTime, instant, true, true);
-        });
-        table.getMetaClient().reloadActiveTimeline();
-      }
+    Set<HoodieFileGroupId> fileGroupsInPendingClustering =
+        table.getFileSystemView().getFileGroupsInPendingClustering().map(entry -> entry.getKey()).collect(Collectors.toSet());
+    UpdateStrategy updateStrategy = (UpdateStrategy) ReflectionUtils
+        .loadClass(config.getClusteringUpdatesStrategyClass(), this.context, fileGroupsInPendingClustering);
+    Pair<JavaRDD<HoodieRecord<T>>, Set<HoodieFileGroupId>> recordsAndPendingClusteringFileGroups =
+        (Pair<JavaRDD<HoodieRecord<T>>, Set<HoodieFileGroupId>>) updateStrategy.handleUpdate(inputRecordsRDD);
+    Set<HoodieFileGroupId> fileGroupsWithUpdatesAndPendingClustering = recordsAndPendingClusteringFileGroups.getRight();
+    if (fileGroupsWithUpdatesAndPendingClustering.isEmpty()) {
      return recordsAndPendingClusteringFileGroups.getLeft();
-    } else {
-      return inputRecordsRDD;
    }
+    // there are filegroups pending clustering and receiving updates, so rollback the pending clustering instants
+    // there could be race condition, for example, if the clustering completes after instants are fetched but before rollback completed
+    if (config.isRollbackPendingClustering()) {
+      Set<HoodieInstant> pendingClusteringInstantsToRollback = getAllFileGroupsInPendingClusteringPlans(table.getMetaClient()).entrySet().stream()
+          .filter(e -> fileGroupsWithUpdatesAndPendingClustering.contains(e.getKey()))
+          .map(Map.Entry::getValue)
+          .collect(Collectors.toSet());
+      pendingClusteringInstantsToRollback.forEach(instant -> {
+        String commitTime = HoodieActiveTimeline.createNewInstantTime();
+        table.scheduleRollback(context, commitTime, instant, false, config.shouldRollbackUsingMarkers());
+        table.rollback(context, commitTime, instant, true, true);
+      });
+      table.getMetaClient().reloadActiveTimeline();
+    }
+    return recordsAndPendingClusteringFileGroups.getLeft();
  }

  @Override