From 395806fc684d8f6c3fa1b6b0f29e601cb6f4bbd0 Mon Sep 17 00:00:00 2001 From: ambition119 <1269223860@qq.com> Date: Wed, 20 Mar 2019 12:25:51 +0800 Subject: [PATCH] [HUDI-63] Removed unused BucketedIndex code --- .../com/uber/hoodie/index/HoodieIndex.java | 5 +- .../hoodie/index/bucketed/BucketedIndex.java | 115 ------------------ 2 files changed, 1 insertion(+), 119 deletions(-) delete mode 100644 hoodie-client/src/main/java/com/uber/hoodie/index/bucketed/BucketedIndex.java diff --git a/hoodie-client/src/main/java/com/uber/hoodie/index/HoodieIndex.java b/hoodie-client/src/main/java/com/uber/hoodie/index/HoodieIndex.java index 17897846d..5eea91583 100644 --- a/hoodie-client/src/main/java/com/uber/hoodie/index/HoodieIndex.java +++ b/hoodie-client/src/main/java/com/uber/hoodie/index/HoodieIndex.java @@ -25,7 +25,6 @@ import com.uber.hoodie.config.HoodieWriteConfig; import com.uber.hoodie.exception.HoodieIndexException; import com.uber.hoodie.index.bloom.HoodieBloomIndex; import com.uber.hoodie.index.bloom.HoodieGlobalBloomIndex; -import com.uber.hoodie.index.bucketed.BucketedIndex; import com.uber.hoodie.index.hbase.HBaseIndex; import com.uber.hoodie.table.HoodieTable; import java.io.Serializable; @@ -56,8 +55,6 @@ public abstract class HoodieIndex implements Seri return new HoodieBloomIndex<>(config); case GLOBAL_BLOOM: return new HoodieGlobalBloomIndex<>(config); - case BUCKETED: - return new BucketedIndex<>(config); default: throw new HoodieIndexException("Index type unspecified, set " + config.getIndexType()); } @@ -119,6 +116,6 @@ public abstract class HoodieIndex implements Seri public enum IndexType { - HBASE, INMEMORY, BLOOM, GLOBAL_BLOOM, BUCKETED + HBASE, INMEMORY, BLOOM, GLOBAL_BLOOM } } diff --git a/hoodie-client/src/main/java/com/uber/hoodie/index/bucketed/BucketedIndex.java b/hoodie-client/src/main/java/com/uber/hoodie/index/bucketed/BucketedIndex.java deleted file mode 100644 index dcb84add4..000000000 --- a/hoodie-client/src/main/java/com/uber/hoodie/index/bucketed/BucketedIndex.java +++ /dev/null @@ -1,115 +0,0 @@ -/* - * Copyright (c) 2017 Uber Technologies, Inc. (hoodie-dev-group@uber.com) - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - * - * - */ - -package com.uber.hoodie.index.bucketed; - -import com.google.common.base.Optional; -import com.uber.hoodie.WriteStatus; -import com.uber.hoodie.common.model.HoodieKey; -import com.uber.hoodie.common.model.HoodieRecord; -import com.uber.hoodie.common.model.HoodieRecordLocation; -import com.uber.hoodie.common.model.HoodieRecordPayload; -import com.uber.hoodie.config.HoodieWriteConfig; -import com.uber.hoodie.exception.HoodieIndexException; -import com.uber.hoodie.index.HoodieIndex; -import com.uber.hoodie.table.HoodieTable; -import org.apache.log4j.LogManager; -import org.apache.log4j.Logger; -import org.apache.spark.api.java.JavaPairRDD; -import org.apache.spark.api.java.JavaRDD; -import org.apache.spark.api.java.JavaSparkContext; -import scala.Tuple2; - -/** - * An `stateless` index implementation that will using a deterministic mapping function to determine - * the fileID for a given record. - *

- * Pros: - Fast - *

- * Cons : - Need to tune the number of buckets per partition path manually (FIXME: Need to autotune - * this) - Could increase write amplification on copy-on-write storage since inserts always rewrite - * files - Not global. - */ -public class BucketedIndex extends HoodieIndex { - - private static Logger logger = LogManager.getLogger(BucketedIndex.class); - - public BucketedIndex(HoodieWriteConfig config) { - super(config); - } - - private String getBucket(String recordKey) { - return String.valueOf(recordKey.hashCode() % config.getNumBucketsPerPartition()); - } - - @Override - public JavaPairRDD> fetchRecordLocation(JavaRDD hoodieKeys, - JavaSparkContext jsc, HoodieTable hoodieTable) { - return hoodieKeys.mapToPair(hk -> new Tuple2<>(hk, Optional.of(getBucket(hk.getRecordKey())))); - } - - @Override - public JavaRDD> tagLocation(JavaRDD> recordRDD, JavaSparkContext jsc, - HoodieTable hoodieTable) - throws HoodieIndexException { - return recordRDD.map(record -> { - String bucket = getBucket(record.getRecordKey()); - //HACK(vc) a non-existent commit is provided here. - record.setCurrentLocation(new HoodieRecordLocation("000", bucket)); - return record; - }); - } - - @Override - public JavaRDD updateLocation(JavaRDD writeStatusRDD, JavaSparkContext jsc, - HoodieTable hoodieTable) - throws HoodieIndexException { - return writeStatusRDD; - } - - @Override - public boolean rollbackCommit(String commitTime) { - // nothing to rollback in the index. - return true; - } - - /** - * Bucketing is still done within each partition. - */ - @Override - public boolean isGlobal() { - return false; - } - - /** - * Since indexing is just a deterministic hash, we can identify file group correctly even without - * an index on the actual log file. - */ - @Override - public boolean canIndexLogFiles() { - return true; - } - - /** - * Indexing is just a hash function. - */ - @Override - public boolean isImplicitWithStorage() { - return true; - } -}