1
0

[HUDI-121] Fix licensing issues found during RC voting by general incubator group

This commit is contained in:
Balaji Varadarajan
2019-10-11 23:00:55 -07:00
committed by Balaji Varadarajan
parent 8c13340062
commit 77f4e73615
19 changed files with 830 additions and 1657 deletions

View File

@@ -17,6 +17,9 @@
package org.apache.hudi.cli
import java.util
import java.util.Map
import org.apache.avro.Schema
import org.apache.avro.generic.IndexedRecord
import org.apache.hadoop.conf.Configuration
@@ -54,11 +57,6 @@ object SparkHelpers {
}
writer.close
}
def getBloomFilter(file: String, conf: Configuration): String = {
val footer = ParquetFileReader.readFooter(conf, new Path(file));
return footer.getFileMetaData().getKeyValueMetaData().get(HoodieAvroWriteSupport.HOODIE_AVRO_BLOOM_FILTER_METADATA_KEY)
}
}
@@ -124,8 +122,7 @@ class SparkHelper(sqlContext: SQLContext, fs: FileSystem) {
* @return
*/
def fileKeysAgainstBF(conf: Configuration, sqlContext: SQLContext, file: String): Boolean = {
val bfStr = SparkHelpers.getBloomFilter(file, conf)
val bf = new BloomFilter(bfStr)
val bf = ParquetUtils.readBloomFilterFromParquetMetadata(conf, new Path(file))
val foundCount = sqlContext.parquetFile(file)
.select(s"`${HoodieRecord.RECORD_KEY_METADATA_FIELD}`")
.collect().count(r => !bf.mightContain(r.getString(0)))