[HUDI-2778] Optimize statistics collection related codes and add some docs for z-order add fix some bugs (#4013)
* [HUDI-2778] Optimize statistics collection related codes and add more docs for z-order. * add test code for multi-thread parquet footer read
This commit is contained in:
@@ -100,7 +100,7 @@ object DataSkippingUtils {
|
||||
// query filter "colA >= b" convert it to "colA_maxValue >= b" for index table
|
||||
case GreaterThanOrEqual(attribute: AttributeReference, right: Literal) =>
|
||||
val colName = getTargetColNameParts(attribute)
|
||||
GreaterThanOrEqual(maxValue(colName), right)
|
||||
reWriteCondition(colName, GreaterThanOrEqual(maxValue(colName), right))
|
||||
// query filter "b >= colA" convert it to "colA_minValue <= b" for index table
|
||||
case GreaterThanOrEqual(value: Literal, attribute: AttributeReference) =>
|
||||
val colName = getTargetColNameParts(attribute)
|
||||
@@ -179,7 +179,7 @@ object DataSkippingUtils {
|
||||
def getIndexFiles(conf: Configuration, indexPath: String): Seq[FileStatus] = {
|
||||
val basePath = new Path(indexPath)
|
||||
basePath.getFileSystem(conf)
|
||||
.listStatus(basePath).filterNot(f => f.getPath.getName.endsWith(".parquet"))
|
||||
.listStatus(basePath).filter(f => f.getPath.getName.endsWith(".parquet"))
|
||||
}
|
||||
|
||||
/**
|
||||
|
||||
Reference in New Issue
Block a user