1
0

[HUDI-2778] Optimize statistics collection related codes and add some docs for z-order add fix some bugs (#4013)

* [HUDI-2778] Optimize statistics collection related codes and add more docs for z-order.

* add test code for multi-thread parquet footer read
This commit is contained in:
xiarixiaoyao
2021-11-23 13:46:02 +08:00
committed by GitHub
parent c88c2af8bf
commit 9de9951348
6 changed files with 175 additions and 39 deletions

View File

@@ -100,7 +100,7 @@ object DataSkippingUtils {
// query filter "colA >= b" convert it to "colA_maxValue >= b" for index table
case GreaterThanOrEqual(attribute: AttributeReference, right: Literal) =>
val colName = getTargetColNameParts(attribute)
GreaterThanOrEqual(maxValue(colName), right)
reWriteCondition(colName, GreaterThanOrEqual(maxValue(colName), right))
// query filter "b >= colA" convert it to "colA_minValue <= b" for index table
case GreaterThanOrEqual(value: Literal, attribute: AttributeReference) =>
val colName = getTargetColNameParts(attribute)
@@ -179,7 +179,7 @@ object DataSkippingUtils {
def getIndexFiles(conf: Configuration, indexPath: String): Seq[FileStatus] = {
val basePath = new Path(indexPath)
basePath.getFileSystem(conf)
.listStatus(basePath).filterNot(f => f.getPath.getName.endsWith(".parquet"))
.listStatus(basePath).filter(f => f.getPath.getName.endsWith(".parquet"))
}
/**