[MINOR] Fix dates as per UTC in TestDataSkippingUtils (#5166)
* Fix timezone in test
This commit is contained in:
@@ -22,6 +22,7 @@ import org.apache.hudi.testutils.HoodieClientTestBase
|
|||||||
import org.apache.spark.sql.catalyst.expressions.{Expression, Not}
|
import org.apache.spark.sql.catalyst.expressions.{Expression, Not}
|
||||||
import org.apache.spark.sql.functions.{col, lower}
|
import org.apache.spark.sql.functions.{col, lower}
|
||||||
import org.apache.spark.sql.hudi.DataSkippingUtils
|
import org.apache.spark.sql.hudi.DataSkippingUtils
|
||||||
|
import org.apache.spark.sql.internal.SQLConf.SESSION_LOCAL_TIMEZONE
|
||||||
import org.apache.spark.sql.types._
|
import org.apache.spark.sql.types._
|
||||||
import org.apache.spark.sql.{Column, HoodieCatalystExpressionUtils, Row, SparkSession}
|
import org.apache.spark.sql.{Column, HoodieCatalystExpressionUtils, Row, SparkSession}
|
||||||
import org.junit.jupiter.api.Assertions.assertEquals
|
import org.junit.jupiter.api.Assertions.assertEquals
|
||||||
@@ -93,6 +94,7 @@ class TestDataSkippingUtils extends HoodieClientTestBase with SparkAdapterSuppor
|
|||||||
"testCompositeFilterExpressionsSource"
|
"testCompositeFilterExpressionsSource"
|
||||||
))
|
))
|
||||||
def testLookupFilterExpressions(sourceExpr: String, input: Seq[IndexRow], output: Seq[String]): Unit = {
|
def testLookupFilterExpressions(sourceExpr: String, input: Seq[IndexRow], output: Seq[String]): Unit = {
|
||||||
|
spark.sqlContext.setConf(SESSION_LOCAL_TIMEZONE.key, "UTC")
|
||||||
val resolvedExpr: Expression = exprUtils.resolveExpr(spark, sourceExpr, sourceTableSchema)
|
val resolvedExpr: Expression = exprUtils.resolveExpr(spark, sourceExpr, sourceTableSchema)
|
||||||
val lookupFilter = DataSkippingUtils.translateIntoColumnStatsIndexFilterExpr(resolvedExpr, indexSchema)
|
val lookupFilter = DataSkippingUtils.translateIntoColumnStatsIndexFilterExpr(resolvedExpr, indexSchema)
|
||||||
|
|
||||||
@@ -384,186 +386,187 @@ object TestDataSkippingUtils {
|
|||||||
}
|
}
|
||||||
|
|
||||||
def testCompositeFilterExpressionsSource(): java.util.stream.Stream[Arguments] = {
|
def testCompositeFilterExpressionsSource(): java.util.stream.Stream[Arguments] = {
|
||||||
|
// NOTE: all timestamps in UTC
|
||||||
java.util.stream.Stream.of(
|
java.util.stream.Stream.of(
|
||||||
arguments(
|
arguments(
|
||||||
"date_format(C, 'MM/dd/yyyy') = '03/06/2022'",
|
"date_format(C, 'MM/dd/yyyy') = '03/07/2022'",
|
||||||
Seq(
|
Seq(
|
||||||
IndexRow("file_1",
|
IndexRow("file_1",
|
||||||
C_minValue = new Timestamp(1646711448000L), // 03/07/2022
|
C_minValue = new Timestamp(1646711448000L), // 03/08/2022
|
||||||
C_maxValue = new Timestamp(1646797848000L), // 03/08/2022
|
C_maxValue = new Timestamp(1646797848000L), // 03/09/2022
|
||||||
C_num_nulls = 0),
|
C_num_nulls = 0),
|
||||||
IndexRow("file_2",
|
IndexRow("file_2",
|
||||||
C_minValue = new Timestamp(1646625048000L), // 03/06/2022
|
C_minValue = new Timestamp(1646625048000L), // 03/07/2022
|
||||||
C_maxValue = new Timestamp(1646711448000L), // 03/07/2022
|
C_maxValue = new Timestamp(1646711448000L), // 03/08/2022
|
||||||
C_num_nulls = 0)
|
C_num_nulls = 0)
|
||||||
),
|
),
|
||||||
Seq("file_2")),
|
Seq("file_2")),
|
||||||
arguments(
|
arguments(
|
||||||
"'03/06/2022' = date_format(C, 'MM/dd/yyyy')",
|
"'03/07/2022' = date_format(C, 'MM/dd/yyyy')",
|
||||||
Seq(
|
Seq(
|
||||||
IndexRow("file_1",
|
IndexRow("file_1",
|
||||||
C_minValue = new Timestamp(1646711448000L), // 03/07/2022
|
C_minValue = new Timestamp(1646711448000L), // 03/08/2022
|
||||||
C_maxValue = new Timestamp(1646797848000L), // 03/08/2022
|
C_maxValue = new Timestamp(1646797848000L), // 03/09/2022
|
||||||
C_num_nulls = 0),
|
C_num_nulls = 0),
|
||||||
IndexRow("file_2",
|
IndexRow("file_2",
|
||||||
C_minValue = new Timestamp(1646625048000L), // 03/06/2022
|
C_minValue = new Timestamp(1646625048000L), // 03/07/2022
|
||||||
C_maxValue = new Timestamp(1646711448000L), // 03/07/2022
|
C_maxValue = new Timestamp(1646711448000L), // 03/08/2022
|
||||||
C_num_nulls = 0)
|
C_num_nulls = 0)
|
||||||
),
|
),
|
||||||
Seq("file_2")),
|
Seq("file_2")),
|
||||||
arguments(
|
arguments(
|
||||||
"'03/06/2022' != date_format(C, 'MM/dd/yyyy')",
|
"'03/07/2022' != date_format(C, 'MM/dd/yyyy')",
|
||||||
Seq(
|
Seq(
|
||||||
IndexRow("file_1",
|
IndexRow("file_1",
|
||||||
C_minValue = new Timestamp(1646711448000L), // 03/07/2022
|
C_minValue = new Timestamp(1646711448000L), // 03/08/2022
|
||||||
C_maxValue = new Timestamp(1646797848000L), // 03/08/2022
|
C_maxValue = new Timestamp(1646797848000L), // 03/09/2022
|
||||||
C_num_nulls = 0),
|
C_num_nulls = 0),
|
||||||
IndexRow("file_2",
|
IndexRow("file_2",
|
||||||
C_minValue = new Timestamp(1646625048000L), // 03/06/2022
|
C_minValue = new Timestamp(1646625048000L), // 03/07/2022
|
||||||
C_maxValue = new Timestamp(1646625048000L), // 03/06/2022
|
C_maxValue = new Timestamp(1646625048000L), // 03/07/2022
|
||||||
C_num_nulls = 0)
|
C_num_nulls = 0)
|
||||||
),
|
),
|
||||||
Seq("file_1")),
|
Seq("file_1")),
|
||||||
arguments(
|
arguments(
|
||||||
"date_format(C, 'MM/dd/yyyy') != '03/06/2022'",
|
"date_format(C, 'MM/dd/yyyy') != '03/07/2022'",
|
||||||
Seq(
|
Seq(
|
||||||
IndexRow("file_1",
|
IndexRow("file_1",
|
||||||
C_minValue = new Timestamp(1646711448000L), // 03/07/2022
|
C_minValue = new Timestamp(1646711448000L), // 03/08/2022
|
||||||
C_maxValue = new Timestamp(1646797848000L), // 03/08/2022
|
C_maxValue = new Timestamp(1646797848000L), // 03/09/2022
|
||||||
C_num_nulls = 0),
|
C_num_nulls = 0),
|
||||||
IndexRow("file_2",
|
IndexRow("file_2",
|
||||||
C_minValue = new Timestamp(1646625048000L), // 03/06/2022
|
C_minValue = new Timestamp(1646625048000L), // 03/07/2022
|
||||||
C_maxValue = new Timestamp(1646625048000L), // 03/06/2022
|
C_maxValue = new Timestamp(1646625048000L), // 03/07/2022
|
||||||
C_num_nulls = 0)
|
C_num_nulls = 0)
|
||||||
),
|
),
|
||||||
Seq("file_1")),
|
Seq("file_1")),
|
||||||
arguments(
|
arguments(
|
||||||
"date_format(C, 'MM/dd/yyyy') < '03/07/2022'",
|
"date_format(C, 'MM/dd/yyyy') < '03/08/2022'",
|
||||||
Seq(
|
Seq(
|
||||||
IndexRow("file_1",
|
IndexRow("file_1",
|
||||||
C_minValue = new Timestamp(1646711448000L), // 03/07/2022
|
C_minValue = new Timestamp(1646711448000L), // 03/08/2022
|
||||||
C_maxValue = new Timestamp(1646797848000L), // 03/08/2022
|
C_maxValue = new Timestamp(1646797848000L), // 03/09/2022
|
||||||
C_num_nulls = 0),
|
C_num_nulls = 0),
|
||||||
IndexRow("file_2",
|
IndexRow("file_2",
|
||||||
C_minValue = new Timestamp(1646625048000L), // 03/06/2022
|
C_minValue = new Timestamp(1646625048000L), // 03/07/2022
|
||||||
C_maxValue = new Timestamp(1646711448000L), // 03/07/2022
|
C_maxValue = new Timestamp(1646711448000L), // 03/08/2022
|
||||||
C_num_nulls = 0)
|
C_num_nulls = 0)
|
||||||
),
|
),
|
||||||
Seq("file_2")),
|
Seq("file_2")),
|
||||||
arguments(
|
arguments(
|
||||||
"'03/07/2022' > date_format(C, 'MM/dd/yyyy')",
|
"'03/08/2022' > date_format(C, 'MM/dd/yyyy')",
|
||||||
Seq(
|
Seq(
|
||||||
IndexRow("file_1",
|
IndexRow("file_1",
|
||||||
C_minValue = new Timestamp(1646711448000L), // 03/07/2022
|
C_minValue = new Timestamp(1646711448000L), // 03/08/2022
|
||||||
C_maxValue = new Timestamp(1646797848000L), // 03/08/2022
|
C_maxValue = new Timestamp(1646797848000L), // 03/09/2022
|
||||||
C_num_nulls = 0),
|
C_num_nulls = 0),
|
||||||
IndexRow("file_2",
|
IndexRow("file_2",
|
||||||
C_minValue = new Timestamp(1646625048000L), // 03/06/2022
|
C_minValue = new Timestamp(1646625048000L), // 03/07/2022
|
||||||
C_maxValue = new Timestamp(1646711448000L), // 03/07/2022
|
C_maxValue = new Timestamp(1646711448000L), // 03/08/2022
|
||||||
C_num_nulls = 0)
|
C_num_nulls = 0)
|
||||||
),
|
),
|
||||||
Seq("file_2")),
|
Seq("file_2")),
|
||||||
arguments(
|
arguments(
|
||||||
"'03/07/2022' < date_format(C, 'MM/dd/yyyy')",
|
"'03/08/2022' < date_format(C, 'MM/dd/yyyy')",
|
||||||
Seq(
|
Seq(
|
||||||
IndexRow("file_1",
|
IndexRow("file_1",
|
||||||
C_minValue = new Timestamp(1646711448000L), // 03/07/2022
|
C_minValue = new Timestamp(1646711448000L), // 03/08/2022
|
||||||
C_maxValue = new Timestamp(1646797848000L), // 03/08/2022
|
C_maxValue = new Timestamp(1646797848000L), // 03/09/2022
|
||||||
C_num_nulls = 0),
|
C_num_nulls = 0),
|
||||||
IndexRow("file_2",
|
IndexRow("file_2",
|
||||||
C_minValue = new Timestamp(1646625048000L), // 03/06/2022
|
C_minValue = new Timestamp(1646625048000L), // 03/07/2022
|
||||||
C_maxValue = new Timestamp(1646711448000L), // 03/07/2022
|
C_maxValue = new Timestamp(1646711448000L), // 03/08/2022
|
||||||
C_num_nulls = 0)
|
C_num_nulls = 0)
|
||||||
),
|
),
|
||||||
Seq("file_1")),
|
Seq("file_1")),
|
||||||
arguments(
|
arguments(
|
||||||
"date_format(C, 'MM/dd/yyyy') > '03/07/2022'",
|
"date_format(C, 'MM/dd/yyyy') > '03/08/2022'",
|
||||||
Seq(
|
Seq(
|
||||||
IndexRow("file_1",
|
IndexRow("file_1",
|
||||||
C_minValue = new Timestamp(1646711448000L), // 03/07/2022
|
C_minValue = new Timestamp(1646711448000L), // 03/08/2022
|
||||||
C_maxValue = new Timestamp(1646797848000L), // 03/08/2022
|
C_maxValue = new Timestamp(1646797848000L), // 03/09/2022
|
||||||
C_num_nulls = 0),
|
C_num_nulls = 0),
|
||||||
IndexRow("file_2",
|
IndexRow("file_2",
|
||||||
C_minValue = new Timestamp(1646625048000L), // 03/06/2022
|
C_minValue = new Timestamp(1646625048000L), // 03/07/2022
|
||||||
C_maxValue = new Timestamp(1646711448000L), // 03/07/2022
|
C_maxValue = new Timestamp(1646711448000L), // 03/08/2022
|
||||||
C_num_nulls = 0)
|
C_num_nulls = 0)
|
||||||
),
|
),
|
||||||
Seq("file_1")),
|
Seq("file_1")),
|
||||||
arguments(
|
arguments(
|
||||||
"date_format(C, 'MM/dd/yyyy') <= '03/06/2022'",
|
"date_format(C, 'MM/dd/yyyy') <= '03/07/2022'",
|
||||||
Seq(
|
Seq(
|
||||||
IndexRow("file_1",
|
IndexRow("file_1",
|
||||||
C_minValue = new Timestamp(1646711448000L), // 03/07/2022
|
C_minValue = new Timestamp(1646711448000L), // 03/08/2022
|
||||||
C_maxValue = new Timestamp(1646797848000L), // 03/08/2022
|
C_maxValue = new Timestamp(1646797848000L), // 03/09/2022
|
||||||
C_num_nulls = 0),
|
C_num_nulls = 0),
|
||||||
IndexRow("file_2",
|
IndexRow("file_2",
|
||||||
C_minValue = new Timestamp(1646625048000L), // 03/06/2022
|
C_minValue = new Timestamp(1646625048000L), // 03/07/2022
|
||||||
C_maxValue = new Timestamp(1646711448000L), // 03/07/2022
|
C_maxValue = new Timestamp(1646711448000L), // 03/08/2022
|
||||||
C_num_nulls = 0)
|
C_num_nulls = 0)
|
||||||
),
|
),
|
||||||
Seq("file_2")),
|
Seq("file_2")),
|
||||||
arguments(
|
arguments(
|
||||||
"'03/06/2022' >= date_format(C, 'MM/dd/yyyy')",
|
"'03/07/2022' >= date_format(C, 'MM/dd/yyyy')",
|
||||||
Seq(
|
Seq(
|
||||||
IndexRow("file_1",
|
IndexRow("file_1",
|
||||||
C_minValue = new Timestamp(1646711448000L), // 03/07/2022
|
C_minValue = new Timestamp(1646711448000L), // 03/08/2022
|
||||||
C_maxValue = new Timestamp(1646797848000L), // 03/08/2022
|
C_maxValue = new Timestamp(1646797848000L), // 03/09/2022
|
||||||
C_num_nulls = 0),
|
C_num_nulls = 0),
|
||||||
IndexRow("file_2",
|
IndexRow("file_2",
|
||||||
C_minValue = new Timestamp(1646625048000L), // 03/06/2022
|
C_minValue = new Timestamp(1646625048000L), // 03/07/2022
|
||||||
C_maxValue = new Timestamp(1646711448000L), // 03/07/2022
|
C_maxValue = new Timestamp(1646711448000L), // 03/08/2022
|
||||||
C_num_nulls = 0)
|
C_num_nulls = 0)
|
||||||
),
|
),
|
||||||
Seq("file_2")),
|
Seq("file_2")),
|
||||||
arguments(
|
arguments(
|
||||||
"'03/08/2022' <= date_format(C, 'MM/dd/yyyy')",
|
"'03/09/2022' <= date_format(C, 'MM/dd/yyyy')",
|
||||||
Seq(
|
Seq(
|
||||||
IndexRow("file_1",
|
IndexRow("file_1",
|
||||||
C_minValue = new Timestamp(1646711448000L), // 03/07/2022
|
C_minValue = new Timestamp(1646711448000L), // 03/08/2022
|
||||||
C_maxValue = new Timestamp(1646797848000L), // 03/08/2022
|
C_maxValue = new Timestamp(1646797848000L), // 03/09/2022
|
||||||
C_num_nulls = 0),
|
C_num_nulls = 0),
|
||||||
IndexRow("file_2",
|
IndexRow("file_2",
|
||||||
C_minValue = new Timestamp(1646625048000L), // 03/06/2022
|
C_minValue = new Timestamp(1646625048000L), // 03/07/2022
|
||||||
C_maxValue = new Timestamp(1646711448000L), // 03/07/2022
|
C_maxValue = new Timestamp(1646711448000L), // 03/08/2022
|
||||||
C_num_nulls = 0)
|
C_num_nulls = 0)
|
||||||
),
|
),
|
||||||
Seq("file_1")),
|
Seq("file_1")),
|
||||||
arguments(
|
arguments(
|
||||||
"date_format(C, 'MM/dd/yyyy') >= '03/08/2022'",
|
"date_format(C, 'MM/dd/yyyy') >= '03/09/2022'",
|
||||||
Seq(
|
Seq(
|
||||||
IndexRow("file_1",
|
IndexRow("file_1",
|
||||||
C_minValue = new Timestamp(1646711448000L), // 03/07/2022
|
C_minValue = new Timestamp(1646711448000L), // 03/08/2022
|
||||||
C_maxValue = new Timestamp(1646797848000L), // 03/08/2022
|
C_maxValue = new Timestamp(1646797848000L), // 03/09/2022
|
||||||
C_num_nulls = 0),
|
C_num_nulls = 0),
|
||||||
IndexRow("file_2",
|
IndexRow("file_2",
|
||||||
C_minValue = new Timestamp(1646625048000L), // 03/06/2022
|
C_minValue = new Timestamp(1646625048000L), // 03/07/2022
|
||||||
C_maxValue = new Timestamp(1646711448000L), // 03/07/2022
|
C_maxValue = new Timestamp(1646711448000L), // 03/08/2022
|
||||||
C_num_nulls = 0)
|
C_num_nulls = 0)
|
||||||
),
|
),
|
||||||
Seq("file_1")),
|
Seq("file_1")),
|
||||||
arguments(
|
arguments(
|
||||||
"date_format(C, 'MM/dd/yyyy') IN ('03/08/2022')",
|
"date_format(C, 'MM/dd/yyyy') IN ('03/09/2022')",
|
||||||
Seq(
|
Seq(
|
||||||
IndexRow("file_1",
|
IndexRow("file_1",
|
||||||
C_minValue = new Timestamp(1646711448000L), // 03/07/2022
|
C_minValue = new Timestamp(1646711448000L), // 03/08/2022
|
||||||
C_maxValue = new Timestamp(1646797848000L), // 03/08/2022
|
C_maxValue = new Timestamp(1646797848000L), // 03/09/2022
|
||||||
C_num_nulls = 0),
|
C_num_nulls = 0),
|
||||||
IndexRow("file_2",
|
IndexRow("file_2",
|
||||||
C_minValue = new Timestamp(1646625048000L), // 03/06/2022
|
C_minValue = new Timestamp(1646625048000L), // 03/07/2022
|
||||||
C_maxValue = new Timestamp(1646711448000L), // 03/07/2022
|
C_maxValue = new Timestamp(1646711448000L), // 03/08/2022
|
||||||
C_num_nulls = 0)
|
C_num_nulls = 0)
|
||||||
),
|
),
|
||||||
Seq("file_1")),
|
Seq("file_1")),
|
||||||
arguments(
|
arguments(
|
||||||
"date_format(C, 'MM/dd/yyyy') NOT IN ('03/06/2022')",
|
"date_format(C, 'MM/dd/yyyy') NOT IN ('03/07/2022')",
|
||||||
Seq(
|
Seq(
|
||||||
IndexRow("file_1",
|
IndexRow("file_1",
|
||||||
C_minValue = new Timestamp(1646711448000L), // 03/07/2022
|
C_minValue = new Timestamp(1646711448000L), // 03/08/2022
|
||||||
C_maxValue = new Timestamp(1646797848000L), // 03/08/2022
|
C_maxValue = new Timestamp(1646797848000L), // 03/09/2022
|
||||||
C_num_nulls = 0),
|
C_num_nulls = 0),
|
||||||
IndexRow("file_2",
|
IndexRow("file_2",
|
||||||
C_minValue = new Timestamp(1646625048000L), // 03/06/2022
|
C_minValue = new Timestamp(1646625048000L), // 03/07/2022
|
||||||
C_maxValue = new Timestamp(1646625048000L), // 03/06/2022
|
C_maxValue = new Timestamp(1646625048000L), // 03/07/2022
|
||||||
C_num_nulls = 0)
|
C_num_nulls = 0)
|
||||||
),
|
),
|
||||||
Seq("file_1")),
|
Seq("file_1")),
|
||||||
|
|||||||
Reference in New Issue
Block a user