1
0

[MINOR] Fix dates as per UTC in TestDataSkippingUtils (#5166)

* Fix timezone in test
This commit is contained in:
Sagar Sumit
2022-03-30 20:03:14 +05:30
committed by GitHub
parent b9fbada2f2
commit 04478a45d9

View File

@@ -22,6 +22,7 @@ import org.apache.hudi.testutils.HoodieClientTestBase
import org.apache.spark.sql.catalyst.expressions.{Expression, Not}
import org.apache.spark.sql.functions.{col, lower}
import org.apache.spark.sql.hudi.DataSkippingUtils
import org.apache.spark.sql.internal.SQLConf.SESSION_LOCAL_TIMEZONE
import org.apache.spark.sql.types._
import org.apache.spark.sql.{Column, HoodieCatalystExpressionUtils, Row, SparkSession}
import org.junit.jupiter.api.Assertions.assertEquals
@@ -93,6 +94,7 @@ class TestDataSkippingUtils extends HoodieClientTestBase with SparkAdapterSuppor
"testCompositeFilterExpressionsSource"
))
def testLookupFilterExpressions(sourceExpr: String, input: Seq[IndexRow], output: Seq[String]): Unit = {
spark.sqlContext.setConf(SESSION_LOCAL_TIMEZONE.key, "UTC")
val resolvedExpr: Expression = exprUtils.resolveExpr(spark, sourceExpr, sourceTableSchema)
val lookupFilter = DataSkippingUtils.translateIntoColumnStatsIndexFilterExpr(resolvedExpr, indexSchema)
@@ -384,186 +386,187 @@ object TestDataSkippingUtils {
}
def testCompositeFilterExpressionsSource(): java.util.stream.Stream[Arguments] = {
// NOTE: all timestamps in UTC
java.util.stream.Stream.of(
arguments(
"date_format(C, 'MM/dd/yyyy') = '03/06/2022'",
"date_format(C, 'MM/dd/yyyy') = '03/07/2022'",
Seq(
IndexRow("file_1",
C_minValue = new Timestamp(1646711448000L), // 03/07/2022
C_maxValue = new Timestamp(1646797848000L), // 03/08/2022
C_minValue = new Timestamp(1646711448000L), // 03/08/2022
C_maxValue = new Timestamp(1646797848000L), // 03/09/2022
C_num_nulls = 0),
IndexRow("file_2",
C_minValue = new Timestamp(1646625048000L), // 03/06/2022
C_maxValue = new Timestamp(1646711448000L), // 03/07/2022
C_minValue = new Timestamp(1646625048000L), // 03/07/2022
C_maxValue = new Timestamp(1646711448000L), // 03/08/2022
C_num_nulls = 0)
),
Seq("file_2")),
arguments(
"'03/06/2022' = date_format(C, 'MM/dd/yyyy')",
"'03/07/2022' = date_format(C, 'MM/dd/yyyy')",
Seq(
IndexRow("file_1",
C_minValue = new Timestamp(1646711448000L), // 03/07/2022
C_maxValue = new Timestamp(1646797848000L), // 03/08/2022
C_minValue = new Timestamp(1646711448000L), // 03/08/2022
C_maxValue = new Timestamp(1646797848000L), // 03/09/2022
C_num_nulls = 0),
IndexRow("file_2",
C_minValue = new Timestamp(1646625048000L), // 03/06/2022
C_maxValue = new Timestamp(1646711448000L), // 03/07/2022
C_minValue = new Timestamp(1646625048000L), // 03/07/2022
C_maxValue = new Timestamp(1646711448000L), // 03/08/2022
C_num_nulls = 0)
),
Seq("file_2")),
arguments(
"'03/06/2022' != date_format(C, 'MM/dd/yyyy')",
"'03/07/2022' != date_format(C, 'MM/dd/yyyy')",
Seq(
IndexRow("file_1",
C_minValue = new Timestamp(1646711448000L), // 03/07/2022
C_maxValue = new Timestamp(1646797848000L), // 03/08/2022
C_minValue = new Timestamp(1646711448000L), // 03/08/2022
C_maxValue = new Timestamp(1646797848000L), // 03/09/2022
C_num_nulls = 0),
IndexRow("file_2",
C_minValue = new Timestamp(1646625048000L), // 03/06/2022
C_maxValue = new Timestamp(1646625048000L), // 03/06/2022
C_minValue = new Timestamp(1646625048000L), // 03/07/2022
C_maxValue = new Timestamp(1646625048000L), // 03/07/2022
C_num_nulls = 0)
),
Seq("file_1")),
arguments(
"date_format(C, 'MM/dd/yyyy') != '03/06/2022'",
"date_format(C, 'MM/dd/yyyy') != '03/07/2022'",
Seq(
IndexRow("file_1",
C_minValue = new Timestamp(1646711448000L), // 03/07/2022
C_maxValue = new Timestamp(1646797848000L), // 03/08/2022
C_minValue = new Timestamp(1646711448000L), // 03/08/2022
C_maxValue = new Timestamp(1646797848000L), // 03/09/2022
C_num_nulls = 0),
IndexRow("file_2",
C_minValue = new Timestamp(1646625048000L), // 03/06/2022
C_maxValue = new Timestamp(1646625048000L), // 03/06/2022
C_minValue = new Timestamp(1646625048000L), // 03/07/2022
C_maxValue = new Timestamp(1646625048000L), // 03/07/2022
C_num_nulls = 0)
),
Seq("file_1")),
arguments(
"date_format(C, 'MM/dd/yyyy') < '03/07/2022'",
"date_format(C, 'MM/dd/yyyy') < '03/08/2022'",
Seq(
IndexRow("file_1",
C_minValue = new Timestamp(1646711448000L), // 03/07/2022
C_maxValue = new Timestamp(1646797848000L), // 03/08/2022
C_minValue = new Timestamp(1646711448000L), // 03/08/2022
C_maxValue = new Timestamp(1646797848000L), // 03/09/2022
C_num_nulls = 0),
IndexRow("file_2",
C_minValue = new Timestamp(1646625048000L), // 03/06/2022
C_maxValue = new Timestamp(1646711448000L), // 03/07/2022
C_minValue = new Timestamp(1646625048000L), // 03/07/2022
C_maxValue = new Timestamp(1646711448000L), // 03/08/2022
C_num_nulls = 0)
),
Seq("file_2")),
arguments(
"'03/07/2022' > date_format(C, 'MM/dd/yyyy')",
"'03/08/2022' > date_format(C, 'MM/dd/yyyy')",
Seq(
IndexRow("file_1",
C_minValue = new Timestamp(1646711448000L), // 03/07/2022
C_maxValue = new Timestamp(1646797848000L), // 03/08/2022
C_minValue = new Timestamp(1646711448000L), // 03/08/2022
C_maxValue = new Timestamp(1646797848000L), // 03/09/2022
C_num_nulls = 0),
IndexRow("file_2",
C_minValue = new Timestamp(1646625048000L), // 03/06/2022
C_maxValue = new Timestamp(1646711448000L), // 03/07/2022
C_minValue = new Timestamp(1646625048000L), // 03/07/2022
C_maxValue = new Timestamp(1646711448000L), // 03/08/2022
C_num_nulls = 0)
),
Seq("file_2")),
arguments(
"'03/07/2022' < date_format(C, 'MM/dd/yyyy')",
"'03/08/2022' < date_format(C, 'MM/dd/yyyy')",
Seq(
IndexRow("file_1",
C_minValue = new Timestamp(1646711448000L), // 03/07/2022
C_maxValue = new Timestamp(1646797848000L), // 03/08/2022
C_minValue = new Timestamp(1646711448000L), // 03/08/2022
C_maxValue = new Timestamp(1646797848000L), // 03/09/2022
C_num_nulls = 0),
IndexRow("file_2",
C_minValue = new Timestamp(1646625048000L), // 03/06/2022
C_maxValue = new Timestamp(1646711448000L), // 03/07/2022
C_minValue = new Timestamp(1646625048000L), // 03/07/2022
C_maxValue = new Timestamp(1646711448000L), // 03/08/2022
C_num_nulls = 0)
),
Seq("file_1")),
arguments(
"date_format(C, 'MM/dd/yyyy') > '03/07/2022'",
"date_format(C, 'MM/dd/yyyy') > '03/08/2022'",
Seq(
IndexRow("file_1",
C_minValue = new Timestamp(1646711448000L), // 03/07/2022
C_maxValue = new Timestamp(1646797848000L), // 03/08/2022
C_minValue = new Timestamp(1646711448000L), // 03/08/2022
C_maxValue = new Timestamp(1646797848000L), // 03/09/2022
C_num_nulls = 0),
IndexRow("file_2",
C_minValue = new Timestamp(1646625048000L), // 03/06/2022
C_maxValue = new Timestamp(1646711448000L), // 03/07/2022
C_minValue = new Timestamp(1646625048000L), // 03/07/2022
C_maxValue = new Timestamp(1646711448000L), // 03/08/2022
C_num_nulls = 0)
),
Seq("file_1")),
arguments(
"date_format(C, 'MM/dd/yyyy') <= '03/06/2022'",
"date_format(C, 'MM/dd/yyyy') <= '03/07/2022'",
Seq(
IndexRow("file_1",
C_minValue = new Timestamp(1646711448000L), // 03/07/2022
C_maxValue = new Timestamp(1646797848000L), // 03/08/2022
C_minValue = new Timestamp(1646711448000L), // 03/08/2022
C_maxValue = new Timestamp(1646797848000L), // 03/09/2022
C_num_nulls = 0),
IndexRow("file_2",
C_minValue = new Timestamp(1646625048000L), // 03/06/2022
C_maxValue = new Timestamp(1646711448000L), // 03/07/2022
C_minValue = new Timestamp(1646625048000L), // 03/07/2022
C_maxValue = new Timestamp(1646711448000L), // 03/08/2022
C_num_nulls = 0)
),
Seq("file_2")),
arguments(
"'03/06/2022' >= date_format(C, 'MM/dd/yyyy')",
"'03/07/2022' >= date_format(C, 'MM/dd/yyyy')",
Seq(
IndexRow("file_1",
C_minValue = new Timestamp(1646711448000L), // 03/07/2022
C_maxValue = new Timestamp(1646797848000L), // 03/08/2022
C_minValue = new Timestamp(1646711448000L), // 03/08/2022
C_maxValue = new Timestamp(1646797848000L), // 03/09/2022
C_num_nulls = 0),
IndexRow("file_2",
C_minValue = new Timestamp(1646625048000L), // 03/06/2022
C_maxValue = new Timestamp(1646711448000L), // 03/07/2022
C_minValue = new Timestamp(1646625048000L), // 03/07/2022
C_maxValue = new Timestamp(1646711448000L), // 03/08/2022
C_num_nulls = 0)
),
Seq("file_2")),
arguments(
"'03/08/2022' <= date_format(C, 'MM/dd/yyyy')",
"'03/09/2022' <= date_format(C, 'MM/dd/yyyy')",
Seq(
IndexRow("file_1",
C_minValue = new Timestamp(1646711448000L), // 03/07/2022
C_maxValue = new Timestamp(1646797848000L), // 03/08/2022
C_minValue = new Timestamp(1646711448000L), // 03/08/2022
C_maxValue = new Timestamp(1646797848000L), // 03/09/2022
C_num_nulls = 0),
IndexRow("file_2",
C_minValue = new Timestamp(1646625048000L), // 03/06/2022
C_maxValue = new Timestamp(1646711448000L), // 03/07/2022
C_minValue = new Timestamp(1646625048000L), // 03/07/2022
C_maxValue = new Timestamp(1646711448000L), // 03/08/2022
C_num_nulls = 0)
),
Seq("file_1")),
arguments(
"date_format(C, 'MM/dd/yyyy') >= '03/08/2022'",
"date_format(C, 'MM/dd/yyyy') >= '03/09/2022'",
Seq(
IndexRow("file_1",
C_minValue = new Timestamp(1646711448000L), // 03/07/2022
C_maxValue = new Timestamp(1646797848000L), // 03/08/2022
C_minValue = new Timestamp(1646711448000L), // 03/08/2022
C_maxValue = new Timestamp(1646797848000L), // 03/09/2022
C_num_nulls = 0),
IndexRow("file_2",
C_minValue = new Timestamp(1646625048000L), // 03/06/2022
C_maxValue = new Timestamp(1646711448000L), // 03/07/2022
C_minValue = new Timestamp(1646625048000L), // 03/07/2022
C_maxValue = new Timestamp(1646711448000L), // 03/08/2022
C_num_nulls = 0)
),
Seq("file_1")),
arguments(
"date_format(C, 'MM/dd/yyyy') IN ('03/08/2022')",
"date_format(C, 'MM/dd/yyyy') IN ('03/09/2022')",
Seq(
IndexRow("file_1",
C_minValue = new Timestamp(1646711448000L), // 03/07/2022
C_maxValue = new Timestamp(1646797848000L), // 03/08/2022
C_minValue = new Timestamp(1646711448000L), // 03/08/2022
C_maxValue = new Timestamp(1646797848000L), // 03/09/2022
C_num_nulls = 0),
IndexRow("file_2",
C_minValue = new Timestamp(1646625048000L), // 03/06/2022
C_maxValue = new Timestamp(1646711448000L), // 03/07/2022
C_minValue = new Timestamp(1646625048000L), // 03/07/2022
C_maxValue = new Timestamp(1646711448000L), // 03/08/2022
C_num_nulls = 0)
),
Seq("file_1")),
arguments(
"date_format(C, 'MM/dd/yyyy') NOT IN ('03/06/2022')",
"date_format(C, 'MM/dd/yyyy') NOT IN ('03/07/2022')",
Seq(
IndexRow("file_1",
C_minValue = new Timestamp(1646711448000L), // 03/07/2022
C_maxValue = new Timestamp(1646797848000L), // 03/08/2022
C_minValue = new Timestamp(1646711448000L), // 03/08/2022
C_maxValue = new Timestamp(1646797848000L), // 03/09/2022
C_num_nulls = 0),
IndexRow("file_2",
C_minValue = new Timestamp(1646625048000L), // 03/06/2022
C_maxValue = new Timestamp(1646625048000L), // 03/06/2022
C_minValue = new Timestamp(1646625048000L), // 03/07/2022
C_maxValue = new Timestamp(1646625048000L), // 03/07/2022
C_num_nulls = 0)
),
Seq("file_1")),