[HUDI-4353] Column stats data skipping for flink (#6026)
This commit is contained in:
@@ -18,11 +18,10 @@
|
||||
|
||||
package org.apache.hudi.sink.utils;
|
||||
|
||||
import org.apache.flink.configuration.Configuration;
|
||||
|
||||
import org.apache.hudi.configuration.FlinkOptions;
|
||||
import org.apache.hudi.hive.HiveSyncConfig;
|
||||
|
||||
import org.apache.flink.configuration.Configuration;
|
||||
import org.junit.jupiter.api.Test;
|
||||
|
||||
import java.lang.reflect.Method;
|
||||
|
||||
@@ -58,7 +58,7 @@ public class TestFileIndex {
|
||||
conf.setBoolean(FlinkOptions.METADATA_ENABLED, true);
|
||||
conf.setBoolean(FlinkOptions.HIVE_STYLE_PARTITIONING, hiveStylePartitioning);
|
||||
TestData.writeData(TestData.DATA_SET_INSERT, conf);
|
||||
FileIndex fileIndex = FileIndex.instance(new Path(tempFile.getAbsolutePath()), conf);
|
||||
FileIndex fileIndex = FileIndex.instance(new Path(tempFile.getAbsolutePath()), conf, TestConfigurations.ROW_TYPE);
|
||||
List<String> partitionKeys = Collections.singletonList("partition");
|
||||
List<Map<String, String>> partitions = fileIndex.getPartitions(partitionKeys, "default", hiveStylePartitioning);
|
||||
assertTrue(partitions.stream().allMatch(m -> m.size() == 1));
|
||||
@@ -79,7 +79,7 @@ public class TestFileIndex {
|
||||
conf.setString(FlinkOptions.KEYGEN_CLASS_NAME, NonpartitionedAvroKeyGenerator.class.getName());
|
||||
conf.setBoolean(FlinkOptions.METADATA_ENABLED, true);
|
||||
TestData.writeData(TestData.DATA_SET_INSERT, conf);
|
||||
FileIndex fileIndex = FileIndex.instance(new Path(tempFile.getAbsolutePath()), conf);
|
||||
FileIndex fileIndex = FileIndex.instance(new Path(tempFile.getAbsolutePath()), conf, TestConfigurations.ROW_TYPE);
|
||||
List<String> partitionKeys = Collections.singletonList("");
|
||||
List<Map<String, String>> partitions = fileIndex.getPartitions(partitionKeys, "default", false);
|
||||
assertThat(partitions.size(), is(0));
|
||||
@@ -94,7 +94,7 @@ public class TestFileIndex {
|
||||
void testFileListingEmptyTable(boolean enableMetadata) {
|
||||
Configuration conf = TestConfigurations.getDefaultConf(tempFile.getAbsolutePath());
|
||||
conf.setBoolean(FlinkOptions.METADATA_ENABLED, enableMetadata);
|
||||
FileIndex fileIndex = FileIndex.instance(new Path(tempFile.getAbsolutePath()), conf);
|
||||
FileIndex fileIndex = FileIndex.instance(new Path(tempFile.getAbsolutePath()), conf, TestConfigurations.ROW_TYPE);
|
||||
List<String> partitionKeys = Collections.singletonList("partition");
|
||||
List<Map<String, String>> partitions = fileIndex.getPartitions(partitionKeys, "default", false);
|
||||
assertThat(partitions.size(), is(0));
|
||||
|
||||
@@ -0,0 +1,98 @@
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.hudi.source.stats;
|
||||
|
||||
import org.apache.hudi.common.config.HoodieMetadataConfig;
|
||||
import org.apache.hudi.common.util.collection.Pair;
|
||||
import org.apache.hudi.configuration.FlinkOptions;
|
||||
import org.apache.hudi.utils.TestConfigurations;
|
||||
import org.apache.hudi.utils.TestData;
|
||||
|
||||
import org.apache.flink.configuration.Configuration;
|
||||
import org.apache.flink.table.data.GenericRowData;
|
||||
import org.apache.flink.table.data.RowData;
|
||||
import org.junit.jupiter.api.Test;
|
||||
import org.junit.jupiter.api.io.TempDir;
|
||||
|
||||
import java.io.File;
|
||||
import java.util.Arrays;
|
||||
import java.util.Comparator;
|
||||
import java.util.List;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
import static org.hamcrest.CoreMatchers.is;
|
||||
import static org.hamcrest.MatcherAssert.assertThat;
|
||||
import static org.junit.jupiter.api.Assertions.assertThrows;
|
||||
|
||||
/**
|
||||
* Test cases for {@link ColumnStatsIndices}.
|
||||
*/
|
||||
public class TestColumnStatsIndices {
|
||||
@TempDir
|
||||
File tempFile;
|
||||
|
||||
@Test
|
||||
void testTransposeColumnStatsIndex() throws Exception {
|
||||
final String path = tempFile.getAbsolutePath();
|
||||
Configuration conf = TestConfigurations.getDefaultConf(path);
|
||||
conf.setBoolean(FlinkOptions.METADATA_ENABLED, true);
|
||||
conf.setBoolean(FlinkOptions.READ_DATA_SKIPPING_ENABLED, true);
|
||||
conf.setString("hoodie.metadata.index.column.stats.enable", "true");
|
||||
|
||||
HoodieMetadataConfig metadataConfig = HoodieMetadataConfig.newBuilder()
|
||||
.enable(true)
|
||||
.withMetadataIndexColumnStats(true)
|
||||
.build();
|
||||
TestData.writeData(TestData.DATA_SET_INSERT, conf);
|
||||
|
||||
// explicit query columns
|
||||
String[] queryColumns1 = {"uuid", "age"};
|
||||
List<RowData> indexRows1 = ColumnStatsIndices.readColumnStatsIndex(path, metadataConfig, queryColumns1);
|
||||
Pair<List<RowData>, String[]> transposedIndexTable1 = ColumnStatsIndices
|
||||
.transposeColumnStatsIndex(indexRows1, queryColumns1, TestConfigurations.ROW_TYPE);
|
||||
assertThat("The schema columns should sort by natural order",
|
||||
Arrays.toString(transposedIndexTable1.getRight()), is("[age, uuid]"));
|
||||
List<RowData> transposed1 = filterOutFileNames(transposedIndexTable1.getLeft());
|
||||
assertThat(transposed1.size(), is(4));
|
||||
final String expected = "["
|
||||
+ "+I(2,18,20,0,id5,id6,0), "
|
||||
+ "+I(2,23,33,0,id1,id2,0), "
|
||||
+ "+I(2,31,53,0,id3,id4,0), "
|
||||
+ "+I(2,44,56,0,id7,id8,0)]";
|
||||
assertThat(transposed1.toString(), is(expected));
|
||||
|
||||
// no query columns, only for tests
|
||||
assertThrows(IllegalArgumentException.class,
|
||||
() -> ColumnStatsIndices.readColumnStatsIndex(path, metadataConfig, new String[0]));
|
||||
}
|
||||
|
||||
private static List<RowData> filterOutFileNames(List<RowData> indexRows) {
|
||||
return indexRows.stream().map(row -> {
|
||||
GenericRowData gr = (GenericRowData) row;
|
||||
GenericRowData converted = new GenericRowData(gr.getArity() - 1);
|
||||
for (int i = 1; i < gr.getArity(); i++) {
|
||||
converted.setField(i - 1, gr.getField(i));
|
||||
}
|
||||
return converted;
|
||||
})
|
||||
// sort by age min values
|
||||
.sorted(Comparator.comparingInt(r -> r.getInt(1)))
|
||||
.collect(Collectors.toList());
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,374 @@
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.hudi.source.stats;
|
||||
|
||||
import org.apache.hudi.utils.TestData;
|
||||
|
||||
import org.apache.flink.table.api.DataTypes;
|
||||
import org.apache.flink.table.data.RowData;
|
||||
import org.apache.flink.table.data.StringData;
|
||||
import org.apache.flink.table.data.TimestampData;
|
||||
import org.apache.flink.table.expressions.FieldReferenceExpression;
|
||||
import org.apache.flink.table.expressions.ValueLiteralExpression;
|
||||
import org.apache.flink.table.types.DataType;
|
||||
import org.apache.flink.table.types.logical.RowType;
|
||||
import org.junit.jupiter.api.Test;
|
||||
|
||||
import java.util.Arrays;
|
||||
import java.util.List;
|
||||
|
||||
import static org.junit.jupiter.api.Assertions.assertFalse;
|
||||
import static org.junit.jupiter.api.Assertions.assertTrue;
|
||||
|
||||
/**
|
||||
* Test cases for {@link ExpressionEvaluator}.
|
||||
*/
|
||||
public class TestExpressionEvaluator {
|
||||
private static final DataType ROW_DATA_TYPE = DataTypes.ROW(
|
||||
DataTypes.FIELD("f_tinyint", DataTypes.TINYINT()),
|
||||
DataTypes.FIELD("f_smallint", DataTypes.SMALLINT()),
|
||||
DataTypes.FIELD("f_int", DataTypes.INT()),
|
||||
DataTypes.FIELD("f_long", DataTypes.BIGINT()),
|
||||
DataTypes.FIELD("f_float", DataTypes.FLOAT()),
|
||||
DataTypes.FIELD("f_double", DataTypes.DOUBLE()),
|
||||
DataTypes.FIELD("f_boolean", DataTypes.BOOLEAN()),
|
||||
DataTypes.FIELD("f_decimal", DataTypes.DECIMAL(10, 2)),
|
||||
DataTypes.FIELD("f_bytes", DataTypes.VARBINARY(10)),
|
||||
DataTypes.FIELD("f_string", DataTypes.VARCHAR(10)),
|
||||
DataTypes.FIELD("f_time", DataTypes.TIME(3)),
|
||||
DataTypes.FIELD("f_date", DataTypes.DATE()),
|
||||
DataTypes.FIELD("f_timestamp", DataTypes.TIMESTAMP(3))
|
||||
).notNull();
|
||||
private static final DataType INDEX_ROW_DATA_TYPE = DataTypes.ROW(
|
||||
DataTypes.FIELD("file_name", DataTypes.STRING()),
|
||||
DataTypes.FIELD("value_cnt", DataTypes.BIGINT()),
|
||||
DataTypes.FIELD("f_int_min", DataTypes.INT()),
|
||||
DataTypes.FIELD("f_int_max", DataTypes.INT()),
|
||||
DataTypes.FIELD("f_int_null_cnt", DataTypes.BIGINT()),
|
||||
DataTypes.FIELD("f_string_min", DataTypes.VARCHAR(10)),
|
||||
DataTypes.FIELD("f_string_max", DataTypes.VARCHAR(10)),
|
||||
DataTypes.FIELD("f_string_null_cnt", DataTypes.BIGINT()),
|
||||
DataTypes.FIELD("f_timestamp_min", DataTypes.TIMESTAMP(3)),
|
||||
DataTypes.FIELD("f_timestamp_max", DataTypes.TIMESTAMP(3)),
|
||||
DataTypes.FIELD("f_timestamp_null_cnt", DataTypes.BIGINT())
|
||||
).notNull();
|
||||
|
||||
private static final RowType INDEX_ROW_TYPE = (RowType) INDEX_ROW_DATA_TYPE.getLogicalType();
|
||||
|
||||
@Test
|
||||
void testEqualTo() {
|
||||
ExpressionEvaluator.EqualTo equalTo = ExpressionEvaluator.EqualTo.getInstance();
|
||||
FieldReferenceExpression rExpr = new FieldReferenceExpression("f_int", DataTypes.INT(), 2, 2);
|
||||
ValueLiteralExpression vExpr = new ValueLiteralExpression(12);
|
||||
|
||||
RowData indexRow1 = intIndexRow(11, 13);
|
||||
equalTo.bindFieldReference(rExpr)
|
||||
.bindVal(vExpr)
|
||||
.bindColStats(indexRow1, queryFields(2), rExpr);
|
||||
assertTrue(equalTo.eval(), "11 < 12 < 13");
|
||||
|
||||
RowData indexRow2 = intIndexRow(12, 13);
|
||||
equalTo.bindColStats(indexRow2, queryFields(2), rExpr);
|
||||
assertTrue(equalTo.eval(), "12 <= 12 < 13");
|
||||
|
||||
RowData indexRow3 = intIndexRow(11, 12);
|
||||
equalTo.bindColStats(indexRow3, queryFields(2), rExpr);
|
||||
assertTrue(equalTo.eval(), "11 < 12 <= 12");
|
||||
|
||||
RowData indexRow4 = intIndexRow(10, 11);
|
||||
equalTo.bindColStats(indexRow4, queryFields(2), rExpr);
|
||||
assertFalse(equalTo.eval(), "11 < 12");
|
||||
|
||||
RowData indexRow5 = intIndexRow(13, 14);
|
||||
equalTo.bindColStats(indexRow5, queryFields(2), rExpr);
|
||||
assertFalse(equalTo.eval(), "12 < 13");
|
||||
|
||||
RowData indexRow6 = intIndexRow(null, null);
|
||||
equalTo.bindColStats(indexRow6, queryFields(2), rExpr);
|
||||
assertFalse(equalTo.eval(), "12 <> null");
|
||||
|
||||
equalTo.bindVal(new ValueLiteralExpression(null, DataTypes.INT()));
|
||||
assertFalse(equalTo.eval(), "null <> null");
|
||||
}
|
||||
|
||||
@Test
|
||||
void testNotEqualTo() {
|
||||
ExpressionEvaluator.NotEqualTo notEqualTo = ExpressionEvaluator.NotEqualTo.getInstance();
|
||||
FieldReferenceExpression rExpr = new FieldReferenceExpression("f_int", DataTypes.INT(), 2, 2);
|
||||
ValueLiteralExpression vExpr = new ValueLiteralExpression(12);
|
||||
|
||||
RowData indexRow1 = intIndexRow(11, 13);
|
||||
notEqualTo.bindFieldReference(rExpr)
|
||||
.bindVal(vExpr)
|
||||
.bindColStats(indexRow1, queryFields(2), rExpr);
|
||||
assertTrue(notEqualTo.eval(), "11 <> 12 && 12 <> 13");
|
||||
|
||||
RowData indexRow2 = intIndexRow(12, 13);
|
||||
notEqualTo.bindColStats(indexRow2, queryFields(2), rExpr);
|
||||
assertTrue(notEqualTo.eval(), "12 <> 13");
|
||||
|
||||
RowData indexRow3 = intIndexRow(11, 12);
|
||||
notEqualTo.bindColStats(indexRow3, queryFields(2), rExpr);
|
||||
assertTrue(notEqualTo.eval(), "11 <> 12");
|
||||
|
||||
RowData indexRow4 = intIndexRow(10, 11);
|
||||
notEqualTo.bindColStats(indexRow4, queryFields(2), rExpr);
|
||||
assertTrue(notEqualTo.eval(), "10 <> 12 and 11 < 12");
|
||||
|
||||
RowData indexRow5 = intIndexRow(13, 14);
|
||||
notEqualTo.bindColStats(indexRow5, queryFields(2), rExpr);
|
||||
assertTrue(notEqualTo.eval(), "12 <> 13 and 12 <> 14");
|
||||
|
||||
RowData indexRow6 = intIndexRow(null, null);
|
||||
notEqualTo.bindColStats(indexRow6, queryFields(2), rExpr);
|
||||
assertTrue(notEqualTo.eval(), "12 <> null");
|
||||
|
||||
notEqualTo.bindVal(new ValueLiteralExpression(null, DataTypes.INT()));
|
||||
assertTrue(notEqualTo.eval(), "null <> null");
|
||||
}
|
||||
|
||||
@Test
|
||||
void testIsNull() {
|
||||
ExpressionEvaluator.IsNull isNull = ExpressionEvaluator.IsNull.getInstance();
|
||||
FieldReferenceExpression rExpr = new FieldReferenceExpression("f_int", DataTypes.INT(), 2, 2);
|
||||
|
||||
RowData indexRow1 = intIndexRow(11, 13);
|
||||
isNull.bindFieldReference(rExpr)
|
||||
.bindColStats(indexRow1, queryFields(2), rExpr);
|
||||
assertTrue(isNull.eval(), "2 nulls");
|
||||
|
||||
RowData indexRow2 = intIndexRow(12, 13, 0L);
|
||||
isNull.bindColStats(indexRow2, queryFields(2), rExpr);
|
||||
assertFalse(isNull.eval(), "0 nulls");
|
||||
}
|
||||
|
||||
@Test
|
||||
void testIsNotNull() {
|
||||
ExpressionEvaluator.IsNotNull isNotNull = ExpressionEvaluator.IsNotNull.getInstance();
|
||||
FieldReferenceExpression rExpr = new FieldReferenceExpression("f_int", DataTypes.INT(), 2, 2);
|
||||
|
||||
RowData indexRow1 = intIndexRow(11, 13);
|
||||
isNotNull.bindFieldReference(rExpr)
|
||||
.bindColStats(indexRow1, queryFields(2), rExpr);
|
||||
assertTrue(isNotNull.eval(), "min 11 is not null");
|
||||
|
||||
RowData indexRow2 = intIndexRow(null, null, 0L);
|
||||
isNotNull.bindColStats(indexRow2, queryFields(2), rExpr);
|
||||
assertTrue(isNotNull.eval(), "min is null and 0 nulls");
|
||||
}
|
||||
|
||||
@Test
|
||||
void testLessThan() {
|
||||
ExpressionEvaluator.LessThan lessThan = ExpressionEvaluator.LessThan.getInstance();
|
||||
FieldReferenceExpression rExpr = new FieldReferenceExpression("f_int", DataTypes.INT(), 2, 2);
|
||||
ValueLiteralExpression vExpr = new ValueLiteralExpression(12);
|
||||
|
||||
RowData indexRow1 = intIndexRow(11, 13);
|
||||
lessThan.bindFieldReference(rExpr)
|
||||
.bindVal(vExpr)
|
||||
.bindColStats(indexRow1, queryFields(2), rExpr);
|
||||
assertTrue(lessThan.eval(), "12 < 13");
|
||||
|
||||
RowData indexRow2 = intIndexRow(12, 13);
|
||||
lessThan.bindColStats(indexRow2, queryFields(2), rExpr);
|
||||
assertFalse(lessThan.eval(), "min 12 = 12");
|
||||
|
||||
RowData indexRow3 = intIndexRow(11, 12);
|
||||
lessThan.bindColStats(indexRow3, queryFields(2), rExpr);
|
||||
assertTrue(lessThan.eval(), "11 < 12");
|
||||
|
||||
RowData indexRow4 = intIndexRow(10, 11);
|
||||
lessThan.bindColStats(indexRow4, queryFields(2), rExpr);
|
||||
assertTrue(lessThan.eval(), "11 < 12");
|
||||
|
||||
RowData indexRow5 = intIndexRow(13, 14);
|
||||
lessThan.bindColStats(indexRow5, queryFields(2), rExpr);
|
||||
assertFalse(lessThan.eval(), "12 < min 13");
|
||||
|
||||
RowData indexRow6 = intIndexRow(null, null);
|
||||
lessThan.bindColStats(indexRow6, queryFields(2), rExpr);
|
||||
assertFalse(lessThan.eval(), "12 <> null");
|
||||
|
||||
lessThan.bindVal(new ValueLiteralExpression(null, DataTypes.INT()));
|
||||
assertFalse(lessThan.eval(), "null <> null");
|
||||
}
|
||||
|
||||
@Test
|
||||
void testGreaterThan() {
|
||||
ExpressionEvaluator.GreaterThan greaterThan = ExpressionEvaluator.GreaterThan.getInstance();
|
||||
FieldReferenceExpression rExpr = new FieldReferenceExpression("f_int", DataTypes.INT(), 2, 2);
|
||||
ValueLiteralExpression vExpr = new ValueLiteralExpression(12);
|
||||
|
||||
RowData indexRow1 = intIndexRow(11, 13);
|
||||
greaterThan.bindFieldReference(rExpr)
|
||||
.bindVal(vExpr)
|
||||
.bindColStats(indexRow1, queryFields(2), rExpr);
|
||||
assertTrue(greaterThan.eval(), "12 < 13");
|
||||
|
||||
RowData indexRow2 = intIndexRow(12, 13);
|
||||
greaterThan.bindColStats(indexRow2, queryFields(2), rExpr);
|
||||
assertTrue(greaterThan.eval(), "12 < 13");
|
||||
|
||||
RowData indexRow3 = intIndexRow(11, 12);
|
||||
greaterThan.bindColStats(indexRow3, queryFields(2), rExpr);
|
||||
assertFalse(greaterThan.eval(), "max 12 = 12");
|
||||
|
||||
RowData indexRow4 = intIndexRow(10, 11);
|
||||
greaterThan.bindColStats(indexRow4, queryFields(2), rExpr);
|
||||
assertFalse(greaterThan.eval(), "max 11 < 12");
|
||||
|
||||
RowData indexRow5 = intIndexRow(13, 14);
|
||||
greaterThan.bindColStats(indexRow5, queryFields(2), rExpr);
|
||||
assertTrue(greaterThan.eval(), "12 < 13");
|
||||
|
||||
RowData indexRow6 = intIndexRow(null, null);
|
||||
greaterThan.bindColStats(indexRow6, queryFields(2), rExpr);
|
||||
assertFalse(greaterThan.eval(), "12 <> null");
|
||||
|
||||
greaterThan.bindVal(new ValueLiteralExpression(null, DataTypes.INT()));
|
||||
assertFalse(greaterThan.eval(), "null <> null");
|
||||
}
|
||||
|
||||
@Test
|
||||
void testLessThanOrEqual() {
|
||||
ExpressionEvaluator.LessThanOrEqual lessThanOrEqual = ExpressionEvaluator.LessThanOrEqual.getInstance();
|
||||
FieldReferenceExpression rExpr = new FieldReferenceExpression("f_int", DataTypes.INT(), 2, 2);
|
||||
ValueLiteralExpression vExpr = new ValueLiteralExpression(12);
|
||||
|
||||
RowData indexRow1 = intIndexRow(11, 13);
|
||||
lessThanOrEqual.bindFieldReference(rExpr)
|
||||
.bindVal(vExpr)
|
||||
.bindColStats(indexRow1, queryFields(2), rExpr);
|
||||
assertTrue(lessThanOrEqual.eval(), "11 < 12");
|
||||
|
||||
RowData indexRow2 = intIndexRow(12, 13);
|
||||
lessThanOrEqual.bindColStats(indexRow2, queryFields(2), rExpr);
|
||||
assertTrue(lessThanOrEqual.eval(), "min 12 = 12");
|
||||
|
||||
RowData indexRow3 = intIndexRow(11, 12);
|
||||
lessThanOrEqual.bindColStats(indexRow3, queryFields(2), rExpr);
|
||||
assertTrue(lessThanOrEqual.eval(), "max 12 = 12");
|
||||
|
||||
RowData indexRow4 = intIndexRow(10, 11);
|
||||
lessThanOrEqual.bindColStats(indexRow4, queryFields(2), rExpr);
|
||||
assertTrue(lessThanOrEqual.eval(), "max 11 < 12");
|
||||
|
||||
RowData indexRow5 = intIndexRow(13, 14);
|
||||
lessThanOrEqual.bindColStats(indexRow5, queryFields(2), rExpr);
|
||||
assertFalse(lessThanOrEqual.eval(), "12 < 13");
|
||||
|
||||
RowData indexRow6 = intIndexRow(null, null);
|
||||
lessThanOrEqual.bindColStats(indexRow6, queryFields(2), rExpr);
|
||||
assertFalse(lessThanOrEqual.eval(), "12 <> null");
|
||||
|
||||
lessThanOrEqual.bindVal(new ValueLiteralExpression(null, DataTypes.INT()));
|
||||
assertFalse(lessThanOrEqual.eval(), "null <> null");
|
||||
}
|
||||
|
||||
@Test
|
||||
void testGreaterThanOrEqual() {
|
||||
ExpressionEvaluator.GreaterThanOrEqual greaterThanOrEqual = ExpressionEvaluator.GreaterThanOrEqual.getInstance();
|
||||
FieldReferenceExpression rExpr = new FieldReferenceExpression("f_int", DataTypes.INT(), 2, 2);
|
||||
ValueLiteralExpression vExpr = new ValueLiteralExpression(12);
|
||||
|
||||
RowData indexRow1 = intIndexRow(11, 13);
|
||||
greaterThanOrEqual.bindFieldReference(rExpr)
|
||||
.bindVal(vExpr)
|
||||
.bindColStats(indexRow1, queryFields(2), rExpr);
|
||||
assertTrue(greaterThanOrEqual.eval(), "12 < 13");
|
||||
|
||||
RowData indexRow2 = intIndexRow(12, 13);
|
||||
greaterThanOrEqual.bindColStats(indexRow2, queryFields(2), rExpr);
|
||||
assertTrue(greaterThanOrEqual.eval(), "min 12 = 12");
|
||||
|
||||
RowData indexRow3 = intIndexRow(11, 12);
|
||||
greaterThanOrEqual.bindColStats(indexRow3, queryFields(2), rExpr);
|
||||
assertTrue(greaterThanOrEqual.eval(), "max 12 = 12");
|
||||
|
||||
RowData indexRow4 = intIndexRow(10, 11);
|
||||
greaterThanOrEqual.bindColStats(indexRow4, queryFields(2), rExpr);
|
||||
assertFalse(greaterThanOrEqual.eval(), "max 11 < 12");
|
||||
|
||||
RowData indexRow5 = intIndexRow(13, 14);
|
||||
greaterThanOrEqual.bindColStats(indexRow5, queryFields(2), rExpr);
|
||||
assertTrue(greaterThanOrEqual.eval(), "12 < 13");
|
||||
|
||||
RowData indexRow6 = intIndexRow(null, null);
|
||||
greaterThanOrEqual.bindColStats(indexRow6, queryFields(2), rExpr);
|
||||
assertFalse(greaterThanOrEqual.eval(), "12 <> null");
|
||||
|
||||
greaterThanOrEqual.bindVal(new ValueLiteralExpression(null, DataTypes.INT()));
|
||||
assertFalse(greaterThanOrEqual.eval(), "null <> null");
|
||||
}
|
||||
|
||||
@Test
|
||||
void testIn() {
|
||||
ExpressionEvaluator.In in = ExpressionEvaluator.In.getInstance();
|
||||
FieldReferenceExpression rExpr = new FieldReferenceExpression("f_int", DataTypes.INT(), 2, 2);
|
||||
|
||||
RowData indexRow1 = intIndexRow(11, 13);
|
||||
in.bindFieldReference(rExpr)
|
||||
.bindColStats(indexRow1, queryFields(2), rExpr);
|
||||
in.bindVals(12);
|
||||
assertTrue(in.eval(), "11 < 12 < 13");
|
||||
|
||||
RowData indexRow2 = intIndexRow(12, 13);
|
||||
in.bindColStats(indexRow2, queryFields(2), rExpr);
|
||||
assertTrue(in.eval(), "min 12 = 12");
|
||||
|
||||
RowData indexRow3 = intIndexRow(11, 12);
|
||||
in.bindColStats(indexRow3, queryFields(2), rExpr);
|
||||
assertTrue(in.eval(), "max 12 = 12");
|
||||
|
||||
RowData indexRow4 = intIndexRow(10, 11);
|
||||
in.bindColStats(indexRow4, queryFields(2), rExpr);
|
||||
assertFalse(in.eval(), "max 11 < 12");
|
||||
|
||||
RowData indexRow5 = intIndexRow(13, 14);
|
||||
in.bindColStats(indexRow5, queryFields(2), rExpr);
|
||||
assertFalse(in.eval(), "12 < 13");
|
||||
|
||||
RowData indexRow6 = intIndexRow(null, null);
|
||||
in.bindColStats(indexRow6, queryFields(2), rExpr);
|
||||
assertFalse(in.eval(), "12 <> null");
|
||||
|
||||
in.bindVals((Object) null);
|
||||
assertFalse(in.eval(), "null <> null");
|
||||
}
|
||||
|
||||
private static RowData intIndexRow(Integer minVal, Integer maxVal) {
|
||||
return intIndexRow(minVal, maxVal, 2L);
|
||||
}
|
||||
|
||||
private static RowData intIndexRow(Integer minVal, Integer maxVal, Long nullCnt) {
|
||||
return indexRow(StringData.fromString("f1"), 100L,
|
||||
minVal, maxVal, nullCnt,
|
||||
StringData.fromString("1"), StringData.fromString("100"), 5L,
|
||||
TimestampData.fromEpochMillis(1), TimestampData.fromEpochMillis(100), 3L);
|
||||
}
|
||||
|
||||
private static RowData indexRow(Object... fields) {
|
||||
return TestData.insertRow(INDEX_ROW_TYPE, fields);
|
||||
}
|
||||
|
||||
private static RowType.RowField[] queryFields(int... pos) {
|
||||
List<RowType.RowField> fields = ((RowType) ROW_DATA_TYPE.getLogicalType()).getFields();
|
||||
return Arrays.stream(pos).mapToObj(fields::get).toArray(RowType.RowField[]::new);
|
||||
}
|
||||
}
|
||||
@@ -1256,6 +1256,37 @@ public class ITTestHoodieDataSource extends AbstractTestBase {
|
||||
assertRowsEquals(partitionResult, "[+I[1, 2022-02-02]]");
|
||||
}
|
||||
|
||||
@Test
|
||||
void testWriteAndReadWithDataSkipping() {
|
||||
TableEnvironment tableEnv = batchTableEnv;
|
||||
String hoodieTableDDL = sql("t1")
|
||||
.option(FlinkOptions.PATH, tempFile.getAbsolutePath())
|
||||
.option(FlinkOptions.METADATA_ENABLED, true)
|
||||
.option("hoodie.metadata.index.column.stats.enable", true)
|
||||
.option(FlinkOptions.READ_DATA_SKIPPING_ENABLED, true)
|
||||
.end();
|
||||
tableEnv.executeSql(hoodieTableDDL);
|
||||
|
||||
execInsertSql(tableEnv, TestSQL.INSERT_T1);
|
||||
|
||||
List<Row> result1 = CollectionUtil.iterableToList(
|
||||
() -> tableEnv.sqlQuery("select * from t1").execute().collect());
|
||||
assertRowsEquals(result1, TestData.DATA_SET_SOURCE_INSERT);
|
||||
// apply filters
|
||||
List<Row> result2 = CollectionUtil.iterableToList(
|
||||
() -> tableEnv.sqlQuery("select * from t1 where uuid > 'id5' and age > 20").execute().collect());
|
||||
assertRowsEquals(result2, "["
|
||||
+ "+I[id7, Bob, 44, 1970-01-01T00:00:07, par4], "
|
||||
+ "+I[id8, Han, 56, 1970-01-01T00:00:08, par4]]");
|
||||
// filter by timestamp
|
||||
List<Row> result3 = CollectionUtil.iterableToList(
|
||||
() -> tableEnv.sqlQuery("select * from t1 where ts > TIMESTAMP '1970-01-01 00:00:05'").execute().collect());
|
||||
assertRowsEquals(result3, "["
|
||||
+ "+I[id6, Emma, 20, 1970-01-01T00:00:06, par3], "
|
||||
+ "+I[id7, Bob, 44, 1970-01-01T00:00:07, par4], "
|
||||
+ "+I[id8, Han, 56, 1970-01-01T00:00:08, par4]]");
|
||||
}
|
||||
|
||||
// -------------------------------------------------------------------------
|
||||
// Utilities
|
||||
// -------------------------------------------------------------------------
|
||||
|
||||
@@ -28,6 +28,7 @@ import org.apache.flink.api.common.io.FileInputFormat;
|
||||
import org.apache.flink.api.common.io.InputFormat;
|
||||
import org.apache.flink.configuration.Configuration;
|
||||
import org.apache.flink.table.data.RowData;
|
||||
import org.apache.hadoop.fs.FileStatus;
|
||||
import org.apache.hadoop.fs.Path;
|
||||
import org.junit.jupiter.api.Test;
|
||||
import org.junit.jupiter.api.function.ThrowingSupplier;
|
||||
@@ -38,7 +39,6 @@ import org.slf4j.LoggerFactory;
|
||||
import java.io.File;
|
||||
import java.util.Arrays;
|
||||
import java.util.Collections;
|
||||
import java.util.Comparator;
|
||||
import java.util.HashMap;
|
||||
import java.util.Map;
|
||||
import java.util.stream.Collectors;
|
||||
@@ -76,22 +76,18 @@ public class TestHoodieTableSource {
|
||||
Arrays.asList(conf.getString(FlinkOptions.PARTITION_PATH_FIELD).split(",")),
|
||||
"default-par",
|
||||
conf);
|
||||
Path[] paths = tableSource.getReadPaths();
|
||||
assertNotNull(paths);
|
||||
String[] names = Arrays.stream(paths).map(Path::getName)
|
||||
.sorted(Comparator.naturalOrder()).toArray(String[]::new);
|
||||
assertThat(Arrays.toString(names), is("[par1, par2, par3, par4]"));
|
||||
FileStatus[] fileStatuses = tableSource.getReadFiles();
|
||||
assertNotNull(fileStatuses);
|
||||
assertThat(fileStatuses.length, is(4));
|
||||
// apply partition pruning
|
||||
Map<String, String> partitions = new HashMap<>();
|
||||
partitions.put("partition", "par1");
|
||||
|
||||
tableSource.applyPartitions(Collections.singletonList(partitions));
|
||||
|
||||
Path[] paths2 = tableSource.getReadPaths();
|
||||
assertNotNull(paths2);
|
||||
String[] names2 = Arrays.stream(paths2).map(Path::getName)
|
||||
.sorted(Comparator.naturalOrder()).toArray(String[]::new);
|
||||
assertThat(Arrays.toString(names2), is("[par1]"));
|
||||
FileStatus[] fileStatuses2 = tableSource.getReadFiles();
|
||||
assertNotNull(fileStatuses2);
|
||||
assertThat(fileStatuses2.length, is(1));
|
||||
}
|
||||
|
||||
@Test
|
||||
|
||||
@@ -0,0 +1,53 @@
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.hudi.utils;
|
||||
|
||||
import org.apache.hudi.avro.model.HoodieMetadataRecord;
|
||||
import org.apache.hudi.metadata.HoodieMetadataPayload;
|
||||
import org.apache.hudi.util.AvroSchemaConverter;
|
||||
|
||||
import org.apache.avro.Schema;
|
||||
import org.apache.flink.table.types.DataType;
|
||||
import org.junit.jupiter.api.Test;
|
||||
|
||||
import static org.hamcrest.CoreMatchers.is;
|
||||
import static org.hamcrest.MatcherAssert.assertThat;
|
||||
|
||||
/**
|
||||
* Test cases for {@link org.apache.hudi.util.AvroSchemaConverter}.
|
||||
*/
|
||||
public class TestAvroSchemaConverter {
|
||||
@Test
|
||||
void testUnionSchemaWithMultipleRecordTypes() {
|
||||
Schema schema = HoodieMetadataRecord.SCHEMA$;
|
||||
DataType dataType = AvroSchemaConverter.convertToDataType(schema);
|
||||
int pos = HoodieMetadataRecord.SCHEMA$.getField(HoodieMetadataPayload.SCHEMA_FIELD_ID_COLUMN_STATS).pos();
|
||||
final String expected = "ROW<"
|
||||
+ "`fileName` STRING, "
|
||||
+ "`columnName` STRING, "
|
||||
+ "`minValue` ROW<`wrapper` RAW('java.lang.Object', ?) NOT NULL>, "
|
||||
+ "`maxValue` ROW<`wrapper` RAW('java.lang.Object', ?) NOT NULL>, "
|
||||
+ "`valueCount` BIGINT, "
|
||||
+ "`nullCount` BIGINT, "
|
||||
+ "`totalSize` BIGINT, "
|
||||
+ "`totalUncompressedSize` BIGINT, "
|
||||
+ "`isDeleted` BOOLEAN NOT NULL>";
|
||||
assertThat(dataType.getChildren().get(pos).toString(), is(expected));
|
||||
}
|
||||
}
|
||||
@@ -318,6 +318,9 @@ public class TestConfigurations {
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Tool to construct the catalog DDL.
|
||||
*/
|
||||
public static class Catalog {
|
||||
private final String catalogName;
|
||||
private String catalogPath = ".";
|
||||
|
||||
@@ -97,6 +97,6 @@ public class TestUtils {
|
||||
|
||||
public static StreamReadMonitoringFunction getMonitorFunc(Configuration conf) {
|
||||
final String basePath = conf.getString(FlinkOptions.PATH);
|
||||
return new StreamReadMonitoringFunction(conf, new Path(basePath), 1024 * 1024L, null);
|
||||
return new StreamReadMonitoringFunction(conf, new Path(basePath), TestConfigurations.ROW_TYPE, 1024 * 1024L, null);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -18,11 +18,11 @@
|
||||
|
||||
package org.apache.hudi.utils;
|
||||
|
||||
import org.apache.flink.configuration.Configuration;
|
||||
import org.apache.hudi.common.table.view.FileSystemViewStorageConfig;
|
||||
import org.apache.hudi.common.table.view.FileSystemViewStorageType;
|
||||
import org.apache.hudi.util.ViewStorageProperties;
|
||||
|
||||
import org.apache.flink.configuration.Configuration;
|
||||
import org.junit.jupiter.api.Test;
|
||||
import org.junit.jupiter.api.io.TempDir;
|
||||
|
||||
|
||||
Reference in New Issue
Block a user