1
0

[HUDI-2430] Make decimal compatible with hudi for flink writer (#3658)

This commit is contained in:
Danny Chan
2021-09-15 12:04:46 +08:00
committed by GitHub
parent d90fd1f68c
commit 627f20f9c5
5 changed files with 84 additions and 78 deletions

View File

@@ -19,12 +19,9 @@
package org.apache.hudi.table.format.cow;
import org.apache.flink.table.data.DecimalData;
import org.apache.flink.table.data.DecimalDataUtils;
import org.apache.flink.table.data.vector.BytesColumnVector;
import org.apache.flink.table.data.vector.ColumnVector;
import org.apache.flink.table.data.vector.DecimalColumnVector;
import org.apache.flink.table.data.vector.IntColumnVector;
import org.apache.flink.table.data.vector.LongColumnVector;
/**
* Parquet write decimal as int32 and int64 and binary, this class wrap the real vector to
@@ -43,22 +40,10 @@ public class ParquetDecimalVector implements DecimalColumnVector {
@Override
public DecimalData getDecimal(int i, int precision, int scale) {
if (DecimalDataUtils.is32BitDecimal(precision)) {
return DecimalData.fromUnscaledLong(
((IntColumnVector) vector).getInt(i),
precision,
scale);
} else if (DecimalDataUtils.is64BitDecimal(precision)) {
return DecimalData.fromUnscaledLong(
((LongColumnVector) vector).getLong(i),
precision,
scale);
} else {
return DecimalData.fromUnscaledBytes(
((BytesColumnVector) vector).getBytes(i).getBytes(),
precision,
scale);
}
return DecimalData.fromUnscaledBytes(
((BytesColumnVector) vector).getBytes(i).getBytes(),
precision,
scale);
}
@Override

View File

@@ -31,7 +31,6 @@ import org.apache.flink.formats.parquet.vector.reader.LongColumnReader;
import org.apache.flink.formats.parquet.vector.reader.ShortColumnReader;
import org.apache.flink.formats.parquet.vector.reader.TimestampColumnReader;
import org.apache.flink.table.data.DecimalData;
import org.apache.flink.table.data.DecimalDataUtils;
import org.apache.flink.table.data.TimestampData;
import org.apache.flink.table.data.vector.ColumnVector;
import org.apache.flink.table.data.vector.VectorizedColumnBatch;
@@ -46,7 +45,6 @@ import org.apache.flink.table.data.vector.heap.HeapShortVector;
import org.apache.flink.table.data.vector.heap.HeapTimestampVector;
import org.apache.flink.table.data.vector.writable.WritableColumnVector;
import org.apache.flink.table.types.DataType;
import org.apache.flink.table.types.logical.BigIntType;
import org.apache.flink.table.types.logical.DecimalType;
import org.apache.flink.table.types.logical.IntType;
import org.apache.flink.table.types.logical.LogicalType;
@@ -197,23 +195,10 @@ public class ParquetSplitReaderUtil {
DecimalData decimal = value == null
? null
: Preconditions.checkNotNull(DecimalData.fromBigDecimal((BigDecimal) value, precision, scale));
ColumnVector internalVector;
if (DecimalDataUtils.is32BitDecimal(precision)) {
internalVector = createVectorFromConstant(
new IntType(),
decimal == null ? null : (int) decimal.toUnscaledLong(),
batchSize);
} else if (DecimalDataUtils.is64BitDecimal(precision)) {
internalVector = createVectorFromConstant(
new BigIntType(),
decimal == null ? null : decimal.toUnscaledLong(),
batchSize);
} else {
internalVector = createVectorFromConstant(
new VarBinaryType(),
decimal == null ? null : decimal.toUnscaledBytes(),
batchSize);
}
ColumnVector internalVector = createVectorFromConstant(
new VarBinaryType(),
decimal == null ? null : decimal.toUnscaledBytes(),
batchSize);
return new ParquetDecimalVector(internalVector);
case FLOAT:
HeapFloatVector fv = new HeapFloatVector(batchSize);
@@ -365,29 +350,10 @@ public class ParquetSplitReaderUtil {
"TIME_MICROS original type is not ");
return new HeapTimestampVector(batchSize);
case DECIMAL:
DecimalType decimalType = (DecimalType) fieldType;
if (DecimalDataUtils.is32BitDecimal(decimalType.getPrecision())) {
checkArgument(
(typeName == PrimitiveType.PrimitiveTypeName.FIXED_LEN_BYTE_ARRAY
|| typeName == PrimitiveType.PrimitiveTypeName.INT32)
&& primitiveType.getOriginalType() == OriginalType.DECIMAL,
"Unexpected type: %s", typeName);
return new HeapIntVector(batchSize);
} else if (DecimalDataUtils.is64BitDecimal(decimalType.getPrecision())) {
checkArgument(
(typeName == PrimitiveType.PrimitiveTypeName.FIXED_LEN_BYTE_ARRAY
|| typeName == PrimitiveType.PrimitiveTypeName.INT64)
&& primitiveType.getOriginalType() == OriginalType.DECIMAL,
"Unexpected type: %s", typeName);
return new HeapLongVector(batchSize);
} else {
checkArgument(
(typeName == PrimitiveType.PrimitiveTypeName.FIXED_LEN_BYTE_ARRAY
|| typeName == PrimitiveType.PrimitiveTypeName.BINARY)
&& primitiveType.getOriginalType() == OriginalType.DECIMAL,
"Unexpected type: %s", typeName);
return new HeapBytesVector(batchSize);
}
checkArgument(typeName == PrimitiveType.PrimitiveTypeName.BINARY
&& primitiveType.getOriginalType() == OriginalType.DECIMAL,
"Unexpected type: %s", typeName);
return new HeapBytesVector(batchSize);
default:
throw new UnsupportedOperationException(fieldType + " is not supported now.");
}