[HUDI-2814] Make Z-index more generic Column-Stats Index (#4106)
This commit is contained in:
@@ -79,7 +79,7 @@ public class HoodieTableMetaClient implements Serializable {
|
||||
public static final String AUXILIARYFOLDER_NAME = METAFOLDER_NAME + Path.SEPARATOR + ".aux";
|
||||
public static final String BOOTSTRAP_INDEX_ROOT_FOLDER_PATH = AUXILIARYFOLDER_NAME + Path.SEPARATOR + ".bootstrap";
|
||||
public static final String HEARTBEAT_FOLDER_NAME = METAFOLDER_NAME + Path.SEPARATOR + ".heartbeat";
|
||||
public static final String ZINDEX_NAME = ".zindex";
|
||||
public static final String COLUMN_STATISTICS_INDEX_NAME = ".colstatsindex";
|
||||
public static final String BOOTSTRAP_INDEX_BY_PARTITION_FOLDER_PATH = BOOTSTRAP_INDEX_ROOT_FOLDER_PATH
|
||||
+ Path.SEPARATOR + ".partitions";
|
||||
public static final String BOOTSTRAP_INDEX_BY_FILE_ID_FOLDER_PATH = BOOTSTRAP_INDEX_ROOT_FOLDER_PATH + Path.SEPARATOR
|
||||
@@ -178,10 +178,10 @@ public class HoodieTableMetaClient implements Serializable {
|
||||
}
|
||||
|
||||
/**
|
||||
* @return z-index path
|
||||
* @return Column Statistics index path
|
||||
*/
|
||||
public String getZindexPath() {
|
||||
return new Path(metaPath, ZINDEX_NAME).toString();
|
||||
public String getColumnStatsIndexPath() {
|
||||
return new Path(metaPath, COLUMN_STATISTICS_INDEX_NAME).toString();
|
||||
}
|
||||
|
||||
/**
|
||||
|
||||
@@ -0,0 +1,191 @@
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.hudi.common.util;
|
||||
|
||||
import java.nio.charset.Charset;
|
||||
|
||||
public class BinaryUtil {
|
||||
|
||||
/**
|
||||
* Lexicographically compare two arrays.
|
||||
* copy from hbase
|
||||
* @param buffer1 left operand
|
||||
* @param buffer2 right operand
|
||||
* @param offset1 Where to start comparing in the left buffer
|
||||
* @param offset2 Where to start comparing in the right buffer
|
||||
* @param length1 How much to compare from the left buffer
|
||||
* @param length2 How much to compare from the right buffer
|
||||
* @return 0 if equal, < 0 if left is less than right, etc.
|
||||
*/
|
||||
public static int compareTo(byte[] buffer1, int offset1, int length1,
|
||||
byte[] buffer2, int offset2, int length2) {
|
||||
// Short circuit equal case
|
||||
if (buffer1 == buffer2
|
||||
&& offset1 == offset2
|
||||
&& length1 == length2) {
|
||||
return 0;
|
||||
}
|
||||
// Bring WritableComparator code local
|
||||
int end1 = offset1 + length1;
|
||||
int end2 = offset2 + length2;
|
||||
for (int i = offset1, j = offset2; i < end1 && j < end2; i++, j++) {
|
||||
int a = (buffer1[i] & 0xff);
|
||||
int b = (buffer2[j] & 0xff);
|
||||
if (a != b) {
|
||||
return a - b;
|
||||
}
|
||||
}
|
||||
return length1 - length2;
|
||||
}
|
||||
|
||||
public static byte[] paddingTo8Byte(byte[] a) {
|
||||
if (a.length == 8) {
|
||||
return a;
|
||||
}
|
||||
if (a.length > 8) {
|
||||
byte[] result = new byte[8];
|
||||
System.arraycopy(a, 0, result, 0, 8);
|
||||
return result;
|
||||
}
|
||||
int paddingSize = 8 - a.length;
|
||||
byte[] result = new byte[8];
|
||||
for (int i = 0; i < paddingSize; i++) {
|
||||
result[i] = 0;
|
||||
}
|
||||
System.arraycopy(a, 0, result, paddingSize, a.length);
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
/**
|
||||
* Interleaving array bytes.
|
||||
* Interleaving means take one bit from the first matrix element, one bit
|
||||
* from the next, etc, then take the second bit from the first matrix
|
||||
* element, second bit from the second, all the way to the last bit of the
|
||||
* last element. Combine those bits in that order into a single BigInteger,
|
||||
* @param buffer candidate element to do interleaving
|
||||
* @return byte size of candidate element
|
||||
*/
|
||||
public static byte[] interleaving(byte[][] buffer, int size) {
|
||||
int candidateSize = buffer.length;
|
||||
byte[] result = new byte[size * candidateSize];
|
||||
int resBitPos = 0;
|
||||
int totalBits = size * 8;
|
||||
for (int bitStep = 0; bitStep < totalBits; bitStep++) {
|
||||
int currentBytePos = (int) Math.floor(bitStep / 8);
|
||||
int currentBitPos = bitStep % 8;
|
||||
|
||||
for (int i = 0; i < candidateSize; i++) {
|
||||
int tempResBytePos = (int) Math.floor(resBitPos / 8);
|
||||
int tempResBitPos = resBitPos % 8;
|
||||
result[tempResBytePos] = updatePos(result[tempResBytePos], tempResBitPos, buffer[i][currentBytePos], currentBitPos);
|
||||
resBitPos++;
|
||||
}
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
public static byte updatePos(byte a, int apos, byte b, int bpos) {
|
||||
byte temp = (byte) (b & (1 << (7 - bpos)));
|
||||
if (apos < bpos) {
|
||||
temp = (byte) (temp << (bpos - apos));
|
||||
}
|
||||
if (apos > bpos) {
|
||||
temp = (byte) (temp >> (apos - bpos));
|
||||
}
|
||||
byte atemp = (byte) (a & (1 << (7 - apos)));
|
||||
if ((byte) (atemp ^ temp) == 0) {
|
||||
return a;
|
||||
}
|
||||
return (byte) (a ^ (1 << (7 - apos)));
|
||||
}
|
||||
|
||||
public static byte[] toBytes(int val) {
|
||||
byte[] b = new byte[4];
|
||||
for (int i = 3; i > 0; i--) {
|
||||
b[i] = (byte) val;
|
||||
val >>>= 8;
|
||||
}
|
||||
b[0] = (byte) val;
|
||||
return b;
|
||||
}
|
||||
|
||||
public static byte[] toBytes(long val) {
|
||||
long temp = val;
|
||||
byte[] b = new byte[8];
|
||||
for (int i = 7; i > 0; i--) {
|
||||
b[i] = (byte) temp;
|
||||
temp >>>= 8;
|
||||
}
|
||||
b[0] = (byte) temp;
|
||||
return b;
|
||||
}
|
||||
|
||||
public static byte[] toBytes(final double d) {
|
||||
return toBytes(Double.doubleToRawLongBits(d));
|
||||
}
|
||||
|
||||
public static byte[] intTo8Byte(int a) {
|
||||
int temp = a;
|
||||
temp = temp ^ (1 << 31);
|
||||
return paddingTo8Byte(toBytes(temp));
|
||||
}
|
||||
|
||||
public static byte[] byteTo8Byte(byte a) {
|
||||
return paddingTo8Byte(new byte[] { a });
|
||||
}
|
||||
|
||||
public static byte[] longTo8Byte(long a) {
|
||||
long temp = a;
|
||||
temp = temp ^ (1L << 63);
|
||||
return toBytes(temp);
|
||||
}
|
||||
|
||||
public static byte[] doubleTo8Byte(double a) {
|
||||
byte[] temp = toBytes(a);
|
||||
if (a > 0) {
|
||||
temp[0] = (byte) (temp[0] ^ (1 << 7));
|
||||
}
|
||||
if (a < 0) {
|
||||
for (int i = 0; i < temp.length; i++) {
|
||||
temp[i] = (byte) ~temp[i];
|
||||
}
|
||||
}
|
||||
return temp;
|
||||
}
|
||||
|
||||
public static byte[] utf8To8Byte(String a) {
|
||||
return paddingTo8Byte(a.getBytes(Charset.forName("utf-8")));
|
||||
}
|
||||
|
||||
public static Long convertStringToLong(String a) {
|
||||
byte[] bytes = utf8To8Byte(a);
|
||||
return convertBytesToLong(bytes);
|
||||
}
|
||||
|
||||
public static long convertBytesToLong(byte[] bytes) {
|
||||
byte[] paddedBytes = paddingTo8Byte(bytes);
|
||||
long temp = 0L;
|
||||
for (int i = 7; i >= 0; i--) {
|
||||
temp = temp | (((long) paddedBytes[i] & 0xff) << (7 - i) * 8);
|
||||
}
|
||||
return temp;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -18,15 +18,17 @@
|
||||
|
||||
package org.apache.hudi.common.util;
|
||||
|
||||
import java.util.Properties;
|
||||
import org.apache.hudi.common.util.collection.Pair;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collections;
|
||||
import java.util.HashMap;
|
||||
import java.util.HashSet;
|
||||
import java.util.Iterator;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Objects;
|
||||
import java.util.Properties;
|
||||
import java.util.Set;
|
||||
import java.util.stream.Collectors;
|
||||
import java.util.stream.Stream;
|
||||
@@ -35,6 +37,36 @@ public class CollectionUtils {
|
||||
|
||||
public static final Properties EMPTY_PROPERTIES = new Properties();
|
||||
|
||||
/**
|
||||
* Combines provided {@link List}s into one
|
||||
*/
|
||||
public static <E> List<E> combine(List<E> one, List<E> another) {
|
||||
ArrayList<E> combined = new ArrayList<>(one);
|
||||
combined.addAll(another);
|
||||
return combined;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns difference b/w {@code one} {@link Set} of elements and {@code another}
|
||||
*/
|
||||
public static <E> Set<E> diff(Set<E> one, Set<E> another) {
|
||||
Set<E> diff = new HashSet<>(one);
|
||||
diff.removeAll(another);
|
||||
return diff;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns difference b/w {@code one} {@link List} of elements and {@code another}
|
||||
*
|
||||
* NOTE: This is less optimal counterpart to {@link #diff(Set, Set)}, accepting {@link List}
|
||||
* as a holding collection to support duplicate elements use-cases
|
||||
*/
|
||||
public static <E> List<E> diff(List<E> one, List<E> another) {
|
||||
List<E> diff = new ArrayList<>(one);
|
||||
diff.removeAll(another);
|
||||
return diff;
|
||||
}
|
||||
|
||||
/**
|
||||
* Determines whether two iterators contain equal elements in the same order. More specifically,
|
||||
* this method returns {@code true} if {@code iterator1} and {@code iterator2} contain the same
|
||||
|
||||
@@ -0,0 +1,42 @@
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.hudi.common.util;
|
||||
|
||||
import javax.annotation.Nonnull;
|
||||
import java.util.Arrays;
|
||||
import java.util.Map;
|
||||
import java.util.function.Function;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
public final class TypeUtils {
|
||||
|
||||
private TypeUtils() {}
|
||||
|
||||
/**
|
||||
* Maps values from the provided Enum's {@link Class} into corresponding values,
|
||||
* extracted by provided {@code valueMapper}
|
||||
*/
|
||||
public static <EnumT extends Enum<EnumT>> Map<String, EnumT> getValueToEnumMap(
|
||||
@Nonnull Class<EnumT> klass,
|
||||
@Nonnull Function<EnumT, String> valueMapper
|
||||
) {
|
||||
return Arrays.stream(klass.getEnumConstants())
|
||||
.collect(Collectors.toMap(valueMapper, Function.identity()));
|
||||
}
|
||||
|
||||
}
|
||||
Reference in New Issue
Block a user