[HUDI-2102] Support hilbert curve for hudi (#3952)
Co-authored-by: Y Ethan Guo <ethan.guoyihua@gmail.com>
This commit is contained in:
@@ -542,4 +542,32 @@ public class HoodieClusteringConfig extends HoodieConfig {
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* strategy types for optimize layout for hudi data.
|
||||
*/
|
||||
public enum BuildLayoutOptimizationStrategy {
|
||||
ZORDER("z-order"),
|
||||
HILBERT("hilbert");
|
||||
private final String value;
|
||||
|
||||
BuildLayoutOptimizationStrategy(String value) {
|
||||
this.value = value;
|
||||
}
|
||||
|
||||
public String toCustomString() {
|
||||
return value;
|
||||
}
|
||||
|
||||
public static BuildLayoutOptimizationStrategy fromValue(String value) {
|
||||
switch (value.toLowerCase(Locale.ROOT)) {
|
||||
case "z-order":
|
||||
return ZORDER;
|
||||
case "hilbert":
|
||||
return HILBERT;
|
||||
default:
|
||||
throw new HoodieException("Invalid value of Type.");
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -0,0 +1,52 @@
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing,
|
||||
* software distributed under the License is distributed on an
|
||||
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
* KIND, either express or implied. See the License for the
|
||||
* specific language governing permissions and limitations
|
||||
* under the License.
|
||||
*/
|
||||
|
||||
package org.apache.hudi.optimize;
|
||||
|
||||
import org.davidmoten.hilbert.HilbertCurve;
|
||||
|
||||
import java.math.BigInteger;
|
||||
|
||||
/**
|
||||
* Utils for Hilbert Curve.
|
||||
*/
|
||||
public class HilbertCurveUtils {
|
||||
public static byte[] indexBytes(HilbertCurve hilbertCurve, long[] points, int paddingNum) {
|
||||
BigInteger index = hilbertCurve.index(points);
|
||||
return paddingToNByte(index.toByteArray(), paddingNum);
|
||||
}
|
||||
|
||||
public static byte[] paddingToNByte(byte[] a, int paddingNum) {
|
||||
if (a.length == paddingNum) {
|
||||
return a;
|
||||
}
|
||||
if (a.length > paddingNum) {
|
||||
byte[] result = new byte[paddingNum];
|
||||
System.arraycopy(a, 0, result, 0, paddingNum);
|
||||
return result;
|
||||
}
|
||||
int paddingSize = paddingNum - a.length;
|
||||
byte[] result = new byte[paddingNum];
|
||||
for (int i = 0; i < paddingSize; i++) {
|
||||
result[i] = 0;
|
||||
}
|
||||
System.arraycopy(a, 0, result, paddingSize, a.length);
|
||||
return result;
|
||||
}
|
||||
}
|
||||
@@ -176,9 +176,14 @@ public class ZOrderingUtil {
|
||||
|
||||
public static Long convertStringToLong(String a) {
|
||||
byte[] bytes = utf8To8Byte(a);
|
||||
return convertBytesToLong(bytes);
|
||||
}
|
||||
|
||||
public static long convertBytesToLong(byte[] bytes) {
|
||||
byte[] paddedBytes = paddingTo8Byte(bytes);
|
||||
long temp = 0L;
|
||||
for (int i = 7; i >= 0; i--) {
|
||||
temp = temp | (((long)bytes[i] & 0xff) << (7 - i) * 8);
|
||||
temp = temp | (((long) paddedBytes[i] & 0xff) << (7 - i) * 8);
|
||||
}
|
||||
return temp;
|
||||
}
|
||||
|
||||
@@ -0,0 +1,38 @@
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing,
|
||||
* software distributed under the License is distributed on an
|
||||
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
* KIND, either express or implied. See the License for the
|
||||
* specific language governing permissions and limitations
|
||||
* under the License.
|
||||
*/
|
||||
|
||||
package org.apache.hudi.optimize;
|
||||
|
||||
import org.davidmoten.hilbert.HilbertCurve;
|
||||
import org.junit.jupiter.api.Test;
|
||||
|
||||
import static org.junit.jupiter.api.Assertions.assertEquals;
|
||||
|
||||
public class TestHilbertCurveUtils {
|
||||
|
||||
private static final HilbertCurve INSTANCE = HilbertCurve.bits(5).dimensions(2);
|
||||
|
||||
@Test
|
||||
public void testIndex() {
|
||||
long[] t = {1, 2};
|
||||
assertEquals(13, INSTANCE.index(t).intValue());
|
||||
long[] t1 = {0, 16};
|
||||
assertEquals(256, INSTANCE.index(t1).intValue());
|
||||
}
|
||||
}
|
||||
@@ -126,4 +126,29 @@ public class TestZOrderingUtil {
|
||||
this.originValue = originValue;
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testConvertBytesToLong() {
|
||||
long[] tests = new long[] {Long.MIN_VALUE, -1L, 0, 1L, Long.MAX_VALUE};
|
||||
for (int i = 0; i < tests.length; i++) {
|
||||
assertEquals(ZOrderingUtil.convertBytesToLong(convertLongToBytes(tests[i])), tests[i]);
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testConvertBytesToLongWithPadding() {
|
||||
byte[] bytes = new byte[2];
|
||||
bytes[0] = 2;
|
||||
bytes[1] = 127;
|
||||
assertEquals(ZOrderingUtil.convertBytesToLong(bytes), 2 * 256 + 127);
|
||||
}
|
||||
|
||||
private byte[] convertLongToBytes(long num) {
|
||||
byte[] byteNum = new byte[8];
|
||||
for (int i = 0; i < 8; i++) {
|
||||
int offset = 64 - (i + 1) * 8;
|
||||
byteNum[i] = (byte) ((num >> offset) & 0xff);
|
||||
}
|
||||
return byteNum;
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user