1
0

[HUDI-2102] Support hilbert curve for hudi (#3952)

Co-authored-by: Y Ethan Guo <ethan.guoyihua@gmail.com>
This commit is contained in:
xiarixiaoyao
2021-11-27 15:20:19 +08:00
committed by GitHub
parent 2c7656c35f
commit 780a2ac5b2
18 changed files with 1015 additions and 30 deletions

View File

@@ -542,4 +542,32 @@ public class HoodieClusteringConfig extends HoodieConfig {
}
}
}
/**
* strategy types for optimize layout for hudi data.
*/
public enum BuildLayoutOptimizationStrategy {
ZORDER("z-order"),
HILBERT("hilbert");
private final String value;
BuildLayoutOptimizationStrategy(String value) {
this.value = value;
}
public String toCustomString() {
return value;
}
public static BuildLayoutOptimizationStrategy fromValue(String value) {
switch (value.toLowerCase(Locale.ROOT)) {
case "z-order":
return ZORDER;
case "hilbert":
return HILBERT;
default:
throw new HoodieException("Invalid value of Type.");
}
}
}
}

View File

@@ -0,0 +1,52 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.hudi.optimize;
import org.davidmoten.hilbert.HilbertCurve;
import java.math.BigInteger;
/**
* Utils for Hilbert Curve.
*/
public class HilbertCurveUtils {
public static byte[] indexBytes(HilbertCurve hilbertCurve, long[] points, int paddingNum) {
BigInteger index = hilbertCurve.index(points);
return paddingToNByte(index.toByteArray(), paddingNum);
}
public static byte[] paddingToNByte(byte[] a, int paddingNum) {
if (a.length == paddingNum) {
return a;
}
if (a.length > paddingNum) {
byte[] result = new byte[paddingNum];
System.arraycopy(a, 0, result, 0, paddingNum);
return result;
}
int paddingSize = paddingNum - a.length;
byte[] result = new byte[paddingNum];
for (int i = 0; i < paddingSize; i++) {
result[i] = 0;
}
System.arraycopy(a, 0, result, paddingSize, a.length);
return result;
}
}

View File

@@ -176,9 +176,14 @@ public class ZOrderingUtil {
public static Long convertStringToLong(String a) {
byte[] bytes = utf8To8Byte(a);
return convertBytesToLong(bytes);
}
public static long convertBytesToLong(byte[] bytes) {
byte[] paddedBytes = paddingTo8Byte(bytes);
long temp = 0L;
for (int i = 7; i >= 0; i--) {
temp = temp | (((long)bytes[i] & 0xff) << (7 - i) * 8);
temp = temp | (((long) paddedBytes[i] & 0xff) << (7 - i) * 8);
}
return temp;
}

View File

@@ -0,0 +1,38 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.hudi.optimize;
import org.davidmoten.hilbert.HilbertCurve;
import org.junit.jupiter.api.Test;
import static org.junit.jupiter.api.Assertions.assertEquals;
public class TestHilbertCurveUtils {
private static final HilbertCurve INSTANCE = HilbertCurve.bits(5).dimensions(2);
@Test
public void testIndex() {
long[] t = {1, 2};
assertEquals(13, INSTANCE.index(t).intValue());
long[] t1 = {0, 16};
assertEquals(256, INSTANCE.index(t1).intValue());
}
}

View File

@@ -126,4 +126,29 @@ public class TestZOrderingUtil {
this.originValue = originValue;
}
}
@Test
public void testConvertBytesToLong() {
long[] tests = new long[] {Long.MIN_VALUE, -1L, 0, 1L, Long.MAX_VALUE};
for (int i = 0; i < tests.length; i++) {
assertEquals(ZOrderingUtil.convertBytesToLong(convertLongToBytes(tests[i])), tests[i]);
}
}
@Test
public void testConvertBytesToLongWithPadding() {
byte[] bytes = new byte[2];
bytes[0] = 2;
bytes[1] = 127;
assertEquals(ZOrderingUtil.convertBytesToLong(bytes), 2 * 256 + 127);
}
private byte[] convertLongToBytes(long num) {
byte[] byteNum = new byte[8];
for (int i = 0; i < 8; i++) {
int offset = 64 - (i + 1) * 8;
byteNum[i] = (byte) ((num >> offset) & 0xff);
}
return byteNum;
}
}