From 170ee884578a85db53b87cee87f5208083f142aa Mon Sep 17 00:00:00 2001 From: lamber-ken Date: Sat, 7 Mar 2020 03:27:40 -0600 Subject: [PATCH] [HUDI-553] Building/Running Hudi on higher java versions (#1369) --- hudi-cli/pom.xml | 5 ++ .../HoodieDynamicBoundedBloomFilter.java | 7 ++- .../bloom/filter/SimpleBloomFilter.java | 7 ++- .../hudi/common/util/Base64CodecUtil.java | 46 ++++++++++++++++++ .../hudi/common/util/TestBase64CodecUtil.java | 47 +++++++++++++++++++ hudi-spark/pom.xml | 5 ++ pom.xml | 2 +- 7 files changed, 110 insertions(+), 9 deletions(-) create mode 100644 hudi-common/src/main/java/org/apache/hudi/common/util/Base64CodecUtil.java create mode 100644 hudi-common/src/test/java/org/apache/hudi/common/util/TestBase64CodecUtil.java diff --git a/hudi-cli/pom.xml b/hudi-cli/pom.xml index 6f572bb96..71010cbd2 100644 --- a/hudi-cli/pom.xml +++ b/hudi-cli/pom.xml @@ -44,6 +44,11 @@ net.alchim31.maven scala-maven-plugin ${scala-maven-plugin.version} + + + -nobootcp + + diff --git a/hudi-common/src/main/java/org/apache/hudi/common/bloom/filter/HoodieDynamicBoundedBloomFilter.java b/hudi-common/src/main/java/org/apache/hudi/common/bloom/filter/HoodieDynamicBoundedBloomFilter.java index 46800cbae..4d66718e2 100644 --- a/hudi-common/src/main/java/org/apache/hudi/common/bloom/filter/HoodieDynamicBoundedBloomFilter.java +++ b/hudi-common/src/main/java/org/apache/hudi/common/bloom/filter/HoodieDynamicBoundedBloomFilter.java @@ -18,12 +18,11 @@ package org.apache.hudi.common.bloom.filter; +import org.apache.hudi.common.util.Base64CodecUtil; import org.apache.hudi.exception.HoodieIndexException; import org.apache.hadoop.util.bloom.Key; -import javax.xml.bind.DatatypeConverter; - import java.io.ByteArrayInputStream; import java.io.ByteArrayOutputStream; import java.io.DataInputStream; @@ -66,7 +65,7 @@ public class HoodieDynamicBoundedBloomFilter implements BloomFilter { */ HoodieDynamicBoundedBloomFilter(String serString, BloomFilterTypeCode typeCode) { // ignoring the type code for now, since we have just one version - byte[] bytes = DatatypeConverter.parseBase64Binary(serString); + byte[] bytes = Base64CodecUtil.decode(serString); DataInputStream dis = new DataInputStream(new ByteArrayInputStream(bytes)); try { internalDynamicBloomFilter = new InternalDynamicBloomFilter(); @@ -95,7 +94,7 @@ public class HoodieDynamicBoundedBloomFilter implements BloomFilter { internalDynamicBloomFilter.write(dos); byte[] bytes = baos.toByteArray(); dos.close(); - return DatatypeConverter.printBase64Binary(bytes); + return Base64CodecUtil.encode(bytes); } catch (IOException e) { throw new HoodieIndexException("Could not serialize BloomFilter instance", e); } diff --git a/hudi-common/src/main/java/org/apache/hudi/common/bloom/filter/SimpleBloomFilter.java b/hudi-common/src/main/java/org/apache/hudi/common/bloom/filter/SimpleBloomFilter.java index a5e784811..cf8b066c7 100644 --- a/hudi-common/src/main/java/org/apache/hudi/common/bloom/filter/SimpleBloomFilter.java +++ b/hudi-common/src/main/java/org/apache/hudi/common/bloom/filter/SimpleBloomFilter.java @@ -18,12 +18,11 @@ package org.apache.hudi.common.bloom.filter; +import org.apache.hudi.common.util.Base64CodecUtil; import org.apache.hudi.exception.HoodieIndexException; import org.apache.hadoop.util.bloom.Key; -import javax.xml.bind.DatatypeConverter; - import java.io.ByteArrayInputStream; import java.io.ByteArrayOutputStream; import java.io.DataInput; @@ -66,7 +65,7 @@ public class SimpleBloomFilter implements BloomFilter { */ public SimpleBloomFilter(String serString) { this.filter = new org.apache.hadoop.util.bloom.BloomFilter(); - byte[] bytes = DatatypeConverter.parseBase64Binary(serString); + byte[] bytes = Base64CodecUtil.decode(serString); DataInputStream dis = new DataInputStream(new ByteArrayInputStream(bytes)); try { this.filter.readFields(dis); @@ -103,7 +102,7 @@ public class SimpleBloomFilter implements BloomFilter { filter.write(dos); byte[] bytes = baos.toByteArray(); dos.close(); - return DatatypeConverter.printBase64Binary(bytes); + return Base64CodecUtil.encode(bytes); } catch (IOException e) { throw new HoodieIndexException("Could not serialize BloomFilter instance", e); } diff --git a/hudi-common/src/main/java/org/apache/hudi/common/util/Base64CodecUtil.java b/hudi-common/src/main/java/org/apache/hudi/common/util/Base64CodecUtil.java new file mode 100644 index 000000000..a86879ad6 --- /dev/null +++ b/hudi-common/src/main/java/org/apache/hudi/common/util/Base64CodecUtil.java @@ -0,0 +1,46 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hudi.common.util; + +import java.nio.charset.StandardCharsets; +import java.util.Base64; + +public final class Base64CodecUtil { + + /** + * Decodes data from the input string into using the encoding scheme. + * + * @param serString + * @return A newly-allocated byte array containing the decoded bytes. + */ + public static byte[] decode(String serString) { + return Base64.getDecoder().decode(serString.getBytes(StandardCharsets.UTF_8)); + } + + /** + * Encodes all bytes from the specified byte array into String using StandardCharsets.UTF_8. + * + * @param data byte[] source data + * @return base64 encoded data + */ + public static String encode(byte[] data) { + return new String(Base64.getEncoder().encode(data), StandardCharsets.UTF_8); + } + +} diff --git a/hudi-common/src/test/java/org/apache/hudi/common/util/TestBase64CodecUtil.java b/hudi-common/src/test/java/org/apache/hudi/common/util/TestBase64CodecUtil.java new file mode 100644 index 000000000..e62df4024 --- /dev/null +++ b/hudi-common/src/test/java/org/apache/hudi/common/util/TestBase64CodecUtil.java @@ -0,0 +1,47 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hudi.common.util; + +import org.junit.Assert; +import org.junit.Test; + +import java.nio.charset.StandardCharsets; +import java.util.UUID; + +public class TestBase64CodecUtil { + + @Test + public void testCodec() { + + int times = 100; + UUID uuid = UUID.randomUUID(); + + for (int i = 0; i < times; i++) { + + byte[] originalData = uuid.toString().getBytes(StandardCharsets.UTF_8); + + String encodeData = Base64CodecUtil.encode(originalData); + byte[] decodeData = Base64CodecUtil.decode(encodeData); + + Assert.assertArrayEquals(originalData, decodeData); + } + + } + +} diff --git a/hudi-spark/pom.xml b/hudi-spark/pom.xml index a66dfdced..b3231af38 100644 --- a/hudi-spark/pom.xml +++ b/hudi-spark/pom.xml @@ -42,6 +42,11 @@ net.alchim31.maven scala-maven-plugin ${scala-maven-plugin.version} + + + -nobootcp + + org.apache.maven.plugins diff --git a/pom.xml b/pom.xml index 33759cc94..4c2fb8bd5 100644 --- a/pom.xml +++ b/pom.xml @@ -94,7 +94,7 @@ 4.1.1 2.4.4 1.8.2 - 2.11.8 + 2.11.12 2.11 0.12 3.3.1