1
0

[HUDI-3993] Replacing UDF in Bulk Insert w/ RDD transformation (#5470)

This commit is contained in:
Alexey Kudinkin
2022-07-21 06:20:47 -07:00
committed by GitHub
parent c7fe3fd01d
commit a33bdd32e3
41 changed files with 1180 additions and 870 deletions

View File

@@ -1,35 +0,0 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hudi;
public class TypeUtils {
/**
* This utility abstracts unsafe type-casting in a way that allows to
* <ul>
* <li>Search for such type-casts more easily (just searching for usages of this method)</li>
* <li>Avoid type-cast warnings from the compiler</li>
* </ul>
*/
@SuppressWarnings("unchecked")
public static <T> T unsafeCast(Object o) {
return (T) o;
}
}

View File

@@ -24,12 +24,19 @@ package org.apache.hudi.common.bloom;
public interface BloomFilter {
/**
* Add a key to the {@link BloomFilter}.
* Add a key represented by a {@link String} to the {@link BloomFilter}.
*
* @param key the key to the added to the {@link BloomFilter}
*/
void add(String key);
/**
* Add a key's bytes, representing UTF8-encoded string, to the {@link BloomFilter}.
*
* @param key the key bytes to the added to the {@link BloomFilter}
*/
void add(byte[] key);
/**
* Tests for key membership.
*

View File

@@ -78,7 +78,12 @@ public class HoodieDynamicBoundedBloomFilter implements BloomFilter {
@Override
public void add(String key) {
internalDynamicBloomFilter.add(new Key(key.getBytes(StandardCharsets.UTF_8)));
add(key.getBytes(StandardCharsets.UTF_8));
}
@Override
public void add(byte[] keyBytes) {
internalDynamicBloomFilter.add(new Key(keyBytes));
}
@Override

View File

@@ -77,10 +77,15 @@ public class SimpleBloomFilter implements BloomFilter {
@Override
public void add(String key) {
if (key == null) {
add(key.getBytes(StandardCharsets.UTF_8));
}
@Override
public void add(byte[] keyBytes) {
if (keyBytes == null) {
throw new NullPointerException("Key cannot be null");
}
filter.add(new Key(key.getBytes(StandardCharsets.UTF_8)));
filter.add(new Key(keyBytes));
}
@Override

View File

@@ -20,7 +20,7 @@ package org.apache.hudi.common.util;
import javax.annotation.Nonnull;
import static org.apache.hudi.TypeUtils.unsafeCast;
import static org.apache.hudi.common.util.TypeUtils.unsafeCast;
/**
* Utility that could hold exclusively only either of (hence the name):

View File

@@ -30,7 +30,17 @@ import java.util.Deque;
public class HoodieTimer {
// Ordered stack of TimeInfo's to make sure stopping the timer returns the correct elapsed time
Deque<TimeInfo> timeInfoDeque = new ArrayDeque<>();
private final Deque<TimeInfo> timeInfoDeque = new ArrayDeque<>();
public HoodieTimer() {
this(false);
}
public HoodieTimer(boolean shouldStart) {
if (shouldStart) {
startTimer();
}
}
static class TimeInfo {

View File

@@ -39,4 +39,16 @@ public final class TypeUtils {
.collect(Collectors.toMap(valueMapper, Function.identity()));
}
/**
* This utility abstracts unsafe type-casting in a way that allows to
* <ul>
* <li>Search for such type-casts more easily (just searching for usages of this method)</li>
* <li>Avoid type-cast warnings from the compiler</li>
* </ul>
*/
@SuppressWarnings("unchecked")
public static <T> T unsafeCast(Object o) {
return (T) o;
}
}

View File

@@ -68,6 +68,7 @@ public abstract class BaseKeyGenerator extends KeyGenerator {
@Override
public final List<String> getRecordKeyFieldNames() {
// For nested columns, pick top level column name
// TODO materialize
return getRecordKeyFields().stream().map(k -> {
int idx = k.indexOf('.');
return idx > 0 ? k.substring(0, idx) : k;

View File

@@ -75,9 +75,9 @@ import java.util.function.Function;
import java.util.stream.Collectors;
import java.util.stream.Stream;
import static org.apache.hudi.TypeUtils.unsafeCast;
import static org.apache.hudi.common.util.DateTimeUtils.instantToMicros;
import static org.apache.hudi.common.util.DateTimeUtils.microsToInstant;
import static org.apache.hudi.common.util.TypeUtils.unsafeCast;
import static org.apache.hudi.common.util.ValidationUtils.checkArgument;
import static org.apache.hudi.common.util.ValidationUtils.checkState;
import static org.apache.hudi.metadata.HoodieTableMetadata.RECORDKEY_PARTITION_LIST;

View File

@@ -898,7 +898,7 @@ public class HoodieTestDataGenerator implements AutoCloseable {
return anchorTs + r.nextLong() % 259200000L;
}
private static UUID genPseudoRandomUUID(Random r) {
public static UUID genPseudoRandomUUID(Random r) {
byte[] bytes = new byte[16];
r.nextBytes(bytes);