[HUDI-3993] Replacing UDF in Bulk Insert w/ RDD transformation (#5470)
This commit is contained in:
@@ -1,35 +0,0 @@
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.hudi;
|
||||
|
||||
public class TypeUtils {
|
||||
|
||||
/**
|
||||
* This utility abstracts unsafe type-casting in a way that allows to
|
||||
* <ul>
|
||||
* <li>Search for such type-casts more easily (just searching for usages of this method)</li>
|
||||
* <li>Avoid type-cast warnings from the compiler</li>
|
||||
* </ul>
|
||||
*/
|
||||
@SuppressWarnings("unchecked")
|
||||
public static <T> T unsafeCast(Object o) {
|
||||
return (T) o;
|
||||
}
|
||||
|
||||
}
|
||||
@@ -24,12 +24,19 @@ package org.apache.hudi.common.bloom;
|
||||
public interface BloomFilter {
|
||||
|
||||
/**
|
||||
* Add a key to the {@link BloomFilter}.
|
||||
* Add a key represented by a {@link String} to the {@link BloomFilter}.
|
||||
*
|
||||
* @param key the key to the added to the {@link BloomFilter}
|
||||
*/
|
||||
void add(String key);
|
||||
|
||||
/**
|
||||
* Add a key's bytes, representing UTF8-encoded string, to the {@link BloomFilter}.
|
||||
*
|
||||
* @param key the key bytes to the added to the {@link BloomFilter}
|
||||
*/
|
||||
void add(byte[] key);
|
||||
|
||||
/**
|
||||
* Tests for key membership.
|
||||
*
|
||||
|
||||
@@ -78,7 +78,12 @@ public class HoodieDynamicBoundedBloomFilter implements BloomFilter {
|
||||
|
||||
@Override
|
||||
public void add(String key) {
|
||||
internalDynamicBloomFilter.add(new Key(key.getBytes(StandardCharsets.UTF_8)));
|
||||
add(key.getBytes(StandardCharsets.UTF_8));
|
||||
}
|
||||
|
||||
@Override
|
||||
public void add(byte[] keyBytes) {
|
||||
internalDynamicBloomFilter.add(new Key(keyBytes));
|
||||
}
|
||||
|
||||
@Override
|
||||
|
||||
@@ -77,10 +77,15 @@ public class SimpleBloomFilter implements BloomFilter {
|
||||
|
||||
@Override
|
||||
public void add(String key) {
|
||||
if (key == null) {
|
||||
add(key.getBytes(StandardCharsets.UTF_8));
|
||||
}
|
||||
|
||||
@Override
|
||||
public void add(byte[] keyBytes) {
|
||||
if (keyBytes == null) {
|
||||
throw new NullPointerException("Key cannot be null");
|
||||
}
|
||||
filter.add(new Key(key.getBytes(StandardCharsets.UTF_8)));
|
||||
filter.add(new Key(keyBytes));
|
||||
}
|
||||
|
||||
@Override
|
||||
|
||||
@@ -20,7 +20,7 @@ package org.apache.hudi.common.util;
|
||||
|
||||
import javax.annotation.Nonnull;
|
||||
|
||||
import static org.apache.hudi.TypeUtils.unsafeCast;
|
||||
import static org.apache.hudi.common.util.TypeUtils.unsafeCast;
|
||||
|
||||
/**
|
||||
* Utility that could hold exclusively only either of (hence the name):
|
||||
|
||||
@@ -30,7 +30,17 @@ import java.util.Deque;
|
||||
public class HoodieTimer {
|
||||
|
||||
// Ordered stack of TimeInfo's to make sure stopping the timer returns the correct elapsed time
|
||||
Deque<TimeInfo> timeInfoDeque = new ArrayDeque<>();
|
||||
private final Deque<TimeInfo> timeInfoDeque = new ArrayDeque<>();
|
||||
|
||||
public HoodieTimer() {
|
||||
this(false);
|
||||
}
|
||||
|
||||
public HoodieTimer(boolean shouldStart) {
|
||||
if (shouldStart) {
|
||||
startTimer();
|
||||
}
|
||||
}
|
||||
|
||||
static class TimeInfo {
|
||||
|
||||
|
||||
@@ -39,4 +39,16 @@ public final class TypeUtils {
|
||||
.collect(Collectors.toMap(valueMapper, Function.identity()));
|
||||
}
|
||||
|
||||
/**
|
||||
* This utility abstracts unsafe type-casting in a way that allows to
|
||||
* <ul>
|
||||
* <li>Search for such type-casts more easily (just searching for usages of this method)</li>
|
||||
* <li>Avoid type-cast warnings from the compiler</li>
|
||||
* </ul>
|
||||
*/
|
||||
@SuppressWarnings("unchecked")
|
||||
public static <T> T unsafeCast(Object o) {
|
||||
return (T) o;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@@ -68,6 +68,7 @@ public abstract class BaseKeyGenerator extends KeyGenerator {
|
||||
@Override
|
||||
public final List<String> getRecordKeyFieldNames() {
|
||||
// For nested columns, pick top level column name
|
||||
// TODO materialize
|
||||
return getRecordKeyFields().stream().map(k -> {
|
||||
int idx = k.indexOf('.');
|
||||
return idx > 0 ? k.substring(0, idx) : k;
|
||||
|
||||
@@ -75,9 +75,9 @@ import java.util.function.Function;
|
||||
import java.util.stream.Collectors;
|
||||
import java.util.stream.Stream;
|
||||
|
||||
import static org.apache.hudi.TypeUtils.unsafeCast;
|
||||
import static org.apache.hudi.common.util.DateTimeUtils.instantToMicros;
|
||||
import static org.apache.hudi.common.util.DateTimeUtils.microsToInstant;
|
||||
import static org.apache.hudi.common.util.TypeUtils.unsafeCast;
|
||||
import static org.apache.hudi.common.util.ValidationUtils.checkArgument;
|
||||
import static org.apache.hudi.common.util.ValidationUtils.checkState;
|
||||
import static org.apache.hudi.metadata.HoodieTableMetadata.RECORDKEY_PARTITION_LIST;
|
||||
|
||||
@@ -898,7 +898,7 @@ public class HoodieTestDataGenerator implements AutoCloseable {
|
||||
return anchorTs + r.nextLong() % 259200000L;
|
||||
}
|
||||
|
||||
private static UUID genPseudoRandomUUID(Random r) {
|
||||
public static UUID genPseudoRandomUUID(Random r) {
|
||||
byte[] bytes = new byte[16];
|
||||
r.nextBytes(bytes);
|
||||
|
||||
|
||||
Reference in New Issue
Block a user