1
0

[HUDI-2757] Implement Hudi AWS Glue sync (#5076)

This commit is contained in:
Raymond Xu
2022-03-28 11:54:59 -07:00
committed by GitHub
parent 4ed84b216d
commit 6ccbae4d2a
25 changed files with 1151 additions and 204 deletions

View File

@@ -22,6 +22,7 @@ import org.apache.hudi.common.util.collection.Pair;
import java.lang.reflect.Array;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
@@ -32,12 +33,21 @@ import java.util.Objects;
import java.util.Properties;
import java.util.Set;
import java.util.stream.Collectors;
import java.util.stream.IntStream;
import java.util.stream.Stream;
public class CollectionUtils {
public static final Properties EMPTY_PROPERTIES = new Properties();
public static boolean isNullOrEmpty(Collection<?> c) {
return Objects.isNull(c) || c.isEmpty();
}
public static boolean nonEmpty(Collection<?> c) {
return !isNullOrEmpty(c);
}
/**
* Combines provided arrays into one
*/
@@ -105,6 +115,21 @@ public class CollectionUtils {
return diff;
}
public static <E> Stream<List<E>> batchesAsStream(List<E> list, int batchSize) {
ValidationUtils.checkArgument(batchSize > 0, "batch size must be positive.");
int total = list.size();
if (total <= 0) {
return Stream.empty();
}
int numFullBatches = (total - 1) / batchSize;
return IntStream.range(0, numFullBatches + 1).mapToObj(
n -> list.subList(n * batchSize, n == numFullBatches ? total : (n + 1) * batchSize));
}
public static <E> List<List<E>> batches(List<E> list, int batchSize) {
return batchesAsStream(list, batchSize).collect(Collectors.toList());
}
/**
* Determines whether two iterators contain equal elements in the same order. More specifically,
* this method returns {@code true} if {@code iterator1} and {@code iterator2} contain the same

View File

@@ -0,0 +1,34 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.hudi.common.util;
import java.util.Map;
import java.util.Objects;
public class MapUtils {
public static boolean isNullOrEmpty(Map<?, ?> m) {
return Objects.isNull(m) || m.isEmpty();
}
public static boolean nonEmpty(Map<?, ?> m) {
return !isNullOrEmpty(m);
}
}

View File

@@ -0,0 +1,56 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.hudi.common.util;
import org.junit.jupiter.api.Test;
import java.util.Arrays;
import java.util.Collections;
import java.util.List;
import static org.apache.hudi.common.util.CollectionUtils.batches;
import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertThrows;
class TestCollectionUtils {
@Test
void getBatchesFromList() {
assertThrows(IllegalArgumentException.class, () -> {
batches(Collections.emptyList(), -1);
});
assertThrows(IllegalArgumentException.class, () -> {
batches(Collections.emptyList(), 0);
});
assertEquals(Collections.emptyList(), batches(Collections.emptyList(), 1));
List<List<Integer>> intsBatches1 = batches(Arrays.asList(1, 2, 3, 4, 5, 6), 3);
assertEquals(2, intsBatches1.size());
assertEquals(Arrays.asList(1, 2, 3), intsBatches1.get(0));
assertEquals(Arrays.asList(4, 5, 6), intsBatches1.get(1));
List<List<Integer>> intsBatches2 = batches(Arrays.asList(1, 2, 3, 4, 5, 6), 5);
assertEquals(2, intsBatches2.size());
assertEquals(Arrays.asList(1, 2, 3, 4, 5), intsBatches2.get(0));
assertEquals(Collections.singletonList(6), intsBatches2.get(1));
}
}