[HUDI-1364] Add HoodieJavaEngineContext to hudi-java-client (#2222)
This commit is contained in:
@@ -0,0 +1,86 @@
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.hudi.client.common;
|
||||
|
||||
import org.apache.hadoop.conf.Configuration;
|
||||
import org.apache.hudi.client.common.function.SerializableConsumer;
|
||||
import org.apache.hudi.client.common.function.SerializableFunction;
|
||||
import org.apache.hudi.client.common.function.SerializablePairFunction;
|
||||
import org.apache.hudi.common.config.SerializableConfiguration;
|
||||
import org.apache.hudi.common.util.Option;
|
||||
|
||||
import scala.Tuple2;
|
||||
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.stream.Collectors;
|
||||
import java.util.stream.Stream;
|
||||
|
||||
import static java.util.stream.Collectors.toList;
|
||||
import static org.apache.hudi.client.common.function.FunctionWrapper.throwingFlatMapWrapper;
|
||||
import static org.apache.hudi.client.common.function.FunctionWrapper.throwingForeachWrapper;
|
||||
import static org.apache.hudi.client.common.function.FunctionWrapper.throwingMapToPairWrapper;
|
||||
import static org.apache.hudi.client.common.function.FunctionWrapper.throwingMapWrapper;
|
||||
|
||||
/**
|
||||
* A java engine implementation of HoodieEngineContext.
|
||||
*/
|
||||
public class HoodieJavaEngineContext extends HoodieEngineContext {
|
||||
|
||||
public HoodieJavaEngineContext(Configuration conf, TaskContextSupplier taskContextSupplier) {
|
||||
super(new SerializableConfiguration(conf), taskContextSupplier);
|
||||
}
|
||||
|
||||
@Override
|
||||
public <I, O> List<O> map(List<I> data, SerializableFunction<I, O> func, int parallelism) {
|
||||
return data.stream().parallel().map(throwingMapWrapper(func)).collect(toList());
|
||||
}
|
||||
|
||||
@Override
|
||||
public <I, O> List<O> flatMap(List<I> data, SerializableFunction<I, Stream<O>> func, int parallelism) {
|
||||
return data.stream().parallel().flatMap(throwingFlatMapWrapper(func)).collect(toList());
|
||||
}
|
||||
|
||||
@Override
|
||||
public <I> void foreach(List<I> data, SerializableConsumer<I> consumer, int parallelism) {
|
||||
data.stream().forEach(throwingForeachWrapper(consumer));
|
||||
}
|
||||
|
||||
@Override
|
||||
public <I, K, V> Map<K, V> mapToPair(List<I> data, SerializablePairFunction<I, K, V> func, Integer parallelism) {
|
||||
return data.stream().map(throwingMapToPairWrapper(func)).collect(
|
||||
Collectors.toMap(Tuple2::_1, Tuple2::_2, (oldVal, newVal) -> newVal)
|
||||
);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void setProperty(EngineProperty key, String value) {
|
||||
// no operation for now
|
||||
}
|
||||
|
||||
@Override
|
||||
public Option<String> getProperty(EngineProperty key) {
|
||||
return Option.empty();
|
||||
}
|
||||
|
||||
@Override
|
||||
public void setJobStatus(String activeModule, String activityDescription) {
|
||||
// no operation for now
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,23 @@
|
||||
###
|
||||
# Licensed to the Apache Software Foundation (ASF) under one
|
||||
# or more contributor license agreements. See the NOTICE file
|
||||
# distributed with this work for additional information
|
||||
# regarding copyright ownership. The ASF licenses this file
|
||||
# to you under the Apache License, Version 2.0 (the
|
||||
# "License"); you may not use this file except in compliance
|
||||
# with the License. You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
###
|
||||
log4j.rootLogger=INFO, A1
|
||||
# A1 is set to be a ConsoleAppender.
|
||||
log4j.appender.A1=org.apache.log4j.ConsoleAppender
|
||||
# A1 uses PatternLayout.
|
||||
log4j.appender.A1.layout=org.apache.log4j.PatternLayout
|
||||
log4j.appender.A1.layout.ConversionPattern=%-4r [%t] %-5p %c %x - %m%n
|
||||
@@ -0,0 +1,85 @@
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.hudi.client.common;
|
||||
|
||||
import org.apache.hadoop.conf.Configuration;
|
||||
import org.apache.hudi.DummyTaskContextSupplier;
|
||||
import org.junit.jupiter.api.Test;
|
||||
import org.junit.jupiter.api.Assertions;
|
||||
|
||||
import scala.Tuple2;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
import java.util.Collection;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
|
||||
public class TestHoodieJavaEngineContext {
|
||||
private HoodieJavaEngineContext context =
|
||||
new HoodieJavaEngineContext(new Configuration(), new DummyTaskContextSupplier());
|
||||
|
||||
@Test
|
||||
public void testMap() {
|
||||
List<Integer> mapList = Arrays.asList(1, 2, 3, 4, 5, 6, 7, 8, 9, 10);
|
||||
List<Integer> result = context.map(mapList, x -> x + 1, 2);
|
||||
result.removeAll(mapList);
|
||||
|
||||
Assertions.assertEquals(1, result.size());
|
||||
Assertions.assertEquals(11, result.get(0));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testFlatMap() {
|
||||
List<String> list1 = Arrays.asList("a", "b", "c");
|
||||
List<String> list2 = Arrays.asList("d", "e", "f");
|
||||
List<String> list3 = Arrays.asList("g", "h", "i");
|
||||
|
||||
List<List<String>> inputList = new ArrayList<>();
|
||||
inputList.add(list1);
|
||||
inputList.add(list2);
|
||||
inputList.add(list3);
|
||||
|
||||
List<String> result = context.flatMap(inputList, Collection::stream, 2);
|
||||
|
||||
Assertions.assertEquals(9, result.size());
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testForeach() {
|
||||
List<Integer> mapList = Arrays.asList(1, 2, 3, 4, 5, 6, 7, 8, 9, 10);
|
||||
List<Integer> result = new ArrayList<>(10);
|
||||
context.foreach(mapList, result::add, 2);
|
||||
|
||||
Assertions.assertEquals(result.size(), mapList.size());
|
||||
Assertions.assertTrue(result.containsAll(mapList));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testMapToPair() {
|
||||
List<String> mapList = Arrays.asList("hudi_flink", "hudi_spark", "hudi_java");
|
||||
|
||||
Map<String, String> resultMap = context.mapToPair(mapList, x -> {
|
||||
String[] splits = x.split("_");
|
||||
return Tuple2.apply(splits[0], splits[1]);
|
||||
}, 2);
|
||||
|
||||
Assertions.assertNotNull(resultMap.get("hudi"));
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,31 @@
|
||||
###
|
||||
# Licensed to the Apache Software Foundation (ASF) under one
|
||||
# or more contributor license agreements. See the NOTICE file
|
||||
# distributed with this work for additional information
|
||||
# regarding copyright ownership. The ASF licenses this file
|
||||
# to you under the Apache License, Version 2.0 (the
|
||||
# "License"); you may not use this file except in compliance
|
||||
# with the License. You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
###
|
||||
log4j.rootLogger=WARN, CONSOLE
|
||||
log4j.logger.org.apache=INFO
|
||||
log4j.logger.org.apache.hudi=DEBUG
|
||||
log4j.logger.org.apache.hadoop.hbase=ERROR
|
||||
|
||||
# A1 is set to be a ConsoleAppender.
|
||||
log4j.appender.CONSOLE=org.apache.log4j.ConsoleAppender
|
||||
# A1 uses PatternLayout.
|
||||
log4j.appender.CONSOLE.layout=org.apache.log4j.PatternLayout
|
||||
log4j.appender.CONSOLE.layout.ConversionPattern=%-4r [%t] %-5p %c %x - %m%n
|
||||
log4j.appender.CONSOLE.filter.a=org.apache.log4j.varia.LevelRangeFilter
|
||||
log4j.appender.CONSOLE.filter.a.AcceptOnMatch=true
|
||||
log4j.appender.CONSOLE.filter.a.LevelMin=WARN
|
||||
log4j.appender.CONSOLE.filter.a.LevelMax=FATAL
|
||||
Reference in New Issue
Block a user