1
0

[HUDI-532] Add java doc for the test classes of hudi test suite (#1901)

This commit is contained in:
Mathieu
2020-08-26 08:49:01 +08:00
committed by GitHub
parent 7e68c42eb1
commit df8f099c99
15 changed files with 67 additions and 0 deletions

View File

@@ -49,6 +49,9 @@ import static org.awaitility.Awaitility.await;
import static org.junit.jupiter.api.Assertions.assertEquals; import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertNotEquals; import static org.junit.jupiter.api.Assertions.assertNotEquals;
/**
* Base test class for IT Test helps to run command and generate data.
*/
public abstract class ITTestBase { public abstract class ITTestBase {
public static final Logger LOG = LogManager.getLogger(ITTestBase.class); public static final Logger LOG = LogManager.getLogger(ITTestBase.class);

View File

@@ -52,6 +52,9 @@ import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.Test; import org.junit.jupiter.api.Test;
import org.mockito.Mockito; import org.mockito.Mockito;
/**
* Unit test against DeltaWriterAdapter, by testing writing DFS files.
*/
public class TestDFSHoodieTestSuiteWriterAdapter extends UtilitiesTestBase { public class TestDFSHoodieTestSuiteWriterAdapter extends UtilitiesTestBase {
private FilebasedSchemaProvider schemaProvider; private FilebasedSchemaProvider schemaProvider;

View File

@@ -48,6 +48,9 @@ import org.junit.jupiter.api.BeforeAll;
import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.Test; import org.junit.jupiter.api.Test;
/**
* Unit test for {@link DeltaInputWriter}.
*/
public class TestFileDeltaInputWriter extends UtilitiesTestBase { public class TestFileDeltaInputWriter extends UtilitiesTestBase {
private FilebasedSchemaProvider schemaProvider; private FilebasedSchemaProvider schemaProvider;

View File

@@ -30,6 +30,9 @@ import org.apache.hudi.integ.testsuite.dag.nodes.UpsertNode;
import org.apache.hudi.integ.testsuite.dag.WorkflowDag; import org.apache.hudi.integ.testsuite.dag.WorkflowDag;
import org.junit.jupiter.api.Test; import org.junit.jupiter.api.Test;
/**
* Unit test for the build process of {@link DagNode} and {@link WorkflowDag}.
*/
public class TestWorkflowBuilder { public class TestWorkflowBuilder {
@Test @Test

View File

@@ -23,6 +23,7 @@ import static junit.framework.TestCase.assertTrue;
import java.util.Arrays; import java.util.Arrays;
import java.util.List; import java.util.List;
import java.util.Map; import java.util.Map;
import org.apache.avro.Schema.Field; import org.apache.avro.Schema.Field;
import org.apache.avro.generic.GenericRecord; import org.apache.avro.generic.GenericRecord;
import org.apache.hudi.integ.testsuite.utils.TestUtils; import org.apache.hudi.integ.testsuite.utils.TestUtils;
@@ -34,6 +35,9 @@ import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.Test; import org.junit.jupiter.api.Test;
import scala.Tuple2; import scala.Tuple2;
/**
* Test Cases for {@link UpdateConverter} APIs.
*/
public class TestUpdateConverter { public class TestUpdateConverter {
private JavaSparkContext jsc; private JavaSparkContext jsc;
@@ -49,11 +53,16 @@ public class TestUpdateConverter {
jsc.stop(); jsc.stop();
} }
/**
* Test {@link UpdateConverter} by generates random updates from existing records.
*/
@Test @Test
public void testGenerateUpdateRecordsFromInputRecords() throws Exception { public void testGenerateUpdateRecordsFromInputRecords() throws Exception {
// 1. prepare input records
JavaRDD<GenericRecord> inputRDD = TestUtils.makeRDD(jsc, 10); JavaRDD<GenericRecord> inputRDD = TestUtils.makeRDD(jsc, 10);
String schemaStr = inputRDD.take(1).get(0).getSchema().toString(); String schemaStr = inputRDD.take(1).get(0).getSchema().toString();
int minPayloadSize = 1000; int minPayloadSize = 1000;
// 2. DFS converter reads existing records and generates random updates for the same row keys // 2. DFS converter reads existing records and generates random updates for the same row keys
UpdateConverter updateConverter = new UpdateConverter(schemaStr, minPayloadSize, UpdateConverter updateConverter = new UpdateConverter(schemaStr, minPayloadSize,
Arrays.asList("timestamp"), Arrays.asList("_row_key")); Arrays.asList("timestamp"), Arrays.asList("_row_key"));

View File

@@ -29,15 +29,20 @@ import org.apache.hudi.integ.testsuite.dag.nodes.ValidateNode;
import org.apache.hudi.integ.testsuite.configuration.DeltaConfig.Config; import org.apache.hudi.integ.testsuite.configuration.DeltaConfig.Config;
import org.apache.spark.api.java.JavaRDD; import org.apache.spark.api.java.JavaRDD;
/**
* An implementation of {@link WorkflowDagGenerator}, that generates complex workflowDag.
*/
public class ComplexDagGenerator implements WorkflowDagGenerator { public class ComplexDagGenerator implements WorkflowDagGenerator {
@Override @Override
public WorkflowDag build() { public WorkflowDag build() {
// root node
DagNode root = new InsertNode(Config.newBuilder() DagNode root = new InsertNode(Config.newBuilder()
.withNumRecordsToInsert(1000) .withNumRecordsToInsert(1000)
.withNumInsertPartitions(3) .withNumInsertPartitions(3)
.withRecordSize(1000).build()); .withRecordSize(1000).build());
// child node1
DagNode child1 = new UpsertNode(Config.newBuilder() DagNode child1 = new UpsertNode(Config.newBuilder()
.withNumRecordsToUpdate(999) .withNumRecordsToUpdate(999)
.withNumRecordsToInsert(1000) .withNumRecordsToInsert(1000)
@@ -46,6 +51,7 @@ public class ComplexDagGenerator implements WorkflowDagGenerator {
.withNumInsertPartitions(1) .withNumInsertPartitions(1)
.withRecordSize(10000).build()); .withRecordSize(10000).build());
// function used to build ValidateNode
Function<List<DagNode<JavaRDD<WriteStatus>>>, Boolean> function = (dagNodes) -> { Function<List<DagNode<JavaRDD<WriteStatus>>>, Boolean> function = (dagNodes) -> {
DagNode<JavaRDD<WriteStatus>> parent1 = dagNodes.get(0); DagNode<JavaRDD<WriteStatus>> parent1 = dagNodes.get(0);
List<WriteStatus> statuses = parent1.getResult().collect(); List<WriteStatus> statuses = parent1.getResult().collect();
@@ -63,8 +69,11 @@ public class ComplexDagGenerator implements WorkflowDagGenerator {
* parent2.getConfig().getNumInsertPartitions() + parent2.getConfig().getNumRecordsUpsert(); * parent2.getConfig().getNumInsertPartitions() + parent2.getConfig().getNumRecordsUpsert();
return b1 & b2 & b3; return b1 & b2 & b3;
}; };
// child node2
DagNode child2 = new ValidateNode(Config.newBuilder().build(), function); DagNode child2 = new ValidateNode(Config.newBuilder().build(), function);
// create relationship between nodes
root.addChildNode(child1); root.addChildNode(child1);
// child1.addParentNode(root); // child1.addParentNode(root);
child1.addChildNode(child2); child1.addChildNode(child2);

View File

@@ -28,6 +28,9 @@ import org.apache.hudi.integ.testsuite.dag.nodes.InsertNode;
import org.apache.hudi.integ.testsuite.configuration.DeltaConfig.Config; import org.apache.hudi.integ.testsuite.configuration.DeltaConfig.Config;
import org.apache.hudi.integ.testsuite.dag.nodes.HiveQueryNode; import org.apache.hudi.integ.testsuite.dag.nodes.HiveQueryNode;
/**
* An implementation of {@link WorkflowDagGenerator}, helps to generate a workflowDag with two hive nodes as child node.
*/
public class HiveSyncDagGenerator implements WorkflowDagGenerator { public class HiveSyncDagGenerator implements WorkflowDagGenerator {
@Override @Override

View File

@@ -28,6 +28,10 @@ import org.apache.hudi.integ.testsuite.dag.nodes.InsertNode;
import org.apache.hudi.integ.testsuite.configuration.DeltaConfig.Config; import org.apache.hudi.integ.testsuite.configuration.DeltaConfig.Config;
import org.apache.hudi.integ.testsuite.dag.nodes.HiveQueryNode; import org.apache.hudi.integ.testsuite.dag.nodes.HiveQueryNode;
/**
* An implementation of {@link WorkflowDagGenerator}, that generates workflowDag with hive nodes as child node
* for MOR table.
*/
public class HiveSyncDagGeneratorMOR implements WorkflowDagGenerator { public class HiveSyncDagGeneratorMOR implements WorkflowDagGenerator {
@Override @Override

View File

@@ -29,6 +29,9 @@ import org.apache.hudi.utilities.testutils.UtilitiesTestBase;
import org.junit.jupiter.api.Assertions; import org.junit.jupiter.api.Assertions;
import org.junit.jupiter.api.Test; import org.junit.jupiter.api.Test;
/**
* A utility class for DAG test.
*/
public class TestDagUtils { public class TestDagUtils {
private static final String COW_DAG_DOCKER_DEMO_RELATIVE_PATH = "/docker/demo/config/test-suite/complex-dag-cow.yaml"; private static final String COW_DAG_DOCKER_DEMO_RELATIVE_PATH = "/docker/demo/config/test-suite/complex-dag-cow.yaml";

View File

@@ -25,6 +25,9 @@ import org.apache.hudi.common.util.collection.Pair;
import org.apache.hudi.utilities.testutils.UtilitiesTestBase; import org.apache.hudi.utilities.testutils.UtilitiesTestBase;
import org.junit.jupiter.api.Test; import org.junit.jupiter.api.Test;
/**
* Unit test for {@link GenericRecordFullPayloadSizeEstimator}.
*/
public class TestGenericRecordPayloadEstimator { public class TestGenericRecordPayloadEstimator {
private static final String SOURCE_SCHEMA_DOCKER_DEMO_RELATIVE_PATH = "/docker/demo/config/test-suite/source.avsc"; private static final String SOURCE_SCHEMA_DOCKER_DEMO_RELATIVE_PATH = "/docker/demo/config/test-suite/source.avsc";

View File

@@ -32,6 +32,9 @@ import org.apache.hudi.avro.HoodieAvroUtils;
import org.apache.hudi.utilities.testutils.UtilitiesTestBase; import org.apache.hudi.utilities.testutils.UtilitiesTestBase;
import org.junit.jupiter.api.Test; import org.junit.jupiter.api.Test;
/**
* Unit test for {@link GenericRecordFullPayloadGenerator} and {@link GenericRecordPartialPayloadGenerator}.
*/
public class TestGenericRecordPayloadGenerator { public class TestGenericRecordPayloadGenerator {
private static final String SOURCE_SCHEMA_DOCKER_DEMO_RELATIVE_PATH = "/docker/demo/config/test-suite/source.avsc"; private static final String SOURCE_SCHEMA_DOCKER_DEMO_RELATIVE_PATH = "/docker/demo/config/test-suite/source.avsc";

View File

@@ -49,6 +49,9 @@ import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.params.provider.Arguments; import org.junit.jupiter.params.provider.Arguments;
import org.junit.jupiter.params.provider.MethodSource; import org.junit.jupiter.params.provider.MethodSource;
/**
* Unit test against {@link HoodieTestSuiteJob}.
*/
public class TestHoodieTestSuiteJob extends UtilitiesTestBase { public class TestHoodieTestSuiteJob extends UtilitiesTestBase {
private static final String TEST_NAME_WITH_PARAMS = "[{index}] Test with useDeltaStreamer={0}, tableType={1}"; private static final String TEST_NAME_WITH_PARAMS = "[{index}] Test with useDeltaStreamer={0}, tableType={1}";

View File

@@ -36,6 +36,9 @@ import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.Disabled; import org.junit.jupiter.api.Disabled;
import org.junit.jupiter.api.Test; import org.junit.jupiter.api.Test;
/**
* Unit test for {@link DFSAvroDeltaInputReader} by issuing analyzeSingleFile and read from it.
*/
public class TestDFSAvroDeltaInputReader extends UtilitiesTestBase { public class TestDFSAvroDeltaInputReader extends UtilitiesTestBase {
@BeforeAll @BeforeAll

View File

@@ -40,6 +40,9 @@ import org.junit.jupiter.api.BeforeAll;
import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.Test; import org.junit.jupiter.api.Test;
/**
* Unit test for {@link DFSHoodieDatasetInputReader}.
*/
public class TestDFSHoodieDatasetInputReader extends UtilitiesTestBase { public class TestDFSHoodieDatasetInputReader extends UtilitiesTestBase {
@BeforeAll @BeforeAll

View File

@@ -28,6 +28,9 @@ import org.apache.spark.api.java.JavaRDD;
import org.apache.spark.api.java.JavaSparkContext; import org.apache.spark.api.java.JavaSparkContext;
import org.apache.spark.sql.SparkSession; import org.apache.spark.sql.SparkSession;
/**
* A utility class for testing purpose.
*/
public class TestUtils { public class TestUtils {
/** /**
@@ -45,6 +48,15 @@ public class TestUtils {
return dataGenerator.generateGenericRecords(numRecords); return dataGenerator.generateGenericRecords(numRecords);
} }
/**
* Method helps to create avro files and save it to file.
*
* @param jsc Java Spark Context jsc.
* @param sparkSession Spark Session, the entry point to programming Spark with the Dataset and DataFrame API.
* @param basePath The basePath where files are written.
* @param numFiles The number of files to create.
* @param numRecordsPerFile The number of records per file.
*/
public static void createAvroFiles(JavaSparkContext jsc, SparkSession sparkSession, String basePath, int numFiles, public static void createAvroFiles(JavaSparkContext jsc, SparkSession sparkSession, String basePath, int numFiles,
int numRecordsPerFile) { int numRecordsPerFile) {
Schema schema = HoodieTestDataGenerator.AVRO_SCHEMA; Schema schema = HoodieTestDataGenerator.AVRO_SCHEMA;