1
0

[HUDI-2253] Refactoring few tests to reduce runningtime. DeltaStreamer and MultiDeltaStreamer tests. Bulk insert row writer tests (#3371)

Co-authored-by: Sivabalan Narayanan <nsb@Sivabalans-MBP.attlocal.net>
This commit is contained in:
Sivabalan Narayanan
2021-07-30 01:22:26 -04:00
committed by GitHub
parent c2370402ea
commit 7bdae69053
7 changed files with 266 additions and 233 deletions

View File

@@ -74,7 +74,7 @@ public class TestHoodieBulkInsertDataInternalWriter extends
HoodieWriteConfig cfg = getWriteConfig(populateMetaFields);
HoodieTable table = HoodieSparkTable.create(cfg, context, metaClient);
// execute N rounds
for (int i = 0; i < 3; i++) {
for (int i = 0; i < 2; i++) {
String instantTime = "00" + i;
// init writer
HoodieBulkInsertDataInternalWriter writer = new HoodieBulkInsertDataInternalWriter(table, cfg, instantTime, RANDOM.nextInt(100000), RANDOM.nextLong(), RANDOM.nextLong(),
@@ -82,7 +82,7 @@ public class TestHoodieBulkInsertDataInternalWriter extends
int size = 10 + RANDOM.nextInt(1000);
// write N rows to partition1, N rows to partition2 and N rows to partition3 ... Each batch should create a new RowCreateHandle and a new file
int batches = 5;
int batches = 3;
Dataset<Row> totalInputRows = null;
for (int j = 0; j < batches; j++) {

View File

@@ -30,6 +30,7 @@ import org.apache.spark.sql.Row;
import org.apache.spark.sql.catalyst.InternalRow;
import org.apache.spark.sql.sources.v2.DataSourceOptions;
import org.apache.spark.sql.sources.v2.writer.DataWriter;
import org.junit.jupiter.api.Disabled;
import org.junit.jupiter.api.Test;
import org.junit.jupiter.params.ParameterizedTest;
import org.junit.jupiter.params.provider.Arguments;
@@ -87,7 +88,7 @@ public class TestHoodieDataSourceInternalWriter extends
}
int size = 10 + RANDOM.nextInt(1000);
int batches = 5;
int batches = 2;
Dataset<Row> totalInputRows = null;
for (int j = 0; j < batches; j++) {
String partitionPath = HoodieTestDataGenerator.DEFAULT_PARTITION_PATHS[j % 3];
@@ -158,7 +159,7 @@ public class TestHoodieDataSourceInternalWriter extends
int partitionCounter = 0;
// execute N rounds
for (int i = 0; i < 5; i++) {
for (int i = 0; i < 2; i++) {
String instantTime = "00" + i;
// init writer
HoodieDataSourceInternalWriter dataSourceInternalWriter =
@@ -168,7 +169,7 @@ public class TestHoodieDataSourceInternalWriter extends
DataWriter<InternalRow> writer = dataSourceInternalWriter.createWriterFactory().createDataWriter(partitionCounter++, RANDOM.nextLong(), RANDOM.nextLong());
int size = 10 + RANDOM.nextInt(1000);
int batches = 5; // one batch per partition
int batches = 2; // one batch per partition
for (int j = 0; j < batches; j++) {
String partitionPath = HoodieTestDataGenerator.DEFAULT_PARTITION_PATHS[j % 3];
@@ -195,6 +196,8 @@ public class TestHoodieDataSourceInternalWriter extends
}
}
// takes up lot of running time with CI.
@Disabled
@ParameterizedTest
@MethodSource("bulkInsertTypeParams")
public void testLargeWrites(boolean populateMetaFields) throws Exception {