1
0

[REVERT] "[HUDI-1058] Make delete marker configurable (#1819)" (#1914)

This reverts commit 433d7d2c98.
This commit is contained in:
Sivabalan Narayanan
2020-08-04 18:20:38 -04:00
committed by GitHub
parent 539621bd33
commit ab11ba43e1
14 changed files with 43 additions and 266 deletions

View File

@@ -1,97 +0,0 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hudi.utilities.functional;
import org.apache.avro.generic.GenericRecord;
import org.apache.hadoop.fs.Path;
import org.apache.hudi.common.config.TypedProperties;
import org.apache.hudi.common.testutils.HoodieTestDataGenerator;
import org.apache.hudi.config.HoodieWriteConfig;
import org.apache.hudi.utilities.deltastreamer.HoodieDeltaStreamer;
import org.apache.hudi.utilities.sources.ParquetDFSSource;
import org.apache.hudi.utilities.testutils.UtilitiesTestBase;
import org.apache.spark.sql.Row;
import org.junit.jupiter.api.BeforeAll;
import org.junit.jupiter.api.Test;
import java.util.List;
import static org.junit.jupiter.api.Assertions.assertEquals;
public class TestDeltaStreamerWithOverwriteLatestAvroPayload extends UtilitiesTestBase {
private static String PARQUET_SOURCE_ROOT;
private static final String PROPS_FILENAME_TEST_PARQUET = "test-parquet-dfs-source.properties";
@BeforeAll
public static void initClass() throws Exception {
UtilitiesTestBase.initClass(true);
PARQUET_SOURCE_ROOT = dfsBasePath + "/parquetFiles";
// prepare the configs.
UtilitiesTestBase.Helpers.copyToDFS("delta-streamer-config/base.properties", dfs, dfsBasePath + "/base.properties");
UtilitiesTestBase.Helpers.copyToDFS("delta-streamer-config/sql-transformer.properties", dfs,
dfsBasePath + "/sql-transformer.properties");
UtilitiesTestBase.Helpers.copyToDFS("delta-streamer-config/source.avsc", dfs, dfsBasePath + "/source.avsc");
UtilitiesTestBase.Helpers.copyToDFS("delta-streamer-config/source-flattened.avsc", dfs, dfsBasePath + "/source-flattened.avsc");
UtilitiesTestBase.Helpers.copyToDFS("delta-streamer-config/target.avsc", dfs, dfsBasePath + "/target.avsc");
}
@Test
public void testOverwriteLatestAvroPayload() throws Exception {
// test defaultDeleteMarkerField
this.testOverwriteLatestAvroPayload(null);
// test userDefinedDeleteMarkerField
this.testOverwriteLatestAvroPayload("user_defined_delete_marker_field");
}
private void testOverwriteLatestAvroPayload(String deleteMarkerField) throws Exception {
String path = PARQUET_SOURCE_ROOT + "/1.parquet";
List<GenericRecord> records = HoodieTestDataGenerator.generateGenericRecords(5, false, 0);
Helpers.saveParquetToDFS(records, new Path(path));
TypedProperties parquetProps = new TypedProperties();
parquetProps.setProperty("include", "base.properties");
parquetProps.setProperty("hoodie.datasource.write.recordkey.field", "_row_key");
parquetProps.setProperty("hoodie.datasource.write.partitionpath.field", "not_there");
parquetProps.setProperty("hoodie.deltastreamer.source.dfs.root", PARQUET_SOURCE_ROOT);
if (deleteMarkerField != null) {
parquetProps.setProperty(HoodieWriteConfig.DELETE_MARKER_FIELD_PROP, deleteMarkerField);
}
Helpers.savePropsToDFS(parquetProps, dfs, dfsBasePath + "/" + PROPS_FILENAME_TEST_PARQUET);
String tableBasePath = dfsBasePath + "/test_overwrite_lastest_avro_payload_table";
HoodieDeltaStreamer deltaStreamer = new HoodieDeltaStreamer(
TestHoodieDeltaStreamer.TestHelpers.makeConfig(tableBasePath, HoodieDeltaStreamer.Operation.INSERT, ParquetDFSSource.class.getName(),
null, PROPS_FILENAME_TEST_PARQUET, false,
false, 100000, false, null, null, "timestamp"), jsc);
deltaStreamer.sync();
TestHoodieDeltaStreamer.TestHelpers.assertRecordCount(5, tableBasePath + "/*/*.parquet", sqlContext);
String path2 = PARQUET_SOURCE_ROOT + "/2.parquet";
List<GenericRecord> records2 = HoodieTestDataGenerator.generateGenericRecords(4, true, 1);
Helpers.saveParquetToDFS(records2, new Path(path2));
deltaStreamer.sync();
List<Row> rows = sqlContext.read().format("org.apache.hudi").load(tableBasePath + "/*/*.parquet").collectAsList();
assertEquals(1, rows.size());
assertEquals(records.get(4).get("_row_key"), rows.get(0).getString(2));
}
}

View File

@@ -55,10 +55,6 @@
},{
"name" : "nation",
"type" : "bytes"
},{
"name" : "user_defined_delete_marker_field",
"type" : "boolean",
"default" : false
},{
"name" : "current_date",
"type" : {

View File

@@ -16,4 +16,4 @@
# limitations under the License.
###
include=base.properties
hoodie.deltastreamer.transformer.sql=SELECT a.timestamp, a._row_key, a.rider, a.driver, a.begin_lat, a.begin_lon, a.end_lat, a.end_lon, a.distance_in_meters, a.seconds_since_epoch, a.weight, a.nation, a.user_defined_delete_marker_field, a.current_date, a.current_ts, a.height, a.city_to_state, a.fare, a.tip_history, a.`_hoodie_is_deleted`, CAST(1.0 AS DOUBLE) AS haversine_distance FROM <SRC> a
hoodie.deltastreamer.transformer.sql=SELECT a.timestamp, a._row_key, a.rider, a.driver, a.begin_lat, a.begin_lon, a.end_lat, a.end_lon, a.distance_in_meters, a.seconds_since_epoch, a.weight, a.nation, a.current_date, a.current_ts, a.height, a.city_to_state, a.fare, a.tip_history, a.`_hoodie_is_deleted`, CAST(1.0 AS DOUBLE) AS haversine_distance FROM <SRC> a

View File

@@ -55,10 +55,6 @@
}, {
"name" : "nation",
"type" : "bytes"
},{
"name" : "user_defined_delete_marker_field",
"type" : "boolean",
"default" : false
},{
"name" : "current_date",
"type" : {