[HUDI-114]: added option to overwrite payload implementation in hoodie.properties file
This commit is contained in:
committed by
Balaji Varadarajan
parent
5af3dc6aed
commit
3c90d252cc
@@ -1,61 +0,0 @@
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.hudi;
|
||||
|
||||
import org.apache.hudi.common.util.HoodieAvroUtils;
|
||||
import org.apache.hudi.exception.HoodieException;
|
||||
import org.apache.hudi.exception.HoodieIOException;
|
||||
|
||||
import org.apache.avro.generic.GenericRecord;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.io.Serializable;
|
||||
|
||||
/**
|
||||
* Base class for all AVRO record based payloads, that can be ordered based on a field.
|
||||
*/
|
||||
public abstract class BaseAvroPayload implements Serializable {
|
||||
/**
|
||||
* Avro data extracted from the source converted to bytes.
|
||||
*/
|
||||
protected final byte[] recordBytes;
|
||||
|
||||
/**
|
||||
* For purposes of preCombining.
|
||||
*/
|
||||
protected final Comparable orderingVal;
|
||||
|
||||
/**
|
||||
* Instantiate {@link BaseAvroPayload}.
|
||||
*
|
||||
* @param record Generic record for the payload.
|
||||
* @param orderingVal {@link Comparable} to be used in pre combine.
|
||||
*/
|
||||
public BaseAvroPayload(GenericRecord record, Comparable orderingVal) {
|
||||
try {
|
||||
this.recordBytes = HoodieAvroUtils.avroToBytes(record);
|
||||
} catch (IOException io) {
|
||||
throw new HoodieIOException("Cannot convert GenericRecord to bytes", io);
|
||||
}
|
||||
this.orderingVal = orderingVal;
|
||||
if (orderingVal == null) {
|
||||
throw new HoodieException("Ordering value is null for record: " + record);
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -1,78 +0,0 @@
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.hudi;
|
||||
|
||||
import org.apache.hudi.common.model.HoodieRecordPayload;
|
||||
import org.apache.hudi.common.util.HoodieAvroUtils;
|
||||
import org.apache.hudi.common.util.Option;
|
||||
|
||||
import org.apache.avro.Schema;
|
||||
import org.apache.avro.generic.GenericRecord;
|
||||
import org.apache.avro.generic.IndexedRecord;
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
/**
|
||||
* Default payload used for delta streamer.
|
||||
* <p>
|
||||
* 1. preCombine - Picks the latest delta record for a key, based on an ordering field 2.
|
||||
* combineAndGetUpdateValue/getInsertValue - Simply overwrites storage with latest delta record
|
||||
*/
|
||||
public class OverwriteWithLatestAvroPayload extends BaseAvroPayload
|
||||
implements HoodieRecordPayload<OverwriteWithLatestAvroPayload> {
|
||||
|
||||
/**
|
||||
*
|
||||
*/
|
||||
public OverwriteWithLatestAvroPayload(GenericRecord record, Comparable orderingVal) {
|
||||
super(record, orderingVal);
|
||||
}
|
||||
|
||||
public OverwriteWithLatestAvroPayload(Option<GenericRecord> record) {
|
||||
this(record.get(), (record1) -> 0); // natural order
|
||||
}
|
||||
|
||||
@Override
|
||||
public OverwriteWithLatestAvroPayload preCombine(OverwriteWithLatestAvroPayload another) {
|
||||
// pick the payload with greatest ordering value
|
||||
if (another.orderingVal.compareTo(orderingVal) > 0) {
|
||||
return another;
|
||||
} else {
|
||||
return this;
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public Option<IndexedRecord> combineAndGetUpdateValue(IndexedRecord currentValue, Schema schema) throws IOException {
|
||||
|
||||
GenericRecord genericRecord = (GenericRecord) getInsertValue(schema).get();
|
||||
// combining strategy here trivially ignores currentValue on disk and writes this record
|
||||
Object deleteMarker = genericRecord.get("_hoodie_is_deleted");
|
||||
if (deleteMarker instanceof Boolean && (boolean) deleteMarker) {
|
||||
return Option.empty();
|
||||
} else {
|
||||
return Option.of(genericRecord);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public Option<IndexedRecord> getInsertValue(Schema schema) throws IOException {
|
||||
return Option.of(HoodieAvroUtils.bytesToAvro(recordBytes, schema));
|
||||
}
|
||||
}
|
||||
@@ -20,6 +20,7 @@ package org.apache.hudi;
|
||||
|
||||
import org.apache.hudi.common.model.HoodieKey;
|
||||
import org.apache.hudi.common.model.HoodieRecord;
|
||||
import org.apache.hudi.common.model.OverwriteWithLatestAvroPayload;
|
||||
import org.apache.hudi.common.util.HoodieAvroUtils;
|
||||
import org.apache.hudi.common.util.Option;
|
||||
import org.apache.hudi.exception.HoodieIOException;
|
||||
|
||||
@@ -18,7 +18,7 @@
|
||||
|
||||
package org.apache.hudi.payload;
|
||||
|
||||
import org.apache.hudi.OverwriteWithLatestAvroPayload;
|
||||
import org.apache.hudi.common.model.OverwriteWithLatestAvroPayload;
|
||||
import org.apache.hudi.common.util.Option;
|
||||
|
||||
import org.apache.avro.Schema;
|
||||
|
||||
@@ -18,6 +18,7 @@
|
||||
package org.apache.hudi
|
||||
|
||||
import org.apache.hudi.common.model.HoodieTableType
|
||||
import org.apache.hudi.common.model.OverwriteWithLatestAvroPayload
|
||||
import org.apache.hudi.hive.SlashEncodedDayPartitionValueExtractor
|
||||
|
||||
/**
|
||||
|
||||
@@ -120,7 +120,7 @@ private[hudi] object HoodieSparkSqlWriter {
|
||||
// Create the table if not present
|
||||
if (!exists) {
|
||||
HoodieTableMetaClient.initTableType(sparkContext.hadoopConfiguration, path.get, storageType,
|
||||
tblName.get, "archived")
|
||||
tblName.get, "archived", parameters(PAYLOAD_CLASS_OPT_KEY))
|
||||
}
|
||||
|
||||
// Create a HoodieWriteClient & issue the write.
|
||||
|
||||
@@ -16,10 +16,10 @@
|
||||
*/
|
||||
|
||||
import org.apache.avro.generic.GenericRecord
|
||||
import org.apache.hudi.common.model.EmptyHoodieRecordPayload
|
||||
import org.apache.hudi.common.model.{EmptyHoodieRecordPayload, OverwriteWithLatestAvroPayload}
|
||||
import org.apache.hudi.common.util.{Option, SchemaTestUtil, TypedProperties}
|
||||
import org.apache.hudi.exception.{HoodieException, HoodieKeyException}
|
||||
import org.apache.hudi.{ComplexKeyGenerator, DataSourceWriteOptions, OverwriteWithLatestAvroPayload, SimpleKeyGenerator}
|
||||
import org.apache.hudi.{ComplexKeyGenerator, DataSourceWriteOptions, SimpleKeyGenerator}
|
||||
import org.junit.Assert._
|
||||
import org.junit.{Before, Test}
|
||||
import org.scalatest.junit.AssertionsForJUnit
|
||||
|
||||
Reference in New Issue
Block a user