1
0

[HUDI-1290] Add Debezium Source for deltastreamer (#4063)

* add source for postgres debezium

* Add tests for debezium payload

* Fix test

* Fix test

* Add tests for debezium source

* Add tests for debezium source

* Fix schema for debezium

* Fix checkstyle issues

* Fix config issue for schema registry

* Add mysql source for debezium

* Fix checkstyle issues an tests

* Improve code for merging toasted values

* Improve code for merging toasted values

Co-authored-by: Rajesh Mahindra <rmahindra@Rajeshs-MacBook-Pro.local>
This commit is contained in:
rmahindra123
2021-11-24 17:57:02 -08:00
committed by GitHub
parent abc0175cf7
commit 83f8ed2ae3
13 changed files with 1520 additions and 1 deletions

View File

@@ -0,0 +1,89 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hudi.common.model.debezium;
import org.apache.hudi.common.model.OverwriteWithLatestAvroPayload;
import org.apache.hudi.common.util.Option;
import org.apache.avro.Schema;
import org.apache.avro.generic.GenericRecord;
import org.apache.avro.generic.IndexedRecord;
import org.apache.log4j.LogManager;
import org.apache.log4j.Logger;
import java.io.IOException;
/**
* Base class that provides support for seamlessly applying changes captured via Debezium.
* <p>
* Debezium change event types are determined for the op field in the payload
* <p>
* - For inserts, op=i
* - For deletes, op=d
* - For updates, op=u
* - For snapshort inserts, op=r
* <p>
* This payload implementation will issue matching insert, delete, updates against the hudi table
*/
public abstract class AbstractDebeziumAvroPayload extends OverwriteWithLatestAvroPayload {
private static final Logger LOG = LogManager.getLogger(AbstractDebeziumAvroPayload.class);
public AbstractDebeziumAvroPayload(GenericRecord record, Comparable orderingVal) {
super(record, orderingVal);
}
public AbstractDebeziumAvroPayload(Option<GenericRecord> record) {
super(record);
}
@Override
public Option<IndexedRecord> getInsertValue(Schema schema) throws IOException {
IndexedRecord insertRecord = getInsertRecord(schema);
return handleDeleteOperation(insertRecord);
}
@Override
public Option<IndexedRecord> combineAndGetUpdateValue(IndexedRecord currentValue, Schema schema) throws IOException {
// Step 1: If the time occurrence of the current record in storage is higher than the time occurrence of the
// insert record (including a delete record), pick the current record.
if (shouldPickCurrentRecord(currentValue, getInsertRecord(schema), schema)) {
return Option.of(currentValue);
}
// Step 2: Pick the insert record (as a delete record if its a deleted event)
return getInsertValue(schema);
}
protected abstract boolean shouldPickCurrentRecord(IndexedRecord currentRecord, IndexedRecord insertRecord, Schema schema) throws IOException;
private Option<IndexedRecord> handleDeleteOperation(IndexedRecord insertRecord) {
boolean delete = false;
if (insertRecord instanceof GenericRecord) {
GenericRecord record = (GenericRecord) insertRecord;
Object value = record.get(DebeziumConstants.FLATTENED_OP_COL_NAME);
delete = value != null && value.toString().equalsIgnoreCase(DebeziumConstants.DELETE_OP);
}
return delete ? Option.empty() : Option.of(insertRecord);
}
private IndexedRecord getInsertRecord(Schema schema) throws IOException {
return super.getInsertValue(schema).get();
}
}

View File

@@ -0,0 +1,81 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hudi.common.model.debezium;
import java.util.Arrays;
import java.util.Collections;
import java.util.List;
/**
* Constants used by {@link DebeziumSource} and {@link DebeziumAvroPayload}.
*/
public class DebeziumConstants {
// INPUT COLUMNS
public static final String INCOMING_BEFORE_FIELD = "before";
public static final String INCOMING_AFTER_FIELD = "after";
public static final String INCOMING_SOURCE_FIELD = "source";
public static final String INCOMING_OP_FIELD = "op";
public static final String INCOMING_TS_MS_FIELD = "ts_ms";
public static final String INCOMING_SOURCE_NAME_FIELD = "source.name";
public static final String INCOMING_SOURCE_TS_MS_FIELD = "source.ts_ms";
public static final String INCOMING_SOURCE_TXID_FIELD = "source.txId";
// INPUT COLUMNS SPECIFIC TO MYSQL
public static final String INCOMING_SOURCE_FILE_FIELD = "source.file";
public static final String INCOMING_SOURCE_POS_FIELD = "source.pos";
public static final String INCOMING_SOURCE_ROW_FIELD = "source.row";
// INPUT COLUMNS SPECIFIC TO POSTGRES
public static final String INCOMING_SOURCE_LSN_FIELD = "source.lsn";
public static final String INCOMING_SOURCE_XMIN_FIELD = "source.xmin";
// OUTPUT COLUMNS
public static final String FLATTENED_OP_COL_NAME = "_change_operation_type";
public static final String UPSTREAM_PROCESSING_TS_COL_NAME = "_upstream_event_processed_ts_ms";
public static final String FLATTENED_SHARD_NAME = "db_shard_source_partition";
public static final String FLATTENED_TS_COL_NAME = "_event_origin_ts_ms";
public static final String FLATTENED_TX_ID_COL_NAME = "_event_tx_id";
// OUTPUT COLUMNS SPECIFIC TO MYSQL
public static final String FLATTENED_FILE_COL_NAME = "_event_bin_file";
public static final String FLATTENED_POS_COL_NAME = "_event_pos";
public static final String FLATTENED_ROW_COL_NAME = "_event_row";
public static final String ADDED_SEQ_COL_NAME = "_event_seq";
// OUTPUT COLUMNS SPECIFIC TO POSTGRES
public static final String FLATTENED_LSN_COL_NAME = "_event_lsn";
public static final String FLATTENED_XMIN_COL_NAME = "_event_xmin";
// Other Constants
public static final String DELETE_OP = "d";
// List of meta data columns
public static List<String> META_COLUMNS = Collections.unmodifiableList(Arrays.asList(
FLATTENED_OP_COL_NAME,
UPSTREAM_PROCESSING_TS_COL_NAME,
FLATTENED_TS_COL_NAME,
FLATTENED_TX_ID_COL_NAME,
FLATTENED_LSN_COL_NAME,
FLATTENED_XMIN_COL_NAME,
FLATTENED_SHARD_NAME
));
}

View File

@@ -0,0 +1,67 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hudi.common.model.debezium;
import org.apache.hudi.common.util.Option;
import org.apache.avro.Schema;
import org.apache.avro.generic.GenericRecord;
import org.apache.avro.generic.IndexedRecord;
import org.apache.log4j.LogManager;
import org.apache.log4j.Logger;
import java.io.IOException;
/**
* Provides support for seamlessly applying changes captured via Debezium for MysqlDB.
* <p>
* Debezium change event types are determined for the op field in the payload
* <p>
* - For inserts, op=i
* - For deletes, op=d
* - For updates, op=u
* - For snapshort inserts, op=r
* <p>
* This payload implementation will issue matching insert, delete, updates against the hudi table
*/
public class MySqlDebeziumAvroPayload extends AbstractDebeziumAvroPayload {
private static final Logger LOG = LogManager.getLogger(MySqlDebeziumAvroPayload.class);
public MySqlDebeziumAvroPayload(GenericRecord record, Comparable orderingVal) {
super(record, orderingVal);
}
public MySqlDebeziumAvroPayload(Option<GenericRecord> record) {
super(record);
}
private String extractSeq(IndexedRecord record) {
return ((CharSequence) ((GenericRecord) record).get(DebeziumConstants.ADDED_SEQ_COL_NAME)).toString();
}
@Override
protected boolean shouldPickCurrentRecord(IndexedRecord currentRecord, IndexedRecord insertRecord, Schema schema) throws IOException {
String currentSourceSeq = extractSeq(currentRecord);
String insertSourceSeq = extractSeq(insertRecord);
// Pick the current value in storage only if its Seq (file+pos) is latest
// compared to the Seq (file+pos) of the insert value
return insertSourceSeq.compareTo(currentSourceSeq) < 0;
}
}

View File

@@ -0,0 +1,130 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hudi.common.model.debezium;
import org.apache.hudi.common.util.Option;
import org.apache.avro.Schema;
import org.apache.avro.generic.GenericData;
import org.apache.avro.generic.GenericRecord;
import org.apache.avro.generic.IndexedRecord;
import org.apache.log4j.LogManager;
import org.apache.log4j.Logger;
import java.io.IOException;
import java.nio.ByteBuffer;
import java.nio.charset.StandardCharsets;
import java.util.List;
/**
* Provides support for seamlessly applying changes captured via Debezium for PostgresDB.
* <p>
* Debezium change event types are determined for the op field in the payload
* <p>
* - For inserts, op=i
* - For deletes, op=d
* - For updates, op=u
* - For snapshort inserts, op=r
* <p>
* This payload implementation will issue matching insert, delete, updates against the hudi table
*/
public class PostgresDebeziumAvroPayload extends AbstractDebeziumAvroPayload {
private static final Logger LOG = LogManager.getLogger(PostgresDebeziumAvroPayload.class);
public static final String DEBEZIUM_TOASTED_VALUE = "__debezium_unavailable_value";
public PostgresDebeziumAvroPayload(GenericRecord record, Comparable orderingVal) {
super(record, orderingVal);
}
public PostgresDebeziumAvroPayload(Option<GenericRecord> record) {
super(record);
}
private Long extractLSN(IndexedRecord record) {
GenericRecord genericRecord = (GenericRecord) record;
return (Long) genericRecord.get(DebeziumConstants.FLATTENED_LSN_COL_NAME);
}
@Override
protected boolean shouldPickCurrentRecord(IndexedRecord currentRecord, IndexedRecord insertRecord, Schema schema) throws IOException {
Long currentSourceLSN = extractLSN(currentRecord);
Long insertSourceLSN = extractLSN(insertRecord);
// Pick the current value in storage only if its LSN is latest compared to the LSN of the insert value
return insertSourceLSN < currentSourceLSN;
}
@Override
public Option<IndexedRecord> combineAndGetUpdateValue(IndexedRecord currentValue, Schema schema) throws IOException {
// Specific to Postgres: If the updated record has TOASTED columns,
// we will need to keep the previous value for those columns
// see https://debezium.io/documentation/reference/connectors/postgresql.html#postgresql-toasted-values
Option<IndexedRecord> insertOrDeleteRecord = super.combineAndGetUpdateValue(currentValue, schema);
if (insertOrDeleteRecord.isPresent()) {
mergeToastedValuesIfPresent(insertOrDeleteRecord.get(), currentValue);
}
return insertOrDeleteRecord;
}
private void mergeToastedValuesIfPresent(IndexedRecord incomingRecord, IndexedRecord currentRecord) {
List<Schema.Field> fields = incomingRecord.getSchema().getFields();
fields.forEach(field -> {
// There are only four avro data types that have unconstrained sizes, which are
// NON-NULLABLE STRING, NULLABLE STRING, NON-NULLABLE BYTES, NULLABLE BYTES
if (((GenericData.Record) incomingRecord).get(field.name()) != null
&& (containsStringToastedValues(incomingRecord, field) || containsBytesToastedValues(incomingRecord, field))) {
((GenericData.Record) incomingRecord).put(field.name(), ((GenericData.Record) currentRecord).get(field.name()));
}
});
}
/**
* Returns true if a column is either of type string or a union of one or more strings that contain a debezium toasted value.
*
* @param incomingRecord The incoming avro record
* @param field the column of interest
* @return
*/
private boolean containsStringToastedValues(IndexedRecord incomingRecord, Schema.Field field) {
return ((field.schema().getType() == Schema.Type.STRING
|| (field.schema().getType() == Schema.Type.UNION && field.schema().getTypes().stream().anyMatch(s -> s.getType() == Schema.Type.STRING)))
// Check length first as an optimization
&& ((CharSequence) ((GenericData.Record) incomingRecord).get(field.name())).length() == DEBEZIUM_TOASTED_VALUE.length()
&& DEBEZIUM_TOASTED_VALUE.equals(((CharSequence) ((GenericData.Record) incomingRecord).get(field.name())).toString()));
}
/**
* Returns true if a column is either of type bytes or a union of one or more bytes that contain a debezium toasted value.
*
* @param incomingRecord The incoming avro record
* @param field the column of interest
* @return
*/
private boolean containsBytesToastedValues(IndexedRecord incomingRecord, Schema.Field field) {
return ((field.schema().getType() == Schema.Type.BYTES
|| (field.schema().getType() == Schema.Type.UNION && field.schema().getTypes().stream().anyMatch(s -> s.getType() == Schema.Type.BYTES)))
// Check length first as an optimization
&& ((ByteBuffer) ((GenericData.Record) incomingRecord).get(field.name())).array().length == DEBEZIUM_TOASTED_VALUE.length()
&& DEBEZIUM_TOASTED_VALUE.equals(new String(((ByteBuffer) ((GenericData.Record) incomingRecord).get(field.name())).array(), StandardCharsets.UTF_8)));
}
}

View File

@@ -0,0 +1,131 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or mo contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hudi.common.model.debezium;
import org.apache.hudi.common.util.Option;
import org.apache.avro.Schema;
import org.apache.avro.generic.GenericData;
import org.apache.avro.generic.GenericRecord;
import org.apache.avro.generic.IndexedRecord;
import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.Test;
import java.io.IOException;
import java.util.Arrays;
import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertFalse;
public class TestMySqlDebeziumAvroPayload {
private static final String KEY_FIELD_NAME = "Key";
private Schema avroSchema;
@BeforeEach
void setUp() {
this.avroSchema = Schema.createRecord(Arrays.asList(
new Schema.Field(KEY_FIELD_NAME, Schema.create(Schema.Type.INT), "", 0),
new Schema.Field(DebeziumConstants.FLATTENED_OP_COL_NAME, Schema.create(Schema.Type.STRING), "", null),
new Schema.Field(DebeziumConstants.ADDED_SEQ_COL_NAME, Schema.create(Schema.Type.STRING), "", null)
));
}
@Test
public void testInsert() throws IOException {
GenericRecord insertRecord = createRecord(0, Operation.INSERT, "00001.111");
MySqlDebeziumAvroPayload payload = new MySqlDebeziumAvroPayload(insertRecord, "00001.111");
validateRecord(payload.getInsertValue(avroSchema), 0, Operation.INSERT, "00001.111");
}
@Test
public void testPreCombine() {
GenericRecord insertRecord = createRecord(0, Operation.INSERT, "00002.111");
MySqlDebeziumAvroPayload insertPayload = new MySqlDebeziumAvroPayload(insertRecord, "00002.111");
GenericRecord updateRecord = createRecord(0, Operation.UPDATE, "00001.111");
MySqlDebeziumAvroPayload updatePayload = new MySqlDebeziumAvroPayload(updateRecord, "00001.111");
GenericRecord deleteRecord = createRecord(0, Operation.DELETE, "00002.11");
MySqlDebeziumAvroPayload deletePayload = new MySqlDebeziumAvroPayload(deleteRecord, "00002.11");
assertEquals(insertPayload, insertPayload.preCombine(updatePayload));
assertEquals(deletePayload, deletePayload.preCombine(updatePayload));
assertEquals(insertPayload, deletePayload.preCombine(insertPayload));
}
@Test
public void testMergeWithUpdate() throws IOException {
GenericRecord updateRecord = createRecord(1, Operation.UPDATE, "00002.11");
MySqlDebeziumAvroPayload payload = new MySqlDebeziumAvroPayload(updateRecord, "00002.11");
GenericRecord existingRecord = createRecord(1, Operation.INSERT, "00001.111");
Option<IndexedRecord> mergedRecord = payload.combineAndGetUpdateValue(existingRecord, avroSchema);
validateRecord(mergedRecord, 1, Operation.UPDATE, "00002.11");
GenericRecord lateRecord = createRecord(1, Operation.UPDATE, "00000.222");
payload = new MySqlDebeziumAvroPayload(lateRecord, "00000.222");
mergedRecord = payload.combineAndGetUpdateValue(existingRecord, avroSchema);
validateRecord(mergedRecord, 1, Operation.INSERT, "00001.111");
}
@Test
public void testMergeWithDelete() throws IOException {
GenericRecord deleteRecord = createRecord(2, Operation.DELETE, "00002.11");
MySqlDebeziumAvroPayload payload = new MySqlDebeziumAvroPayload(deleteRecord, "00002.11");
GenericRecord existingRecord = createRecord(2, Operation.UPDATE, "00001.111");
Option<IndexedRecord> mergedRecord = payload.combineAndGetUpdateValue(existingRecord, avroSchema);
// expect nothing to be committed to table
assertFalse(mergedRecord.isPresent());
GenericRecord lateRecord = createRecord(2, Operation.DELETE, "00000.222");
payload = new MySqlDebeziumAvroPayload(lateRecord, "00000.222");
mergedRecord = payload.combineAndGetUpdateValue(existingRecord, avroSchema);
validateRecord(mergedRecord, 2, Operation.UPDATE, "00001.111");
}
private GenericRecord createRecord(int primaryKeyValue, Operation op, String seqValue) {
GenericRecord record = new GenericData.Record(avroSchema);
record.put(KEY_FIELD_NAME, primaryKeyValue);
record.put(DebeziumConstants.FLATTENED_OP_COL_NAME, op.op);
record.put(DebeziumConstants.ADDED_SEQ_COL_NAME, seqValue);
return record;
}
private void validateRecord(Option<IndexedRecord> iRecord, int primaryKeyValue, Operation op, String seqValue) {
IndexedRecord record = iRecord.get();
assertEquals(primaryKeyValue, (int) record.get(0));
assertEquals(op.op, record.get(1).toString());
assertEquals(seqValue, record.get(2).toString());
}
private enum Operation {
INSERT("c"),
UPDATE("u"),
DELETE("d");
public final String op;
Operation(String op) {
this.op = op;
}
}
}

View File

@@ -0,0 +1,181 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or mo contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hudi.common.model.debezium;
import org.apache.hudi.common.util.Option;
import org.apache.avro.Schema;
import org.apache.avro.SchemaBuilder;
import org.apache.avro.generic.GenericData;
import org.apache.avro.generic.GenericRecord;
import org.apache.avro.generic.IndexedRecord;
import org.apache.avro.util.Utf8;
import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.Test;
import java.io.IOException;
import java.nio.ByteBuffer;
import java.nio.charset.StandardCharsets;
import java.util.Arrays;
import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertFalse;
import static org.junit.jupiter.api.Assertions.assertNull;
public class TestPostgresDebeziumAvroPayload {
private static final String KEY_FIELD_NAME = "Key";
private Schema avroSchema;
@BeforeEach
void setUp() {
this.avroSchema = Schema.createRecord(Arrays.asList(
new Schema.Field(KEY_FIELD_NAME, Schema.create(Schema.Type.INT), "", 0),
new Schema.Field(DebeziumConstants.FLATTENED_OP_COL_NAME, Schema.create(Schema.Type.STRING), "", null),
new Schema.Field(DebeziumConstants.FLATTENED_LSN_COL_NAME, Schema.create(Schema.Type.LONG), "", null)
));
}
@Test
public void testInsert() throws IOException {
GenericRecord insertRecord = createRecord(0, Operation.INSERT, 100L);
PostgresDebeziumAvroPayload payload = new PostgresDebeziumAvroPayload(insertRecord, 100L);
validateRecord(payload.getInsertValue(avroSchema), 0, Operation.INSERT, 100L);
}
@Test
public void testPreCombine() {
GenericRecord insertRecord = createRecord(0, Operation.INSERT, 120L);
PostgresDebeziumAvroPayload insertPayload = new PostgresDebeziumAvroPayload(insertRecord, 120L);
GenericRecord updateRecord = createRecord(0, Operation.UPDATE, 99L);
PostgresDebeziumAvroPayload updatePayload = new PostgresDebeziumAvroPayload(updateRecord, 99L);
GenericRecord deleteRecord = createRecord(0, Operation.DELETE, 111L);
PostgresDebeziumAvroPayload deletePayload = new PostgresDebeziumAvroPayload(deleteRecord, 111L);
assertEquals(insertPayload, insertPayload.preCombine(updatePayload));
assertEquals(deletePayload, deletePayload.preCombine(updatePayload));
assertEquals(insertPayload, deletePayload.preCombine(insertPayload));
}
@Test
public void testMergeWithUpdate() throws IOException {
GenericRecord updateRecord = createRecord(1, Operation.UPDATE, 100L);
PostgresDebeziumAvroPayload payload = new PostgresDebeziumAvroPayload(updateRecord, 100L);
GenericRecord existingRecord = createRecord(1, Operation.INSERT, 99L);
Option<IndexedRecord> mergedRecord = payload.combineAndGetUpdateValue(existingRecord, avroSchema);
validateRecord(mergedRecord, 1, Operation.UPDATE, 100L);
GenericRecord lateRecord = createRecord(1, Operation.UPDATE, 98L);
payload = new PostgresDebeziumAvroPayload(lateRecord, 98L);
mergedRecord = payload.combineAndGetUpdateValue(existingRecord, avroSchema);
validateRecord(mergedRecord, 1, Operation.INSERT, 99L);
}
@Test
public void testMergeWithDelete() throws IOException {
GenericRecord deleteRecord = createRecord(2, Operation.DELETE, 100L);
PostgresDebeziumAvroPayload payload = new PostgresDebeziumAvroPayload(deleteRecord, 100L);
GenericRecord existingRecord = createRecord(2, Operation.UPDATE, 99L);
Option<IndexedRecord> mergedRecord = payload.combineAndGetUpdateValue(existingRecord, avroSchema);
// expect nothing to be committed to table
assertFalse(mergedRecord.isPresent());
GenericRecord lateRecord = createRecord(2, Operation.DELETE, 98L);
payload = new PostgresDebeziumAvroPayload(lateRecord, 98L);
mergedRecord = payload.combineAndGetUpdateValue(existingRecord, avroSchema);
validateRecord(mergedRecord, 2, Operation.UPDATE, 99L);
}
@Test
public void testMergeWithToastedValues() throws IOException {
Schema avroSchema = SchemaBuilder.builder()
.record("test_schema")
.namespace("test_namespace")
.fields()
.name(DebeziumConstants.FLATTENED_LSN_COL_NAME).type().longType().noDefault()
.name("string_col").type().stringType().noDefault()
.name("byte_col").type().bytesType().noDefault()
.name("string_null_col_1").type().nullable().stringType().noDefault()
.name("byte_null_col_1").type().nullable().bytesType().noDefault()
.name("string_null_col_2").type().nullable().stringType().noDefault()
.name("byte_null_col_2").type().nullable().bytesType().noDefault()
.endRecord();
GenericRecord oldVal = new GenericData.Record(avroSchema);
oldVal.put(DebeziumConstants.FLATTENED_LSN_COL_NAME, 100L);
oldVal.put("string_col", "valid string value");
oldVal.put("byte_col", ByteBuffer.wrap("valid byte value".getBytes()));
oldVal.put("string_null_col_1", "valid string value");
oldVal.put("byte_null_col_1", ByteBuffer.wrap("valid byte value".getBytes()));
oldVal.put("string_null_col_2", null);
oldVal.put("byte_null_col_2", null);
GenericRecord newVal = new GenericData.Record(avroSchema);
newVal.put(DebeziumConstants.FLATTENED_LSN_COL_NAME, 105L);
newVal.put("string_col", PostgresDebeziumAvroPayload.DEBEZIUM_TOASTED_VALUE);
newVal.put("byte_col", ByteBuffer.wrap(PostgresDebeziumAvroPayload.DEBEZIUM_TOASTED_VALUE.getBytes()));
newVal.put("string_null_col_1", null);
newVal.put("byte_null_col_1", null);
newVal.put("string_null_col_2", "valid string value");
newVal.put("byte_null_col_2", ByteBuffer.wrap("valid byte value".getBytes()));
PostgresDebeziumAvroPayload payload = new PostgresDebeziumAvroPayload(Option.of(newVal));
GenericRecord outputRecord = (GenericRecord) payload
.combineAndGetUpdateValue(oldVal, avroSchema).get();
assertEquals("valid string value", outputRecord.get("string_col"));
assertEquals("valid byte value", new String(((ByteBuffer) outputRecord.get("byte_col")).array(), StandardCharsets.UTF_8));
assertNull(outputRecord.get("string_null_col_1"));
assertNull(outputRecord.get("byte_null_col_1"));
assertEquals("valid string value", ((Utf8) outputRecord.get("string_null_col_2")).toString());
assertEquals("valid byte value", new String(((ByteBuffer) outputRecord.get("byte_null_col_2")).array(), StandardCharsets.UTF_8));
}
private GenericRecord createRecord(int primaryKeyValue, Operation op, long lsnValue) {
GenericRecord record = new GenericData.Record(avroSchema);
record.put(KEY_FIELD_NAME, primaryKeyValue);
record.put(DebeziumConstants.FLATTENED_OP_COL_NAME, op.op);
record.put(DebeziumConstants.FLATTENED_LSN_COL_NAME, lsnValue);
return record;
}
private void validateRecord(Option<IndexedRecord> iRecord, int primaryKeyValue, Operation op, long lsnValue) {
IndexedRecord record = iRecord.get();
assertEquals(primaryKeyValue, (int) record.get(0));
assertEquals(op.op, record.get(1).toString());
assertEquals(lsnValue, (long) record.get(2));
}
private enum Operation {
INSERT("c"),
UPDATE("u"),
DELETE("d");
public final String op;
Operation(String op) {
this.op = op;
}
}
}