[HUDI-509] Renaming code in sync with cWiki restructuring (#1212)
- Storage Type replaced with Table Type (remaining instances) - View types replaced with query types; - ReadOptimized view referred as Snapshot Query - TableFileSystemView sub interfaces renamed to BaseFileOnly and Slice Views - HoodieDataFile renamed to HoodieBaseFile - Hive Sync tool will register RO tables for MOR with a `_ro` suffix - Datasource/Deltastreamer options renamed accordingly - Support fallback to old config values as well, so migration is painless - Config for controlling _ro suffix addition - Renaming DataFile to BaseFile across DTOs, HoodieFileSlice and AbstractTableFileSystemView
This commit is contained in:
@@ -118,14 +118,14 @@ public class ITTestHoodieDemo extends ITTestBase {
|
||||
private void ingestFirstBatchAndHiveSync() throws Exception {
|
||||
List<String> cmds = new ImmutableList.Builder<String>()
|
||||
.add("spark-submit --class org.apache.hudi.utilities.deltastreamer.HoodieDeltaStreamer " + HUDI_UTILITIES_BUNDLE
|
||||
+ " --storage-type COPY_ON_WRITE "
|
||||
+ " --table-type COPY_ON_WRITE "
|
||||
+ " --source-class org.apache.hudi.utilities.sources.JsonDFSSource --source-ordering-field ts "
|
||||
+ " --target-base-path " + COW_BASE_PATH + " --target-table " + COW_TABLE_NAME
|
||||
+ " --props /var/demo/config/dfs-source.properties "
|
||||
+ " --schemaprovider-class org.apache.hudi.utilities.schema.FilebasedSchemaProvider "
|
||||
+ String.format(HIVE_SYNC_CMD_FMT, "dt", COW_TABLE_NAME))
|
||||
.add("spark-submit --class org.apache.hudi.utilities.deltastreamer.HoodieDeltaStreamer " + HUDI_UTILITIES_BUNDLE
|
||||
+ " --storage-type MERGE_ON_READ "
|
||||
+ " --table-type MERGE_ON_READ "
|
||||
+ " --source-class org.apache.hudi.utilities.sources.JsonDFSSource --source-ordering-field ts "
|
||||
+ " --target-base-path " + MOR_BASE_PATH + " --target-table " + MOR_TABLE_NAME
|
||||
+ " --props /var/demo/config/dfs-source.properties "
|
||||
@@ -139,7 +139,7 @@ public class ITTestHoodieDemo extends ITTestBase {
|
||||
private void testHiveAfterFirstBatch() throws Exception {
|
||||
Pair<String, String> stdOutErrPair = executeHiveCommandFile(HIVE_TBLCHECK_COMMANDS);
|
||||
assertStdOutContains(stdOutErrPair, "| stock_ticks_cow |");
|
||||
assertStdOutContains(stdOutErrPair, "| stock_ticks_mor |");
|
||||
assertStdOutContains(stdOutErrPair, "| stock_ticks_mor_ro |");
|
||||
assertStdOutContains(stdOutErrPair, "| stock_ticks_mor_rt |");
|
||||
|
||||
assertStdOutContains(stdOutErrPair,
|
||||
@@ -159,7 +159,8 @@ public class ITTestHoodieDemo extends ITTestBase {
|
||||
private void testSparkSQLAfterFirstBatch() throws Exception {
|
||||
Pair<String, String> stdOutErrPair = executeSparkSQLCommand(SPARKSQL_BATCH1_COMMANDS, true);
|
||||
assertStdOutContains(stdOutErrPair, "|default |stock_ticks_cow |false |\n"
|
||||
+ "|default |stock_ticks_mor |false |\n|default |stock_ticks_mor_rt |false |");
|
||||
+ "|default |stock_ticks_mor_ro |false |\n" +
|
||||
"|default |stock_ticks_mor_rt |false |");
|
||||
assertStdOutContains(stdOutErrPair,
|
||||
"+------+-------------------+\n|GOOG |2018-08-31 10:29:00|\n+------+-------------------+", 3);
|
||||
assertStdOutContains(stdOutErrPair, "|GOOG |2018-08-31 09:59:00|6330 |1230.5 |1230.02 |", 3);
|
||||
@@ -170,14 +171,14 @@ public class ITTestHoodieDemo extends ITTestBase {
|
||||
List<String> cmds = new ImmutableList.Builder<String>()
|
||||
.add("hdfs dfs -copyFromLocal -f " + INPUT_BATCH_PATH2 + " " + HDFS_BATCH_PATH2)
|
||||
.add("spark-submit --class org.apache.hudi.utilities.deltastreamer.HoodieDeltaStreamer " + HUDI_UTILITIES_BUNDLE
|
||||
+ " --storage-type COPY_ON_WRITE "
|
||||
+ " --table-type COPY_ON_WRITE "
|
||||
+ " --source-class org.apache.hudi.utilities.sources.JsonDFSSource --source-ordering-field ts "
|
||||
+ " --target-base-path " + COW_BASE_PATH + " --target-table " + COW_TABLE_NAME
|
||||
+ " --props /var/demo/config/dfs-source.properties "
|
||||
+ " --schemaprovider-class org.apache.hudi.utilities.schema.FilebasedSchemaProvider "
|
||||
+ String.format(HIVE_SYNC_CMD_FMT, "dt", COW_TABLE_NAME))
|
||||
.add("spark-submit --class org.apache.hudi.utilities.deltastreamer.HoodieDeltaStreamer " + HUDI_UTILITIES_BUNDLE
|
||||
+ " --storage-type MERGE_ON_READ "
|
||||
+ " --table-type MERGE_ON_READ "
|
||||
+ " --source-class org.apache.hudi.utilities.sources.JsonDFSSource --source-ordering-field ts "
|
||||
+ " --target-base-path " + MOR_BASE_PATH + " --target-table " + MOR_TABLE_NAME
|
||||
+ " --props /var/demo/config/dfs-source.properties "
|
||||
@@ -291,8 +292,10 @@ public class ITTestHoodieDemo extends ITTestBase {
|
||||
Pair<String, String> stdOutErrPair = executeSparkSQLCommand(SPARKSQL_INCREMENTAL_COMMANDS, true);
|
||||
assertStdOutContains(stdOutErrPair, "|GOOG |2018-08-31 10:59:00|9021 |1227.1993|1227.215|");
|
||||
assertStdOutContains(stdOutErrPair, "|default |stock_ticks_cow |false |\n"
|
||||
+ "|default |stock_ticks_derived_mor |false |\n|default |stock_ticks_derived_mor_rt|false |\n"
|
||||
+ "|default |stock_ticks_mor |false |\n|default |stock_ticks_mor_rt |false |\n"
|
||||
+ "|default |stock_ticks_derived_mor_ro|false |\n"
|
||||
+ "|default |stock_ticks_derived_mor_rt|false |\n"
|
||||
+ "|default |stock_ticks_mor_ro |false |\n"
|
||||
+ "|default |stock_ticks_mor_rt |false |\n"
|
||||
+ "| |stock_ticks_cow_incr |true |");
|
||||
assertStdOutContains(stdOutErrPair, "|count(1)|\n+--------+\n|99 |", 2);
|
||||
}
|
||||
|
||||
@@ -18,6 +18,7 @@
|
||||
|
||||
package org.apache.hudi.integ;
|
||||
|
||||
import org.apache.hudi.common.util.Option;
|
||||
import org.apache.hudi.common.util.collection.Pair;
|
||||
import org.apache.hudi.common.model.HoodieTableType;
|
||||
|
||||
@@ -144,38 +145,46 @@ public class ITTestHoodieSanity extends ITTestBase {
|
||||
}
|
||||
executeCommandStringInDocker(ADHOC_1_CONTAINER, cmd, true);
|
||||
|
||||
String snapshotTableName = tableType.equals(HoodieTableType.MERGE_ON_READ.name()) ?
|
||||
hiveTableName + "_rt" : hiveTableName;
|
||||
Option<String> roTableName = tableType.equals(HoodieTableType.MERGE_ON_READ.name()) ?
|
||||
Option.of(hiveTableName +"_ro") : Option.empty();
|
||||
|
||||
// Ensure table does exist
|
||||
stdOutErr = executeHiveCommand("show tables like '" + hiveTableName + "'");
|
||||
Assert.assertEquals("Table exists", hiveTableName, stdOutErr.getLeft());
|
||||
stdOutErr = executeHiveCommand("show tables like '" + snapshotTableName + "'");
|
||||
Assert.assertEquals("Table exists", snapshotTableName, stdOutErr.getLeft());
|
||||
|
||||
// Ensure row count is 80 (without duplicates) (100 - 20 deleted)
|
||||
stdOutErr = executeHiveCommand("select count(1) from " + hiveTableName);
|
||||
Assert.assertEquals("Expecting 100 rows to be present in the new table", 80,
|
||||
stdOutErr = executeHiveCommand("select count(1) from " + snapshotTableName);
|
||||
Assert.assertEquals("Expecting 80 rows to be present in the snapshot table", 80,
|
||||
Integer.parseInt(stdOutErr.getLeft().trim()));
|
||||
|
||||
// If is MOR table, ensure realtime table row count is 100 - 20 = 80 (without duplicates)
|
||||
if (tableType.equals(HoodieTableType.MERGE_ON_READ.name())) {
|
||||
stdOutErr = executeHiveCommand("select count(1) from " + hiveTableName + "_rt");
|
||||
Assert.assertEquals("Expecting 100 rows to be present in the realtime table,", 80,
|
||||
if (roTableName.isPresent()) {
|
||||
stdOutErr = executeHiveCommand("select count(1) from " + roTableName.get());
|
||||
Assert.assertEquals("Expecting 80 rows to be present in the snapshot table", 80,
|
||||
Integer.parseInt(stdOutErr.getLeft().trim()));
|
||||
}
|
||||
|
||||
// Make the HDFS dataset non-hoodie and run the same query
|
||||
// Checks for interoperability with non-hoodie tables
|
||||
|
||||
// Make the HDFS dataset non-hoodie and run the same query; Checks for interoperability with non-hoodie tables
|
||||
// Delete Hoodie directory to make it non-hoodie dataset
|
||||
executeCommandStringInDocker(ADHOC_1_CONTAINER, "hdfs dfs -rm -r " + hdfsPath + "/.hoodie", true);
|
||||
|
||||
// Run the count query again. Without Hoodie, all versions are included. So we get a wrong count
|
||||
stdOutErr = executeHiveCommand("select count(1) from " + hiveTableName);
|
||||
if (tableType.equals(HoodieTableType.MERGE_ON_READ.name())) {
|
||||
stdOutErr = executeHiveCommand("select count(1) from " + roTableName.get());
|
||||
} else {
|
||||
stdOutErr = executeHiveCommand("select count(1) from " + snapshotTableName);
|
||||
}
|
||||
Assert.assertEquals("Expecting 280 rows to be present in the new table", 280,
|
||||
Integer.parseInt(stdOutErr.getLeft().trim()));
|
||||
}
|
||||
|
||||
private void dropHiveTables(String hiveTableName, String tableType) throws Exception {
|
||||
executeHiveCommand("drop table if exists " + hiveTableName);
|
||||
if (tableType.equals(HoodieTableType.MERGE_ON_READ.name())) {
|
||||
executeHiveCommand("drop table if exists " + hiveTableName + "_rt");
|
||||
executeHiveCommand("drop table if exists " + hiveTableName + "_ro");
|
||||
} else {
|
||||
executeHiveCommand("drop table if exists " + hiveTableName);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user