Fix regression which broke HudiInputFormat handling of non-hoodie datasets
This commit is contained in:
committed by
vinoth chandar
parent
1fca9b21cc
commit
8485b9e263
@@ -23,6 +23,7 @@ import com.uber.hoodie.common.table.HoodieTimeline;
|
|||||||
import com.uber.hoodie.common.table.TableFileSystemView;
|
import com.uber.hoodie.common.table.TableFileSystemView;
|
||||||
import com.uber.hoodie.common.table.timeline.HoodieInstant;
|
import com.uber.hoodie.common.table.timeline.HoodieInstant;
|
||||||
import com.uber.hoodie.common.table.view.HoodieTableFileSystemView;
|
import com.uber.hoodie.common.table.view.HoodieTableFileSystemView;
|
||||||
|
import com.uber.hoodie.exception.DatasetNotFoundException;
|
||||||
import com.uber.hoodie.exception.HoodieIOException;
|
import com.uber.hoodie.exception.HoodieIOException;
|
||||||
import com.uber.hoodie.exception.InvalidDatasetException;
|
import com.uber.hoodie.exception.InvalidDatasetException;
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
@@ -161,7 +162,7 @@ public class HoodieInputFormat extends MapredParquetInputFormat implements Confi
|
|||||||
metadata = getTableMetaClient(status.getPath().getFileSystem(conf),
|
metadata = getTableMetaClient(status.getPath().getFileSystem(conf),
|
||||||
status.getPath().getParent());
|
status.getPath().getParent());
|
||||||
nonHoodieBasePath = null;
|
nonHoodieBasePath = null;
|
||||||
} catch (InvalidDatasetException e) {
|
} catch (DatasetNotFoundException | InvalidDatasetException e) {
|
||||||
LOG.info("Handling a non-hoodie path " + status.getPath());
|
LOG.info("Handling a non-hoodie path " + status.getPath());
|
||||||
metadata = null;
|
metadata = null;
|
||||||
nonHoodieBasePath = status.getPath().getParent().toString();
|
nonHoodieBasePath = status.getPath().getParent().toString();
|
||||||
|
|||||||
@@ -66,6 +66,9 @@ public class ITTestHoodieSanity extends ITTestBase {
|
|||||||
*/
|
*/
|
||||||
public void testRunHoodieJavaAppOnCOWTable(String hiveTableName, boolean singlePartitionKey) throws Exception {
|
public void testRunHoodieJavaAppOnCOWTable(String hiveTableName, boolean singlePartitionKey) throws Exception {
|
||||||
|
|
||||||
|
String hdfsPath = "/" + hiveTableName;
|
||||||
|
String hdfsUrl = "hdfs://namenode" + hdfsPath;
|
||||||
|
|
||||||
// Drop Table if it exists
|
// Drop Table if it exists
|
||||||
{
|
{
|
||||||
String[] hiveDropCmd = getHiveConsoleCommand("drop table if exists " + hiveTableName);
|
String[] hiveDropCmd = getHiveConsoleCommand("drop table if exists " + hiveTableName);
|
||||||
@@ -91,6 +94,7 @@ public class ITTestHoodieSanity extends ITTestBase {
|
|||||||
cmd = new String[]{
|
cmd = new String[]{
|
||||||
HOODIE_JAVA_APP,
|
HOODIE_JAVA_APP,
|
||||||
"--hive-sync",
|
"--hive-sync",
|
||||||
|
"--table-path", hdfsUrl,
|
||||||
"--hive-url", HIVE_SERVER_JDBC_URL,
|
"--hive-url", HIVE_SERVER_JDBC_URL,
|
||||||
"--hive-table", hiveTableName
|
"--hive-table", hiveTableName
|
||||||
};
|
};
|
||||||
@@ -98,6 +102,7 @@ public class ITTestHoodieSanity extends ITTestBase {
|
|||||||
cmd = new String[]{
|
cmd = new String[]{
|
||||||
HOODIE_JAVA_APP,
|
HOODIE_JAVA_APP,
|
||||||
"--hive-sync",
|
"--hive-sync",
|
||||||
|
"--table-path", hdfsUrl,
|
||||||
"--hive-url", HIVE_SERVER_JDBC_URL,
|
"--hive-url", HIVE_SERVER_JDBC_URL,
|
||||||
"--use-multi-partition-keys",
|
"--use-multi-partition-keys",
|
||||||
"--hive-table", hiveTableName
|
"--hive-table", hiveTableName
|
||||||
@@ -135,5 +140,30 @@ public class ITTestHoodieSanity extends ITTestBase {
|
|||||||
Assert.assertEquals("Expecting 100 rows to be present in the new table", 100,
|
Assert.assertEquals("Expecting 100 rows to be present in the new table", 100,
|
||||||
Integer.parseInt(stdout.trim()));
|
Integer.parseInt(stdout.trim()));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Make the HDFS dataset non-hoodie and run the same query
|
||||||
|
// Checks for interoperability with non-hoodie tables
|
||||||
|
{
|
||||||
|
// Delete Hoodie directory to make it non-hoodie dataset
|
||||||
|
String[] cmd = new String[]{
|
||||||
|
"hadoop", "fs", "-rm", "-r", hdfsPath + "/.hoodie"
|
||||||
|
};
|
||||||
|
TestExecStartResultCallback callback =
|
||||||
|
executeCommandInDocker(ADHOC_1_CONTAINER, cmd, true);
|
||||||
|
String stderr = callback.getStderr().toString().trim();
|
||||||
|
String stdout = callback.getStdout().toString().trim();
|
||||||
|
LOG.info("Got output for (" + Arrays.toString(cmd) + ") : (" + stdout + ")");
|
||||||
|
LOG.info("Got error output for (" + Arrays.toString(cmd) + ") : (" + stderr + ")");
|
||||||
|
|
||||||
|
// Run the count query again. Without Hoodie, all versions are included. So we get a wrong count
|
||||||
|
String[] hiveTableCheck = getHiveConsoleCommand("select count(1) from " + hiveTableName);
|
||||||
|
callback = executeCommandInDocker(ADHOC_1_CONTAINER, hiveTableCheck, true);
|
||||||
|
stderr = callback.getStderr().toString().trim();
|
||||||
|
stdout = callback.getStdout().toString().trim();
|
||||||
|
LOG.info("Got output for (" + Arrays.toString(hiveTableCheck) + ") : (" + stdout + ")");
|
||||||
|
LOG.info("Got error output for (" + Arrays.toString(hiveTableCheck) + ") : (" + stderr + ")");
|
||||||
|
Assert.assertEquals("Expecting 200 rows to be present in the new table", 200,
|
||||||
|
Integer.parseInt(stdout.trim()));
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user