1
0

[HUDI-3643] Fix hive count exception when the table is empty and the path depth is less than 3 (#5051)

This commit is contained in:
董可伦
2022-04-07 19:21:03 +08:00
committed by GitHub
parent 9d744bb35c
commit 6a8396420c
5 changed files with 63 additions and 4 deletions

View File

@@ -71,7 +71,6 @@ public class HoodieHiveUtils {
public static final String DEFAULT_SCAN_MODE = SNAPSHOT_SCAN_MODE;
public static final int DEFAULT_MAX_COMMITS = 1;
public static final int MAX_COMMIT_ALL = -1;
public static final int DEFAULT_LEVELS_TO_BASEPATH = 3;
public static final Pattern HOODIE_CONSUME_MODE_PATTERN_STRING = Pattern.compile("hoodie\\.(.*)\\.consume\\.mode");
public static final String GLOBALLY_CONSISTENT_READ_TIMESTAMP = "last_replication_timestamp";

View File

@@ -46,6 +46,7 @@ import org.apache.hudi.common.table.view.TableFileSystemView;
import org.apache.hudi.common.util.Option;
import org.apache.hudi.common.util.StringUtils;
import org.apache.hudi.exception.HoodieIOException;
import org.apache.hudi.exception.TableNotFoundException;
import org.apache.hudi.hadoop.FileStatusWithBootstrapBaseFile;
import org.apache.hudi.hadoop.HoodieHFileInputFormat;
import org.apache.hudi.hadoop.HoodieParquetInputFormat;
@@ -68,6 +69,7 @@ import java.util.stream.Collectors;
import static org.apache.hudi.common.config.HoodieMetadataConfig.DEFAULT_METADATA_ENABLE_FOR_READERS;
import static org.apache.hudi.common.config.HoodieMetadataConfig.ENABLE;
import static org.apache.hudi.common.table.HoodieTableMetaClient.METAFOLDER_NAME;
public class HoodieInputFormatUtils {
@@ -324,14 +326,24 @@ public class HoodieInputFormatUtils {
* Extract HoodieTableMetaClient from a partition path (not base path)
*/
public static HoodieTableMetaClient getTableMetaClientForBasePathUnchecked(Configuration conf, Path partitionPath) throws IOException {
Path baseDir = partitionPath;
FileSystem fs = partitionPath.getFileSystem(conf);
int levels = HoodieHiveUtils.DEFAULT_LEVELS_TO_BASEPATH;
if (HoodiePartitionMetadata.hasPartitionMetadata(fs, partitionPath)) {
HoodiePartitionMetadata metadata = new HoodiePartitionMetadata(fs, partitionPath);
metadata.readFromFS();
levels = metadata.getPartitionDepth();
int levels = metadata.getPartitionDepth();
baseDir = HoodieHiveUtils.getNthParent(partitionPath, levels);
} else {
for (int i = 0; i < partitionPath.depth(); i++) {
if (fs.exists(new Path(baseDir, METAFOLDER_NAME))) {
break;
} else if (i == partitionPath.depth() - 1) {
throw new TableNotFoundException(partitionPath.toString());
} else {
baseDir = baseDir.getParent();
}
}
}
Path baseDir = HoodieHiveUtils.getNthParent(partitionPath, levels);
LOG.info("Reading hoodie metadata from path " + baseDir.toString());
return HoodieTableMetaClient.builder().setConf(fs.getConf()).setBasePath(baseDir.toString()).build();
}

View File

@@ -162,6 +162,21 @@ public class TestHoodieHFileInputFormat {
assertEquals(10, inputSplits.length);
}
@Test
public void testInputFormatLoadWithEmptyTable() throws IOException {
// initial hoodie table
String bathPathStr = "/tmp/test_empty_table";
HoodieTestUtils.init(HoodieTestUtils.getDefaultHadoopConf(), bathPathStr, HoodieTableType.COPY_ON_WRITE,
baseFileFormat);
// Add the paths
FileInputFormat.setInputPaths(jobConf, bathPathStr);
FileStatus[] files = inputFormat.listStatus(jobConf);
assertEquals(0, files.length);
InputSplit[] inputSplits = inputFormat.getSplits(jobConf, 0);
assertEquals(0, inputSplits.length);
}
@Test
public void testInputFormatUpdates() throws IOException {
// initial commit

View File

@@ -167,6 +167,21 @@ public class TestHoodieParquetInputFormat {
assertEquals(10, files.length);
}
@Test
public void testInputFormatLoadWithEmptyTable() throws IOException {
// initial hoodie table
String bathPathStr = "/tmp/test_empty_table";
HoodieTestUtils.init(HoodieTestUtils.getDefaultHadoopConf(), bathPathStr, HoodieTableType.COPY_ON_WRITE,
baseFileFormat);
// Add the paths
FileInputFormat.setInputPaths(jobConf, bathPathStr);
FileStatus[] files = inputFormat.listStatus(jobConf);
assertEquals(0, files.length);
InputSplit[] inputSplits = inputFormat.getSplits(jobConf, 0);
assertEquals(0, inputSplits.length);
}
@Test
public void testInputFormatUpdates() throws IOException {
// initial commit

View File

@@ -56,6 +56,12 @@ public class TestInputPathHandler {
// non Hoodie table
public static final String TRIPS_STATS_TEST_NAME = "trips_stats";
// empty snapshot table
public static final String EMPTY_SNAPSHOT_TEST_NAME = "empty_snapshot";
// empty incremental table
public static final String EMPTY_INCREMENTAL_TEST_NAME = "empty_incremental";
@TempDir
static java.nio.file.Path parentPath;
@@ -67,6 +73,8 @@ public class TestInputPathHandler {
private static String basePathTable2 = null;
private static String basePathTable3 = null;
private static String basePathTable4 = null; // non hoodie Path
private static String basePathTable5 = null;
private static String basePathTable6 = null;
private static List<String> incrementalTables;
private static List<Path> incrementalPaths;
private static List<Path> snapshotPaths;
@@ -110,6 +118,9 @@ public class TestInputPathHandler {
basePathTable2 = parentPath.resolve(MODEL_TRIPS_TEST_NAME).toAbsolutePath().toString();
basePathTable3 = parentPath.resolve(ETL_TRIPS_TEST_NAME).toAbsolutePath().toString();
basePathTable4 = parentPath.resolve(TRIPS_STATS_TEST_NAME).toAbsolutePath().toString();
String tempPath = "/tmp/";
basePathTable5 = tempPath + EMPTY_SNAPSHOT_TEST_NAME;
basePathTable6 = tempPath + EMPTY_INCREMENTAL_TEST_NAME;
dfs.mkdirs(new Path(basePathTable1));
initTableType(dfs.getConf(), basePathTable1, RAW_TRIPS_TEST_NAME, HoodieTableType.MERGE_ON_READ);
@@ -126,6 +137,12 @@ public class TestInputPathHandler {
dfs.mkdirs(new Path(basePathTable4));
nonHoodiePaths.addAll(generatePartitions(dfs, basePathTable4));
initTableType(dfs.getConf(), basePathTable5, EMPTY_SNAPSHOT_TEST_NAME, HoodieTableType.COPY_ON_WRITE);
snapshotPaths.add(new Path(basePathTable5));
initTableType(dfs.getConf(), basePathTable6, EMPTY_INCREMENTAL_TEST_NAME, HoodieTableType.MERGE_ON_READ);
incrementalPaths.add(new Path(basePathTable6));
inputPaths.addAll(incrementalPaths);
inputPaths.addAll(snapshotPaths);
inputPaths.addAll(nonHoodiePaths);
@@ -133,6 +150,7 @@ public class TestInputPathHandler {
incrementalTables = new ArrayList<>();
incrementalTables.add(RAW_TRIPS_TEST_NAME);
incrementalTables.add(MODEL_TRIPS_TEST_NAME);
incrementalTables.add(EMPTY_INCREMENTAL_TEST_NAME);
}
static HoodieTableMetaClient initTableType(Configuration hadoopConf, String basePath,