1
0

[HUDI-1315] Adding builder for HoodieTableMetaClient initialization (#2534)

This commit is contained in:
Sivabalan Narayanan
2021-02-19 20:54:26 -05:00
committed by GitHub
parent 0d91c451b0
commit c9fcf964b2
64 changed files with 241 additions and 203 deletions

View File

@@ -70,7 +70,7 @@ public class HoodieDataSourceHelpers {
*/
@PublicAPIMethod(maturity = ApiMaturityLevel.STABLE)
public static HoodieTimeline allCompletedCommitsCompactions(FileSystem fs, String basePath) {
HoodieTableMetaClient metaClient = new HoodieTableMetaClient(fs.getConf(), basePath, true);
HoodieTableMetaClient metaClient = HoodieTableMetaClient.builder().setConf(fs.getConf()).setBasePath(basePath).setLoadActiveTimelineOnLoad(true).build();
if (metaClient.getTableType().equals(HoodieTableType.MERGE_ON_READ)) {
return metaClient.getActiveTimeline().getTimelineOfActions(
CollectionUtils.createSet(HoodieActiveTimeline.COMMIT_ACTION,

View File

@@ -84,7 +84,7 @@ class DefaultSource extends RelationProvider
val tablePath = DataSourceUtils.getTablePath(fs, globPaths.toArray)
log.info("Obtained hudi table path: " + tablePath)
val metaClient = new HoodieTableMetaClient(fs.getConf, tablePath)
val metaClient = HoodieTableMetaClient.builder().setConf(fs.getConf).setBasePath(tablePath).build()
val isBootstrappedTable = metaClient.getTableConfig.getBootstrapBasePath.isPresent
log.info("Is bootstrapped table => " + isBootstrappedTable)
@@ -104,7 +104,7 @@ class DefaultSource extends RelationProvider
} else if(parameters(QUERY_TYPE_OPT_KEY).equals(QUERY_TYPE_READ_OPTIMIZED_OPT_VAL)) {
getBaseFileOnlyView(sqlContext, parameters, schema, readPaths, isBootstrappedTable, globPaths, metaClient)
} else if (parameters(QUERY_TYPE_OPT_KEY).equals(QUERY_TYPE_INCREMENTAL_OPT_VAL)) {
val metaClient = new HoodieTableMetaClient(fs.getConf, tablePath)
val metaClient = HoodieTableMetaClient.builder().setConf(fs.getConf).setBasePath(tablePath).build()
if (metaClient.getTableType.equals(HoodieTableType.MERGE_ON_READ)) {
new MergeOnReadIncrementalRelation(sqlContext, optParams, schema, metaClient)
} else {
@@ -202,8 +202,8 @@ class DefaultSource extends RelationProvider
if (path.isEmpty || path.get == null) {
throw new HoodieException(s"'path' must be specified.")
}
val metaClient = new HoodieTableMetaClient(
sqlContext.sparkSession.sessionState.newHadoopConf(), path.get)
val metaClient = HoodieTableMetaClient.builder().setConf(
sqlContext.sparkSession.sessionState.newHadoopConf()).setBasePath(path.get).build()
val schemaResolver = new TableSchemaResolver(metaClient)
val sqlSchema =
try {

View File

@@ -500,7 +500,8 @@ private[hudi] object HoodieSparkSqlWriter {
hoodieTableConfigOpt: Option[HoodieTableConfig]): HoodieTableConfig = {
if (tableExists) {
hoodieTableConfigOpt.getOrElse(
new HoodieTableMetaClient(sparkContext.hadoopConfiguration, tablePath).getTableConfig)
HoodieTableMetaClient.builder().setConf(sparkContext.hadoopConfiguration).setBasePath(tablePath)
.build().getTableConfig)
} else {
null
}

View File

@@ -175,8 +175,8 @@ class HoodieStreamingSink(sqlContext: SQLContext,
}))
// First time, scan .hoodie folder and get all pending compactions
val metaClient = new HoodieTableMetaClient(sqlContext.sparkContext.hadoopConfiguration,
client.getConfig.getBasePath)
val metaClient = HoodieTableMetaClient.builder().setConf(sqlContext.sparkContext.hadoopConfiguration)
.setBasePath(client.getConfig.getBasePath).build()
val pendingInstants :java.util.List[HoodieInstant] =
CompactionUtils.getPendingCompactionInstantTimes(metaClient)
pendingInstants.foreach((h : HoodieInstant) => asyncCompactorService.enqueuePendingCompaction(h))

View File

@@ -59,7 +59,7 @@ class HoodieStreamSource(
val fs = path.getFileSystem(hadoopConf)
TablePathUtils.getTablePath(fs, path).get()
}
private lazy val metaClient = new HoodieTableMetaClient(hadoopConf, tablePath.toString)
private lazy val metaClient = HoodieTableMetaClient.builder().setConf(hadoopConf).setBasePath(tablePath.toString).build()
private lazy val tableType = metaClient.getTableType
@transient private var lastOffset: HoodieSourceOffset = _

View File

@@ -187,7 +187,7 @@ public class HoodieJavaStreamingApp {
executor.shutdownNow();
}
HoodieTableMetaClient metaClient = new HoodieTableMetaClient(jssc.hadoopConfiguration(), tablePath);
HoodieTableMetaClient metaClient = HoodieTableMetaClient.builder().setConf(jssc.hadoopConfiguration()).setBasePath(tablePath).build();
if (tableType.equals(HoodieTableType.MERGE_ON_READ.name())) {
// Ensure we have successfully completed one compaction commit
ValidationUtils.checkArgument(metaClient.getActiveTimeline().getCommitTimeline().getInstants().count() == 1);
@@ -249,7 +249,7 @@ public class HoodieJavaStreamingApp {
if (timeline.countInstants() >= numCommits) {
return;
}
HoodieTableMetaClient metaClient = new HoodieTableMetaClient(fs.getConf(), tablePath, true);
HoodieTableMetaClient metaClient = HoodieTableMetaClient.builder().setConf(fs.getConf()).setBasePath(tablePath).setLoadActiveTimelineOnLoad(true).build();
System.out.println("Instants :" + metaClient.getActiveTimeline().getInstants().collect(Collectors.toList()));
} catch (TableNotFoundException te) {
LOG.info("Got table not found exception. Retrying");

View File

@@ -210,7 +210,8 @@ class TestCOWDataSource extends HoodieClientTestBase {
.mode(SaveMode.Append)
.save(basePath)
val metaClient = new HoodieTableMetaClient(spark.sparkContext.hadoopConfiguration, basePath, true)
val metaClient = HoodieTableMetaClient.builder().setConf(spark.sparkContext.hadoopConfiguration).setBasePath(basePath)
.setLoadActiveTimelineOnLoad(true).build();
val commits = metaClient.getActiveTimeline.filterCompletedInstants().getInstants.toArray
.map(instant => (instant.asInstanceOf[HoodieInstant]).getAction)
assertEquals(2, commits.size)
@@ -235,7 +236,8 @@ class TestCOWDataSource extends HoodieClientTestBase {
.mode(SaveMode.Overwrite)
.save(basePath)
val metaClient = new HoodieTableMetaClient(spark.sparkContext.hadoopConfiguration, basePath, true)
val metaClient = HoodieTableMetaClient.builder().setConf(spark.sparkContext.hadoopConfiguration).setBasePath(basePath)
.setLoadActiveTimelineOnLoad(true).build()
val commits = metaClient.getActiveTimeline.filterCompletedInstants().getInstants.toArray
.map(instant => (instant.asInstanceOf[HoodieInstant]).getAction)
assertEquals(2, commits.size)
@@ -289,7 +291,8 @@ class TestCOWDataSource extends HoodieClientTestBase {
val filterSecondPartitionCount = recordsForPartitionColumn.filter(row => row.get(0).equals(HoodieTestDataGenerator.DEFAULT_SECOND_PARTITION_PATH)).size
assertEquals(7, filterSecondPartitionCount)
val metaClient = new HoodieTableMetaClient(spark.sparkContext.hadoopConfiguration, basePath, true)
val metaClient = HoodieTableMetaClient.builder().setConf(spark.sparkContext.hadoopConfiguration).setBasePath(basePath)
.setLoadActiveTimelineOnLoad(true).build()
val commits = metaClient.getActiveTimeline.filterCompletedInstants().getInstants.toArray
.map(instant => instant.asInstanceOf[HoodieInstant].getAction)
assertEquals(3, commits.size)
@@ -339,7 +342,8 @@ class TestCOWDataSource extends HoodieClientTestBase {
val filterSecondPartitionCount = recordsForPartitionColumn.filter(row => row.get(0).equals(HoodieTestDataGenerator.DEFAULT_SECOND_PARTITION_PATH)).size
assertEquals(7, filterSecondPartitionCount)
val metaClient = new HoodieTableMetaClient(spark.sparkContext.hadoopConfiguration, basePath, true)
val metaClient = HoodieTableMetaClient.builder().setConf(spark.sparkContext.hadoopConfiguration).setBasePath(basePath)
.setLoadActiveTimelineOnLoad(true).build()
val commits = metaClient.getActiveTimeline.filterCompletedInstants().getInstants.toArray
.map(instant => instant.asInstanceOf[HoodieInstant].getAction)
assertEquals(2, commits.size)

View File

@@ -177,7 +177,8 @@ class TestStructuredStreaming extends HoodieClientTestBase {
numInstants = timeline.countInstants
success = true
}
val metaClient = new HoodieTableMetaClient(fs.getConf, tablePath, true)
val metaClient = HoodieTableMetaClient.builder().setConf(fs.getConf).setBasePath(tablePath)
.setLoadActiveTimelineOnLoad(true).build()
} catch {
case te: TableNotFoundException =>
log.info("Got table not found exception. Retrying")
@@ -253,12 +254,14 @@ class TestStructuredStreaming extends HoodieClientTestBase {
if (HoodieDataSourceHelpers.allCompletedCommitsCompactions(fs, destPath).getCompletedReplaceTimeline().countInstants() > 0) {
assertEquals(3, HoodieDataSourceHelpers.listCommitsSince(fs, destPath, "000").size())
// check have at least one file group
this.metaClient = new HoodieTableMetaClient(fs.getConf, destPath, true)
this.metaClient = HoodieTableMetaClient.builder().setConf(fs.getConf).setBasePath(destPath)
.setLoadActiveTimelineOnLoad(true).build()
assertTrue(getLatestFileGroupsFileId(partitionOfRecords).size > 0)
} else {
assertEquals(currNumCommits, HoodieDataSourceHelpers.listCommitsSince(fs, destPath, "000").size())
// check have more than one file group
this.metaClient = new HoodieTableMetaClient(fs.getConf, destPath, true)
this.metaClient = HoodieTableMetaClient.builder().setConf(fs.getConf).setBasePath(destPath)
.setLoadActiveTimelineOnLoad(true).build()
assertTrue(getLatestFileGroupsFileId(partitionOfRecords).size > 1)
}