[HUDI-1315] Adding builder for HoodieTableMetaClient initialization (#2534)
This commit is contained in:
committed by
GitHub
parent
0d91c451b0
commit
c9fcf964b2
@@ -70,7 +70,7 @@ public class HoodieDataSourceHelpers {
|
||||
*/
|
||||
@PublicAPIMethod(maturity = ApiMaturityLevel.STABLE)
|
||||
public static HoodieTimeline allCompletedCommitsCompactions(FileSystem fs, String basePath) {
|
||||
HoodieTableMetaClient metaClient = new HoodieTableMetaClient(fs.getConf(), basePath, true);
|
||||
HoodieTableMetaClient metaClient = HoodieTableMetaClient.builder().setConf(fs.getConf()).setBasePath(basePath).setLoadActiveTimelineOnLoad(true).build();
|
||||
if (metaClient.getTableType().equals(HoodieTableType.MERGE_ON_READ)) {
|
||||
return metaClient.getActiveTimeline().getTimelineOfActions(
|
||||
CollectionUtils.createSet(HoodieActiveTimeline.COMMIT_ACTION,
|
||||
|
||||
@@ -84,7 +84,7 @@ class DefaultSource extends RelationProvider
|
||||
val tablePath = DataSourceUtils.getTablePath(fs, globPaths.toArray)
|
||||
log.info("Obtained hudi table path: " + tablePath)
|
||||
|
||||
val metaClient = new HoodieTableMetaClient(fs.getConf, tablePath)
|
||||
val metaClient = HoodieTableMetaClient.builder().setConf(fs.getConf).setBasePath(tablePath).build()
|
||||
val isBootstrappedTable = metaClient.getTableConfig.getBootstrapBasePath.isPresent
|
||||
log.info("Is bootstrapped table => " + isBootstrappedTable)
|
||||
|
||||
@@ -104,7 +104,7 @@ class DefaultSource extends RelationProvider
|
||||
} else if(parameters(QUERY_TYPE_OPT_KEY).equals(QUERY_TYPE_READ_OPTIMIZED_OPT_VAL)) {
|
||||
getBaseFileOnlyView(sqlContext, parameters, schema, readPaths, isBootstrappedTable, globPaths, metaClient)
|
||||
} else if (parameters(QUERY_TYPE_OPT_KEY).equals(QUERY_TYPE_INCREMENTAL_OPT_VAL)) {
|
||||
val metaClient = new HoodieTableMetaClient(fs.getConf, tablePath)
|
||||
val metaClient = HoodieTableMetaClient.builder().setConf(fs.getConf).setBasePath(tablePath).build()
|
||||
if (metaClient.getTableType.equals(HoodieTableType.MERGE_ON_READ)) {
|
||||
new MergeOnReadIncrementalRelation(sqlContext, optParams, schema, metaClient)
|
||||
} else {
|
||||
@@ -202,8 +202,8 @@ class DefaultSource extends RelationProvider
|
||||
if (path.isEmpty || path.get == null) {
|
||||
throw new HoodieException(s"'path' must be specified.")
|
||||
}
|
||||
val metaClient = new HoodieTableMetaClient(
|
||||
sqlContext.sparkSession.sessionState.newHadoopConf(), path.get)
|
||||
val metaClient = HoodieTableMetaClient.builder().setConf(
|
||||
sqlContext.sparkSession.sessionState.newHadoopConf()).setBasePath(path.get).build()
|
||||
val schemaResolver = new TableSchemaResolver(metaClient)
|
||||
val sqlSchema =
|
||||
try {
|
||||
|
||||
@@ -500,7 +500,8 @@ private[hudi] object HoodieSparkSqlWriter {
|
||||
hoodieTableConfigOpt: Option[HoodieTableConfig]): HoodieTableConfig = {
|
||||
if (tableExists) {
|
||||
hoodieTableConfigOpt.getOrElse(
|
||||
new HoodieTableMetaClient(sparkContext.hadoopConfiguration, tablePath).getTableConfig)
|
||||
HoodieTableMetaClient.builder().setConf(sparkContext.hadoopConfiguration).setBasePath(tablePath)
|
||||
.build().getTableConfig)
|
||||
} else {
|
||||
null
|
||||
}
|
||||
|
||||
@@ -175,8 +175,8 @@ class HoodieStreamingSink(sqlContext: SQLContext,
|
||||
}))
|
||||
|
||||
// First time, scan .hoodie folder and get all pending compactions
|
||||
val metaClient = new HoodieTableMetaClient(sqlContext.sparkContext.hadoopConfiguration,
|
||||
client.getConfig.getBasePath)
|
||||
val metaClient = HoodieTableMetaClient.builder().setConf(sqlContext.sparkContext.hadoopConfiguration)
|
||||
.setBasePath(client.getConfig.getBasePath).build()
|
||||
val pendingInstants :java.util.List[HoodieInstant] =
|
||||
CompactionUtils.getPendingCompactionInstantTimes(metaClient)
|
||||
pendingInstants.foreach((h : HoodieInstant) => asyncCompactorService.enqueuePendingCompaction(h))
|
||||
|
||||
@@ -59,7 +59,7 @@ class HoodieStreamSource(
|
||||
val fs = path.getFileSystem(hadoopConf)
|
||||
TablePathUtils.getTablePath(fs, path).get()
|
||||
}
|
||||
private lazy val metaClient = new HoodieTableMetaClient(hadoopConf, tablePath.toString)
|
||||
private lazy val metaClient = HoodieTableMetaClient.builder().setConf(hadoopConf).setBasePath(tablePath.toString).build()
|
||||
private lazy val tableType = metaClient.getTableType
|
||||
|
||||
@transient private var lastOffset: HoodieSourceOffset = _
|
||||
|
||||
@@ -187,7 +187,7 @@ public class HoodieJavaStreamingApp {
|
||||
executor.shutdownNow();
|
||||
}
|
||||
|
||||
HoodieTableMetaClient metaClient = new HoodieTableMetaClient(jssc.hadoopConfiguration(), tablePath);
|
||||
HoodieTableMetaClient metaClient = HoodieTableMetaClient.builder().setConf(jssc.hadoopConfiguration()).setBasePath(tablePath).build();
|
||||
if (tableType.equals(HoodieTableType.MERGE_ON_READ.name())) {
|
||||
// Ensure we have successfully completed one compaction commit
|
||||
ValidationUtils.checkArgument(metaClient.getActiveTimeline().getCommitTimeline().getInstants().count() == 1);
|
||||
@@ -249,7 +249,7 @@ public class HoodieJavaStreamingApp {
|
||||
if (timeline.countInstants() >= numCommits) {
|
||||
return;
|
||||
}
|
||||
HoodieTableMetaClient metaClient = new HoodieTableMetaClient(fs.getConf(), tablePath, true);
|
||||
HoodieTableMetaClient metaClient = HoodieTableMetaClient.builder().setConf(fs.getConf()).setBasePath(tablePath).setLoadActiveTimelineOnLoad(true).build();
|
||||
System.out.println("Instants :" + metaClient.getActiveTimeline().getInstants().collect(Collectors.toList()));
|
||||
} catch (TableNotFoundException te) {
|
||||
LOG.info("Got table not found exception. Retrying");
|
||||
|
||||
@@ -210,7 +210,8 @@ class TestCOWDataSource extends HoodieClientTestBase {
|
||||
.mode(SaveMode.Append)
|
||||
.save(basePath)
|
||||
|
||||
val metaClient = new HoodieTableMetaClient(spark.sparkContext.hadoopConfiguration, basePath, true)
|
||||
val metaClient = HoodieTableMetaClient.builder().setConf(spark.sparkContext.hadoopConfiguration).setBasePath(basePath)
|
||||
.setLoadActiveTimelineOnLoad(true).build();
|
||||
val commits = metaClient.getActiveTimeline.filterCompletedInstants().getInstants.toArray
|
||||
.map(instant => (instant.asInstanceOf[HoodieInstant]).getAction)
|
||||
assertEquals(2, commits.size)
|
||||
@@ -235,7 +236,8 @@ class TestCOWDataSource extends HoodieClientTestBase {
|
||||
.mode(SaveMode.Overwrite)
|
||||
.save(basePath)
|
||||
|
||||
val metaClient = new HoodieTableMetaClient(spark.sparkContext.hadoopConfiguration, basePath, true)
|
||||
val metaClient = HoodieTableMetaClient.builder().setConf(spark.sparkContext.hadoopConfiguration).setBasePath(basePath)
|
||||
.setLoadActiveTimelineOnLoad(true).build()
|
||||
val commits = metaClient.getActiveTimeline.filterCompletedInstants().getInstants.toArray
|
||||
.map(instant => (instant.asInstanceOf[HoodieInstant]).getAction)
|
||||
assertEquals(2, commits.size)
|
||||
@@ -289,7 +291,8 @@ class TestCOWDataSource extends HoodieClientTestBase {
|
||||
val filterSecondPartitionCount = recordsForPartitionColumn.filter(row => row.get(0).equals(HoodieTestDataGenerator.DEFAULT_SECOND_PARTITION_PATH)).size
|
||||
assertEquals(7, filterSecondPartitionCount)
|
||||
|
||||
val metaClient = new HoodieTableMetaClient(spark.sparkContext.hadoopConfiguration, basePath, true)
|
||||
val metaClient = HoodieTableMetaClient.builder().setConf(spark.sparkContext.hadoopConfiguration).setBasePath(basePath)
|
||||
.setLoadActiveTimelineOnLoad(true).build()
|
||||
val commits = metaClient.getActiveTimeline.filterCompletedInstants().getInstants.toArray
|
||||
.map(instant => instant.asInstanceOf[HoodieInstant].getAction)
|
||||
assertEquals(3, commits.size)
|
||||
@@ -339,7 +342,8 @@ class TestCOWDataSource extends HoodieClientTestBase {
|
||||
val filterSecondPartitionCount = recordsForPartitionColumn.filter(row => row.get(0).equals(HoodieTestDataGenerator.DEFAULT_SECOND_PARTITION_PATH)).size
|
||||
assertEquals(7, filterSecondPartitionCount)
|
||||
|
||||
val metaClient = new HoodieTableMetaClient(spark.sparkContext.hadoopConfiguration, basePath, true)
|
||||
val metaClient = HoodieTableMetaClient.builder().setConf(spark.sparkContext.hadoopConfiguration).setBasePath(basePath)
|
||||
.setLoadActiveTimelineOnLoad(true).build()
|
||||
val commits = metaClient.getActiveTimeline.filterCompletedInstants().getInstants.toArray
|
||||
.map(instant => instant.asInstanceOf[HoodieInstant].getAction)
|
||||
assertEquals(2, commits.size)
|
||||
|
||||
@@ -177,7 +177,8 @@ class TestStructuredStreaming extends HoodieClientTestBase {
|
||||
numInstants = timeline.countInstants
|
||||
success = true
|
||||
}
|
||||
val metaClient = new HoodieTableMetaClient(fs.getConf, tablePath, true)
|
||||
val metaClient = HoodieTableMetaClient.builder().setConf(fs.getConf).setBasePath(tablePath)
|
||||
.setLoadActiveTimelineOnLoad(true).build()
|
||||
} catch {
|
||||
case te: TableNotFoundException =>
|
||||
log.info("Got table not found exception. Retrying")
|
||||
@@ -253,12 +254,14 @@ class TestStructuredStreaming extends HoodieClientTestBase {
|
||||
if (HoodieDataSourceHelpers.allCompletedCommitsCompactions(fs, destPath).getCompletedReplaceTimeline().countInstants() > 0) {
|
||||
assertEquals(3, HoodieDataSourceHelpers.listCommitsSince(fs, destPath, "000").size())
|
||||
// check have at least one file group
|
||||
this.metaClient = new HoodieTableMetaClient(fs.getConf, destPath, true)
|
||||
this.metaClient = HoodieTableMetaClient.builder().setConf(fs.getConf).setBasePath(destPath)
|
||||
.setLoadActiveTimelineOnLoad(true).build()
|
||||
assertTrue(getLatestFileGroupsFileId(partitionOfRecords).size > 0)
|
||||
} else {
|
||||
assertEquals(currNumCommits, HoodieDataSourceHelpers.listCommitsSince(fs, destPath, "000").size())
|
||||
// check have more than one file group
|
||||
this.metaClient = new HoodieTableMetaClient(fs.getConf, destPath, true)
|
||||
this.metaClient = HoodieTableMetaClient.builder().setConf(fs.getConf).setBasePath(destPath)
|
||||
.setLoadActiveTimelineOnLoad(true).build()
|
||||
assertTrue(getLatestFileGroupsFileId(partitionOfRecords).size > 1)
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user