[HUDI-508] Standardizing on "Table" instead of "Dataset" across code (#1197)
- Docs were talking about storage types before, cWiki moved to "Table" - Most of code already has HoodieTable, HoodieTableMetaClient - correct naming - Replacing renaming use of dataset across code/comments - Few usages in comments and use of Spark SQL DataSet remain unscathed
This commit is contained in:
@@ -28,7 +28,7 @@ import org.apache.hudi.common.util.TypedProperties;
|
||||
import org.apache.hudi.config.HoodieCompactionConfig;
|
||||
import org.apache.hudi.config.HoodieIndexConfig;
|
||||
import org.apache.hudi.config.HoodieWriteConfig;
|
||||
import org.apache.hudi.exception.DatasetNotFoundException;
|
||||
import org.apache.hudi.exception.TableNotFoundException;
|
||||
import org.apache.hudi.exception.HoodieException;
|
||||
import org.apache.hudi.exception.HoodieNotSupportedException;
|
||||
import org.apache.hudi.hive.HiveSyncConfig;
|
||||
@@ -186,8 +186,8 @@ public class DataSourceUtils {
|
||||
client = new HoodieReadClient<>(jssc, writeConfig, timelineService);
|
||||
return client.tagLocation(incomingHoodieRecords)
|
||||
.filter(r -> !((HoodieRecord<HoodieRecordPayload>) r).isCurrentLocationKnown());
|
||||
} catch (DatasetNotFoundException e) {
|
||||
// this will be executed when there is no hoodie dataset yet
|
||||
} catch (TableNotFoundException e) {
|
||||
// this will be executed when there is no hoodie table yet
|
||||
// so no dups to drop
|
||||
return incomingHoodieRecords;
|
||||
} finally {
|
||||
|
||||
@@ -36,7 +36,7 @@ import java.util.stream.Collectors;
|
||||
public class HoodieDataSourceHelpers {
|
||||
|
||||
/**
|
||||
* Checks if the Hoodie dataset has new data since given timestamp. This can be subsequently fed to an incremental
|
||||
* Checks if the Hoodie table has new data since given timestamp. This can be subsequently fed to an incremental
|
||||
* view read, to perform incremental processing.
|
||||
*/
|
||||
public static boolean hasNewCommits(FileSystem fs, String basePath, String commitTimestamp) {
|
||||
|
||||
@@ -38,7 +38,7 @@ import java.io.IOException;
|
||||
* - For updates against the source table, records contain full after image with `Op=U`
|
||||
* - For deletes against the source table, records contain full before image with `Op=D`
|
||||
*
|
||||
* This payload implementation will issue matching insert, delete, updates against the hudi dataset
|
||||
* This payload implementation will issue matching insert, delete, updates against the hudi table
|
||||
*
|
||||
*/
|
||||
public class AWSDmsAvroPayload extends OverwriteWithLatestAvroPayload {
|
||||
|
||||
@@ -26,7 +26,7 @@ import org.apache.hudi.hive.SlashEncodedDayPartitionValueExtractor
|
||||
*/
|
||||
|
||||
/**
|
||||
* Options supported for reading hoodie datasets.
|
||||
* Options supported for reading hoodie tables.
|
||||
*/
|
||||
object DataSourceReadOptions {
|
||||
/**
|
||||
@@ -73,7 +73,7 @@ object DataSourceReadOptions {
|
||||
}
|
||||
|
||||
/**
|
||||
* Options supported for writing hoodie datasets.
|
||||
* Options supported for writing hoodie tables.
|
||||
*/
|
||||
object DataSourceWriteOptions {
|
||||
/**
|
||||
@@ -100,7 +100,7 @@ object DataSourceWriteOptions {
|
||||
val DEFAULT_STORAGE_TYPE_OPT_VAL = COW_STORAGE_TYPE_OPT_VAL
|
||||
|
||||
/**
|
||||
* Hive table name, to register the dataset into.
|
||||
* Hive table name, to register the table into.
|
||||
*
|
||||
* Default: None (mandatory)
|
||||
*/
|
||||
|
||||
@@ -29,7 +29,7 @@ import org.apache.spark.sql.types.StructType
|
||||
import org.apache.spark.sql.{DataFrame, SQLContext, SaveMode}
|
||||
|
||||
/**
|
||||
* Hoodie Spark Datasource, for reading and writing hoodie datasets
|
||||
* Hoodie Spark Datasource, for reading and writing hoodie tables
|
||||
*
|
||||
*/
|
||||
class DefaultSource extends RelationProvider
|
||||
|
||||
@@ -105,19 +105,19 @@ private[hudi] object HoodieSparkSqlWriter {
|
||||
|
||||
// Handle various save modes
|
||||
if (mode == SaveMode.ErrorIfExists && exists) {
|
||||
throw new HoodieException(s"hoodie dataset at $basePath already exists.")
|
||||
throw new HoodieException(s"hoodie table at $basePath already exists.")
|
||||
}
|
||||
if (mode == SaveMode.Ignore && exists) {
|
||||
log.warn(s"hoodie dataset at $basePath already exists. Ignoring & not performing actual writes.")
|
||||
log.warn(s"hoodie table at $basePath already exists. Ignoring & not performing actual writes.")
|
||||
(true, common.util.Option.empty())
|
||||
}
|
||||
if (mode == SaveMode.Overwrite && exists) {
|
||||
log.warn(s"hoodie dataset at $basePath already exists. Deleting existing data & overwriting with new data.")
|
||||
log.warn(s"hoodie table at $basePath already exists. Deleting existing data & overwriting with new data.")
|
||||
fs.delete(basePath, true)
|
||||
exists = false
|
||||
}
|
||||
|
||||
// Create the dataset if not present
|
||||
// Create the table if not present
|
||||
if (!exists) {
|
||||
HoodieTableMetaClient.initTableType(sparkContext.hadoopConfiguration, path.get, storageType,
|
||||
tblName.get, "archived")
|
||||
@@ -164,7 +164,7 @@ private[hudi] object HoodieSparkSqlWriter {
|
||||
val hoodieKeysToDelete = genericRecords.map(gr => keyGenerator.getKey(gr)).toJavaRDD()
|
||||
|
||||
if (!exists) {
|
||||
throw new HoodieException(s"hoodie dataset at $basePath does not exist")
|
||||
throw new HoodieException(s"hoodie table at $basePath does not exist")
|
||||
}
|
||||
|
||||
// Create a HoodieWriteClient & issue the delete.
|
||||
|
||||
@@ -48,9 +48,9 @@ class IncrementalRelation(val sqlContext: SQLContext,
|
||||
|
||||
val fs = new Path(basePath).getFileSystem(sqlContext.sparkContext.hadoopConfiguration)
|
||||
val metaClient = new HoodieTableMetaClient(sqlContext.sparkContext.hadoopConfiguration, basePath, true)
|
||||
// MOR datasets not supported yet
|
||||
// MOR tables not supported yet
|
||||
if (metaClient.getTableType.equals(HoodieTableType.MERGE_ON_READ)) {
|
||||
throw new HoodieException("Incremental view not implemented yet, for merge-on-read datasets")
|
||||
throw new HoodieException("Incremental view not implemented yet, for merge-on-read tables")
|
||||
}
|
||||
// TODO : Figure out a valid HoodieWriteConfig
|
||||
val hoodieTable = HoodieTable.getHoodieTable(metaClient, HoodieWriteConfig.newBuilder().withPath(basePath).build(),
|
||||
|
||||
@@ -45,7 +45,7 @@ import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
|
||||
/**
|
||||
* Sample program that writes & reads hoodie datasets via the Spark datasource.
|
||||
* Sample program that writes & reads hoodie tables via the Spark datasource.
|
||||
*/
|
||||
public class HoodieJavaApp {
|
||||
|
||||
|
||||
@@ -45,7 +45,7 @@ import java.util.concurrent.Executors;
|
||||
import java.util.concurrent.Future;
|
||||
|
||||
/**
|
||||
* Sample program that writes & reads hoodie datasets via the Spark datasource streaming.
|
||||
* Sample program that writes & reads hoodie tables via the Spark datasource streaming.
|
||||
*/
|
||||
public class HoodieJavaStreamingApp {
|
||||
|
||||
|
||||
Reference in New Issue
Block a user