1
0

[HUDI-508] Standardizing on "Table" instead of "Dataset" across code (#1197)

- Docs were talking about storage types before, cWiki moved to "Table"
 - Most of code already has HoodieTable, HoodieTableMetaClient - correct naming
 - Replacing renaming use of dataset across code/comments
 - Few usages in comments and use of Spark SQL DataSet remain unscathed
This commit is contained in:
vinoth chandar
2020-01-07 12:52:32 -08:00
committed by GitHub
parent 8306f749a2
commit 9706f659db
73 changed files with 298 additions and 298 deletions

View File

@@ -28,7 +28,7 @@ import org.apache.hudi.common.util.TypedProperties;
import org.apache.hudi.config.HoodieCompactionConfig;
import org.apache.hudi.config.HoodieIndexConfig;
import org.apache.hudi.config.HoodieWriteConfig;
import org.apache.hudi.exception.DatasetNotFoundException;
import org.apache.hudi.exception.TableNotFoundException;
import org.apache.hudi.exception.HoodieException;
import org.apache.hudi.exception.HoodieNotSupportedException;
import org.apache.hudi.hive.HiveSyncConfig;
@@ -186,8 +186,8 @@ public class DataSourceUtils {
client = new HoodieReadClient<>(jssc, writeConfig, timelineService);
return client.tagLocation(incomingHoodieRecords)
.filter(r -> !((HoodieRecord<HoodieRecordPayload>) r).isCurrentLocationKnown());
} catch (DatasetNotFoundException e) {
// this will be executed when there is no hoodie dataset yet
} catch (TableNotFoundException e) {
// this will be executed when there is no hoodie table yet
// so no dups to drop
return incomingHoodieRecords;
} finally {

View File

@@ -36,7 +36,7 @@ import java.util.stream.Collectors;
public class HoodieDataSourceHelpers {
/**
* Checks if the Hoodie dataset has new data since given timestamp. This can be subsequently fed to an incremental
* Checks if the Hoodie table has new data since given timestamp. This can be subsequently fed to an incremental
* view read, to perform incremental processing.
*/
public static boolean hasNewCommits(FileSystem fs, String basePath, String commitTimestamp) {

View File

@@ -38,7 +38,7 @@ import java.io.IOException;
* - For updates against the source table, records contain full after image with `Op=U`
* - For deletes against the source table, records contain full before image with `Op=D`
*
* This payload implementation will issue matching insert, delete, updates against the hudi dataset
* This payload implementation will issue matching insert, delete, updates against the hudi table
*
*/
public class AWSDmsAvroPayload extends OverwriteWithLatestAvroPayload {

View File

@@ -26,7 +26,7 @@ import org.apache.hudi.hive.SlashEncodedDayPartitionValueExtractor
*/
/**
* Options supported for reading hoodie datasets.
* Options supported for reading hoodie tables.
*/
object DataSourceReadOptions {
/**
@@ -73,7 +73,7 @@ object DataSourceReadOptions {
}
/**
* Options supported for writing hoodie datasets.
* Options supported for writing hoodie tables.
*/
object DataSourceWriteOptions {
/**
@@ -100,7 +100,7 @@ object DataSourceWriteOptions {
val DEFAULT_STORAGE_TYPE_OPT_VAL = COW_STORAGE_TYPE_OPT_VAL
/**
* Hive table name, to register the dataset into.
* Hive table name, to register the table into.
*
* Default: None (mandatory)
*/

View File

@@ -29,7 +29,7 @@ import org.apache.spark.sql.types.StructType
import org.apache.spark.sql.{DataFrame, SQLContext, SaveMode}
/**
* Hoodie Spark Datasource, for reading and writing hoodie datasets
* Hoodie Spark Datasource, for reading and writing hoodie tables
*
*/
class DefaultSource extends RelationProvider

View File

@@ -105,19 +105,19 @@ private[hudi] object HoodieSparkSqlWriter {
// Handle various save modes
if (mode == SaveMode.ErrorIfExists && exists) {
throw new HoodieException(s"hoodie dataset at $basePath already exists.")
throw new HoodieException(s"hoodie table at $basePath already exists.")
}
if (mode == SaveMode.Ignore && exists) {
log.warn(s"hoodie dataset at $basePath already exists. Ignoring & not performing actual writes.")
log.warn(s"hoodie table at $basePath already exists. Ignoring & not performing actual writes.")
(true, common.util.Option.empty())
}
if (mode == SaveMode.Overwrite && exists) {
log.warn(s"hoodie dataset at $basePath already exists. Deleting existing data & overwriting with new data.")
log.warn(s"hoodie table at $basePath already exists. Deleting existing data & overwriting with new data.")
fs.delete(basePath, true)
exists = false
}
// Create the dataset if not present
// Create the table if not present
if (!exists) {
HoodieTableMetaClient.initTableType(sparkContext.hadoopConfiguration, path.get, storageType,
tblName.get, "archived")
@@ -164,7 +164,7 @@ private[hudi] object HoodieSparkSqlWriter {
val hoodieKeysToDelete = genericRecords.map(gr => keyGenerator.getKey(gr)).toJavaRDD()
if (!exists) {
throw new HoodieException(s"hoodie dataset at $basePath does not exist")
throw new HoodieException(s"hoodie table at $basePath does not exist")
}
// Create a HoodieWriteClient & issue the delete.

View File

@@ -48,9 +48,9 @@ class IncrementalRelation(val sqlContext: SQLContext,
val fs = new Path(basePath).getFileSystem(sqlContext.sparkContext.hadoopConfiguration)
val metaClient = new HoodieTableMetaClient(sqlContext.sparkContext.hadoopConfiguration, basePath, true)
// MOR datasets not supported yet
// MOR tables not supported yet
if (metaClient.getTableType.equals(HoodieTableType.MERGE_ON_READ)) {
throw new HoodieException("Incremental view not implemented yet, for merge-on-read datasets")
throw new HoodieException("Incremental view not implemented yet, for merge-on-read tables")
}
// TODO : Figure out a valid HoodieWriteConfig
val hoodieTable = HoodieTable.getHoodieTable(metaClient, HoodieWriteConfig.newBuilder().withPath(basePath).build(),

View File

@@ -45,7 +45,7 @@ import java.util.ArrayList;
import java.util.List;
/**
* Sample program that writes & reads hoodie datasets via the Spark datasource.
* Sample program that writes & reads hoodie tables via the Spark datasource.
*/
public class HoodieJavaApp {

View File

@@ -45,7 +45,7 @@ import java.util.concurrent.Executors;
import java.util.concurrent.Future;
/**
* Sample program that writes & reads hoodie datasets via the Spark datasource streaming.
* Sample program that writes & reads hoodie tables via the Spark datasource streaming.
*/
public class HoodieJavaStreamingApp {