[HUDI-508] Standardizing on "Table" instead of "Dataset" across code (#1197)

- Docs were talking about storage types before, cWiki moved to "Table" - Most of code already has HoodieTable, HoodieTableMetaClient - correct naming - Replacing renaming use of dataset across code/comments - Few usages in comments and use of Spark SQL DataSet remain unscathed
2020-01-07 12:52:32 -08:00
parent 8306f749a2
commit 9706f659db
73 changed files with 298 additions and 298 deletions
--- a/hudi-spark/src/main/java/org/apache/hudi/DataSourceUtils.java
+++ b/hudi-spark/src/main/java/org/apache/hudi/DataSourceUtils.java
@@ -28,7 +28,7 @@ import org.apache.hudi.common.util.TypedProperties;
 import org.apache.hudi.config.HoodieCompactionConfig;
 import org.apache.hudi.config.HoodieIndexConfig;
 import org.apache.hudi.config.HoodieWriteConfig;
-import org.apache.hudi.exception.DatasetNotFoundException;
+import org.apache.hudi.exception.TableNotFoundException;
 import org.apache.hudi.exception.HoodieException;
 import org.apache.hudi.exception.HoodieNotSupportedException;
 import org.apache.hudi.hive.HiveSyncConfig;
@@ -186,8 +186,8 @@ public class DataSourceUtils {
      client = new HoodieReadClient<>(jssc, writeConfig, timelineService);
      return client.tagLocation(incomingHoodieRecords)
          .filter(r -> !((HoodieRecord<HoodieRecordPayload>) r).isCurrentLocationKnown());
-    } catch (DatasetNotFoundException e) {
-      // this will be executed when there is no hoodie dataset yet
+    } catch (TableNotFoundException e) {
+      // this will be executed when there is no hoodie table yet
      // so no dups to drop
      return incomingHoodieRecords;
    } finally {
--- a/hudi-spark/src/main/java/org/apache/hudi/HoodieDataSourceHelpers.java
+++ b/hudi-spark/src/main/java/org/apache/hudi/HoodieDataSourceHelpers.java
@@ -36,7 +36,7 @@ import java.util.stream.Collectors;
 public class HoodieDataSourceHelpers {

  /**
-   * Checks if the Hoodie dataset has new data since given timestamp. This can be subsequently fed to an incremental
+   * Checks if the Hoodie table has new data since given timestamp. This can be subsequently fed to an incremental
   * view read, to perform incremental processing.
   */
  public static boolean hasNewCommits(FileSystem fs, String basePath, String commitTimestamp) {
--- a/hudi-spark/src/main/java/org/apache/hudi/payload/AWSDmsAvroPayload.java
+++ b/hudi-spark/src/main/java/org/apache/hudi/payload/AWSDmsAvroPayload.java
@@ -38,7 +38,7 @@ import java.io.IOException;
 * - For updates against the source table, records contain full after image with `Op=U`
 * - For deletes against the source table, records contain full before image with `Op=D`
 *
- * This payload implementation will issue matching insert, delete, updates against the hudi dataset
+ * This payload implementation will issue matching insert, delete, updates against the hudi table
 *
 */
 public class AWSDmsAvroPayload extends OverwriteWithLatestAvroPayload {
--- a/hudi-spark/src/main/scala/org/apache/hudi/DataSourceOptions.scala
+++ b/hudi-spark/src/main/scala/org/apache/hudi/DataSourceOptions.scala
@@ -26,7 +26,7 @@ import org.apache.hudi.hive.SlashEncodedDayPartitionValueExtractor
  */

 /**
-  * Options supported for reading hoodie datasets.
+  * Options supported for reading hoodie tables.
  */
 object DataSourceReadOptions {
  /**
@@ -73,7 +73,7 @@ object DataSourceReadOptions {
 }

 /**
-  * Options supported for writing hoodie datasets.
+  * Options supported for writing hoodie tables.
  */
 object DataSourceWriteOptions {
  /**
@@ -100,7 +100,7 @@ object DataSourceWriteOptions {
  val DEFAULT_STORAGE_TYPE_OPT_VAL = COW_STORAGE_TYPE_OPT_VAL

  /**
-    * Hive table name, to register the dataset into.
+    * Hive table name, to register the table into.
    *
    * Default:  None (mandatory)
    */
--- a/hudi-spark/src/main/scala/org/apache/hudi/DefaultSource.scala
+++ b/hudi-spark/src/main/scala/org/apache/hudi/DefaultSource.scala
@@ -29,7 +29,7 @@ import org.apache.spark.sql.types.StructType
 import org.apache.spark.sql.{DataFrame, SQLContext, SaveMode}

 /**
-  * Hoodie Spark Datasource, for reading and writing hoodie datasets
+  * Hoodie Spark Datasource, for reading and writing hoodie tables
  *
  */
 class DefaultSource extends RelationProvider
--- a/hudi-spark/src/main/scala/org/apache/hudi/HoodieSparkSqlWriter.scala
+++ b/hudi-spark/src/main/scala/org/apache/hudi/HoodieSparkSqlWriter.scala
@@ -105,19 +105,19 @@ private[hudi] object HoodieSparkSqlWriter {

      // Handle various save modes
      if (mode == SaveMode.ErrorIfExists && exists) {
-        throw new HoodieException(s"hoodie dataset at $basePath already exists.")
+        throw new HoodieException(s"hoodie table at $basePath already exists.")
      }
      if (mode == SaveMode.Ignore && exists) {
-        log.warn(s"hoodie dataset at $basePath already exists. Ignoring & not performing actual writes.")
+        log.warn(s"hoodie table at $basePath already exists. Ignoring & not performing actual writes.")
        (true, common.util.Option.empty())
      }
      if (mode == SaveMode.Overwrite && exists) {
-        log.warn(s"hoodie dataset at $basePath already exists. Deleting existing data & overwriting with new data.")
+        log.warn(s"hoodie table at $basePath already exists. Deleting existing data & overwriting with new data.")
        fs.delete(basePath, true)
        exists = false
      }

-      // Create the dataset if not present
+      // Create the table if not present
      if (!exists) {
        HoodieTableMetaClient.initTableType(sparkContext.hadoopConfiguration, path.get, storageType,
          tblName.get, "archived")
@@ -164,7 +164,7 @@ private[hudi] object HoodieSparkSqlWriter {
      val hoodieKeysToDelete = genericRecords.map(gr => keyGenerator.getKey(gr)).toJavaRDD()

      if (!exists) {
-        throw new HoodieException(s"hoodie dataset at $basePath does not exist")
+        throw new HoodieException(s"hoodie table at $basePath does not exist")
      }

      // Create a HoodieWriteClient & issue the delete.
--- a/hudi-spark/src/main/scala/org/apache/hudi/IncrementalRelation.scala
+++ b/hudi-spark/src/main/scala/org/apache/hudi/IncrementalRelation.scala
@@ -48,9 +48,9 @@ class IncrementalRelation(val sqlContext: SQLContext,

  val fs = new Path(basePath).getFileSystem(sqlContext.sparkContext.hadoopConfiguration)
  val metaClient = new HoodieTableMetaClient(sqlContext.sparkContext.hadoopConfiguration, basePath, true)
-  // MOR datasets not supported yet
+  // MOR tables not supported yet
  if (metaClient.getTableType.equals(HoodieTableType.MERGE_ON_READ)) {
-    throw new HoodieException("Incremental view not implemented yet, for merge-on-read datasets")
+    throw new HoodieException("Incremental view not implemented yet, for merge-on-read tables")
  }
  // TODO : Figure out a valid HoodieWriteConfig
  val hoodieTable = HoodieTable.getHoodieTable(metaClient, HoodieWriteConfig.newBuilder().withPath(basePath).build(),
--- a/hudi-spark/src/test/java/HoodieJavaApp.java
+++ b/hudi-spark/src/test/java/HoodieJavaApp.java
@@ -45,7 +45,7 @@ import java.util.ArrayList;
 import java.util.List;

 /**
- * Sample program that writes & reads hoodie datasets via the Spark datasource.
+ * Sample program that writes & reads hoodie tables via the Spark datasource.
 */
 public class HoodieJavaApp {

--- a/hudi-spark/src/test/java/HoodieJavaStreamingApp.java
+++ b/hudi-spark/src/test/java/HoodieJavaStreamingApp.java
@@ -45,7 +45,7 @@ import java.util.concurrent.Executors;
 import java.util.concurrent.Future;

 /**
- * Sample program that writes & reads hoodie datasets via the Spark datasource streaming.
+ * Sample program that writes & reads hoodie tables via the Spark datasource streaming.
 */
 public class HoodieJavaStreamingApp {