[HUDI-508] Standardizing on "Table" instead of "Dataset" across code (#1197)

- Docs were talking about storage types before, cWiki moved to "Table" - Most of code already has HoodieTable, HoodieTableMetaClient - correct naming - Replacing renaming use of dataset across code/comments - Few usages in comments and use of Spark SQL DataSet remain unscathed
2020-01-07 12:52:32 -08:00
parent 8306f749a2
commit 9706f659db
73 changed files with 298 additions and 298 deletions
--- a/hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/HoodieParquetInputFormat.java
+++ b/hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/HoodieParquetInputFormat.java
@@ -25,9 +25,9 @@ import org.apache.hudi.common.table.HoodieTimeline;
 import org.apache.hudi.common.table.TableFileSystemView.ReadOptimizedView;
 import org.apache.hudi.common.table.timeline.HoodieInstant;
 import org.apache.hudi.common.table.view.HoodieTableFileSystemView;
-import org.apache.hudi.exception.DatasetNotFoundException;
+import org.apache.hudi.exception.TableNotFoundException;
 import org.apache.hudi.exception.HoodieIOException;
-import org.apache.hudi.exception.InvalidDatasetException;
+import org.apache.hudi.exception.InvalidTableException;

 import org.apache.hadoop.conf.Configurable;
 import org.apache.hadoop.conf.Configuration;
@@ -54,8 +54,8 @@ import java.util.stream.Collectors;

 /**
 * HoodieInputFormat which understands the Hoodie File Structure and filters files based on the Hoodie Mode. If paths
- * that does not correspond to a hoodie dataset then they are passed in as is (as what FileInputFormat.listStatus()
- * would do). The JobConf could have paths from multipe Hoodie/Non-Hoodie datasets
+ * that does not correspond to a hoodie table then they are passed in as is (as what FileInputFormat.listStatus()
+ * would do). The JobConf could have paths from multipe Hoodie/Non-Hoodie tables
 */
@UseFileSplitsFromInputFormat
 public class HoodieParquetInputFormat extends MapredParquetInputFormat implements Configurable {
@@ -159,7 +159,7 @@ public class HoodieParquetInputFormat extends MapredParquetInputFormat implement
        try {
          metadata = getTableMetaClient(status.getPath().getFileSystem(conf), status.getPath().getParent());
          nonHoodieBasePath = null;
-        } catch (DatasetNotFoundException | InvalidDatasetException e) {
+        } catch (TableNotFoundException | InvalidTableException e) {
          LOG.info("Handling a non-hoodie path " + status.getPath());
          metadata = null;
          nonHoodieBasePath = status.getPath().getParent().toString();
--- a/hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/HoodieROTablePathFilter.java
+++ b/hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/HoodieROTablePathFilter.java
@@ -22,7 +22,7 @@ import org.apache.hudi.common.model.HoodieDataFile;
 import org.apache.hudi.common.model.HoodiePartitionMetadata;
 import org.apache.hudi.common.table.HoodieTableMetaClient;
 import org.apache.hudi.common.table.view.HoodieTableFileSystemView;
-import org.apache.hudi.exception.DatasetNotFoundException;
+import org.apache.hudi.exception.TableNotFoundException;
 import org.apache.hudi.exception.HoodieException;

 import org.apache.hadoop.conf.Configuration;
@@ -39,11 +39,11 @@ import java.util.List;
 import java.util.stream.Collectors;

 /**
- * Given a path is a part of - Hoodie dataset = accepts ONLY the latest version of each path - Non-Hoodie dataset = then
+ * Given a path is a part of - Hoodie table = accepts ONLY the latest version of each path - Non-Hoodie table = then
 * always accept
 * <p>
 * We can set this filter, on a query engine's Hadoop Config and if it respects path filters, then you should be able to
- * query both hoodie and non-hoodie datasets as you would normally do.
+ * query both hoodie and non-hoodie tables as you would normally do.
 * <p>
 * hadoopConf.setClass("mapreduce.input.pathFilter.class", org.apache.hudi.hadoop .HoodieROTablePathFilter.class,
 * org.apache.hadoop.fs.PathFilter.class)
@@ -59,7 +59,7 @@ public class HoodieROTablePathFilter implements PathFilter, Serializable {
  private HashMap<String, HashSet<Path>> hoodiePathCache;

  /**
-   * Paths that are known to be non-hoodie datasets.
+   * Paths that are known to be non-hoodie tables.
   */
  private HashSet<String> nonHoodiePathCache;

@@ -156,7 +156,7 @@ public class HoodieROTablePathFilter implements PathFilter, Serializable {
                hoodiePathCache.get(folder.toString()).contains(path)));
          }
          return hoodiePathCache.get(folder.toString()).contains(path);
-        } catch (DatasetNotFoundException e) {
+        } catch (TableNotFoundException e) {
          // Non-hoodie path, accept it.
          if (LOG.isDebugEnabled()) {
            LOG.debug(String.format("(1) Caching non-hoodie path under %s \n", folder.toString()));
--- a/hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/realtime/HoodieParquetRealtimeInputFormat.java
+++ b/hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/realtime/HoodieParquetRealtimeInputFormat.java
@@ -60,7 +60,7 @@ import java.util.stream.Collectors;
 import java.util.stream.Stream;

 /**
- * Input Format, that provides a real-time view of data in a Hoodie dataset.
+ * Input Format, that provides a real-time view of data in a Hoodie table.
 */
@UseFileSplitsFromInputFormat
 public class HoodieParquetRealtimeInputFormat extends HoodieParquetInputFormat implements Configurable {