1
0

[HUDI-508] Standardizing on "Table" instead of "Dataset" across code (#1197)

- Docs were talking about storage types before, cWiki moved to "Table"
 - Most of code already has HoodieTable, HoodieTableMetaClient - correct naming
 - Replacing renaming use of dataset across code/comments
 - Few usages in comments and use of Spark SQL DataSet remain unscathed
This commit is contained in:
vinoth chandar
2020-01-07 12:52:32 -08:00
committed by GitHub
parent 8306f749a2
commit 9706f659db
73 changed files with 298 additions and 298 deletions

View File

@@ -30,7 +30,7 @@ import java.io.IOException;
/**
* This is a payload to wrap a existing Hoodie Avro Record. Useful to create a HoodieRecord over existing GenericRecords
* in a hoodie datasets (useful in compactions)
* in a hoodie tables (useful in compactions)
*/
public class HoodieAvroPayload implements HoodieRecordPayload<HoodieAvroPayload> {

View File

@@ -28,7 +28,7 @@ import java.io.Serializable;
import java.util.Map;
/**
* Every Hoodie dataset has an implementation of the <code>HoodieRecordPayload</code> This abstracts out callbacks which
* Every Hoodie table has an implementation of the <code>HoodieRecordPayload</code> This abstracts out callbacks which
* depend on record specific logic.
*/
public interface HoodieRecordPayload<T extends HoodieRecordPayload> extends Serializable {

View File

@@ -27,7 +27,7 @@ import java.util.HashMap;
import java.util.Map;
/**
* This class holds statistics about files belonging to a dataset.
* This class holds statistics about files belonging to a table.
*/
public class HoodieRollingStatMetadata implements Serializable {

View File

@@ -41,7 +41,7 @@ import java.util.stream.Collectors;
/**
* Configurations on the Hoodie Table like type of ingestion, storage formats, hive table name etc Configurations are
* loaded from hoodie.properties, these properties are usually set during initializing a path as hoodie base path and
* never changes during the lifetime of a hoodie dataset.
* never changes during the lifetime of a hoodie table.
*
* @see HoodieTableMetaClient
* @since 0.3.0
@@ -71,7 +71,7 @@ public class HoodieTableConfig implements Serializable {
public HoodieTableConfig(FileSystem fs, String metaPath) {
Properties props = new Properties();
Path propertyPath = new Path(metaPath, HOODIE_PROPERTIES_FILE);
LOG.info("Loading dataset properties from " + propertyPath);
LOG.info("Loading table properties from " + propertyPath);
try {
try (FSDataInputStream inputStream = fs.open(propertyPath)) {
props.load(inputStream);
@@ -144,7 +144,7 @@ public class HoodieTableConfig implements Serializable {
* Read the payload class for HoodieRecords from the table properties.
*/
public String getPayloadClass() {
// There could be datasets written with payload class from com.uber.hoodie. Need to transparently
// There could be tables written with payload class from com.uber.hoodie. Need to transparently
// change to org.apache.hudi
return props.getProperty(HOODIE_PAYLOAD_CLASS_PROP_NAME, DEFAULT_PAYLOAD_CLASS).replace("com.uber.hoodie",
"org.apache.hudi");
@@ -182,7 +182,7 @@ public class HoodieTableConfig implements Serializable {
}
/**
* Get the relative path of archive log folder under metafolder, for this dataset.
* Get the relative path of archive log folder under metafolder, for this table.
*/
public String getArchivelogFolder() {
return props.getProperty(HOODIE_ARCHIVELOG_FOLDER_PROP_NAME, DEFAULT_ARCHIVELOG_FOLDER);

View File

@@ -30,7 +30,7 @@ import org.apache.hudi.common.util.FSUtils;
import org.apache.hudi.common.util.FailSafeConsistencyGuard;
import org.apache.hudi.common.util.NoOpConsistencyGuard;
import org.apache.hudi.common.util.Option;
import org.apache.hudi.exception.DatasetNotFoundException;
import org.apache.hudi.exception.TableNotFoundException;
import org.apache.hudi.exception.HoodieException;
import com.google.common.base.Preconditions;
@@ -84,7 +84,7 @@ public class HoodieTableMetaClient implements Serializable {
private HoodieArchivedTimeline archivedTimeline;
private ConsistencyGuardConfig consistencyGuardConfig = ConsistencyGuardConfig.newBuilder().build();
public HoodieTableMetaClient(Configuration conf, String basePath) throws DatasetNotFoundException {
public HoodieTableMetaClient(Configuration conf, String basePath) throws TableNotFoundException {
// Do not load any timeline by default
this(conf, basePath, false);
}
@@ -96,7 +96,7 @@ public class HoodieTableMetaClient implements Serializable {
public HoodieTableMetaClient(Configuration conf, String basePath, boolean loadActiveTimelineOnLoad,
ConsistencyGuardConfig consistencyGuardConfig, Option<TimelineLayoutVersion> layoutVersion)
throws DatasetNotFoundException {
throws TableNotFoundException {
LOG.info("Loading HoodieTableMetaClient from " + basePath);
this.basePath = basePath;
this.consistencyGuardConfig = consistencyGuardConfig;
@@ -105,7 +105,7 @@ public class HoodieTableMetaClient implements Serializable {
this.metaPath = new Path(basePath, METAFOLDER_NAME).toString();
Path metaPathDir = new Path(this.metaPath);
this.fs = getFs();
DatasetNotFoundException.checkValidDataset(fs, basePathDir, metaPathDir);
TableNotFoundException.checkTableValidity(fs, basePathDir, metaPathDir);
this.tableConfig = new HoodieTableConfig(fs, metaPath);
this.tableType = tableConfig.getTableType();
this.timelineLayoutVersion = layoutVersion.orElse(tableConfig.getTimelineLayoutVersion());
@@ -281,7 +281,7 @@ public class HoodieTableMetaClient implements Serializable {
}
/**
* Helper method to initialize a dataset, with given basePath, tableType, name, archiveFolder.
* Helper method to initialize a table, with given basePath, tableType, name, archiveFolder.
*/
public static HoodieTableMetaClient initTableType(Configuration hadoopConf, String basePath, String tableType,
String tableName, String archiveLogFolder) throws IOException {
@@ -314,17 +314,17 @@ public class HoodieTableMetaClient implements Serializable {
if (null != timelineLayoutVersion) {
properties.put(HoodieTableConfig.HOODIE_TIMELINE_LAYOUT_VERSION, String.valueOf(timelineLayoutVersion));
}
return HoodieTableMetaClient.initDatasetAndGetMetaClient(hadoopConf, basePath, properties);
return HoodieTableMetaClient.initTableAndGetMetaClient(hadoopConf, basePath, properties);
}
/**
* Helper method to initialize a given path as a hoodie dataset with configs passed in as as Properties.
* Helper method to initialize a given path as a hoodie table with configs passed in as as Properties.
*
* @return Instance of HoodieTableMetaClient
*/
public static HoodieTableMetaClient initDatasetAndGetMetaClient(Configuration hadoopConf, String basePath,
public static HoodieTableMetaClient initTableAndGetMetaClient(Configuration hadoopConf, String basePath,
Properties props) throws IOException {
LOG.info("Initializing " + basePath + " as hoodie dataset " + basePath);
LOG.info("Initializing " + basePath + " as hoodie table " + basePath);
Path basePathDir = new Path(basePath);
final FileSystem fs = FSUtils.getFs(basePath, hadoopConf);
if (!fs.exists(basePathDir)) {
@@ -379,7 +379,7 @@ public class HoodieTableMetaClient implements Serializable {
return getActiveTimeline().getCommitTimeline();
case MERGE_ON_READ:
// We need to include the parquet files written out in delta commits
// Include commit action to be able to start doing a MOR over a COW dataset - no
// Include commit action to be able to start doing a MOR over a COW table - no
// migration required
return getActiveTimeline().getCommitsTimeline();
default:

View File

@@ -31,7 +31,7 @@ import java.util.function.Predicate;
import java.util.stream.Stream;
/**
* HoodieTimeline is a view of meta-data instants in the hoodie dataset. Instants are specific points in time
* HoodieTimeline is a view of meta-data instants in the hoodie table. Instants are specific points in time
* represented as HoodieInstant.
* <p>
* Timelines are immutable once created and operations create new instance of timelines which filter on the instants and

View File

@@ -47,7 +47,7 @@ import java.util.stream.Collectors;
import java.util.stream.Stream;
/**
* Represents the Active Timeline for the HoodieDataset. Instants for the last 12 hours (configurable) is in the
* Represents the Active Timeline for the Hoodie table. Instants for the last 12 hours (configurable) is in the
* ActiveTimeline and the rest are Archived. ActiveTimeline is a special timeline that allows for creation of instants
* on the timeline.
* <p>

View File

@@ -38,7 +38,7 @@ import java.util.function.Function;
import java.util.stream.Collectors;
/**
* Represents the Archived Timeline for the HoodieDataset. Instants for the last 12 hours (configurable) is in the
* Represents the Archived Timeline for the Hoodie table. Instants for the last 12 hours (configurable) is in the
* ActiveTimeline and the rest are in ArchivedTimeline.
* <p>
* </p>

View File

@@ -30,7 +30,7 @@ import java.util.Map;
import java.util.Objects;
/**
* A Hoodie Instant represents a action done on a hoodie dataset. All actions start with a inflight instant and then
* A Hoodie Instant represents a action done on a hoodie table. All actions start with a inflight instant and then
* create a completed instant after done.
*
* @see HoodieTimeline

View File

@@ -60,7 +60,7 @@ import java.util.stream.Stream;
/**
* Common thread-safe implementation for multiple TableFileSystemView Implementations. Provides uniform handling of (a)
* Loading file-system views from underlying file-system (b) Pending compaction operations and changing file-system
* views based on that (c) Thread-safety in loading and managing file system views for this dataset. (d) resetting
* views based on that (c) Thread-safety in loading and managing file system views for this table. (d) resetting
* file-system views The actual mechanism of fetching file slices from different view storages is delegated to
* sub-classes.
*/

View File

@@ -30,16 +30,16 @@ import org.apache.log4j.Logger;
import java.util.concurrent.ConcurrentHashMap;
/**
* A container that can potentially hold one or more dataset's file-system views. There is one view for each dataset.
* A container that can potentially hold one or more table's file-system views. There is one view for each table.
* This is a view built against a timeline containing completed actions. In an embedded timeline-server mode, this
* typically holds only one dataset's view. In a stand-alone server mode, this can hold more than one dataset's views.
* typically holds only one table's view. In a stand-alone server mode, this can hold more than one table's views.
*
* FileSystemView can be stored "locally" using the following storage mechanisms: a. In Memory b. Spillable Map c.
* RocksDB
*
* But there can be cases where the file-system view is managed remoted. For example : Embedded Timeline Server). In
* this case, the clients will configure a remote filesystem view client (RemoteHoodieTableFileSystemView) for the
* dataset which can connect to the remote file system view and fetch views. THere are 2 modes here : REMOTE_FIRST and
* table which can connect to the remote file system view and fetch views. THere are 2 modes here : REMOTE_FIRST and
* REMOTE_ONLY REMOTE_FIRST : The file-system view implementation on client side will act as a remote proxy. In case, if
* there is problem (or exceptions) querying remote file-system view, a backup local file-system view(using either one
* of in-memory, spillable, rocksDB) is used to server file-system view queries REMOTE_ONLY : In this case, there is no
@@ -102,11 +102,11 @@ public class FileSystemViewManager {
// FACTORY METHODS FOR CREATING FILE-SYSTEM VIEWS
/**
* Create RocksDB based file System view for a dataset.
* Create RocksDB based file System view for a table.
*
* @param conf Hadoop Configuration
* @param viewConf View Storage Configuration
* @param basePath Base Path of dataset
* @param basePath Base Path of table
* @return
*/
private static RocksDbBasedFileSystemView createRocksDBBasedFileSystemView(SerializableConfiguration conf,
@@ -117,11 +117,11 @@ public class FileSystemViewManager {
}
/**
* Create a spillable Map based file System view for a dataset.
* Create a spillable Map based file System view for a table.
*
* @param conf Hadoop Configuration
* @param viewConf View Storage Configuration
* @param basePath Base Path of dataset
* @param basePath Base Path of table
* @return
*/
private static SpillableMapBasedFileSystemView createSpillableMapBasedFileSystemView(SerializableConfiguration conf,
@@ -133,11 +133,11 @@ public class FileSystemViewManager {
}
/**
* Create an in-memory file System view for a dataset.
* Create an in-memory file System view for a table.
*
* @param conf Hadoop Configuration
* @param viewConf View Storage Configuration
* @param basePath Base Path of dataset
* @param basePath Base Path of table
* @return
*/
private static HoodieTableFileSystemView createInMemoryFileSystemView(SerializableConfiguration conf,
@@ -149,11 +149,11 @@ public class FileSystemViewManager {
}
/**
* Create a remote file System view for a dataset.
* Create a remote file System view for a table.
*
* @param conf Hadoop Configuration
* @param viewConf View Storage Configuration
* @param metaClient Hoodie Table MetaClient for the dataset.
* @param metaClient Hoodie Table MetaClient for the table.
* @return
*/
private static RemoteHoodieTableFileSystemView createRemoteFileSystemView(SerializableConfiguration conf,

View File

@@ -94,7 +94,7 @@ public class RemoteHoodieTableFileSystemView implements SyncableFileSystemView,
public static final String TIMELINE = String.format("%s/%s", BASE_URL, "timeline/instants/all");
// POST Requests
public static final String REFRESH_DATASET = String.format("%s/%s", BASE_URL, "refresh/");
public static final String REFRESH_TABLE = String.format("%s/%s", BASE_URL, "refresh/");
public static final String PARTITION_PARAM = "partition";
public static final String BASEPATH_PARAM = "basepath";
@@ -381,7 +381,7 @@ public class RemoteHoodieTableFileSystemView implements SyncableFileSystemView,
public boolean refresh() {
Map<String, String> paramsMap = getParams();
try {
return executeRequest(REFRESH_DATASET, paramsMap, new TypeReference<Boolean>() {}, RequestMethod.POST);
return executeRequest(REFRESH_TABLE, paramsMap, new TypeReference<Boolean>() {}, RequestMethod.POST);
} catch (IOException e) {
throw new HoodieRemoteException(e);
}

View File

@@ -46,7 +46,7 @@ import java.util.stream.Collectors;
import java.util.stream.Stream;
/**
* A file-system view implementation on top of embedded Rocks DB store. For each DataSet : 3 column Family is added for
* A file-system view implementation on top of embedded Rocks DB store. For each table : 3 column Family is added for
* storing (1) File-Slices and Data Files for View lookups (2) Pending compaction operations (3) Partitions tracked
*
* Fine-grained retrieval API to fetch latest file-slice and data-file which are common operations for
@@ -135,7 +135,7 @@ public class RocksDbBasedFileSystemView extends IncrementalTimelineSyncFileSyste
@Override
protected void resetViewState() {
LOG.info("Deleting all rocksdb data associated with dataset filesystem view");
LOG.info("Deleting all rocksdb data associated with table filesystem view");
rocksDB.close();
rocksDB = new RocksDBDAO(metaClient.getBasePath(), config.getRocksdbBasePath());
}

View File

@@ -29,7 +29,7 @@ import java.util.List;
/**
* Helper class to generate Key and column names for rocksdb based view
*
* For RocksDB, 3 colFamilies are used for storing file-system view for each dataset. (a) View (b) Partitions Cached (c)
* For RocksDB, 3 colFamilies are used for storing file-system view for each table. (a) View (b) Partitions Cached (c)
* Pending Compactions
*
*

View File

@@ -22,7 +22,7 @@ import java.io.IOException;
/**
* <p>
* Exception thrown for dataset IO-related failures.
* Exception thrown for table IO-related failures.
* </p>
*/
public class HoodieIOException extends HoodieException {

View File

@@ -19,15 +19,15 @@
package org.apache.hudi.exception;
/**
* Exception thrown to indicate that a hoodie dataset is invalid.
* Exception thrown to indicate that a hoodie table is invalid.
*/
public class InvalidDatasetException extends HoodieException {
public class InvalidTableException extends HoodieException {
public InvalidDatasetException(String basePath) {
public InvalidTableException(String basePath) {
super(getErrorMessage(basePath));
}
private static String getErrorMessage(String basePath) {
return "Invalid Hoodie Dataset. " + basePath;
return "Invalid Hoodie Table. " + basePath;
}
}

View File

@@ -25,35 +25,35 @@ import java.io.IOException;
/**
* <p>
* Exception thrown to indicate that a hoodie dataset was not found on the path provided.
* Exception thrown to indicate that a hoodie table was not found on the path provided.
* <p>
*/
public class DatasetNotFoundException extends HoodieException {
public class TableNotFoundException extends HoodieException {
public DatasetNotFoundException(String basePath) {
public TableNotFoundException(String basePath) {
super(getErrorMessage(basePath));
}
private static String getErrorMessage(String basePath) {
return "Hoodie dataset not found in path " + basePath;
return "Hoodie table not found in path " + basePath;
}
public static void checkValidDataset(FileSystem fs, Path basePathDir, Path metaPathDir)
throws DatasetNotFoundException {
public static void checkTableValidity(FileSystem fs, Path basePathDir, Path metaPathDir)
throws TableNotFoundException {
// Check if the base path is found
try {
if (!fs.exists(basePathDir) || !fs.isDirectory(basePathDir)) {
throw new DatasetNotFoundException(basePathDir.toString());
throw new TableNotFoundException(basePathDir.toString());
}
// Check if the meta path is found
if (!fs.exists(metaPathDir) || !fs.isDirectory(metaPathDir)) {
throw new DatasetNotFoundException(metaPathDir.toString());
throw new TableNotFoundException(metaPathDir.toString());
}
} catch (IllegalArgumentException e) {
// if the base path is file:///, then we have a IllegalArgumentException
throw new DatasetNotFoundException(metaPathDir.toString());
throw new TableNotFoundException(metaPathDir.toString());
} catch (IOException e) {
throw new HoodieIOException("Could not check if dataset " + basePathDir + " is valid dataset", e);
throw new HoodieIOException("Could not check if " + basePathDir + " is a valid table", e);
}
}
}

View File

@@ -118,7 +118,7 @@ public class HoodieTestUtils {
properties.setProperty(HoodieTableConfig.HOODIE_TABLE_NAME_PROP_NAME, RAW_TRIPS_TEST_NAME);
properties.setProperty(HoodieTableConfig.HOODIE_TABLE_TYPE_PROP_NAME, tableType.name());
properties.setProperty(HoodieTableConfig.HOODIE_PAYLOAD_CLASS_PROP_NAME, HoodieAvroPayload.class.getName());
return HoodieTableMetaClient.initDatasetAndGetMetaClient(hadoopConf, basePath, properties);
return HoodieTableMetaClient.initTableAndGetMetaClient(hadoopConf, basePath, properties);
}
public static String makeNewCommitTime() {

View File

@@ -353,7 +353,7 @@ public class TestIncrementalFSViewSync extends HoodieCommonTestHarness {
* @param isDeltaCommit is Delta Commit ?
* @param instantsToFiles List of files associated with each instant
* @param rolledBackInstants List of rolled-back instants
* @param emptyRestoreInstant Restore instant at which dataset becomes empty
* @param emptyRestoreInstant Restore instant at which table becomes empty
*/
private void testRestore(SyncableFileSystemView view, List<String> newRestoreInstants, boolean isDeltaCommit,
Map<String, List<String>> instantsToFiles, List<String> rolledBackInstants, String emptyRestoreInstant,