[HUDI-508] Standardizing on "Table" instead of "Dataset" across code (#1197)
- Docs were talking about storage types before, cWiki moved to "Table" - Most of code already has HoodieTable, HoodieTableMetaClient - correct naming - Replacing renaming use of dataset across code/comments - Few usages in comments and use of Spark SQL DataSet remain unscathed
This commit is contained in:
@@ -30,7 +30,7 @@ import java.io.IOException;
|
||||
|
||||
/**
|
||||
* This is a payload to wrap a existing Hoodie Avro Record. Useful to create a HoodieRecord over existing GenericRecords
|
||||
* in a hoodie datasets (useful in compactions)
|
||||
* in a hoodie tables (useful in compactions)
|
||||
*/
|
||||
public class HoodieAvroPayload implements HoodieRecordPayload<HoodieAvroPayload> {
|
||||
|
||||
|
||||
@@ -28,7 +28,7 @@ import java.io.Serializable;
|
||||
import java.util.Map;
|
||||
|
||||
/**
|
||||
* Every Hoodie dataset has an implementation of the <code>HoodieRecordPayload</code> This abstracts out callbacks which
|
||||
* Every Hoodie table has an implementation of the <code>HoodieRecordPayload</code> This abstracts out callbacks which
|
||||
* depend on record specific logic.
|
||||
*/
|
||||
public interface HoodieRecordPayload<T extends HoodieRecordPayload> extends Serializable {
|
||||
|
||||
@@ -27,7 +27,7 @@ import java.util.HashMap;
|
||||
import java.util.Map;
|
||||
|
||||
/**
|
||||
* This class holds statistics about files belonging to a dataset.
|
||||
* This class holds statistics about files belonging to a table.
|
||||
*/
|
||||
public class HoodieRollingStatMetadata implements Serializable {
|
||||
|
||||
|
||||
@@ -41,7 +41,7 @@ import java.util.stream.Collectors;
|
||||
/**
|
||||
* Configurations on the Hoodie Table like type of ingestion, storage formats, hive table name etc Configurations are
|
||||
* loaded from hoodie.properties, these properties are usually set during initializing a path as hoodie base path and
|
||||
* never changes during the lifetime of a hoodie dataset.
|
||||
* never changes during the lifetime of a hoodie table.
|
||||
*
|
||||
* @see HoodieTableMetaClient
|
||||
* @since 0.3.0
|
||||
@@ -71,7 +71,7 @@ public class HoodieTableConfig implements Serializable {
|
||||
public HoodieTableConfig(FileSystem fs, String metaPath) {
|
||||
Properties props = new Properties();
|
||||
Path propertyPath = new Path(metaPath, HOODIE_PROPERTIES_FILE);
|
||||
LOG.info("Loading dataset properties from " + propertyPath);
|
||||
LOG.info("Loading table properties from " + propertyPath);
|
||||
try {
|
||||
try (FSDataInputStream inputStream = fs.open(propertyPath)) {
|
||||
props.load(inputStream);
|
||||
@@ -144,7 +144,7 @@ public class HoodieTableConfig implements Serializable {
|
||||
* Read the payload class for HoodieRecords from the table properties.
|
||||
*/
|
||||
public String getPayloadClass() {
|
||||
// There could be datasets written with payload class from com.uber.hoodie. Need to transparently
|
||||
// There could be tables written with payload class from com.uber.hoodie. Need to transparently
|
||||
// change to org.apache.hudi
|
||||
return props.getProperty(HOODIE_PAYLOAD_CLASS_PROP_NAME, DEFAULT_PAYLOAD_CLASS).replace("com.uber.hoodie",
|
||||
"org.apache.hudi");
|
||||
@@ -182,7 +182,7 @@ public class HoodieTableConfig implements Serializable {
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the relative path of archive log folder under metafolder, for this dataset.
|
||||
* Get the relative path of archive log folder under metafolder, for this table.
|
||||
*/
|
||||
public String getArchivelogFolder() {
|
||||
return props.getProperty(HOODIE_ARCHIVELOG_FOLDER_PROP_NAME, DEFAULT_ARCHIVELOG_FOLDER);
|
||||
|
||||
@@ -30,7 +30,7 @@ import org.apache.hudi.common.util.FSUtils;
|
||||
import org.apache.hudi.common.util.FailSafeConsistencyGuard;
|
||||
import org.apache.hudi.common.util.NoOpConsistencyGuard;
|
||||
import org.apache.hudi.common.util.Option;
|
||||
import org.apache.hudi.exception.DatasetNotFoundException;
|
||||
import org.apache.hudi.exception.TableNotFoundException;
|
||||
import org.apache.hudi.exception.HoodieException;
|
||||
|
||||
import com.google.common.base.Preconditions;
|
||||
@@ -84,7 +84,7 @@ public class HoodieTableMetaClient implements Serializable {
|
||||
private HoodieArchivedTimeline archivedTimeline;
|
||||
private ConsistencyGuardConfig consistencyGuardConfig = ConsistencyGuardConfig.newBuilder().build();
|
||||
|
||||
public HoodieTableMetaClient(Configuration conf, String basePath) throws DatasetNotFoundException {
|
||||
public HoodieTableMetaClient(Configuration conf, String basePath) throws TableNotFoundException {
|
||||
// Do not load any timeline by default
|
||||
this(conf, basePath, false);
|
||||
}
|
||||
@@ -96,7 +96,7 @@ public class HoodieTableMetaClient implements Serializable {
|
||||
|
||||
public HoodieTableMetaClient(Configuration conf, String basePath, boolean loadActiveTimelineOnLoad,
|
||||
ConsistencyGuardConfig consistencyGuardConfig, Option<TimelineLayoutVersion> layoutVersion)
|
||||
throws DatasetNotFoundException {
|
||||
throws TableNotFoundException {
|
||||
LOG.info("Loading HoodieTableMetaClient from " + basePath);
|
||||
this.basePath = basePath;
|
||||
this.consistencyGuardConfig = consistencyGuardConfig;
|
||||
@@ -105,7 +105,7 @@ public class HoodieTableMetaClient implements Serializable {
|
||||
this.metaPath = new Path(basePath, METAFOLDER_NAME).toString();
|
||||
Path metaPathDir = new Path(this.metaPath);
|
||||
this.fs = getFs();
|
||||
DatasetNotFoundException.checkValidDataset(fs, basePathDir, metaPathDir);
|
||||
TableNotFoundException.checkTableValidity(fs, basePathDir, metaPathDir);
|
||||
this.tableConfig = new HoodieTableConfig(fs, metaPath);
|
||||
this.tableType = tableConfig.getTableType();
|
||||
this.timelineLayoutVersion = layoutVersion.orElse(tableConfig.getTimelineLayoutVersion());
|
||||
@@ -281,7 +281,7 @@ public class HoodieTableMetaClient implements Serializable {
|
||||
}
|
||||
|
||||
/**
|
||||
* Helper method to initialize a dataset, with given basePath, tableType, name, archiveFolder.
|
||||
* Helper method to initialize a table, with given basePath, tableType, name, archiveFolder.
|
||||
*/
|
||||
public static HoodieTableMetaClient initTableType(Configuration hadoopConf, String basePath, String tableType,
|
||||
String tableName, String archiveLogFolder) throws IOException {
|
||||
@@ -314,17 +314,17 @@ public class HoodieTableMetaClient implements Serializable {
|
||||
if (null != timelineLayoutVersion) {
|
||||
properties.put(HoodieTableConfig.HOODIE_TIMELINE_LAYOUT_VERSION, String.valueOf(timelineLayoutVersion));
|
||||
}
|
||||
return HoodieTableMetaClient.initDatasetAndGetMetaClient(hadoopConf, basePath, properties);
|
||||
return HoodieTableMetaClient.initTableAndGetMetaClient(hadoopConf, basePath, properties);
|
||||
}
|
||||
|
||||
/**
|
||||
* Helper method to initialize a given path as a hoodie dataset with configs passed in as as Properties.
|
||||
* Helper method to initialize a given path as a hoodie table with configs passed in as as Properties.
|
||||
*
|
||||
* @return Instance of HoodieTableMetaClient
|
||||
*/
|
||||
public static HoodieTableMetaClient initDatasetAndGetMetaClient(Configuration hadoopConf, String basePath,
|
||||
public static HoodieTableMetaClient initTableAndGetMetaClient(Configuration hadoopConf, String basePath,
|
||||
Properties props) throws IOException {
|
||||
LOG.info("Initializing " + basePath + " as hoodie dataset " + basePath);
|
||||
LOG.info("Initializing " + basePath + " as hoodie table " + basePath);
|
||||
Path basePathDir = new Path(basePath);
|
||||
final FileSystem fs = FSUtils.getFs(basePath, hadoopConf);
|
||||
if (!fs.exists(basePathDir)) {
|
||||
@@ -379,7 +379,7 @@ public class HoodieTableMetaClient implements Serializable {
|
||||
return getActiveTimeline().getCommitTimeline();
|
||||
case MERGE_ON_READ:
|
||||
// We need to include the parquet files written out in delta commits
|
||||
// Include commit action to be able to start doing a MOR over a COW dataset - no
|
||||
// Include commit action to be able to start doing a MOR over a COW table - no
|
||||
// migration required
|
||||
return getActiveTimeline().getCommitsTimeline();
|
||||
default:
|
||||
|
||||
@@ -31,7 +31,7 @@ import java.util.function.Predicate;
|
||||
import java.util.stream.Stream;
|
||||
|
||||
/**
|
||||
* HoodieTimeline is a view of meta-data instants in the hoodie dataset. Instants are specific points in time
|
||||
* HoodieTimeline is a view of meta-data instants in the hoodie table. Instants are specific points in time
|
||||
* represented as HoodieInstant.
|
||||
* <p>
|
||||
* Timelines are immutable once created and operations create new instance of timelines which filter on the instants and
|
||||
|
||||
@@ -47,7 +47,7 @@ import java.util.stream.Collectors;
|
||||
import java.util.stream.Stream;
|
||||
|
||||
/**
|
||||
* Represents the Active Timeline for the HoodieDataset. Instants for the last 12 hours (configurable) is in the
|
||||
* Represents the Active Timeline for the Hoodie table. Instants for the last 12 hours (configurable) is in the
|
||||
* ActiveTimeline and the rest are Archived. ActiveTimeline is a special timeline that allows for creation of instants
|
||||
* on the timeline.
|
||||
* <p>
|
||||
|
||||
@@ -38,7 +38,7 @@ import java.util.function.Function;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
/**
|
||||
* Represents the Archived Timeline for the HoodieDataset. Instants for the last 12 hours (configurable) is in the
|
||||
* Represents the Archived Timeline for the Hoodie table. Instants for the last 12 hours (configurable) is in the
|
||||
* ActiveTimeline and the rest are in ArchivedTimeline.
|
||||
* <p>
|
||||
* </p>
|
||||
|
||||
@@ -30,7 +30,7 @@ import java.util.Map;
|
||||
import java.util.Objects;
|
||||
|
||||
/**
|
||||
* A Hoodie Instant represents a action done on a hoodie dataset. All actions start with a inflight instant and then
|
||||
* A Hoodie Instant represents a action done on a hoodie table. All actions start with a inflight instant and then
|
||||
* create a completed instant after done.
|
||||
*
|
||||
* @see HoodieTimeline
|
||||
|
||||
@@ -60,7 +60,7 @@ import java.util.stream.Stream;
|
||||
/**
|
||||
* Common thread-safe implementation for multiple TableFileSystemView Implementations. Provides uniform handling of (a)
|
||||
* Loading file-system views from underlying file-system (b) Pending compaction operations and changing file-system
|
||||
* views based on that (c) Thread-safety in loading and managing file system views for this dataset. (d) resetting
|
||||
* views based on that (c) Thread-safety in loading and managing file system views for this table. (d) resetting
|
||||
* file-system views The actual mechanism of fetching file slices from different view storages is delegated to
|
||||
* sub-classes.
|
||||
*/
|
||||
|
||||
@@ -30,16 +30,16 @@ import org.apache.log4j.Logger;
|
||||
import java.util.concurrent.ConcurrentHashMap;
|
||||
|
||||
/**
|
||||
* A container that can potentially hold one or more dataset's file-system views. There is one view for each dataset.
|
||||
* A container that can potentially hold one or more table's file-system views. There is one view for each table.
|
||||
* This is a view built against a timeline containing completed actions. In an embedded timeline-server mode, this
|
||||
* typically holds only one dataset's view. In a stand-alone server mode, this can hold more than one dataset's views.
|
||||
* typically holds only one table's view. In a stand-alone server mode, this can hold more than one table's views.
|
||||
*
|
||||
* FileSystemView can be stored "locally" using the following storage mechanisms: a. In Memory b. Spillable Map c.
|
||||
* RocksDB
|
||||
*
|
||||
* But there can be cases where the file-system view is managed remoted. For example : Embedded Timeline Server). In
|
||||
* this case, the clients will configure a remote filesystem view client (RemoteHoodieTableFileSystemView) for the
|
||||
* dataset which can connect to the remote file system view and fetch views. THere are 2 modes here : REMOTE_FIRST and
|
||||
* table which can connect to the remote file system view and fetch views. THere are 2 modes here : REMOTE_FIRST and
|
||||
* REMOTE_ONLY REMOTE_FIRST : The file-system view implementation on client side will act as a remote proxy. In case, if
|
||||
* there is problem (or exceptions) querying remote file-system view, a backup local file-system view(using either one
|
||||
* of in-memory, spillable, rocksDB) is used to server file-system view queries REMOTE_ONLY : In this case, there is no
|
||||
@@ -102,11 +102,11 @@ public class FileSystemViewManager {
|
||||
// FACTORY METHODS FOR CREATING FILE-SYSTEM VIEWS
|
||||
|
||||
/**
|
||||
* Create RocksDB based file System view for a dataset.
|
||||
* Create RocksDB based file System view for a table.
|
||||
*
|
||||
* @param conf Hadoop Configuration
|
||||
* @param viewConf View Storage Configuration
|
||||
* @param basePath Base Path of dataset
|
||||
* @param basePath Base Path of table
|
||||
* @return
|
||||
*/
|
||||
private static RocksDbBasedFileSystemView createRocksDBBasedFileSystemView(SerializableConfiguration conf,
|
||||
@@ -117,11 +117,11 @@ public class FileSystemViewManager {
|
||||
}
|
||||
|
||||
/**
|
||||
* Create a spillable Map based file System view for a dataset.
|
||||
* Create a spillable Map based file System view for a table.
|
||||
*
|
||||
* @param conf Hadoop Configuration
|
||||
* @param viewConf View Storage Configuration
|
||||
* @param basePath Base Path of dataset
|
||||
* @param basePath Base Path of table
|
||||
* @return
|
||||
*/
|
||||
private static SpillableMapBasedFileSystemView createSpillableMapBasedFileSystemView(SerializableConfiguration conf,
|
||||
@@ -133,11 +133,11 @@ public class FileSystemViewManager {
|
||||
}
|
||||
|
||||
/**
|
||||
* Create an in-memory file System view for a dataset.
|
||||
* Create an in-memory file System view for a table.
|
||||
*
|
||||
* @param conf Hadoop Configuration
|
||||
* @param viewConf View Storage Configuration
|
||||
* @param basePath Base Path of dataset
|
||||
* @param basePath Base Path of table
|
||||
* @return
|
||||
*/
|
||||
private static HoodieTableFileSystemView createInMemoryFileSystemView(SerializableConfiguration conf,
|
||||
@@ -149,11 +149,11 @@ public class FileSystemViewManager {
|
||||
}
|
||||
|
||||
/**
|
||||
* Create a remote file System view for a dataset.
|
||||
* Create a remote file System view for a table.
|
||||
*
|
||||
* @param conf Hadoop Configuration
|
||||
* @param viewConf View Storage Configuration
|
||||
* @param metaClient Hoodie Table MetaClient for the dataset.
|
||||
* @param metaClient Hoodie Table MetaClient for the table.
|
||||
* @return
|
||||
*/
|
||||
private static RemoteHoodieTableFileSystemView createRemoteFileSystemView(SerializableConfiguration conf,
|
||||
|
||||
@@ -94,7 +94,7 @@ public class RemoteHoodieTableFileSystemView implements SyncableFileSystemView,
|
||||
public static final String TIMELINE = String.format("%s/%s", BASE_URL, "timeline/instants/all");
|
||||
|
||||
// POST Requests
|
||||
public static final String REFRESH_DATASET = String.format("%s/%s", BASE_URL, "refresh/");
|
||||
public static final String REFRESH_TABLE = String.format("%s/%s", BASE_URL, "refresh/");
|
||||
|
||||
public static final String PARTITION_PARAM = "partition";
|
||||
public static final String BASEPATH_PARAM = "basepath";
|
||||
@@ -381,7 +381,7 @@ public class RemoteHoodieTableFileSystemView implements SyncableFileSystemView,
|
||||
public boolean refresh() {
|
||||
Map<String, String> paramsMap = getParams();
|
||||
try {
|
||||
return executeRequest(REFRESH_DATASET, paramsMap, new TypeReference<Boolean>() {}, RequestMethod.POST);
|
||||
return executeRequest(REFRESH_TABLE, paramsMap, new TypeReference<Boolean>() {}, RequestMethod.POST);
|
||||
} catch (IOException e) {
|
||||
throw new HoodieRemoteException(e);
|
||||
}
|
||||
|
||||
@@ -46,7 +46,7 @@ import java.util.stream.Collectors;
|
||||
import java.util.stream.Stream;
|
||||
|
||||
/**
|
||||
* A file-system view implementation on top of embedded Rocks DB store. For each DataSet : 3 column Family is added for
|
||||
* A file-system view implementation on top of embedded Rocks DB store. For each table : 3 column Family is added for
|
||||
* storing (1) File-Slices and Data Files for View lookups (2) Pending compaction operations (3) Partitions tracked
|
||||
*
|
||||
* Fine-grained retrieval API to fetch latest file-slice and data-file which are common operations for
|
||||
@@ -135,7 +135,7 @@ public class RocksDbBasedFileSystemView extends IncrementalTimelineSyncFileSyste
|
||||
|
||||
@Override
|
||||
protected void resetViewState() {
|
||||
LOG.info("Deleting all rocksdb data associated with dataset filesystem view");
|
||||
LOG.info("Deleting all rocksdb data associated with table filesystem view");
|
||||
rocksDB.close();
|
||||
rocksDB = new RocksDBDAO(metaClient.getBasePath(), config.getRocksdbBasePath());
|
||||
}
|
||||
|
||||
@@ -29,7 +29,7 @@ import java.util.List;
|
||||
/**
|
||||
* Helper class to generate Key and column names for rocksdb based view
|
||||
*
|
||||
* For RocksDB, 3 colFamilies are used for storing file-system view for each dataset. (a) View (b) Partitions Cached (c)
|
||||
* For RocksDB, 3 colFamilies are used for storing file-system view for each table. (a) View (b) Partitions Cached (c)
|
||||
* Pending Compactions
|
||||
*
|
||||
*
|
||||
|
||||
@@ -22,7 +22,7 @@ import java.io.IOException;
|
||||
|
||||
/**
|
||||
* <p>
|
||||
* Exception thrown for dataset IO-related failures.
|
||||
* Exception thrown for table IO-related failures.
|
||||
* </p>
|
||||
*/
|
||||
public class HoodieIOException extends HoodieException {
|
||||
|
||||
@@ -19,15 +19,15 @@
|
||||
package org.apache.hudi.exception;
|
||||
|
||||
/**
|
||||
* Exception thrown to indicate that a hoodie dataset is invalid.
|
||||
* Exception thrown to indicate that a hoodie table is invalid.
|
||||
*/
|
||||
public class InvalidDatasetException extends HoodieException {
|
||||
public class InvalidTableException extends HoodieException {
|
||||
|
||||
public InvalidDatasetException(String basePath) {
|
||||
public InvalidTableException(String basePath) {
|
||||
super(getErrorMessage(basePath));
|
||||
}
|
||||
|
||||
private static String getErrorMessage(String basePath) {
|
||||
return "Invalid Hoodie Dataset. " + basePath;
|
||||
return "Invalid Hoodie Table. " + basePath;
|
||||
}
|
||||
}
|
||||
@@ -25,35 +25,35 @@ import java.io.IOException;
|
||||
|
||||
/**
|
||||
* <p>
|
||||
* Exception thrown to indicate that a hoodie dataset was not found on the path provided.
|
||||
* Exception thrown to indicate that a hoodie table was not found on the path provided.
|
||||
* <p>
|
||||
*/
|
||||
public class DatasetNotFoundException extends HoodieException {
|
||||
public class TableNotFoundException extends HoodieException {
|
||||
|
||||
public DatasetNotFoundException(String basePath) {
|
||||
public TableNotFoundException(String basePath) {
|
||||
super(getErrorMessage(basePath));
|
||||
}
|
||||
|
||||
private static String getErrorMessage(String basePath) {
|
||||
return "Hoodie dataset not found in path " + basePath;
|
||||
return "Hoodie table not found in path " + basePath;
|
||||
}
|
||||
|
||||
public static void checkValidDataset(FileSystem fs, Path basePathDir, Path metaPathDir)
|
||||
throws DatasetNotFoundException {
|
||||
public static void checkTableValidity(FileSystem fs, Path basePathDir, Path metaPathDir)
|
||||
throws TableNotFoundException {
|
||||
// Check if the base path is found
|
||||
try {
|
||||
if (!fs.exists(basePathDir) || !fs.isDirectory(basePathDir)) {
|
||||
throw new DatasetNotFoundException(basePathDir.toString());
|
||||
throw new TableNotFoundException(basePathDir.toString());
|
||||
}
|
||||
// Check if the meta path is found
|
||||
if (!fs.exists(metaPathDir) || !fs.isDirectory(metaPathDir)) {
|
||||
throw new DatasetNotFoundException(metaPathDir.toString());
|
||||
throw new TableNotFoundException(metaPathDir.toString());
|
||||
}
|
||||
} catch (IllegalArgumentException e) {
|
||||
// if the base path is file:///, then we have a IllegalArgumentException
|
||||
throw new DatasetNotFoundException(metaPathDir.toString());
|
||||
throw new TableNotFoundException(metaPathDir.toString());
|
||||
} catch (IOException e) {
|
||||
throw new HoodieIOException("Could not check if dataset " + basePathDir + " is valid dataset", e);
|
||||
throw new HoodieIOException("Could not check if " + basePathDir + " is a valid table", e);
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -118,7 +118,7 @@ public class HoodieTestUtils {
|
||||
properties.setProperty(HoodieTableConfig.HOODIE_TABLE_NAME_PROP_NAME, RAW_TRIPS_TEST_NAME);
|
||||
properties.setProperty(HoodieTableConfig.HOODIE_TABLE_TYPE_PROP_NAME, tableType.name());
|
||||
properties.setProperty(HoodieTableConfig.HOODIE_PAYLOAD_CLASS_PROP_NAME, HoodieAvroPayload.class.getName());
|
||||
return HoodieTableMetaClient.initDatasetAndGetMetaClient(hadoopConf, basePath, properties);
|
||||
return HoodieTableMetaClient.initTableAndGetMetaClient(hadoopConf, basePath, properties);
|
||||
}
|
||||
|
||||
public static String makeNewCommitTime() {
|
||||
|
||||
@@ -353,7 +353,7 @@ public class TestIncrementalFSViewSync extends HoodieCommonTestHarness {
|
||||
* @param isDeltaCommit is Delta Commit ?
|
||||
* @param instantsToFiles List of files associated with each instant
|
||||
* @param rolledBackInstants List of rolled-back instants
|
||||
* @param emptyRestoreInstant Restore instant at which dataset becomes empty
|
||||
* @param emptyRestoreInstant Restore instant at which table becomes empty
|
||||
*/
|
||||
private void testRestore(SyncableFileSystemView view, List<String> newRestoreInstants, boolean isDeltaCommit,
|
||||
Map<String, List<String>> instantsToFiles, List<String> rolledBackInstants, String emptyRestoreInstant,
|
||||
|
||||
Reference in New Issue
Block a user