[HUDI-1138] Add timeline-server-based marker file strategy for improving marker-related latency (#3233)
- Can be enabled for cloud stores like S3. Not supported for hdfs yet, due to partial write failures.
This commit is contained in:
@@ -64,9 +64,9 @@ import org.apache.hudi.metrics.HoodieMetrics;
|
||||
import org.apache.hudi.table.BulkInsertPartitioner;
|
||||
import org.apache.hudi.table.HoodieTable;
|
||||
import org.apache.hudi.table.HoodieTimelineArchiveLog;
|
||||
import org.apache.hudi.table.MarkerFiles;
|
||||
import org.apache.hudi.table.action.HoodieWriteMetadata;
|
||||
import org.apache.hudi.table.action.savepoint.SavepointHelpers;
|
||||
import org.apache.hudi.table.marker.WriteMarkersFactory;
|
||||
|
||||
import com.codahale.metrics.Timer;
|
||||
import org.apache.hadoop.conf.Configuration;
|
||||
@@ -430,7 +430,8 @@ public abstract class AbstractHoodieWriteClient<T extends HoodieRecordPayload, I
|
||||
protected void postCommit(HoodieTable<T, I, K, O> table, HoodieCommitMetadata metadata, String instantTime, Option<Map<String, String>> extraMetadata) {
|
||||
try {
|
||||
// Delete the marker directory for the instant.
|
||||
new MarkerFiles(table, instantTime).quietDeleteMarkerDir(context, config.getMarkersDeleteParallelism());
|
||||
WriteMarkersFactory.get(config.getMarkersType(), table, instantTime)
|
||||
.quietDeleteMarkerDir(context, config.getMarkersDeleteParallelism());
|
||||
// We cannot have unbounded commit files. Archive commits if we have to archive
|
||||
HoodieTimelineArchiveLog archiveLog = new HoodieTimelineArchiveLog(config, table);
|
||||
archiveLog.archiveIfRequired(context);
|
||||
|
||||
@@ -67,11 +67,7 @@ public class EmbeddedTimelineServerHelper {
|
||||
LOG.info("Starting Timeline service !!");
|
||||
Option<String> hostAddr = context.getProperty(EngineProperty.EMBEDDED_SERVER_HOST);
|
||||
EmbeddedTimelineService timelineService = new EmbeddedTimelineService(
|
||||
context, hostAddr.orElse(null),config.getEmbeddedTimelineServerPort(),
|
||||
config.getMetadataConfig(), config.getCommonConfig(),
|
||||
config.getClientSpecifiedViewStorageConfig(), config.getBasePath(),
|
||||
config.getEmbeddedTimelineServerThreads(), config.getEmbeddedTimelineServerCompressOutput(),
|
||||
config.getEmbeddedTimelineServerUseAsync());
|
||||
context, hostAddr.orElse(null), config);
|
||||
timelineService.startServer();
|
||||
updateWriteConfigWithTimelineServer(timelineService, config);
|
||||
return timelineService;
|
||||
|
||||
@@ -18,14 +18,15 @@
|
||||
|
||||
package org.apache.hudi.client.embedded;
|
||||
|
||||
import org.apache.hudi.common.config.HoodieCommonConfig;
|
||||
import org.apache.hudi.common.config.HoodieMetadataConfig;
|
||||
import org.apache.hudi.common.engine.HoodieEngineContext;
|
||||
import org.apache.hudi.common.config.SerializableConfiguration;
|
||||
import org.apache.hudi.common.fs.FSUtils;
|
||||
import org.apache.hudi.common.table.view.FileSystemViewManager;
|
||||
import org.apache.hudi.common.table.view.FileSystemViewStorageConfig;
|
||||
import org.apache.hudi.common.table.view.FileSystemViewStorageType;
|
||||
import org.apache.hudi.common.util.NetworkUtils;
|
||||
import org.apache.hudi.config.HoodieWriteConfig;
|
||||
import org.apache.hudi.table.marker.MarkerType;
|
||||
import org.apache.hudi.timeline.service.TimelineService;
|
||||
|
||||
import org.apache.log4j.LogManager;
|
||||
@@ -41,53 +42,55 @@ public class EmbeddedTimelineService {
|
||||
private static final Logger LOG = LogManager.getLogger(EmbeddedTimelineService.class);
|
||||
|
||||
private int serverPort;
|
||||
private int preferredPort;
|
||||
private String hostAddr;
|
||||
private HoodieEngineContext context;
|
||||
private final SerializableConfiguration hadoopConf;
|
||||
private final FileSystemViewStorageConfig config;
|
||||
private final HoodieMetadataConfig metadataConfig;
|
||||
private final HoodieCommonConfig commonConfig;
|
||||
private final HoodieWriteConfig writeConfig;
|
||||
private final String basePath;
|
||||
|
||||
private final int numThreads;
|
||||
private final boolean shouldCompressOutput;
|
||||
private final boolean useAsync;
|
||||
private transient FileSystemViewManager viewManager;
|
||||
private transient TimelineService server;
|
||||
|
||||
public EmbeddedTimelineService(HoodieEngineContext context, String embeddedTimelineServiceHostAddr, int embeddedTimelineServerPort,
|
||||
HoodieMetadataConfig metadataConfig, HoodieCommonConfig commonConfig, FileSystemViewStorageConfig config, String basePath,
|
||||
int numThreads, boolean compressOutput, boolean useAsync) {
|
||||
public EmbeddedTimelineService(HoodieEngineContext context, String embeddedTimelineServiceHostAddr, HoodieWriteConfig writeConfig) {
|
||||
setHostAddr(embeddedTimelineServiceHostAddr);
|
||||
this.context = context;
|
||||
this.config = config;
|
||||
this.basePath = basePath;
|
||||
this.metadataConfig = metadataConfig;
|
||||
this.commonConfig = commonConfig;
|
||||
this.writeConfig = writeConfig;
|
||||
this.basePath = writeConfig.getBasePath();
|
||||
this.hadoopConf = context.getHadoopConf();
|
||||
this.viewManager = createViewManager();
|
||||
this.preferredPort = embeddedTimelineServerPort;
|
||||
this.numThreads = numThreads;
|
||||
this.shouldCompressOutput = compressOutput;
|
||||
this.useAsync = useAsync;
|
||||
}
|
||||
|
||||
private FileSystemViewManager createViewManager() {
|
||||
// Using passed-in configs to build view storage configs
|
||||
FileSystemViewStorageConfig.Builder builder =
|
||||
FileSystemViewStorageConfig.newBuilder().fromProperties(config.getProps());
|
||||
FileSystemViewStorageConfig.newBuilder().fromProperties(writeConfig.getClientSpecifiedViewStorageConfig().getProps());
|
||||
FileSystemViewStorageType storageType = builder.build().getStorageType();
|
||||
if (storageType.equals(FileSystemViewStorageType.REMOTE_ONLY)
|
||||
|| storageType.equals(FileSystemViewStorageType.REMOTE_FIRST)) {
|
||||
// Reset to default if set to Remote
|
||||
builder.withStorageType(FileSystemViewStorageType.MEMORY);
|
||||
}
|
||||
return FileSystemViewManager.createViewManager(context, metadataConfig, builder.build(), commonConfig, basePath);
|
||||
return FileSystemViewManager.createViewManager(context, writeConfig.getMetadataConfig(), builder.build(), writeConfig.getCommonConfig(), basePath);
|
||||
}
|
||||
|
||||
public void startServer() throws IOException {
|
||||
server = new TimelineService(preferredPort, viewManager, hadoopConf.newCopy(), numThreads, shouldCompressOutput, useAsync);
|
||||
TimelineService.Config.Builder timelineServiceConfBuilder = TimelineService.Config.builder()
|
||||
.serverPort(writeConfig.getEmbeddedTimelineServerPort())
|
||||
.numThreads(writeConfig.getEmbeddedTimelineServerThreads())
|
||||
.compress(writeConfig.getEmbeddedTimelineServerCompressOutput())
|
||||
.async(writeConfig.getEmbeddedTimelineServerUseAsync());
|
||||
// Only passing marker-related write configs to timeline server
|
||||
// if timeline-server-based markers are used.
|
||||
if (writeConfig.getMarkersType() == MarkerType.TIMELINE_SERVER_BASED) {
|
||||
timelineServiceConfBuilder
|
||||
.enableMarkerRequests(true)
|
||||
.markerBatchNumThreads(writeConfig.getMarkersTimelineServerBasedBatchNumThreads())
|
||||
.markerBatchIntervalMs(writeConfig.getMarkersTimelineServerBasedBatchIntervalMs())
|
||||
.markerParallelism(writeConfig.getMarkersDeleteParallelism());
|
||||
}
|
||||
|
||||
server = new TimelineService(context, hadoopConf.newCopy(), timelineServiceConfBuilder.build(),
|
||||
FSUtils.getFs(basePath, hadoopConf.newCopy()), viewManager);
|
||||
serverPort = server.startService();
|
||||
LOG.info("Started embedded timeline server at " + hostAddr + ":" + serverPort);
|
||||
}
|
||||
@@ -106,8 +109,9 @@ public class EmbeddedTimelineService {
|
||||
* Retrieves proper view storage configs for remote clients to access this service.
|
||||
*/
|
||||
public FileSystemViewStorageConfig getRemoteFileSystemViewConfig() {
|
||||
FileSystemViewStorageType viewStorageType = config.shouldEnableBackupForRemoteFileSystemView()
|
||||
? FileSystemViewStorageType.REMOTE_FIRST : FileSystemViewStorageType.REMOTE_ONLY;
|
||||
FileSystemViewStorageType viewStorageType = writeConfig.getClientSpecifiedViewStorageConfig()
|
||||
.shouldEnableBackupForRemoteFileSystemView()
|
||||
? FileSystemViewStorageType.REMOTE_FIRST : FileSystemViewStorageType.REMOTE_ONLY;
|
||||
return FileSystemViewStorageConfig.newBuilder().withStorageType(viewStorageType)
|
||||
.withRemoteServerHost(hostAddr).withRemoteServerPort(serverPort).build();
|
||||
}
|
||||
|
||||
@@ -46,6 +46,7 @@ import org.apache.hudi.metrics.MetricsReporterType;
|
||||
import org.apache.hudi.metrics.datadog.DatadogHttpClient.ApiSite;
|
||||
import org.apache.hudi.table.action.compact.CompactionTriggerStrategy;
|
||||
import org.apache.hudi.table.action.compact.strategy.CompactionStrategy;
|
||||
import org.apache.hudi.table.marker.MarkerType;
|
||||
|
||||
import org.apache.hadoop.hbase.io.compress.Compression;
|
||||
import org.apache.orc.CompressionKind;
|
||||
@@ -227,6 +228,30 @@ public class HoodieWriteConfig extends HoodieConfig {
|
||||
+ "files from lake storage, before committing the write. Reduce this value, if the high number of tasks incur delays for smaller tables "
|
||||
+ "or low latency writes.");
|
||||
|
||||
public static final ConfigProperty<String> MARKERS_TYPE_PROP = ConfigProperty
|
||||
.key("hoodie.write.markers.type")
|
||||
.defaultValue(MarkerType.DIRECT.toString())
|
||||
.sinceVersion("0.9.0")
|
||||
.withDocumentation("Marker type to use. Two modes are supported: "
|
||||
+ "- DIRECT: individual marker file corresponding to each data file is directly "
|
||||
+ "created by the writer. "
|
||||
+ "- TIMELINE_SERVER_BASED: marker operations are all handled at the timeline service "
|
||||
+ "which serves as a proxy. New marker entries are batch processed and stored "
|
||||
+ "in a limited number of underlying files for efficiency.");
|
||||
|
||||
public static final ConfigProperty<Integer> MARKERS_TIMELINE_SERVER_BASED_BATCH_NUM_THREADS_PROP = ConfigProperty
|
||||
.key("hoodie.markers.timeline_server_based.batch.num_threads")
|
||||
.defaultValue(20)
|
||||
.sinceVersion("0.9.0")
|
||||
.withDocumentation("Number of threads to use for batch processing marker "
|
||||
+ "creation requests at the timeline server");
|
||||
|
||||
public static final ConfigProperty<Long> MARKERS_TIMELINE_SERVER_BASED_BATCH_INTERVAL_MS_PROP = ConfigProperty
|
||||
.key("hoodie.markers.timeline_server_based.batch.interval_ms")
|
||||
.defaultValue(50L)
|
||||
.sinceVersion("0.9.0")
|
||||
.withDocumentation("The batch interval in milliseconds for marker creation batch processing");
|
||||
|
||||
public static final ConfigProperty<String> MARKERS_DELETE_PARALLELISM = ConfigProperty
|
||||
.key("hoodie.markers.delete.parallelism")
|
||||
.defaultValue("100")
|
||||
@@ -542,6 +567,19 @@ public class HoodieWriteConfig extends HoodieConfig {
|
||||
return getInt(FINALIZE_WRITE_PARALLELISM);
|
||||
}
|
||||
|
||||
public MarkerType getMarkersType() {
|
||||
String markerType = getString(MARKERS_TYPE_PROP);
|
||||
return MarkerType.valueOf(markerType.toUpperCase());
|
||||
}
|
||||
|
||||
public int getMarkersTimelineServerBasedBatchNumThreads() {
|
||||
return getInt(MARKERS_TIMELINE_SERVER_BASED_BATCH_NUM_THREADS_PROP);
|
||||
}
|
||||
|
||||
public long getMarkersTimelineServerBasedBatchIntervalMs() {
|
||||
return getLong(MARKERS_TIMELINE_SERVER_BASED_BATCH_INTERVAL_MS_PROP);
|
||||
}
|
||||
|
||||
public int getMarkersDeleteParallelism() {
|
||||
return getInt(MARKERS_DELETE_PARALLELISM);
|
||||
}
|
||||
@@ -1556,6 +1594,21 @@ public class HoodieWriteConfig extends HoodieConfig {
|
||||
return this;
|
||||
}
|
||||
|
||||
public Builder withMarkersType(String markerType) {
|
||||
writeConfig.setValue(MARKERS_TYPE_PROP, markerType);
|
||||
return this;
|
||||
}
|
||||
|
||||
public Builder withMarkersTimelineServerBasedBatchNumThreads(int numThreads) {
|
||||
writeConfig.setValue(MARKERS_TIMELINE_SERVER_BASED_BATCH_NUM_THREADS_PROP, String.valueOf(numThreads));
|
||||
return this;
|
||||
}
|
||||
|
||||
public Builder withMarkersTimelineServerBasedBatchIntervalMs(long intervalMs) {
|
||||
writeConfig.setValue(MARKERS_TIMELINE_SERVER_BASED_BATCH_INTERVAL_MS_PROP, String.valueOf(intervalMs));
|
||||
return this;
|
||||
}
|
||||
|
||||
public Builder withMarkersDeleteParallelism(int parallelism) {
|
||||
writeConfig.setValue(MARKERS_DELETE_PARALLELISM, String.valueOf(parallelism));
|
||||
return this;
|
||||
|
||||
@@ -33,7 +33,7 @@ import org.apache.hudi.exception.HoodieIOException;
|
||||
import org.apache.hudi.io.storage.HoodieFileWriter;
|
||||
import org.apache.hudi.io.storage.HoodieFileWriterFactory;
|
||||
import org.apache.hudi.table.HoodieTable;
|
||||
import org.apache.hudi.table.MarkerFiles;
|
||||
import org.apache.hudi.table.marker.WriteMarkersFactory;
|
||||
|
||||
import org.apache.avro.Schema;
|
||||
import org.apache.avro.generic.GenericRecord;
|
||||
@@ -177,8 +177,8 @@ public abstract class HoodieWriteHandle<T extends HoodieRecordPayload, I, K, O>
|
||||
* @param partitionPath Partition path
|
||||
*/
|
||||
protected void createMarkerFile(String partitionPath, String dataFileName) {
|
||||
MarkerFiles markerFiles = new MarkerFiles(hoodieTable, instantTime);
|
||||
markerFiles.create(partitionPath, dataFileName, getIOType());
|
||||
WriteMarkersFactory.get(config.getMarkersType(), hoodieTable, instantTime)
|
||||
.create(partitionPath, dataFileName, getIOType());
|
||||
}
|
||||
|
||||
public Schema getWriterSchemaWithMetaFields() {
|
||||
|
||||
@@ -68,6 +68,9 @@ import org.apache.hudi.index.HoodieIndex;
|
||||
import org.apache.hudi.metadata.HoodieTableMetadata;
|
||||
import org.apache.hudi.table.action.HoodieWriteMetadata;
|
||||
import org.apache.hudi.table.action.bootstrap.HoodieBootstrapWriteMetadata;
|
||||
import org.apache.hudi.table.marker.WriteMarkers;
|
||||
import org.apache.hudi.table.marker.WriteMarkersFactory;
|
||||
|
||||
import org.apache.log4j.LogManager;
|
||||
import org.apache.log4j.Logger;
|
||||
|
||||
@@ -482,7 +485,7 @@ public abstract class HoodieTable<T extends HoodieRecordPayload, I, K, O> implem
|
||||
/**
|
||||
* Returns the possible invalid data file name with given marker files.
|
||||
*/
|
||||
protected Set<String> getInvalidDataPaths(MarkerFiles markers) throws IOException {
|
||||
protected Set<String> getInvalidDataPaths(WriteMarkers markers) throws IOException {
|
||||
return markers.createdAndMergedDataPaths(context, config.getFinalizeWriteParallelism());
|
||||
}
|
||||
|
||||
@@ -504,7 +507,7 @@ public abstract class HoodieTable<T extends HoodieRecordPayload, I, K, O> implem
|
||||
// Reconcile marker and data files with WriteStats so that partially written data-files due to failed
|
||||
// (but succeeded on retry) tasks are removed.
|
||||
String basePath = getMetaClient().getBasePath();
|
||||
MarkerFiles markers = new MarkerFiles(this, instantTs);
|
||||
WriteMarkers markers = WriteMarkersFactory.get(config.getMarkersType(), this, instantTs);
|
||||
|
||||
if (!markers.doesMarkerDirExist()) {
|
||||
// can happen if it was an empty write say.
|
||||
|
||||
@@ -49,6 +49,9 @@ import org.apache.hudi.exception.HoodieCommitException;
|
||||
import org.apache.hudi.exception.HoodieException;
|
||||
import org.apache.hudi.exception.HoodieIOException;
|
||||
import org.apache.hudi.metadata.HoodieTableMetadata;
|
||||
import org.apache.hudi.table.marker.WriteMarkers;
|
||||
import org.apache.hudi.table.marker.WriteMarkersFactory;
|
||||
|
||||
import org.apache.log4j.LogManager;
|
||||
import org.apache.log4j.Logger;
|
||||
|
||||
@@ -306,7 +309,7 @@ public class HoodieTimelineArchiveLog<T extends HoodieAvroPayload, I, K, O> {
|
||||
LOG.warn("Unable to delete file(s) for " + hoodieInstant.getFileName() + ", replaced files possibly deleted by cleaner");
|
||||
}
|
||||
try {
|
||||
deleteAnyLeftOverMarkerFiles(context, hoodieInstant);
|
||||
deleteAnyLeftOverMarkers(context, hoodieInstant);
|
||||
records.add(convertToAvroRecord(hoodieInstant));
|
||||
if (records.size() >= this.config.getCommitArchivalBatchSize()) {
|
||||
writeToFile(wrapperSchema, records);
|
||||
@@ -324,9 +327,9 @@ public class HoodieTimelineArchiveLog<T extends HoodieAvroPayload, I, K, O> {
|
||||
}
|
||||
}
|
||||
|
||||
private void deleteAnyLeftOverMarkerFiles(HoodieEngineContext context, HoodieInstant instant) {
|
||||
MarkerFiles markerFiles = new MarkerFiles(table, instant.getTimestamp());
|
||||
if (markerFiles.deleteMarkerDir(context, config.getMarkersDeleteParallelism())) {
|
||||
private void deleteAnyLeftOverMarkers(HoodieEngineContext context, HoodieInstant instant) {
|
||||
WriteMarkers writeMarkers = WriteMarkersFactory.get(config.getMarkersType(), table, instant.getTimestamp());
|
||||
if (writeMarkers.deleteMarkerDir(context, config.getMarkersDeleteParallelism())) {
|
||||
LOG.info("Cleaned up left over marker directory for instant :" + instant);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -36,8 +36,9 @@ import org.apache.hudi.config.HoodieWriteConfig;
|
||||
import org.apache.hudi.exception.HoodieIOException;
|
||||
import org.apache.hudi.exception.HoodieRollbackException;
|
||||
import org.apache.hudi.table.HoodieTable;
|
||||
import org.apache.hudi.table.MarkerFiles;
|
||||
import org.apache.hudi.table.action.BaseActionExecutor;
|
||||
import org.apache.hudi.table.marker.WriteMarkersFactory;
|
||||
|
||||
import org.apache.log4j.LogManager;
|
||||
import org.apache.log4j.Logger;
|
||||
|
||||
@@ -110,8 +111,9 @@ public abstract class BaseRollbackActionExecutor<T extends HoodieRecordPayload,
|
||||
finishRollback(rollbackMetadata);
|
||||
}
|
||||
|
||||
// Finally, remove the marker files post rollback.
|
||||
new MarkerFiles(table, instantToRollback.getTimestamp()).quietDeleteMarkerDir(context, config.getMarkersDeleteParallelism());
|
||||
// Finally, remove the markers post rollback.
|
||||
WriteMarkersFactory.get(config.getMarkersType(), table, instantToRollback.getTimestamp())
|
||||
.quietDeleteMarkerDir(context, config.getMarkersDeleteParallelism());
|
||||
|
||||
return rollbackMetadata;
|
||||
}
|
||||
|
||||
@@ -16,16 +16,18 @@
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.hudi.table;
|
||||
package org.apache.hudi.table.marker;
|
||||
|
||||
import org.apache.hudi.common.config.SerializableConfiguration;
|
||||
import org.apache.hudi.common.engine.HoodieEngineContext;
|
||||
import org.apache.hudi.common.fs.FSUtils;
|
||||
import org.apache.hudi.common.model.IOType;
|
||||
import org.apache.hudi.common.table.HoodieTableMetaClient;
|
||||
import org.apache.hudi.common.util.ValidationUtils;
|
||||
import org.apache.hudi.common.util.HoodieTimer;
|
||||
import org.apache.hudi.common.util.Option;
|
||||
import org.apache.hudi.exception.HoodieException;
|
||||
import org.apache.hudi.exception.HoodieIOException;
|
||||
import org.apache.hudi.table.HoodieTable;
|
||||
|
||||
import org.apache.hadoop.fs.FileStatus;
|
||||
import org.apache.hadoop.fs.FileSystem;
|
||||
@@ -36,7 +38,6 @@ import org.apache.log4j.LogManager;
|
||||
import org.apache.log4j.Logger;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.io.Serializable;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
import java.util.HashSet;
|
||||
@@ -45,41 +46,28 @@ import java.util.Set;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
/**
|
||||
* Operates on marker files for a given write action (commit, delta commit, compaction).
|
||||
* Marker operations of directly accessing the file system to create and delete
|
||||
* marker files. Each data file has a corresponding marker file.
|
||||
*/
|
||||
public class MarkerFiles implements Serializable {
|
||||
public class DirectWriteMarkers extends WriteMarkers {
|
||||
|
||||
private static final Logger LOG = LogManager.getLogger(MarkerFiles.class);
|
||||
|
||||
private final String instantTime;
|
||||
private static final Logger LOG = LogManager.getLogger(DirectWriteMarkers.class);
|
||||
private final transient FileSystem fs;
|
||||
private final transient Path markerDirPath;
|
||||
private final String basePath;
|
||||
|
||||
public MarkerFiles(FileSystem fs, String basePath, String markerFolderPath, String instantTime) {
|
||||
this.instantTime = instantTime;
|
||||
public DirectWriteMarkers(FileSystem fs, String basePath, String markerFolderPath, String instantTime) {
|
||||
super(basePath, markerFolderPath, instantTime);
|
||||
this.fs = fs;
|
||||
this.markerDirPath = new Path(markerFolderPath);
|
||||
this.basePath = basePath;
|
||||
}
|
||||
|
||||
public MarkerFiles(HoodieTable table, String instantTime) {
|
||||
public DirectWriteMarkers(HoodieTable table, String instantTime) {
|
||||
this(table.getMetaClient().getFs(),
|
||||
table.getMetaClient().getBasePath(),
|
||||
table.getMetaClient().getMarkerFolderPath(instantTime),
|
||||
instantTime);
|
||||
}
|
||||
|
||||
public void quietDeleteMarkerDir(HoodieEngineContext context, int parallelism) {
|
||||
try {
|
||||
deleteMarkerDir(context, parallelism);
|
||||
} catch (HoodieIOException ioe) {
|
||||
LOG.warn("Error deleting marker directory for instant " + instantTime, ioe);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Delete Marker directory corresponding to an instant.
|
||||
* Deletes Marker directory corresponding to an instant.
|
||||
*
|
||||
* @param context HoodieEngineContext.
|
||||
* @param parallelism parallelism for deletion.
|
||||
@@ -112,10 +100,15 @@ public class MarkerFiles implements Serializable {
|
||||
return false;
|
||||
}
|
||||
|
||||
/**
|
||||
* @return {@code true} if marker directory exists; {@code false} otherwise.
|
||||
* @throws IOException
|
||||
*/
|
||||
public boolean doesMarkerDirExist() throws IOException {
|
||||
return fs.exists(markerDirPath);
|
||||
}
|
||||
|
||||
@Override
|
||||
public Set<String> createdAndMergedDataPaths(HoodieEngineContext context, int parallelism) throws IOException {
|
||||
Set<String> dataFiles = new HashSet<>();
|
||||
|
||||
@@ -157,15 +150,12 @@ public class MarkerFiles implements Serializable {
|
||||
|
||||
private String translateMarkerToDataPath(String markerPath) {
|
||||
String rPath = stripMarkerFolderPrefix(markerPath);
|
||||
return MarkerFiles.stripMarkerSuffix(rPath);
|
||||
return stripMarkerSuffix(rPath);
|
||||
}
|
||||
|
||||
public static String stripMarkerSuffix(String path) {
|
||||
return path.substring(0, path.indexOf(HoodieTableMetaClient.MARKER_EXTN));
|
||||
}
|
||||
|
||||
public List<String> allMarkerFilePaths() throws IOException {
|
||||
List<String> markerFiles = new ArrayList<>();
|
||||
@Override
|
||||
public Set<String> allMarkerFilePaths() throws IOException {
|
||||
Set<String> markerFiles = new HashSet<>();
|
||||
if (doesMarkerDirExist()) {
|
||||
FSUtils.processFiles(fs, markerDirPath.toString(), fileStatus -> {
|
||||
markerFiles.add(stripMarkerFolderPrefix(fileStatus.getPath().toString()));
|
||||
@@ -175,70 +165,30 @@ public class MarkerFiles implements Serializable {
|
||||
return markerFiles;
|
||||
}
|
||||
|
||||
private String stripMarkerFolderPrefix(String fullMarkerPath) {
|
||||
ValidationUtils.checkArgument(fullMarkerPath.contains(HoodieTableMetaClient.MARKER_EXTN));
|
||||
String markerRootPath = Path.getPathWithoutSchemeAndAuthority(
|
||||
new Path(String.format("%s/%s/%s", basePath, HoodieTableMetaClient.TEMPFOLDER_NAME, instantTime))).toString();
|
||||
int begin = fullMarkerPath.indexOf(markerRootPath);
|
||||
ValidationUtils.checkArgument(begin >= 0,
|
||||
"Not in marker dir. Marker Path=" + fullMarkerPath + ", Expected Marker Root=" + markerRootPath);
|
||||
return fullMarkerPath.substring(begin + markerRootPath.length() + 1);
|
||||
}
|
||||
|
||||
/**
|
||||
* The marker path will be <base-path>/.hoodie/.temp/<instant_ts>/2019/04/25/filename.marker.writeIOType.
|
||||
*/
|
||||
public Path create(String partitionPath, String dataFileName, IOType type) {
|
||||
@Override
|
||||
protected Option<Path> create(String partitionPath, String dataFileName, IOType type, boolean checkIfExists) {
|
||||
HoodieTimer timer = new HoodieTimer().startTimer();
|
||||
Path markerPath = getMarkerPath(partitionPath, dataFileName, type);
|
||||
Path dirPath = markerPath.getParent();
|
||||
try {
|
||||
LOG.info("Creating Marker Path=" + markerPath);
|
||||
fs.create(markerPath, false).close();
|
||||
if (!fs.exists(dirPath)) {
|
||||
fs.mkdirs(dirPath); // create a new partition as needed.
|
||||
}
|
||||
} catch (IOException e) {
|
||||
throw new HoodieException("Failed to create marker file " + markerPath, e);
|
||||
throw new HoodieIOException("Failed to make dir " + dirPath, e);
|
||||
}
|
||||
return markerPath;
|
||||
}
|
||||
|
||||
/**
|
||||
* The marker path will be <base-path>/.hoodie/.temp/<instant_ts>/2019/04/25/filename.marker.writeIOType.
|
||||
*
|
||||
* @return true if the marker file creates successfully,
|
||||
* false if it already exists
|
||||
*/
|
||||
public boolean createIfNotExists(String partitionPath, String dataFileName, IOType type) {
|
||||
Path markerPath = getMarkerPath(partitionPath, dataFileName, type);
|
||||
try {
|
||||
if (fs.exists(markerPath)) {
|
||||
if (checkIfExists && fs.exists(markerPath)) {
|
||||
LOG.warn("Marker Path=" + markerPath + " already exists, cancel creation");
|
||||
return false;
|
||||
return Option.empty();
|
||||
}
|
||||
LOG.info("Creating Marker Path=" + markerPath);
|
||||
fs.create(markerPath, false).close();
|
||||
} catch (IOException e) {
|
||||
throw new HoodieException("Failed to create marker file " + markerPath, e);
|
||||
}
|
||||
return true;
|
||||
LOG.info("[direct] Created marker file " + markerPath.toString()
|
||||
+ " in " + timer.endTimer() + " ms");
|
||||
return Option.of(markerPath);
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the marker path. Would create the partition path first if not exists.
|
||||
*
|
||||
* @param partitionPath The partition path
|
||||
* @param dataFileName The data file name
|
||||
* @param type The IO type
|
||||
* @return path of the marker file
|
||||
*/
|
||||
private Path getMarkerPath(String partitionPath, String dataFileName, IOType type) {
|
||||
Path path = FSUtils.getPartitionPath(markerDirPath, partitionPath);
|
||||
try {
|
||||
if (!fs.exists(path)) {
|
||||
fs.mkdirs(path); // create a new partition as needed.
|
||||
}
|
||||
} catch (IOException e) {
|
||||
throw new HoodieIOException("Failed to make dir " + path, e);
|
||||
}
|
||||
String markerFileName = String.format("%s%s.%s", dataFileName, HoodieTableMetaClient.MARKER_EXTN, type.name());
|
||||
return new Path(path, markerFileName);
|
||||
}
|
||||
|
||||
}
|
||||
@@ -0,0 +1,27 @@
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.hudi.table.marker;
|
||||
|
||||
/**
|
||||
* Marker type indicating how markers are stored in the file system.
|
||||
*/
|
||||
public enum MarkerType {
|
||||
DIRECT,
|
||||
TIMELINE_SERVER_BASED
|
||||
}
|
||||
@@ -0,0 +1,179 @@
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.hudi.table.marker;
|
||||
|
||||
import org.apache.hudi.common.engine.HoodieEngineContext;
|
||||
import org.apache.hudi.common.model.IOType;
|
||||
import org.apache.hudi.common.util.HoodieTimer;
|
||||
import org.apache.hudi.common.util.Option;
|
||||
import org.apache.hudi.exception.HoodieRemoteException;
|
||||
import org.apache.hudi.table.HoodieTable;
|
||||
|
||||
import com.fasterxml.jackson.core.type.TypeReference;
|
||||
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||
import org.apache.hadoop.fs.Path;
|
||||
import org.apache.http.client.fluent.Request;
|
||||
import org.apache.http.client.fluent.Response;
|
||||
import org.apache.http.client.utils.URIBuilder;
|
||||
import org.apache.log4j.LogManager;
|
||||
import org.apache.log4j.Logger;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.Collections;
|
||||
import java.util.HashMap;
|
||||
import java.util.Map;
|
||||
import java.util.Set;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
import static org.apache.hudi.common.table.marker.MarkerOperation.ALL_MARKERS_URL;
|
||||
import static org.apache.hudi.common.table.marker.MarkerOperation.CREATE_AND_MERGE_MARKERS_URL;
|
||||
import static org.apache.hudi.common.table.marker.MarkerOperation.CREATE_MARKER_URL;
|
||||
import static org.apache.hudi.common.table.marker.MarkerOperation.DELETE_MARKER_DIR_URL;
|
||||
import static org.apache.hudi.common.table.marker.MarkerOperation.MARKERS_DIR_EXISTS_URL;
|
||||
import static org.apache.hudi.common.table.marker.MarkerOperation.MARKER_DIR_PATH_PARAM;
|
||||
import static org.apache.hudi.common.table.marker.MarkerOperation.MARKER_NAME_PARAM;
|
||||
|
||||
/**
|
||||
* Marker operations of using timeline server as a proxy to create and delete markers.
|
||||
* Each data file has a corresponding marker entry, which is stored in a limited number of
|
||||
* underlying files maintained by the timeline server (each file contains multiple marker
|
||||
* entries).
|
||||
*/
|
||||
public class TimelineServerBasedWriteMarkers extends WriteMarkers {
|
||||
private static final Logger LOG = LogManager.getLogger(TimelineServerBasedWriteMarkers.class);
|
||||
private final ObjectMapper mapper;
|
||||
private final String timelineServerHost;
|
||||
private final int timelineServerPort;
|
||||
private final int timeoutSecs;
|
||||
|
||||
public TimelineServerBasedWriteMarkers(HoodieTable table, String instantTime) {
|
||||
this(table.getMetaClient().getBasePath(),
|
||||
table.getMetaClient().getMarkerFolderPath(instantTime), instantTime,
|
||||
table.getConfig().getViewStorageConfig().getRemoteViewServerHost(),
|
||||
table.getConfig().getViewStorageConfig().getRemoteViewServerPort(),
|
||||
table.getConfig().getViewStorageConfig().getRemoteTimelineClientTimeoutSecs());
|
||||
}
|
||||
|
||||
TimelineServerBasedWriteMarkers(String basePath, String markerFolderPath, String instantTime,
|
||||
String timelineServerHost, int timelineServerPort, int timeoutSecs) {
|
||||
super(basePath, markerFolderPath, instantTime);
|
||||
this.mapper = new ObjectMapper();
|
||||
this.timelineServerHost = timelineServerHost;
|
||||
this.timelineServerPort = timelineServerPort;
|
||||
this.timeoutSecs = timeoutSecs;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean deleteMarkerDir(HoodieEngineContext context, int parallelism) {
|
||||
Map<String, String> paramsMap = Collections.singletonMap(MARKER_DIR_PATH_PARAM, markerDirPath.toString());
|
||||
try {
|
||||
return executeRequestToTimelineServer(
|
||||
DELETE_MARKER_DIR_URL, paramsMap, new TypeReference<Boolean>() {}, RequestMethod.POST);
|
||||
} catch (IOException e) {
|
||||
throw new HoodieRemoteException("Failed to delete marker directory " + markerDirPath.toString(), e);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean doesMarkerDirExist() {
|
||||
Map<String, String> paramsMap = Collections.singletonMap(MARKER_DIR_PATH_PARAM, markerDirPath.toString());
|
||||
try {
|
||||
return executeRequestToTimelineServer(
|
||||
MARKERS_DIR_EXISTS_URL, paramsMap, new TypeReference<Boolean>() {}, RequestMethod.GET);
|
||||
} catch (IOException e) {
|
||||
throw new HoodieRemoteException("Failed to check marker directory " + markerDirPath.toString(), e);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public Set<String> createdAndMergedDataPaths(HoodieEngineContext context, int parallelism) throws IOException {
|
||||
Map<String, String> paramsMap = Collections.singletonMap(MARKER_DIR_PATH_PARAM, markerDirPath.toString());
|
||||
try {
|
||||
Set<String> markerPaths = executeRequestToTimelineServer(
|
||||
CREATE_AND_MERGE_MARKERS_URL, paramsMap, new TypeReference<Set<String>>() {}, RequestMethod.GET);
|
||||
return markerPaths.stream().map(WriteMarkers::stripMarkerSuffix).collect(Collectors.toSet());
|
||||
} catch (IOException e) {
|
||||
throw new HoodieRemoteException("Failed to get CREATE and MERGE data file paths in "
|
||||
+ markerDirPath.toString(), e);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public Set<String> allMarkerFilePaths() {
|
||||
Map<String, String> paramsMap = Collections.singletonMap(MARKER_DIR_PATH_PARAM, markerDirPath.toString());
|
||||
try {
|
||||
return executeRequestToTimelineServer(
|
||||
ALL_MARKERS_URL, paramsMap, new TypeReference<Set<String>>() {}, RequestMethod.GET);
|
||||
} catch (IOException e) {
|
||||
throw new HoodieRemoteException("Failed to get all markers in " + markerDirPath.toString(), e);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
protected Option<Path> create(String partitionPath, String dataFileName, IOType type, boolean checkIfExists) {
|
||||
HoodieTimer timer = new HoodieTimer().startTimer();
|
||||
String markerFileName = getMarkerFileName(dataFileName, type);
|
||||
|
||||
Map<String, String> paramsMap = new HashMap<>();
|
||||
paramsMap.put(MARKER_DIR_PATH_PARAM, markerDirPath.toString());
|
||||
paramsMap.put(MARKER_NAME_PARAM, partitionPath + "/" + markerFileName);
|
||||
boolean success;
|
||||
try {
|
||||
success = executeRequestToTimelineServer(
|
||||
CREATE_MARKER_URL, paramsMap, new TypeReference<Boolean>() {}, RequestMethod.POST);
|
||||
} catch (IOException e) {
|
||||
throw new HoodieRemoteException("Failed to create marker file " + partitionPath + "/" + markerFileName, e);
|
||||
}
|
||||
LOG.info("[timeline-server-based] Created marker file " + partitionPath + "/" + markerFileName
|
||||
+ " in " + timer.endTimer() + " ms");
|
||||
if (success) {
|
||||
return Option.of(new Path(new Path(markerDirPath, partitionPath), markerFileName));
|
||||
} else {
|
||||
return Option.empty();
|
||||
}
|
||||
}
|
||||
|
||||
private <T> T executeRequestToTimelineServer(String requestPath, Map<String, String> queryParameters,
|
||||
TypeReference reference, RequestMethod method) throws IOException {
|
||||
URIBuilder builder =
|
||||
new URIBuilder().setHost(timelineServerHost).setPort(timelineServerPort).setPath(requestPath).setScheme("http");
|
||||
|
||||
queryParameters.forEach(builder::addParameter);
|
||||
|
||||
String url = builder.toString();
|
||||
LOG.info("Sending request : (" + url + ")");
|
||||
Response response;
|
||||
int timeout = this.timeoutSecs * 1000; // msec
|
||||
switch (method) {
|
||||
case GET:
|
||||
response = Request.Get(url).connectTimeout(timeout).socketTimeout(timeout).execute();
|
||||
break;
|
||||
case POST:
|
||||
default:
|
||||
response = Request.Post(url).connectTimeout(timeout).socketTimeout(timeout).execute();
|
||||
break;
|
||||
}
|
||||
String content = response.returnContent().asString();
|
||||
return (T) mapper.readValue(content, reference);
|
||||
}
|
||||
|
||||
private enum RequestMethod {
|
||||
GET, POST
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,186 @@
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.hudi.table.marker;
|
||||
|
||||
import org.apache.hudi.common.engine.HoodieEngineContext;
|
||||
import org.apache.hudi.common.fs.FSUtils;
|
||||
import org.apache.hudi.common.model.IOType;
|
||||
import org.apache.hudi.common.table.HoodieTableMetaClient;
|
||||
import org.apache.hudi.common.util.Option;
|
||||
import org.apache.hudi.common.util.ValidationUtils;
|
||||
import org.apache.hudi.exception.HoodieIOException;
|
||||
|
||||
import org.apache.hadoop.fs.Path;
|
||||
import org.apache.log4j.LogManager;
|
||||
import org.apache.log4j.Logger;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.io.Serializable;
|
||||
import java.util.Set;
|
||||
|
||||
/**
|
||||
* Operates on markers for a given write action (commit, delta commit, compaction).
|
||||
*
|
||||
* This abstract class provides abstract methods of different marker operations, so that
|
||||
* different marker write mechanism can be implemented.
|
||||
*/
|
||||
public abstract class WriteMarkers implements Serializable {
|
||||
|
||||
private static final Logger LOG = LogManager.getLogger(WriteMarkers.class);
|
||||
|
||||
protected final String basePath;
|
||||
protected final transient Path markerDirPath;
|
||||
protected final String instantTime;
|
||||
|
||||
public WriteMarkers(String basePath, String markerFolderPath, String instantTime) {
|
||||
this.basePath = basePath;
|
||||
this.markerDirPath = new Path(markerFolderPath);
|
||||
this.instantTime = instantTime;
|
||||
}
|
||||
|
||||
/**
|
||||
* Creates a marker without checking if the marker already exists.
|
||||
*
|
||||
* @param partitionPath partition path in the table
|
||||
* @param dataFileName data file name
|
||||
* @param type write IO type
|
||||
* @return the marker path
|
||||
*/
|
||||
public Option<Path> create(String partitionPath, String dataFileName, IOType type) {
|
||||
return create(partitionPath, dataFileName, type, false);
|
||||
}
|
||||
|
||||
/**
|
||||
* Creates a marker if the marker does not exist.
|
||||
*
|
||||
* @param partitionPath partition path in the table
|
||||
* @param dataFileName data file name
|
||||
* @param type write IO type
|
||||
* @return the marker path or empty option if already exists
|
||||
*/
|
||||
public Option<Path> createIfNotExists(String partitionPath, String dataFileName, IOType type) {
|
||||
return create(partitionPath, dataFileName, type, true);
|
||||
}
|
||||
|
||||
/**
|
||||
* Quietly deletes the marker directory.
|
||||
*
|
||||
* @param context {@code HoodieEngineContext} instance.
|
||||
* @param parallelism parallelism for deleting the marker files in the directory.
|
||||
*/
|
||||
public void quietDeleteMarkerDir(HoodieEngineContext context, int parallelism) {
|
||||
try {
|
||||
deleteMarkerDir(context, parallelism);
|
||||
} catch (HoodieIOException ioe) {
|
||||
LOG.warn("Error deleting marker directory for instant " + instantTime, ioe);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Strips the marker file suffix from the input path, i.e., ".marker.[IO_type]".
|
||||
*
|
||||
* @param path file path
|
||||
* @return Stripped path
|
||||
*/
|
||||
public static String stripMarkerSuffix(String path) {
|
||||
return path.substring(0, path.indexOf(HoodieTableMetaClient.MARKER_EXTN));
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets the marker file name, in the format of "[data_file_name].marker.[IO_type]".
|
||||
*
|
||||
* @param dataFileName data file name
|
||||
* @param type IO type
|
||||
* @return the marker file name
|
||||
*/
|
||||
protected String getMarkerFileName(String dataFileName, IOType type) {
|
||||
return String.format("%s%s.%s", dataFileName, HoodieTableMetaClient.MARKER_EXTN, type.name());
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the marker path. Would create the partition path first if not exists
|
||||
*
|
||||
* @param partitionPath The partition path
|
||||
* @param dataFileName The data file name
|
||||
* @param type The IO type
|
||||
* @return path of the marker file
|
||||
*/
|
||||
protected Path getMarkerPath(String partitionPath, String dataFileName, IOType type) {
|
||||
Path path = FSUtils.getPartitionPath(markerDirPath, partitionPath);
|
||||
String markerFileName = getMarkerFileName(dataFileName, type);
|
||||
return new Path(path, markerFileName);
|
||||
}
|
||||
|
||||
/**
|
||||
* Strips the folder prefix of the marker file path.
|
||||
*
|
||||
* @param fullMarkerPath the full path of the marker file
|
||||
* @return marker file name
|
||||
*/
|
||||
protected String stripMarkerFolderPrefix(String fullMarkerPath) {
|
||||
ValidationUtils.checkArgument(fullMarkerPath.contains(HoodieTableMetaClient.MARKER_EXTN));
|
||||
String markerRootPath = Path.getPathWithoutSchemeAndAuthority(
|
||||
new Path(String.format("%s/%s/%s", basePath, HoodieTableMetaClient.TEMPFOLDER_NAME, instantTime))).toString();
|
||||
int begin =
|
||||
fullMarkerPath.indexOf(markerRootPath);
|
||||
ValidationUtils.checkArgument(begin >= 0,
|
||||
"Not in marker dir. Marker Path=" + fullMarkerPath + ", Expected Marker Root=" + markerRootPath);
|
||||
return fullMarkerPath.substring(begin + markerRootPath.length() + 1);
|
||||
}
|
||||
|
||||
/**
|
||||
* Deletes the marker directory.
|
||||
*
|
||||
* @param context {@code HoodieEngineContext} instance.
|
||||
* @param parallelism parallelism for deleting the marker files in the directory.
|
||||
* @return {@true} if successful; {@false} otherwise.
|
||||
*/
|
||||
public abstract boolean deleteMarkerDir(HoodieEngineContext context, int parallelism);
|
||||
|
||||
/**
|
||||
* @return {@true} if the marker directory exists in the file system; {@false} otherwise.
|
||||
* @throws IOException
|
||||
*/
|
||||
public abstract boolean doesMarkerDirExist() throws IOException;
|
||||
|
||||
/**
|
||||
* @param context {@code HoodieEngineContext} instance.
|
||||
* @param parallelism parallelism for reading the marker files in the directory.
|
||||
* @return all the data file paths of write IO type "CREATE" and "MERGE"
|
||||
* @throws IOException
|
||||
*/
|
||||
public abstract Set<String> createdAndMergedDataPaths(HoodieEngineContext context, int parallelism) throws IOException;
|
||||
|
||||
/**
|
||||
* @return all the marker paths
|
||||
* @throws IOException
|
||||
*/
|
||||
public abstract Set<String> allMarkerFilePaths() throws IOException;
|
||||
|
||||
/**
|
||||
* Creates a marker.
|
||||
*
|
||||
* @param partitionPath partition path in the table
|
||||
* @param dataFileName data file name
|
||||
* @param type write IO type
|
||||
* @param checkIfExists whether to check if the marker already exists
|
||||
* @return the marker path or empty option if already exists and {@code checkIfExists} is true
|
||||
*/
|
||||
abstract Option<Path> create(String partitionPath, String dataFileName, IOType type, boolean checkIfExists);
|
||||
}
|
||||
@@ -0,0 +1,58 @@
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.hudi.table.marker;
|
||||
|
||||
import org.apache.hudi.common.fs.FSUtils;
|
||||
import org.apache.hudi.common.fs.StorageSchemes;
|
||||
import org.apache.hudi.exception.HoodieException;
|
||||
import org.apache.hudi.table.HoodieTable;
|
||||
|
||||
import org.apache.log4j.LogManager;
|
||||
import org.apache.log4j.Logger;
|
||||
|
||||
/**
|
||||
* A factory to generate {@code WriteMarkers} instance based on the {@code MarkerType}.
|
||||
*/
|
||||
public class WriteMarkersFactory {
|
||||
private static final Logger LOG = LogManager.getLogger(WriteMarkersFactory.class);
|
||||
|
||||
/**
|
||||
* @param markerType the type of markers to use
|
||||
* @param table {@code HoodieTable} instance
|
||||
* @param instantTime current instant time
|
||||
* @return {@code WriteMarkers} instance based on the {@code MarkerType}
|
||||
*/
|
||||
public static WriteMarkers get(MarkerType markerType, HoodieTable table, String instantTime) {
|
||||
LOG.debug("Instantiated MarkerFiles with marker type: " + markerType.toString());
|
||||
switch (markerType) {
|
||||
case DIRECT:
|
||||
return new DirectWriteMarkers(table, instantTime);
|
||||
case TIMELINE_SERVER_BASED:
|
||||
String basePath = table.getMetaClient().getBasePath();
|
||||
if (StorageSchemes.HDFS.getScheme().equals(
|
||||
FSUtils.getFs(basePath, table.getContext().getHadoopConf().newCopy()).getScheme())) {
|
||||
throw new HoodieException("Timeline-server-based markers are not supported for HDFS: "
|
||||
+ "base path " + basePath);
|
||||
}
|
||||
return new TimelineServerBasedWriteMarkers(table, instantTime);
|
||||
default:
|
||||
throw new HoodieException("The marker type \"" + markerType.name() + "\" is not supported.");
|
||||
}
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user