diff --git a/hoodie-client/src/main/java/com/uber/hoodie/HoodieWriteClient.java b/hoodie-client/src/main/java/com/uber/hoodie/HoodieWriteClient.java index ddde0b964..230a60324 100644 --- a/hoodie-client/src/main/java/com/uber/hoodie/HoodieWriteClient.java +++ b/hoodie-client/src/main/java/com/uber/hoodie/HoodieWriteClient.java @@ -848,7 +848,6 @@ public class HoodieWriteClient implements Seriali String commitActionType = table.getCommitActionType(); activeTimeline.createInflight( new HoodieInstant(true, commitActionType, commitTime)); - table.initializeFinalizeWrite(); } /** diff --git a/hoodie-client/src/main/java/com/uber/hoodie/io/HoodieCreateHandle.java b/hoodie-client/src/main/java/com/uber/hoodie/io/HoodieCreateHandle.java index d4bb198ae..8d933d40c 100644 --- a/hoodie-client/src/main/java/com/uber/hoodie/io/HoodieCreateHandle.java +++ b/hoodie-client/src/main/java/com/uber/hoodie/io/HoodieCreateHandle.java @@ -130,11 +130,7 @@ public class HoodieCreateHandle extends HoodieIOH stat.setNumDeletes(recordsDeleted); stat.setPrevCommit(HoodieWriteStat.NULL_COMMIT); stat.setFileId(status.getFileId()); - String relativePath = path.toString().replace(new Path(config.getBasePath()) + "/", ""); - stat.setPath(relativePath); - if (tempPath != null) { - stat.setTempPath(tempPath.toString().replace(new Path(config.getBasePath()) + "/", "")); - } + stat.setPaths(new Path(config.getBasePath()), path, tempPath); stat.setTotalWriteBytes(FSUtils.getFileSize(fs, getStorageWriterPath())); stat.setTotalWriteErrors(status.getFailedRecords().size()); status.setStat(stat); diff --git a/hoodie-client/src/main/java/com/uber/hoodie/table/HoodieCopyOnWriteTable.java b/hoodie-client/src/main/java/com/uber/hoodie/table/HoodieCopyOnWriteTable.java index c03f75f78..511c867fa 100644 --- a/hoodie-client/src/main/java/com/uber/hoodie/table/HoodieCopyOnWriteTable.java +++ b/hoodie-client/src/main/java/com/uber/hoodie/table/HoodieCopyOnWriteTable.java @@ -576,24 +576,6 @@ public class HoodieCopyOnWriteTable extends Hoodi return stats; } - @Override - public void initializeFinalizeWrite() { - if (!config.shouldUseTempFolderForCopyOnWrite()) { - return; - } - - // create temporary folder if needed - final FileSystem fs = FSUtils.getFs(); - final Path temporaryFolder = new Path(config.getBasePath(), HoodieTableMetaClient.TEMPFOLDER_NAME); - try { - if (!fs.exists(temporaryFolder)) { - fs.mkdirs(temporaryFolder); - } - } catch (IOException e) { - throw new HoodieIOException("Failed to create temporary folder: " + temporaryFolder); - } - } - @Override @SuppressWarnings("unchecked") public Optional finalizeWrite(JavaSparkContext jsc, List writeStatuses) { diff --git a/hoodie-client/src/main/java/com/uber/hoodie/table/HoodieMergeOnReadTable.java b/hoodie-client/src/main/java/com/uber/hoodie/table/HoodieMergeOnReadTable.java index e10dde669..9cab45bc7 100644 --- a/hoodie-client/src/main/java/com/uber/hoodie/table/HoodieMergeOnReadTable.java +++ b/hoodie-client/src/main/java/com/uber/hoodie/table/HoodieMergeOnReadTable.java @@ -250,11 +250,6 @@ public class HoodieMergeOnReadTable extends return allRollbackStats; } - @Override - public void initializeFinalizeWrite() { - // do nothing for MOR tables - } - @Override public Optional finalizeWrite(JavaSparkContext jsc, List writeStatuses) { // do nothing for MOR tables diff --git a/hoodie-client/src/main/java/com/uber/hoodie/table/HoodieTable.java b/hoodie-client/src/main/java/com/uber/hoodie/table/HoodieTable.java index ca9f37762..c395450bb 100644 --- a/hoodie-client/src/main/java/com/uber/hoodie/table/HoodieTable.java +++ b/hoodie-client/src/main/java/com/uber/hoodie/table/HoodieTable.java @@ -273,11 +273,6 @@ public abstract class HoodieTable implements Seri public abstract List rollback(JavaSparkContext jsc, List commits) throws IOException; - /** - * Initialize resources needed for finalize write. - */ - public abstract void initializeFinalizeWrite(); - /** * Finalize the written data files * diff --git a/hoodie-common/src/main/java/com/uber/hoodie/common/model/HoodieWriteStat.java b/hoodie-common/src/main/java/com/uber/hoodie/common/model/HoodieWriteStat.java index c8f241062..dccfd3105 100644 --- a/hoodie-common/src/main/java/com/uber/hoodie/common/model/HoodieWriteStat.java +++ b/hoodie-common/src/main/java/com/uber/hoodie/common/model/HoodieWriteStat.java @@ -20,6 +20,7 @@ import com.fasterxml.jackson.annotation.JsonIgnoreProperties; import javax.annotation.Nullable; import java.io.Serializable; +import org.apache.hadoop.fs.Path; /** * Statistics about a single Hoodie write operation. @@ -212,6 +213,16 @@ public class HoodieWriteStat implements Serializable { return this.tempPath; } + /** + * Set path and tempPath relative to the given basePath. + */ + public void setPaths(Path basePath, Path path, Path tempPath) { + this.path = path.toString().replace(basePath + "/", ""); + if (tempPath != null) { + this.tempPath = tempPath.toString().replace(basePath + "/", ""); + } + } + @Override public String toString() { return new StringBuilder() diff --git a/hoodie-common/src/main/java/com/uber/hoodie/common/table/HoodieTableMetaClient.java b/hoodie-common/src/main/java/com/uber/hoodie/common/table/HoodieTableMetaClient.java index c776abf38..4c5168f60 100644 --- a/hoodie-common/src/main/java/com/uber/hoodie/common/table/HoodieTableMetaClient.java +++ b/hoodie-common/src/main/java/com/uber/hoodie/common/table/HoodieTableMetaClient.java @@ -233,6 +233,11 @@ public class HoodieTableMetaClient implements Serializable { } } + // Always create temporaryFolder which is needed for finalizeWrite for Hoodie tables + final Path temporaryFolder = new Path(basePath, HoodieTableMetaClient.TEMPFOLDER_NAME); + if (!fs.exists(temporaryFolder)) { + fs.mkdirs(temporaryFolder); + } HoodieTableConfig.createHoodieProperties(fs, metaPathDir, props); HoodieTableMetaClient metaClient = new HoodieTableMetaClient(fs.getConf(), basePath); log.info("Finished initializing Table of type " + metaClient.getTableConfig().getTableType() diff --git a/hoodie-common/src/test/java/com/uber/hoodie/common/model/TestHoodieWriteStat.java b/hoodie-common/src/test/java/com/uber/hoodie/common/model/TestHoodieWriteStat.java new file mode 100644 index 000000000..77c7d21de --- /dev/null +++ b/hoodie-common/src/test/java/com/uber/hoodie/common/model/TestHoodieWriteStat.java @@ -0,0 +1,62 @@ +/* + * Copyright (c) 2017 Uber Technologies, Inc. (hoodie-dev-group@uber.com) + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.uber.hoodie.common.model; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertNull; + +import com.uber.hoodie.common.table.HoodieTableMetaClient; +import com.uber.hoodie.common.util.FSUtils; +import java.text.SimpleDateFormat; +import java.util.Date; +import java.util.UUID; +import org.apache.hadoop.fs.Path; +import org.junit.Test; + +public class TestHoodieWriteStat { + + @Test + public void testSetPaths() { + String commitTime = new SimpleDateFormat("yyyyMMddHHmmss").format(new Date()); + String basePathString = "/data/tables/some-hoodie-table"; + String partitionPathString = "2017/12/31"; + String fileName = UUID.randomUUID().toString(); + int taskPartitionId = Integer.MAX_VALUE; + int stageId = Integer.MAX_VALUE; + long taskAttemptId = Long.MAX_VALUE; + + Path basePath = new Path(basePathString); + Path partitionPath = new Path(basePath, partitionPathString); + Path tempPath = new Path(basePath, HoodieTableMetaClient.TEMPFOLDER_NAME); + + Path finalizeFilePath = new Path(partitionPath, FSUtils.makeDataFileName(commitTime, + taskPartitionId, fileName)); + Path tempFilePath = new Path(tempPath, FSUtils.makeTempDataFileName(partitionPathString, + commitTime, taskPartitionId, fileName, stageId, taskAttemptId)); + + HoodieWriteStat writeStat = new HoodieWriteStat(); + writeStat.setPaths(basePath, finalizeFilePath, tempFilePath); + assertEquals(finalizeFilePath, new Path(basePath, writeStat.getPath())); + assertEquals(tempFilePath, new Path(basePath, writeStat.getTempPath())); + + // test for null tempFilePath + writeStat = new HoodieWriteStat(); + writeStat.setPaths(basePath, finalizeFilePath, null); + assertEquals(finalizeFilePath, new Path(basePath, writeStat.getPath())); + assertNull(writeStat.getTempPath()); + } +}