1
0

[HUDI-455] Redo hudi-client log statements using SLF4J (#1145)

* [HUDI-455] Redo hudi-client log statements using SLF4J
This commit is contained in:
hejinbiao123
2019-12-31 13:49:34 +08:00
committed by vinoyang
parent ab6ae5cebb
commit e637d9ed26
26 changed files with 242 additions and 245 deletions

View File

@@ -85,6 +85,11 @@
<groupId>log4j</groupId> <groupId>log4j</groupId>
<artifactId>log4j</artifactId> <artifactId>log4j</artifactId>
</dependency> </dependency>
<dependency>
<groupId>org.slf4j</groupId>
<artifactId>slf4j-api</artifactId>
<version>${slf4j.version}</version>
</dependency>
<!-- Parquet --> <!-- Parquet -->
<dependency> <dependency>

View File

@@ -26,9 +26,9 @@ import org.apache.hudi.common.util.Option;
import org.apache.hudi.config.HoodieWriteConfig; import org.apache.hudi.config.HoodieWriteConfig;
import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.FileSystem;
import org.apache.log4j.LogManager;
import org.apache.log4j.Logger;
import org.apache.spark.api.java.JavaSparkContext; import org.apache.spark.api.java.JavaSparkContext;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.io.IOException; import java.io.IOException;
import java.io.Serializable; import java.io.Serializable;
@@ -39,7 +39,7 @@ import java.io.Serializable;
*/ */
public abstract class AbstractHoodieClient implements Serializable, AutoCloseable { public abstract class AbstractHoodieClient implements Serializable, AutoCloseable {
private static final Logger LOG = LogManager.getLogger(AbstractHoodieClient.class); private static final Logger LOG = LoggerFactory.getLogger(AbstractHoodieClient.class);
protected final transient FileSystem fs; protected final transient FileSystem fs;
protected final transient JavaSparkContext jsc; protected final transient JavaSparkContext jsc;

View File

@@ -45,9 +45,9 @@ import org.apache.hudi.func.OperationResult;
import com.google.common.base.Preconditions; import com.google.common.base.Preconditions;
import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.Path; import org.apache.hadoop.fs.Path;
import org.apache.log4j.LogManager;
import org.apache.log4j.Logger;
import org.apache.spark.api.java.JavaSparkContext; import org.apache.spark.api.java.JavaSparkContext;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.io.FileNotFoundException; import java.io.FileNotFoundException;
import java.io.IOException; import java.io.IOException;
@@ -65,7 +65,7 @@ import static org.apache.hudi.common.table.HoodieTimeline.COMPACTION_ACTION;
*/ */
public class CompactionAdminClient extends AbstractHoodieClient { public class CompactionAdminClient extends AbstractHoodieClient {
private static final Logger LOG = LogManager.getLogger(CompactionAdminClient.class); private static final Logger LOG = LoggerFactory.getLogger(CompactionAdminClient.class);
public CompactionAdminClient(JavaSparkContext jsc, String basePath) { public CompactionAdminClient(JavaSparkContext jsc, String basePath) {
super(jsc, HoodieWriteConfig.newBuilder().withPath(basePath).build()); super(jsc, HoodieWriteConfig.newBuilder().withPath(basePath).build());
@@ -359,13 +359,14 @@ public class CompactionAdminClient extends AbstractHoodieClient {
if (!dryRun) { if (!dryRun) {
return jsc.parallelize(renameActions, parallelism).map(lfPair -> { return jsc.parallelize(renameActions, parallelism).map(lfPair -> {
try { try {
LOG.info("RENAME " + lfPair.getLeft().getPath() + " => " + lfPair.getRight().getPath()); LOG.info("RENAME {} => {}", lfPair.getLeft().getPath(), lfPair.getRight().getPath());
renameLogFile(metaClient, lfPair.getLeft(), lfPair.getRight()); renameLogFile(metaClient, lfPair.getLeft(), lfPair.getRight());
return new RenameOpResult(lfPair, true, Option.empty()); return new RenameOpResult(lfPair, true, Option.empty());
} catch (IOException e) { } catch (IOException e) {
LOG.error("Error renaming log file", e); LOG.error("Error renaming log file", e);
LOG.error("\n\n\n***NOTE Compaction is in inconsistent state. Try running \"compaction repair " LOG.error("\n\n\n***NOTE Compaction is in inconsistent state. "
+ lfPair.getLeft().getBaseCommitTime() + "\" to recover from failure ***\n\n\n"); + "Try running \"compaction repair {} \" to recover from failure ***\n\n\n",
lfPair.getLeft().getBaseCommitTime());
return new RenameOpResult(lfPair, false, Option.of(e)); return new RenameOpResult(lfPair, false, Option.of(e));
} }
}).collect(); }).collect();
@@ -396,7 +397,7 @@ public class CompactionAdminClient extends AbstractHoodieClient {
HoodieCompactionPlan plan = getCompactionPlan(metaClient, compactionInstant); HoodieCompactionPlan plan = getCompactionPlan(metaClient, compactionInstant);
if (plan.getOperations() != null) { if (plan.getOperations() != null) {
LOG.info( LOG.info(
"Number of Compaction Operations :" + plan.getOperations().size() + " for instant :" + compactionInstant); "Number of Compaction Operations :{} for instant :{}", plan.getOperations().size(), compactionInstant);
List<CompactionOperation> ops = plan.getOperations().stream() List<CompactionOperation> ops = plan.getOperations().stream()
.map(CompactionOperation::convertFromAvroRecordInstance).collect(Collectors.toList()); .map(CompactionOperation::convertFromAvroRecordInstance).collect(Collectors.toList());
return jsc.parallelize(ops, parallelism).flatMap(op -> { return jsc.parallelize(ops, parallelism).flatMap(op -> {
@@ -410,7 +411,7 @@ public class CompactionAdminClient extends AbstractHoodieClient {
} }
}).collect(); }).collect();
} }
LOG.warn("No operations for compaction instant : " + compactionInstant); LOG.warn("No operations for compaction instant : {}", compactionInstant);
return new ArrayList<>(); return new ArrayList<>();
} }

View File

@@ -39,16 +39,16 @@ import org.apache.hudi.table.HoodieTable;
import com.codahale.metrics.Timer; import com.codahale.metrics.Timer;
import com.google.common.annotations.VisibleForTesting; import com.google.common.annotations.VisibleForTesting;
import com.google.common.base.Preconditions; import com.google.common.base.Preconditions;
import org.apache.log4j.LogManager;
import org.apache.log4j.Logger;
import org.apache.spark.api.java.JavaSparkContext; import org.apache.spark.api.java.JavaSparkContext;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.io.IOException; import java.io.IOException;
import java.util.List; import java.util.List;
public class HoodieCleanClient<T extends HoodieRecordPayload> extends AbstractHoodieClient { public class HoodieCleanClient<T extends HoodieRecordPayload> extends AbstractHoodieClient {
private static final Logger LOG = LogManager.getLogger(HoodieCleanClient.class); private static final Logger LOG = LoggerFactory.getLogger(HoodieCleanClient.class);
private final transient HoodieMetrics metrics; private final transient HoodieMetrics metrics;
public HoodieCleanClient(JavaSparkContext jsc, HoodieWriteConfig clientConfig, HoodieMetrics metrics) { public HoodieCleanClient(JavaSparkContext jsc, HoodieWriteConfig clientConfig, HoodieMetrics metrics) {
@@ -85,7 +85,7 @@ public class HoodieCleanClient<T extends HoodieRecordPayload> extends AbstractHo
// If there are inflight(failed) or previously requested clean operation, first perform them // If there are inflight(failed) or previously requested clean operation, first perform them
table.getCleanTimeline().filterInflightsAndRequested().getInstants().forEach(hoodieInstant -> { table.getCleanTimeline().filterInflightsAndRequested().getInstants().forEach(hoodieInstant -> {
LOG.info("There were previously unfinished cleaner operations. Finishing Instant=" + hoodieInstant); LOG.info("There were previously unfinished cleaner operations. Finishing Instant={}", hoodieInstant);
runClean(table, hoodieInstant); runClean(table, hoodieInstant);
}); });
@@ -122,7 +122,7 @@ public class HoodieCleanClient<T extends HoodieRecordPayload> extends AbstractHo
// Save to both aux and timeline folder // Save to both aux and timeline folder
try { try {
table.getActiveTimeline().saveToCleanRequested(cleanInstant, AvroUtils.serializeCleanerPlan(cleanerPlan)); table.getActiveTimeline().saveToCleanRequested(cleanInstant, AvroUtils.serializeCleanerPlan(cleanerPlan));
LOG.info("Requesting Cleaning with instant time " + cleanInstant); LOG.info("Requesting Cleaning with instant time {}", cleanInstant);
} catch (IOException e) { } catch (IOException e) {
LOG.error("Got exception when saving cleaner requested file", e); LOG.error("Got exception when saving cleaner requested file", e);
throw new HoodieIOException(e.getMessage(), e); throw new HoodieIOException(e.getMessage(), e);
@@ -173,20 +173,20 @@ public class HoodieCleanClient<T extends HoodieRecordPayload> extends AbstractHo
Option<Long> durationInMs = Option.empty(); Option<Long> durationInMs = Option.empty();
if (context != null) { if (context != null) {
durationInMs = Option.of(metrics.getDurationInMs(context.stop())); durationInMs = Option.of(metrics.getDurationInMs(context.stop()));
LOG.info("cleanerElaspsedTime (Minutes): " + durationInMs.get() / (1000 * 60)); LOG.info("cleanerElaspsedTime (Minutes): {}", durationInMs.get() / (1000 * 60));
} }
HoodieTableMetaClient metaClient = createMetaClient(true); HoodieTableMetaClient metaClient = createMetaClient(true);
// Create the metadata and save it // Create the metadata and save it
HoodieCleanMetadata metadata = HoodieCleanMetadata metadata =
CleanerUtils.convertCleanMetadata(metaClient, cleanInstant.getTimestamp(), durationInMs, cleanStats); CleanerUtils.convertCleanMetadata(metaClient, cleanInstant.getTimestamp(), durationInMs, cleanStats);
LOG.info("Cleaned " + metadata.getTotalFilesDeleted() + " files. Earliest Retained :" + metadata.getEarliestCommitToRetain()); LOG.info("Cleaned {} files. Earliest Retained : {}", metadata.getTotalFilesDeleted(), metadata.getEarliestCommitToRetain());
metrics.updateCleanMetrics(durationInMs.orElseGet(() -> -1L), metadata.getTotalFilesDeleted()); metrics.updateCleanMetrics(durationInMs.orElseGet(() -> -1L), metadata.getTotalFilesDeleted());
table.getActiveTimeline().transitionCleanInflightToComplete( table.getActiveTimeline().transitionCleanInflightToComplete(
new HoodieInstant(true, HoodieTimeline.CLEAN_ACTION, cleanInstant.getTimestamp()), new HoodieInstant(true, HoodieTimeline.CLEAN_ACTION, cleanInstant.getTimestamp()),
AvroUtils.serializeCleanMetadata(metadata)); AvroUtils.serializeCleanMetadata(metadata));
LOG.info("Marked clean started on " + cleanInstant.getTimestamp() + " as complete"); LOG.info("Marked clean started on {} as complete", cleanInstant.getTimestamp());
return metadata; return metadata;
} catch (IOException e) { } catch (IOException e) {
throw new HoodieIOException("Failed to clean up after commit", e); throw new HoodieIOException("Failed to clean up after commit", e);

View File

@@ -35,8 +35,6 @@ import org.apache.hudi.exception.HoodieIndexException;
import org.apache.hudi.index.HoodieIndex; import org.apache.hudi.index.HoodieIndex;
import org.apache.hudi.table.HoodieTable; import org.apache.hudi.table.HoodieTable;
import org.apache.log4j.LogManager;
import org.apache.log4j.Logger;
import org.apache.spark.SparkConf; import org.apache.spark.SparkConf;
import org.apache.spark.api.java.JavaPairRDD; import org.apache.spark.api.java.JavaPairRDD;
import org.apache.spark.api.java.JavaRDD; import org.apache.spark.api.java.JavaRDD;
@@ -51,6 +49,8 @@ import java.util.List;
import java.util.Set; import java.util.Set;
import java.util.stream.Collectors; import java.util.stream.Collectors;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import scala.Tuple2; import scala.Tuple2;
/** /**
@@ -58,7 +58,7 @@ import scala.Tuple2;
*/ */
public class HoodieReadClient<T extends HoodieRecordPayload> extends AbstractHoodieClient { public class HoodieReadClient<T extends HoodieRecordPayload> extends AbstractHoodieClient {
private static final Logger LOG = LogManager.getLogger(HoodieReadClient.class); private static final Logger LOG = LoggerFactory.getLogger(HoodieReadClient.class);
/** /**
* TODO: We need to persist the index type into hoodie.properties and be able to access the index just with a simple * TODO: We need to persist the index type into hoodie.properties and be able to access the index just with a simple

View File

@@ -67,8 +67,6 @@ import com.codahale.metrics.Timer;
import com.google.common.annotations.VisibleForTesting; import com.google.common.annotations.VisibleForTesting;
import com.google.common.base.Preconditions; import com.google.common.base.Preconditions;
import com.google.common.collect.ImmutableMap; import com.google.common.collect.ImmutableMap;
import org.apache.log4j.LogManager;
import org.apache.log4j.Logger;
import org.apache.spark.Partitioner; import org.apache.spark.Partitioner;
import org.apache.spark.SparkConf; import org.apache.spark.SparkConf;
import org.apache.spark.api.java.JavaRDD; import org.apache.spark.api.java.JavaRDD;
@@ -86,6 +84,8 @@ import java.util.Map;
import java.util.stream.Collectors; import java.util.stream.Collectors;
import java.util.stream.IntStream; import java.util.stream.IntStream;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import scala.Tuple2; import scala.Tuple2;
/** /**
@@ -96,7 +96,7 @@ import scala.Tuple2;
*/ */
public class HoodieWriteClient<T extends HoodieRecordPayload> extends AbstractHoodieClient { public class HoodieWriteClient<T extends HoodieRecordPayload> extends AbstractHoodieClient {
private static final Logger LOG = LogManager.getLogger(HoodieWriteClient.class); private static final Logger LOG = LoggerFactory.getLogger(HoodieWriteClient.class);
private static final String UPDATE_STR = "update"; private static final String UPDATE_STR = "update";
private static final String LOOKUP_STR = "lookup"; private static final String LOOKUP_STR = "lookup";
private final boolean rollbackPending; private final boolean rollbackPending;
@@ -399,13 +399,13 @@ public class HoodieWriteClient<T extends HoodieRecordPayload> extends AbstractHo
private void commitOnAutoCommit(String commitTime, JavaRDD<WriteStatus> resultRDD, String actionType) { private void commitOnAutoCommit(String commitTime, JavaRDD<WriteStatus> resultRDD, String actionType) {
if (config.shouldAutoCommit()) { if (config.shouldAutoCommit()) {
LOG.info("Auto commit enabled: Committing " + commitTime); LOG.info("Auto commit enabled: Committing {}", commitTime);
boolean commitResult = commit(commitTime, resultRDD, Option.empty(), actionType); boolean commitResult = commit(commitTime, resultRDD, Option.empty(), actionType);
if (!commitResult) { if (!commitResult) {
throw new HoodieCommitException("Failed to commit " + commitTime); throw new HoodieCommitException("Failed to commit " + commitTime);
} }
} else { } else {
LOG.info("Auto commit disabled for " + commitTime); LOG.info("Auto commit disabled for {}", commitTime);
} }
} }
@@ -454,13 +454,13 @@ public class HoodieWriteClient<T extends HoodieRecordPayload> extends AbstractHo
if (preppedRecords.getStorageLevel() == StorageLevel.NONE()) { if (preppedRecords.getStorageLevel() == StorageLevel.NONE()) {
preppedRecords.persist(StorageLevel.MEMORY_AND_DISK_SER()); preppedRecords.persist(StorageLevel.MEMORY_AND_DISK_SER());
} else { } else {
LOG.info("RDD PreppedRecords was persisted at: " + preppedRecords.getStorageLevel()); LOG.info("RDD PreppedRecords was persisted at: {}", preppedRecords.getStorageLevel());
} }
WorkloadProfile profile = null; WorkloadProfile profile = null;
if (hoodieTable.isWorkloadProfileNeeded()) { if (hoodieTable.isWorkloadProfileNeeded()) {
profile = new WorkloadProfile(preppedRecords); profile = new WorkloadProfile(preppedRecords);
LOG.info("Workload profile :" + profile); LOG.info("Workload profile : {}", profile);
saveWorkloadProfileMetadataToInflight(profile, hoodieTable, commitTime); saveWorkloadProfileMetadataToInflight(profile, hoodieTable, commitTime);
} }
@@ -526,7 +526,7 @@ public class HoodieWriteClient<T extends HoodieRecordPayload> extends AbstractHo
private boolean commit(String commitTime, JavaRDD<WriteStatus> writeStatuses, private boolean commit(String commitTime, JavaRDD<WriteStatus> writeStatuses,
Option<Map<String, String>> extraMetadata, String actionType) { Option<Map<String, String>> extraMetadata, String actionType) {
LOG.info("Commiting " + commitTime); LOG.info("Commiting {}", commitTime);
// Create a Hoodie table which encapsulated the commits and files visible // Create a Hoodie table which encapsulated the commits and files visible
HoodieTable<T> table = HoodieTable.getHoodieTable(createMetaClient(true), config, jsc); HoodieTable<T> table = HoodieTable.getHoodieTable(createMetaClient(true), config, jsc);
@@ -573,7 +573,7 @@ public class HoodieWriteClient<T extends HoodieRecordPayload> extends AbstractHo
metadata, actionType); metadata, actionType);
writeContext = null; writeContext = null;
} }
LOG.info("Committed " + commitTime); LOG.info("Committed {}", commitTime);
} catch (IOException e) { } catch (IOException e) {
throw new HoodieCommitException("Failed to complete commit " + config.getBasePath() + " at time " + commitTime, throw new HoodieCommitException("Failed to complete commit " + config.getBasePath() + " at time " + commitTime,
e); e);
@@ -607,7 +607,7 @@ public class HoodieWriteClient<T extends HoodieRecordPayload> extends AbstractHo
} }
String latestCommit = table.getCompletedCommitsTimeline().lastInstant().get().getTimestamp(); String latestCommit = table.getCompletedCommitsTimeline().lastInstant().get().getTimestamp();
LOG.info("Savepointing latest commit " + latestCommit); LOG.info("Savepointing latest commit {}", latestCommit);
return savepoint(latestCommit, user, comment); return savepoint(latestCommit, user, comment);
} }
@@ -658,7 +658,7 @@ public class HoodieWriteClient<T extends HoodieRecordPayload> extends AbstractHo
config.shouldAssumeDatePartitioning())) config.shouldAssumeDatePartitioning()))
.mapToPair((PairFunction<String, String, List<String>>) partitionPath -> { .mapToPair((PairFunction<String, String, List<String>>) partitionPath -> {
// Scan all partitions files with this commit time // Scan all partitions files with this commit time
LOG.info("Collecting latest files in partition path " + partitionPath); LOG.info("Collecting latest files in partition path {}", partitionPath);
ReadOptimizedView view = table.getROFileSystemView(); ReadOptimizedView view = table.getROFileSystemView();
List<String> latestFiles = view.getLatestDataFilesBeforeOrOn(partitionPath, commitTime) List<String> latestFiles = view.getLatestDataFilesBeforeOrOn(partitionPath, commitTime)
.map(HoodieDataFile::getFileName).collect(Collectors.toList()); .map(HoodieDataFile::getFileName).collect(Collectors.toList());
@@ -672,7 +672,7 @@ public class HoodieWriteClient<T extends HoodieRecordPayload> extends AbstractHo
table.getActiveTimeline() table.getActiveTimeline()
.saveAsComplete(new HoodieInstant(true, HoodieTimeline.SAVEPOINT_ACTION, commitTime), .saveAsComplete(new HoodieInstant(true, HoodieTimeline.SAVEPOINT_ACTION, commitTime),
AvroUtils.serializeSavepointMetadata(metadata)); AvroUtils.serializeSavepointMetadata(metadata));
LOG.info("Savepoint " + commitTime + " created"); LOG.info("Savepoint {} created", commitTime);
return true; return true;
} catch (IOException e) { } catch (IOException e) {
throw new HoodieSavepointException("Failed to savepoint " + commitTime, e); throw new HoodieSavepointException("Failed to savepoint " + commitTime, e);
@@ -696,13 +696,13 @@ public class HoodieWriteClient<T extends HoodieRecordPayload> extends AbstractHo
HoodieInstant savePoint = new HoodieInstant(false, HoodieTimeline.SAVEPOINT_ACTION, savepointTime); HoodieInstant savePoint = new HoodieInstant(false, HoodieTimeline.SAVEPOINT_ACTION, savepointTime);
boolean isSavepointPresent = table.getCompletedSavepointTimeline().containsInstant(savePoint); boolean isSavepointPresent = table.getCompletedSavepointTimeline().containsInstant(savePoint);
if (!isSavepointPresent) { if (!isSavepointPresent) {
LOG.warn("No savepoint present " + savepointTime); LOG.warn("No savepoint present {}", savepointTime);
return; return;
} }
activeTimeline.revertToInflight(savePoint); activeTimeline.revertToInflight(savePoint);
activeTimeline.deleteInflight(new HoodieInstant(true, HoodieTimeline.SAVEPOINT_ACTION, savepointTime)); activeTimeline.deleteInflight(new HoodieInstant(true, HoodieTimeline.SAVEPOINT_ACTION, savepointTime));
LOG.info("Savepoint " + savepointTime + " deleted"); LOG.info("Savepoint {} deleted", savepointTime);
} }
/** /**
@@ -730,7 +730,7 @@ public class HoodieWriteClient<T extends HoodieRecordPayload> extends AbstractHo
} else { } else {
throw new IllegalArgumentException("Compaction is not in requested state " + compactionTime); throw new IllegalArgumentException("Compaction is not in requested state " + compactionTime);
} }
LOG.info("Compaction " + compactionTime + " deleted"); LOG.info("Compaction {} deleted", compactionTime);
} }
/** /**
@@ -758,7 +758,7 @@ public class HoodieWriteClient<T extends HoodieRecordPayload> extends AbstractHo
List<String> commitsToRollback = commitTimeline.findInstantsAfter(savepointTime, Integer.MAX_VALUE).getInstants() List<String> commitsToRollback = commitTimeline.findInstantsAfter(savepointTime, Integer.MAX_VALUE).getInstants()
.map(HoodieInstant::getTimestamp).collect(Collectors.toList()); .map(HoodieInstant::getTimestamp).collect(Collectors.toList());
LOG.info("Rolling back commits " + commitsToRollback); LOG.info("Rolling back commits {}", commitsToRollback);
restoreToInstant(savepointTime); restoreToInstant(savepointTime);
@@ -818,7 +818,7 @@ public class HoodieWriteClient<T extends HoodieRecordPayload> extends AbstractHo
// delete these files when it does not see a corresponding instant file under .hoodie // delete these files when it does not see a corresponding instant file under .hoodie
List<HoodieRollbackStat> statsForCompaction = doRollbackAndGetStats(instant); List<HoodieRollbackStat> statsForCompaction = doRollbackAndGetStats(instant);
instantsToStats.put(instant.getTimestamp(), statsForCompaction); instantsToStats.put(instant.getTimestamp(), statsForCompaction);
LOG.info("Deleted compaction instant " + instant); LOG.info("Deleted compaction instant {}", instant);
break; break;
default: default:
throw new IllegalArgumentException("invalid action name " + instant.getAction()); throw new IllegalArgumentException("invalid action name " + instant.getAction());
@@ -859,7 +859,7 @@ public class HoodieWriteClient<T extends HoodieRecordPayload> extends AbstractHo
if (commitTimeline.empty() && inflightAndRequestedCommitTimeline.empty()) { if (commitTimeline.empty() && inflightAndRequestedCommitTimeline.empty()) {
// nothing to rollback // nothing to rollback
LOG.info("No commits to rollback " + commitToRollback); LOG.info("No commits to rollback {}", commitToRollback);
} }
// Make sure only the last n commits are being rolled back // Make sure only the last n commits are being rolled back
@@ -881,13 +881,13 @@ public class HoodieWriteClient<T extends HoodieRecordPayload> extends AbstractHo
List<HoodieRollbackStat> stats = table.rollback(jsc, instantToRollback, true); List<HoodieRollbackStat> stats = table.rollback(jsc, instantToRollback, true);
LOG.info("Deleted inflight commits " + commitToRollback); LOG.info("Deleted inflight commits {}", commitToRollback);
// cleanup index entries // cleanup index entries
if (!index.rollbackCommit(commitToRollback)) { if (!index.rollbackCommit(commitToRollback)) {
throw new HoodieRollbackException("Rollback index changes failed, for time :" + commitToRollback); throw new HoodieRollbackException("Rollback index changes failed, for time :" + commitToRollback);
} }
LOG.info("Index rolled back for commits " + commitToRollback); LOG.info("Index rolled back for commits {}", commitToRollback);
return stats; return stats;
} }
@@ -908,7 +908,7 @@ public class HoodieWriteClient<T extends HoodieRecordPayload> extends AbstractHo
table.getActiveTimeline().saveAsComplete( table.getActiveTimeline().saveAsComplete(
new HoodieInstant(true, HoodieTimeline.ROLLBACK_ACTION, startRollbackTime), new HoodieInstant(true, HoodieTimeline.ROLLBACK_ACTION, startRollbackTime),
AvroUtils.serializeRollbackMetadata(rollbackMetadata)); AvroUtils.serializeRollbackMetadata(rollbackMetadata));
LOG.info("Commits " + commitsToRollback + " rollback is complete"); LOG.info("Commits {} rollback is complete", commitsToRollback);
if (!table.getActiveTimeline().getCleanerTimeline().empty()) { if (!table.getActiveTimeline().getCleanerTimeline().empty()) {
LOG.info("Cleaning up older rollback meta files"); LOG.info("Cleaning up older rollback meta files");
@@ -936,7 +936,7 @@ public class HoodieWriteClient<T extends HoodieRecordPayload> extends AbstractHo
AvroUtils.convertRestoreMetadata(startRestoreTime, durationInMs, commitsToRollback, commitToStats); AvroUtils.convertRestoreMetadata(startRestoreTime, durationInMs, commitsToRollback, commitToStats);
table.getActiveTimeline().saveAsComplete(new HoodieInstant(true, HoodieTimeline.RESTORE_ACTION, startRestoreTime), table.getActiveTimeline().saveAsComplete(new HoodieInstant(true, HoodieTimeline.RESTORE_ACTION, startRestoreTime),
AvroUtils.serializeRestoreMetadata(restoreMetadata)); AvroUtils.serializeRestoreMetadata(restoreMetadata));
LOG.info("Commits " + commitsToRollback + " rollback is complete. Restored dataset to " + restoreToInstant); LOG.info("Commits {} rollback is complete. Restored dataset to {}", commitsToRollback, restoreToInstant);
if (!table.getActiveTimeline().getCleanerTimeline().empty()) { if (!table.getActiveTimeline().getCleanerTimeline().empty()) {
LOG.info("Cleaning up older restore meta files"); LOG.info("Cleaning up older restore meta files");
@@ -1028,7 +1028,7 @@ public class HoodieWriteClient<T extends HoodieRecordPayload> extends AbstractHo
} }
private void startCommit(String instantTime) { private void startCommit(String instantTime) {
LOG.info("Generate a new instant time " + instantTime); LOG.info("Generate a new instant time {}", instantTime);
HoodieTableMetaClient metaClient = createMetaClient(true); HoodieTableMetaClient metaClient = createMetaClient(true);
// if there are pending compactions, their instantTime must not be greater than that of this instant time // if there are pending compactions, their instantTime must not be greater than that of this instant time
metaClient.getActiveTimeline().filterPendingCompactionTimeline().lastInstant().ifPresent(latestPending -> { metaClient.getActiveTimeline().filterPendingCompactionTimeline().lastInstant().ifPresent(latestPending -> {
@@ -1048,7 +1048,7 @@ public class HoodieWriteClient<T extends HoodieRecordPayload> extends AbstractHo
*/ */
public Option<String> scheduleCompaction(Option<Map<String, String>> extraMetadata) throws IOException { public Option<String> scheduleCompaction(Option<Map<String, String>> extraMetadata) throws IOException {
String instantTime = HoodieActiveTimeline.createNewInstantTime(); String instantTime = HoodieActiveTimeline.createNewInstantTime();
LOG.info("Generate a new instant time " + instantTime); LOG.info("Generate a new instant time {}", instantTime);
boolean notEmpty = scheduleCompactionAtInstant(instantTime, extraMetadata); boolean notEmpty = scheduleCompactionAtInstant(instantTime, extraMetadata);
return notEmpty ? Option.of(instantTime) : Option.empty(); return notEmpty ? Option.of(instantTime) : Option.empty();
} }
@@ -1292,9 +1292,9 @@ public class HoodieWriteClient<T extends HoodieRecordPayload> extends AbstractHo
+ config.getBasePath() + " at time " + compactionCommitTime, e); + config.getBasePath() + " at time " + compactionCommitTime, e);
} }
} }
LOG.info("Compacted successfully on commit " + compactionCommitTime); LOG.info("Compacted successfully on commit {}", compactionCommitTime);
} else { } else {
LOG.info("Compaction did not run for commit " + compactionCommitTime); LOG.info("Compaction did not run for commit {}", compactionCommitTime);
} }
} }
@@ -1305,7 +1305,7 @@ public class HoodieWriteClient<T extends HoodieRecordPayload> extends AbstractHo
if (finalizeCtx != null) { if (finalizeCtx != null) {
Option<Long> durationInMs = Option.of(metrics.getDurationInMs(finalizeCtx.stop())); Option<Long> durationInMs = Option.of(metrics.getDurationInMs(finalizeCtx.stop()));
durationInMs.ifPresent(duration -> { durationInMs.ifPresent(duration -> {
LOG.info("Finalize write elapsed time (milliseconds): " + duration); LOG.info("Finalize write elapsed time (milliseconds): {}", duration);
metrics.updateFinalizeWriteMetrics(duration, stats.size()); metrics.updateFinalizeWriteMetrics(duration, stats.size());
}); });
} }
@@ -1347,7 +1347,7 @@ public class HoodieWriteClient<T extends HoodieRecordPayload> extends AbstractHo
}); });
}); });
LOG.info("Committing Compaction " + compactionCommitTime + ". Finished with result " + metadata); LOG.info("Committing Compaction {}. Finished with result {}", compactionCommitTime, metadata);
HoodieActiveTimeline activeTimeline = metaClient.getActiveTimeline(); HoodieActiveTimeline activeTimeline = metaClient.getActiveTimeline();
try { try {

View File

@@ -26,9 +26,9 @@ import org.apache.hudi.common.util.NetworkUtils;
import org.apache.hudi.timeline.service.TimelineService; import org.apache.hudi.timeline.service.TimelineService;
import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.conf.Configuration;
import org.apache.log4j.LogManager;
import org.apache.log4j.Logger;
import org.apache.spark.SparkConf; import org.apache.spark.SparkConf;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.io.IOException; import java.io.IOException;
@@ -37,7 +37,7 @@ import java.io.IOException;
*/ */
public class EmbeddedTimelineService { public class EmbeddedTimelineService {
private static final Logger LOG = LogManager.getLogger(EmbeddedTimelineService.class); private static final Logger LOG = LoggerFactory.getLogger(EmbeddedTimelineService.class);
private int serverPort; private int serverPort;
private String hostAddr; private String hostAddr;
@@ -72,13 +72,13 @@ public class EmbeddedTimelineService {
public void startServer() throws IOException { public void startServer() throws IOException {
server = new TimelineService(0, viewManager, hadoopConf.newCopy()); server = new TimelineService(0, viewManager, hadoopConf.newCopy());
serverPort = server.startService(); serverPort = server.startService();
LOG.info("Started embedded timeline server at " + hostAddr + ":" + serverPort); LOG.info("Started embedded timeline server at {} : {}", hostAddr, serverPort);
} }
private void setHostAddrFromSparkConf(SparkConf sparkConf) { private void setHostAddrFromSparkConf(SparkConf sparkConf) {
String hostAddr = sparkConf.get("spark.driver.host", null); String hostAddr = sparkConf.get("spark.driver.host", null);
if (hostAddr != null) { if (hostAddr != null) {
LOG.info("Overriding hostIp to (" + hostAddr + ") found in spark-conf. It was " + this.hostAddr); LOG.info("Overriding hostIp to ({}) found in spark-conf. It was {}", hostAddr, this.hostAddr);
this.hostAddr = hostAddr; this.hostAddr = hostAddr;
} else { } else {
LOG.warn("Unable to find driver bind address from spark config"); LOG.warn("Unable to find driver bind address from spark config");

View File

@@ -20,12 +20,12 @@ package org.apache.hudi.index.hbase;
import org.apache.hudi.config.HoodieWriteConfig; import org.apache.hudi.config.HoodieWriteConfig;
import org.apache.log4j.LogManager; import org.slf4j.Logger;
import org.apache.log4j.Logger; import org.slf4j.LoggerFactory;
public class DefaultHBaseQPSResourceAllocator implements HBaseIndexQPSResourceAllocator { public class DefaultHBaseQPSResourceAllocator implements HBaseIndexQPSResourceAllocator {
private HoodieWriteConfig hoodieWriteConfig; private HoodieWriteConfig hoodieWriteConfig;
private static final Logger LOG = LogManager.getLogger(DefaultHBaseQPSResourceAllocator.class); private static final Logger LOG = LoggerFactory.getLogger(DefaultHBaseQPSResourceAllocator.class);
public DefaultHBaseQPSResourceAllocator(HoodieWriteConfig hoodieWriteConfig) { public DefaultHBaseQPSResourceAllocator(HoodieWriteConfig hoodieWriteConfig) {
this.hoodieWriteConfig = hoodieWriteConfig; this.hoodieWriteConfig = hoodieWriteConfig;
@@ -46,7 +46,7 @@ public class DefaultHBaseQPSResourceAllocator implements HBaseIndexQPSResourceAl
@Override @Override
public void releaseQPSResources() { public void releaseQPSResources() {
// Do nothing, as there are no resources locked in default implementation // Do nothing, as there are no resources locked in default implementation
LOG.info(String.format("Release QPS resources called for %s with default implementation, do nothing", LOG.info("Release QPS resources called for {} with default implementation, do nothing",
this.hoodieWriteConfig.getHbaseTableName())); this.hoodieWriteConfig.getHbaseTableName());
} }
} }

View File

@@ -49,8 +49,6 @@ import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hbase.client.RegionLocator; import org.apache.hadoop.hbase.client.RegionLocator;
import org.apache.hadoop.hbase.client.Result; import org.apache.hadoop.hbase.client.Result;
import org.apache.hadoop.hbase.util.Bytes; import org.apache.hadoop.hbase.util.Bytes;
import org.apache.log4j.LogManager;
import org.apache.log4j.Logger;
import org.apache.spark.SparkConf; import org.apache.spark.SparkConf;
import org.apache.spark.api.java.JavaPairRDD; import org.apache.spark.api.java.JavaPairRDD;
import org.apache.spark.api.java.JavaRDD; import org.apache.spark.api.java.JavaRDD;
@@ -64,6 +62,8 @@ import java.util.Iterator;
import java.util.LinkedList; import java.util.LinkedList;
import java.util.List; import java.util.List;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import scala.Tuple2; import scala.Tuple2;
/** /**
@@ -82,7 +82,7 @@ public class HBaseIndex<T extends HoodieRecordPayload> extends HoodieIndex<T> {
private static final byte[] PARTITION_PATH_COLUMN = Bytes.toBytes("partition_path"); private static final byte[] PARTITION_PATH_COLUMN = Bytes.toBytes("partition_path");
private static final int SLEEP_TIME_MILLISECONDS = 100; private static final int SLEEP_TIME_MILLISECONDS = 100;
private static final Logger LOG = LogManager.getLogger(HBaseIndex.class); private static final Logger LOG = LoggerFactory.getLogger(HBaseIndex.class);
private static Connection hbaseConnection = null; private static Connection hbaseConnection = null;
private HBaseIndexQPSResourceAllocator hBaseIndexQPSResourceAllocator = null; private HBaseIndexQPSResourceAllocator hBaseIndexQPSResourceAllocator = null;
private float qpsFraction; private float qpsFraction;
@@ -114,7 +114,7 @@ public class HBaseIndex<T extends HoodieRecordPayload> extends HoodieIndex<T> {
@VisibleForTesting @VisibleForTesting
public HBaseIndexQPSResourceAllocator createQPSResourceAllocator(HoodieWriteConfig config) { public HBaseIndexQPSResourceAllocator createQPSResourceAllocator(HoodieWriteConfig config) {
try { try {
LOG.info("createQPSResourceAllocator :" + config.getHBaseQPSResourceAllocatorClass()); LOG.info("createQPSResourceAllocator : {}", config.getHBaseQPSResourceAllocatorClass());
final HBaseIndexQPSResourceAllocator resourceAllocator = (HBaseIndexQPSResourceAllocator) ReflectionUtils final HBaseIndexQPSResourceAllocator resourceAllocator = (HBaseIndexQPSResourceAllocator) ReflectionUtils
.loadClass(config.getHBaseQPSResourceAllocatorClass(), config); .loadClass(config.getHBaseQPSResourceAllocatorClass(), config);
return resourceAllocator; return resourceAllocator;
@@ -323,7 +323,7 @@ public class HBaseIndex<T extends HoodieRecordPayload> extends HoodieIndex<T> {
doPutsAndDeletes(hTable, puts, deletes); doPutsAndDeletes(hTable, puts, deletes);
} catch (Exception e) { } catch (Exception e) {
Exception we = new Exception("Error updating index for " + writeStatus, e); Exception we = new Exception("Error updating index for " + writeStatus, e);
LOG.error(we); LOG.error("Error updating index for {}", writeStatus, e);
writeStatus.setGlobalError(we); writeStatus.setGlobalError(we);
} }
writeStatusList.add(writeStatus); writeStatusList.add(writeStatus);
@@ -373,7 +373,7 @@ public class HBaseIndex<T extends HoodieRecordPayload> extends HoodieIndex<T> {
HoodieTable<T> hoodieTable) { HoodieTable<T> hoodieTable) {
final HBaseIndexQPSResourceAllocator hBaseIndexQPSResourceAllocator = createQPSResourceAllocator(this.config); final HBaseIndexQPSResourceAllocator hBaseIndexQPSResourceAllocator = createQPSResourceAllocator(this.config);
setPutBatchSize(writeStatusRDD, hBaseIndexQPSResourceAllocator, jsc); setPutBatchSize(writeStatusRDD, hBaseIndexQPSResourceAllocator, jsc);
LOG.info("multiPutBatchSize: before hbase puts" + multiPutBatchSize); LOG.info("multiPutBatchSize: before HBase puts {}", multiPutBatchSize);
JavaRDD<WriteStatus> writeStatusJavaRDD = writeStatusRDD.mapPartitionsWithIndex(updateLocationFunction(), true); JavaRDD<WriteStatus> writeStatusJavaRDD = writeStatusRDD.mapPartitionsWithIndex(updateLocationFunction(), true);
// caching the index updated status RDD // caching the index updated status RDD
writeStatusJavaRDD = writeStatusJavaRDD.persist(config.getWriteStatusStorageLevel()); writeStatusJavaRDD = writeStatusJavaRDD.persist(config.getWriteStatusStorageLevel());
@@ -401,15 +401,15 @@ public class HBaseIndex<T extends HoodieRecordPayload> extends HoodieIndex<T> {
this.numRegionServersForTable = getNumRegionServersAliveForTable(); this.numRegionServersForTable = getNumRegionServersAliveForTable();
final float desiredQPSFraction = final float desiredQPSFraction =
hBaseIndexQPSResourceAllocator.calculateQPSFractionForPutsTime(numPuts, this.numRegionServersForTable); hBaseIndexQPSResourceAllocator.calculateQPSFractionForPutsTime(numPuts, this.numRegionServersForTable);
LOG.info("Desired QPSFraction :" + desiredQPSFraction); LOG.info("Desired QPSFraction : {}", desiredQPSFraction);
LOG.info("Number HBase puts :" + numPuts); LOG.info("Number HBase puts : {}", numPuts);
LOG.info("Hbase Puts Parallelism :" + hbasePutsParallelism); LOG.info("HBase Puts Parallelism : {}", hbasePutsParallelism);
final float availableQpsFraction = final float availableQpsFraction =
hBaseIndexQPSResourceAllocator.acquireQPSResources(desiredQPSFraction, numPuts); hBaseIndexQPSResourceAllocator.acquireQPSResources(desiredQPSFraction, numPuts);
LOG.info("Allocated QPS Fraction :" + availableQpsFraction); LOG.info("Allocated QPS Fraction :" + availableQpsFraction);
multiPutBatchSize = putBatchSizeCalculator.getBatchSize(numRegionServersForTable, maxQpsPerRegionServer, multiPutBatchSize = putBatchSizeCalculator.getBatchSize(numRegionServersForTable, maxQpsPerRegionServer,
hbasePutsParallelism, maxExecutors, SLEEP_TIME_MILLISECONDS, availableQpsFraction); hbasePutsParallelism, maxExecutors, SLEEP_TIME_MILLISECONDS, availableQpsFraction);
LOG.info("multiPutBatchSize :" + multiPutBatchSize); LOG.info("multiPutBatchSize : {}", multiPutBatchSize);
} }
} }
@@ -423,7 +423,7 @@ public class HBaseIndex<T extends HoodieRecordPayload> extends HoodieIndex<T> {
public static class HbasePutBatchSizeCalculator implements Serializable { public static class HbasePutBatchSizeCalculator implements Serializable {
private static final int MILLI_SECONDS_IN_A_SECOND = 1000; private static final int MILLI_SECONDS_IN_A_SECOND = 1000;
private static final Logger LOG = LogManager.getLogger(HbasePutBatchSizeCalculator.class); private static final Logger LOG = LoggerFactory.getLogger(HbasePutBatchSizeCalculator.class);
/** /**
* Calculate putBatch size so that sum of requests across multiple jobs in a second does not exceed * Calculate putBatch size so that sum of requests across multiple jobs in a second does not exceed
@@ -465,15 +465,15 @@ public class HBaseIndex<T extends HoodieRecordPayload> extends HoodieIndex<T> {
int maxParallelPuts = Math.max(1, Math.min(numTasks, maxExecutors)); int maxParallelPuts = Math.max(1, Math.min(numTasks, maxExecutors));
int maxReqsSentPerTaskPerSec = MILLI_SECONDS_IN_A_SECOND / sleepTimeMs; int maxReqsSentPerTaskPerSec = MILLI_SECONDS_IN_A_SECOND / sleepTimeMs;
int multiPutBatchSize = Math.max(1, maxReqPerSec / (maxParallelPuts * maxReqsSentPerTaskPerSec)); int multiPutBatchSize = Math.max(1, maxReqPerSec / (maxParallelPuts * maxReqsSentPerTaskPerSec));
LOG.info("HbaseIndexThrottling: qpsFraction :" + qpsFraction); LOG.info("HBaseIndexThrottling: qpsFraction : {}", qpsFraction);
LOG.info("HbaseIndexThrottling: numRSAlive :" + numRSAlive); LOG.info("HBaseIndexThrottling: numRSAlive : {}", numRSAlive);
LOG.info("HbaseIndexThrottling: maxReqPerSec :" + maxReqPerSec); LOG.info("HBaseIndexThrottling: maxReqPerSec : {}", maxReqPerSec);
LOG.info("HbaseIndexThrottling: numTasks :" + numTasks); LOG.info("HBaseIndexThrottling: numTasks : {}", numTasks);
LOG.info("HbaseIndexThrottling: maxExecutors :" + maxExecutors); LOG.info("HBaseIndexThrottling: maxExecutors : {}", maxExecutors);
LOG.info("HbaseIndexThrottling: maxParallelPuts :" + maxParallelPuts); LOG.info("HBaseIndexThrottling: maxParallelPuts : {}", maxParallelPuts);
LOG.info("HbaseIndexThrottling: maxReqsSentPerTaskPerSec :" + maxReqsSentPerTaskPerSec); LOG.info("HBaseIndexThrottling: maxReqsSentPerTaskPerSec : {}", maxReqsSentPerTaskPerSec);
LOG.info("HbaseIndexThrottling: numRegionServersForTable :" + numRegionServersForTable); LOG.info("HBaseIndexThrottling: numRegionServersForTable : {}", numRegionServersForTable);
LOG.info("HbaseIndexThrottling: multiPutBatchSize :" + multiPutBatchSize); LOG.info("HBaseIndexThrottling: multiPutBatchSize : {}", multiPutBatchSize);
return multiPutBatchSize; return multiPutBatchSize;
} }
} }
@@ -488,7 +488,7 @@ public class HBaseIndex<T extends HoodieRecordPayload> extends HoodieIndex<T> {
.toIntExact(regionLocator.getAllRegionLocations().stream().map(e -> e.getServerName()).distinct().count()); .toIntExact(regionLocator.getAllRegionLocations().stream().map(e -> e.getServerName()).distinct().count());
return numRegionServersForTable; return numRegionServersForTable;
} catch (IOException e) { } catch (IOException e) {
LOG.error(e); LOG.error("Error while connecting HBase:", e);
throw new RuntimeException(e); throw new RuntimeException(e);
} }
} }

View File

@@ -47,10 +47,10 @@ import com.google.common.collect.Maps;
import org.apache.avro.generic.GenericRecord; import org.apache.avro.generic.GenericRecord;
import org.apache.avro.generic.IndexedRecord; import org.apache.avro.generic.IndexedRecord;
import org.apache.hadoop.fs.Path; import org.apache.hadoop.fs.Path;
import org.apache.log4j.LogManager;
import org.apache.log4j.Logger;
import org.apache.spark.TaskContext; import org.apache.spark.TaskContext;
import org.apache.spark.util.SizeEstimator; import org.apache.spark.util.SizeEstimator;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.io.IOException; import java.io.IOException;
import java.util.ArrayList; import java.util.ArrayList;
@@ -64,7 +64,7 @@ import java.util.concurrent.atomic.AtomicLong;
*/ */
public class HoodieAppendHandle<T extends HoodieRecordPayload> extends HoodieWriteHandle<T> { public class HoodieAppendHandle<T extends HoodieRecordPayload> extends HoodieWriteHandle<T> {
private static final Logger LOG = LogManager.getLogger(HoodieAppendHandle.class); private static final Logger LOG = LoggerFactory.getLogger(HoodieAppendHandle.class);
// This acts as the sequenceID for records written // This acts as the sequenceID for records written
private static AtomicLong recordIndex = new AtomicLong(1); private static AtomicLong recordIndex = new AtomicLong(1);
private final String fileId; private final String fileId;
@@ -123,7 +123,7 @@ public class HoodieAppendHandle<T extends HoodieRecordPayload> extends HoodieWri
} else { } else {
// This means there is no base data file, start appending to a new log file // This means there is no base data file, start appending to a new log file
fileSlice = Option.of(new FileSlice(partitionPath, baseInstantTime, this.fileId)); fileSlice = Option.of(new FileSlice(partitionPath, baseInstantTime, this.fileId));
LOG.info("New InsertHandle for partition :" + partitionPath); LOG.info("New InsertHandle for partition : {}", partitionPath);
} }
writeStatus.getStat().setPrevCommit(baseInstantTime); writeStatus.getStat().setPrevCommit(baseInstantTime);
writeStatus.setFileId(fileId); writeStatus.setFileId(fileId);
@@ -137,7 +137,7 @@ public class HoodieAppendHandle<T extends HoodieRecordPayload> extends HoodieWri
((HoodieDeltaWriteStat) writeStatus.getStat()).setLogVersion(currentLogFile.getLogVersion()); ((HoodieDeltaWriteStat) writeStatus.getStat()).setLogVersion(currentLogFile.getLogVersion());
((HoodieDeltaWriteStat) writeStatus.getStat()).setLogOffset(writer.getCurrentSize()); ((HoodieDeltaWriteStat) writeStatus.getStat()).setLogOffset(writer.getCurrentSize());
} catch (Exception e) { } catch (Exception e) {
LOG.error("Error in update task at commit " + instantTime, e); LOG.error("Error in update task at commit {}", instantTime, e);
writeStatus.setGlobalError(e); writeStatus.setGlobalError(e);
throw new HoodieUpsertException("Failed to initialize HoodieAppendHandle for FileId: " + fileId + " on commit " throw new HoodieUpsertException("Failed to initialize HoodieAppendHandle for FileId: " + fileId + " on commit "
+ instantTime + " on HDFS path " + hoodieTable.getMetaClient().getBasePath() + partitionPath, e); + instantTime + " on HDFS path " + hoodieTable.getMetaClient().getBasePath() + partitionPath, e);
@@ -179,7 +179,7 @@ public class HoodieAppendHandle<T extends HoodieRecordPayload> extends HoodieWri
hoodieRecord.deflate(); hoodieRecord.deflate();
return avroRecord; return avroRecord;
} catch (Exception e) { } catch (Exception e) {
LOG.error("Error writing record " + hoodieRecord, e); LOG.error("Error writing record {}", hoodieRecord, e);
writeStatus.markFailure(hoodieRecord, e, recordMetadata); writeStatus.markFailure(hoodieRecord, e, recordMetadata);
} }
return Option.empty(); return Option.empty();
@@ -232,7 +232,7 @@ public class HoodieAppendHandle<T extends HoodieRecordPayload> extends HoodieWri
// Not throwing exception from here, since we don't want to fail the entire job // Not throwing exception from here, since we don't want to fail the entire job
// for a single record // for a single record
writeStatus.markFailure(record, t, recordMetadata); writeStatus.markFailure(record, t, recordMetadata);
LOG.error("Error writing record " + record, t); LOG.error("Error writing record {}", record, t);
} }
} }
@@ -259,8 +259,8 @@ public class HoodieAppendHandle<T extends HoodieRecordPayload> extends HoodieWri
runtimeStats.setTotalUpsertTime(timer.endTimer()); runtimeStats.setTotalUpsertTime(timer.endTimer());
stat.setRuntimeStats(runtimeStats); stat.setRuntimeStats(runtimeStats);
LOG.info(String.format("AppendHandle for partitionPath %s fileID %s, took %d ms.", stat.getPartitionPath(), LOG.info("AppendHandle for partitionPath {} fileID {}, took {} ms.", stat.getPartitionPath(),
stat.getFileId(), runtimeStats.getTotalUpsertTime())); stat.getFileId(), runtimeStats.getTotalUpsertTime());
return writeStatus; return writeStatus;
} catch (IOException e) { } catch (IOException e) {
@@ -308,7 +308,7 @@ public class HoodieAppendHandle<T extends HoodieRecordPayload> extends HoodieWri
if (numberOfRecords >= (int) (maxBlockSize / averageRecordSize)) { if (numberOfRecords >= (int) (maxBlockSize / averageRecordSize)) {
// Recompute averageRecordSize before writing a new block and update existing value with // Recompute averageRecordSize before writing a new block and update existing value with
// avg of new and old // avg of new and old
LOG.info("AvgRecordSize => " + averageRecordSize); LOG.info("AvgRecordSize => {}", averageRecordSize);
averageRecordSize = (averageRecordSize + SizeEstimator.estimate(record)) / 2; averageRecordSize = (averageRecordSize + SizeEstimator.estimate(record)) / 2;
doAppend(header); doAppend(header);
estimatedNumberOfBytesWritten += averageRecordSize * numberOfRecords; estimatedNumberOfBytesWritten += averageRecordSize * numberOfRecords;

View File

@@ -39,8 +39,8 @@ import org.apache.hudi.config.HoodieWriteConfig;
import org.apache.hudi.exception.HoodieIOException; import org.apache.hudi.exception.HoodieIOException;
import org.apache.hudi.table.HoodieTable; import org.apache.hudi.table.HoodieTable;
import org.apache.log4j.LogManager; import org.slf4j.Logger;
import org.apache.log4j.Logger; import org.slf4j.LoggerFactory;
import java.io.IOException; import java.io.IOException;
import java.io.Serializable; import java.io.Serializable;
@@ -61,7 +61,7 @@ import java.util.stream.Collectors;
*/ */
public class HoodieCleanHelper<T extends HoodieRecordPayload<T>> implements Serializable { public class HoodieCleanHelper<T extends HoodieRecordPayload<T>> implements Serializable {
private static final Logger LOG = LogManager.getLogger(HoodieCleanHelper.class); private static final Logger LOG = LoggerFactory.getLogger(HoodieCleanHelper.class);
private final SyncableFileSystemView fileSystemView; private final SyncableFileSystemView fileSystemView;
private final HoodieTimeline commitTimeline; private final HoodieTimeline commitTimeline;
@@ -100,8 +100,7 @@ public class HoodieCleanHelper<T extends HoodieRecordPayload<T>> implements Seri
if ((cleanMetadata.getEarliestCommitToRetain() != null) if ((cleanMetadata.getEarliestCommitToRetain() != null)
&& (cleanMetadata.getEarliestCommitToRetain().length() > 0)) { && (cleanMetadata.getEarliestCommitToRetain().length() > 0)) {
LOG.warn("Incremental Cleaning mode is enabled. Looking up partition-paths that have since changed " LOG.warn("Incremental Cleaning mode is enabled. Looking up partition-paths that have since changed "
+ "since last cleaned at " + cleanMetadata.getEarliestCommitToRetain() + "since last cleaned at {}. New Instant to retain : {}", cleanMetadata.getEarliestCommitToRetain(), newInstantToRetain);
+ ". New Instant to retain : " + newInstantToRetain);
return hoodieTable.getCompletedCommitsTimeline().getInstants().filter(instant -> { return hoodieTable.getCompletedCommitsTimeline().getInstants().filter(instant -> {
return HoodieTimeline.compareTimestamps(instant.getTimestamp(), cleanMetadata.getEarliestCommitToRetain(), return HoodieTimeline.compareTimestamps(instant.getTimestamp(), cleanMetadata.getEarliestCommitToRetain(),
HoodieTimeline.GREATER_OR_EQUAL) && HoodieTimeline.compareTimestamps(instant.getTimestamp(), HoodieTimeline.GREATER_OR_EQUAL) && HoodieTimeline.compareTimestamps(instant.getTimestamp(),
@@ -129,8 +128,7 @@ public class HoodieCleanHelper<T extends HoodieRecordPayload<T>> implements Seri
* single file (i.e run it with versionsRetained = 1) * single file (i.e run it with versionsRetained = 1)
*/ */
private List<String> getFilesToCleanKeepingLatestVersions(String partitionPath) throws IOException { private List<String> getFilesToCleanKeepingLatestVersions(String partitionPath) throws IOException {
LOG.info("Cleaning " + partitionPath + ", retaining latest " + config.getCleanerFileVersionsRetained() LOG.info("Cleaning {}, retaining latest {} file versions. ", partitionPath, config.getCleanerFileVersionsRetained());
+ " file versions. ");
List<HoodieFileGroup> fileGroups = fileSystemView.getAllFileGroups(partitionPath).collect(Collectors.toList()); List<HoodieFileGroup> fileGroups = fileSystemView.getAllFileGroups(partitionPath).collect(Collectors.toList());
List<String> deletePaths = new ArrayList<>(); List<String> deletePaths = new ArrayList<>();
// Collect all the datafiles savepointed by all the savepoints // Collect all the datafiles savepointed by all the savepoints
@@ -189,7 +187,7 @@ public class HoodieCleanHelper<T extends HoodieRecordPayload<T>> implements Seri
*/ */
private List<String> getFilesToCleanKeepingLatestCommits(String partitionPath) throws IOException { private List<String> getFilesToCleanKeepingLatestCommits(String partitionPath) throws IOException {
int commitsRetained = config.getCleanerCommitsRetained(); int commitsRetained = config.getCleanerCommitsRetained();
LOG.info("Cleaning " + partitionPath + ", retaining latest " + commitsRetained + " commits. "); LOG.info("Cleaning {}, retaining latest {} commits. ", partitionPath, commitsRetained);
List<String> deletePaths = new ArrayList<>(); List<String> deletePaths = new ArrayList<>();
// Collect all the datafiles savepointed by all the savepoints // Collect all the datafiles savepointed by all the savepoints
@@ -276,7 +274,7 @@ public class HoodieCleanHelper<T extends HoodieRecordPayload<T>> implements Seri
} else { } else {
throw new IllegalArgumentException("Unknown cleaning policy : " + policy.name()); throw new IllegalArgumentException("Unknown cleaning policy : " + policy.name());
} }
LOG.info(deletePaths.size() + " patterns used to delete in partition path:" + partitionPath); LOG.info("{} patterns used to delete in partition path: {}", deletePaths.size(), partitionPath);
return deletePaths; return deletePaths;
} }

View File

@@ -54,9 +54,9 @@ import com.google.common.collect.Sets;
import org.apache.avro.Schema; import org.apache.avro.Schema;
import org.apache.avro.generic.IndexedRecord; import org.apache.avro.generic.IndexedRecord;
import org.apache.hadoop.fs.Path; import org.apache.hadoop.fs.Path;
import org.apache.log4j.LogManager;
import org.apache.log4j.Logger;
import org.apache.spark.api.java.JavaSparkContext; import org.apache.spark.api.java.JavaSparkContext;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.io.IOException; import java.io.IOException;
import java.util.ArrayList; import java.util.ArrayList;
@@ -71,7 +71,7 @@ import java.util.stream.Stream;
*/ */
public class HoodieCommitArchiveLog { public class HoodieCommitArchiveLog {
private static final Logger LOG = LogManager.getLogger(HoodieCommitArchiveLog.class); private static final Logger LOG = LoggerFactory.getLogger(HoodieCommitArchiveLog.class);
private final Path archiveFilePath; private final Path archiveFilePath;
private final HoodieTableMetaClient metaClient; private final HoodieTableMetaClient metaClient;
@@ -118,9 +118,9 @@ public class HoodieCommitArchiveLog {
boolean success = true; boolean success = true;
if (!instantsToArchive.isEmpty()) { if (!instantsToArchive.isEmpty()) {
this.writer = openWriter(); this.writer = openWriter();
LOG.info("Archiving instants " + instantsToArchive); LOG.info("Archiving instants {}", instantsToArchive);
archive(instantsToArchive); archive(instantsToArchive);
LOG.info("Deleting archived instants " + instantsToArchive); LOG.info("Deleting archived instants {}", instantsToArchive);
success = deleteArchivedInstants(instantsToArchive); success = deleteArchivedInstants(instantsToArchive);
} else { } else {
LOG.info("No Instants to archive"); LOG.info("No Instants to archive");
@@ -189,14 +189,14 @@ public class HoodieCommitArchiveLog {
} }
private boolean deleteArchivedInstants(List<HoodieInstant> archivedInstants) throws IOException { private boolean deleteArchivedInstants(List<HoodieInstant> archivedInstants) throws IOException {
LOG.info("Deleting instants " + archivedInstants); LOG.info("Deleting instants {}", archivedInstants);
boolean success = true; boolean success = true;
for (HoodieInstant archivedInstant : archivedInstants) { for (HoodieInstant archivedInstant : archivedInstants) {
Path commitFile = new Path(metaClient.getMetaPath(), archivedInstant.getFileName()); Path commitFile = new Path(metaClient.getMetaPath(), archivedInstant.getFileName());
try { try {
if (metaClient.getFs().exists(commitFile)) { if (metaClient.getFs().exists(commitFile)) {
success &= metaClient.getFs().delete(commitFile, false); success &= metaClient.getFs().delete(commitFile, false);
LOG.info("Archived and deleted instant file " + commitFile); LOG.info("Archived and deleted instant file {}", commitFile);
} }
} catch (IOException e) { } catch (IOException e) {
throw new HoodieIOException("Failed to delete archived instant " + archivedInstant, e); throw new HoodieIOException("Failed to delete archived instant " + archivedInstant, e);
@@ -208,7 +208,7 @@ public class HoodieCommitArchiveLog {
return i.isCompleted() && (i.getAction().equals(HoodieTimeline.COMMIT_ACTION) return i.isCompleted() && (i.getAction().equals(HoodieTimeline.COMMIT_ACTION)
|| (i.getAction().equals(HoodieTimeline.DELTA_COMMIT_ACTION))); || (i.getAction().equals(HoodieTimeline.DELTA_COMMIT_ACTION)));
}).max(Comparator.comparing(HoodieInstant::getTimestamp))); }).max(Comparator.comparing(HoodieInstant::getTimestamp)));
LOG.info("Latest Committed Instant=" + latestCommitted); LOG.info("Latest Committed Instant={}", latestCommitted);
if (latestCommitted.isPresent()) { if (latestCommitted.isPresent()) {
success &= deleteAllInstantsOlderorEqualsInAuxMetaFolder(latestCommitted.get()); success &= deleteAllInstantsOlderorEqualsInAuxMetaFolder(latestCommitted.get());
} }
@@ -236,7 +236,7 @@ public class HoodieCommitArchiveLog {
Path metaFile = new Path(metaClient.getMetaAuxiliaryPath(), deleteInstant.getFileName()); Path metaFile = new Path(metaClient.getMetaAuxiliaryPath(), deleteInstant.getFileName());
if (metaClient.getFs().exists(metaFile)) { if (metaClient.getFs().exists(metaFile)) {
success &= metaClient.getFs().delete(metaFile, false); success &= metaClient.getFs().delete(metaFile, false);
LOG.info("Deleted instant file in auxiliary metapath : " + metaFile); LOG.info("Deleted instant file in auxiliary metapath : {}", metaFile);
} }
} }
return success; return success;
@@ -246,7 +246,7 @@ public class HoodieCommitArchiveLog {
try { try {
HoodieTimeline commitTimeline = metaClient.getActiveTimeline().getAllCommitsTimeline().filterCompletedInstants(); HoodieTimeline commitTimeline = metaClient.getActiveTimeline().getAllCommitsTimeline().filterCompletedInstants();
Schema wrapperSchema = HoodieArchivedMetaEntry.getClassSchema(); Schema wrapperSchema = HoodieArchivedMetaEntry.getClassSchema();
LOG.info("Wrapper schema " + wrapperSchema.toString()); LOG.info("Wrapper schema {}", wrapperSchema.toString());
List<IndexedRecord> records = new ArrayList<>(); List<IndexedRecord> records = new ArrayList<>();
for (HoodieInstant hoodieInstant : instants) { for (HoodieInstant hoodieInstant : instants) {
try { try {
@@ -255,7 +255,7 @@ public class HoodieCommitArchiveLog {
writeToFile(wrapperSchema, records); writeToFile(wrapperSchema, records);
} }
} catch (Exception e) { } catch (Exception e) {
LOG.error("Failed to archive commits, .commit file: " + hoodieInstant.getFileName(), e); LOG.error("Failed to archive commits, commit file: {}", hoodieInstant.getFileName(), e);
if (this.config.isFailOnTimelineArchivingEnabled()) { if (this.config.isFailOnTimelineArchivingEnabled()) {
throw e; throw e;
} }

View File

@@ -36,16 +36,16 @@ import org.apache.hudi.table.HoodieTable;
import org.apache.avro.generic.GenericRecord; import org.apache.avro.generic.GenericRecord;
import org.apache.avro.generic.IndexedRecord; import org.apache.avro.generic.IndexedRecord;
import org.apache.hadoop.fs.Path; import org.apache.hadoop.fs.Path;
import org.apache.log4j.LogManager;
import org.apache.log4j.Logger;
import org.apache.spark.TaskContext; import org.apache.spark.TaskContext;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.io.IOException; import java.io.IOException;
import java.util.Iterator; import java.util.Iterator;
public class HoodieCreateHandle<T extends HoodieRecordPayload> extends HoodieWriteHandle<T> { public class HoodieCreateHandle<T extends HoodieRecordPayload> extends HoodieWriteHandle<T> {
private static final Logger LOG = LogManager.getLogger(HoodieCreateHandle.class); private static final Logger LOG = LoggerFactory.getLogger(HoodieCreateHandle.class);
private final HoodieStorageWriter<IndexedRecord> storageWriter; private final HoodieStorageWriter<IndexedRecord> storageWriter;
private final Path path; private final Path path;
@@ -73,7 +73,7 @@ public class HoodieCreateHandle<T extends HoodieRecordPayload> extends HoodieWri
} catch (IOException e) { } catch (IOException e) {
throw new HoodieInsertException("Failed to initialize HoodieStorageWriter for path " + path, e); throw new HoodieInsertException("Failed to initialize HoodieStorageWriter for path " + path, e);
} }
LOG.info("New CreateHandle for partition :" + partitionPath + " with fileId " + fileId); LOG.info("New CreateHandle for partition : {} with fileId {}", partitionPath, fileId);
} }
/** /**
@@ -120,7 +120,7 @@ public class HoodieCreateHandle<T extends HoodieRecordPayload> extends HoodieWri
// Not throwing exception from here, since we don't want to fail the entire job // Not throwing exception from here, since we don't want to fail the entire job
// for a single record // for a single record
writeStatus.markFailure(record, t, recordMetadata); writeStatus.markFailure(record, t, recordMetadata);
LOG.error("Error writing record " + record, t); LOG.error("Error writing record {}", record, t);
} }
} }
@@ -152,8 +152,7 @@ public class HoodieCreateHandle<T extends HoodieRecordPayload> extends HoodieWri
*/ */
@Override @Override
public WriteStatus close() { public WriteStatus close() {
LOG LOG.info("Closing the file {} as we are done with all the records {}", writeStatus.getFileId(), recordsWritten);
.info("Closing the file " + writeStatus.getFileId() + " as we are done with all the records " + recordsWritten);
try { try {
storageWriter.close(); storageWriter.close();
@@ -175,8 +174,8 @@ public class HoodieCreateHandle<T extends HoodieRecordPayload> extends HoodieWri
stat.setRuntimeStats(runtimeStats); stat.setRuntimeStats(runtimeStats);
writeStatus.setStat(stat); writeStatus.setStat(stat);
LOG.info(String.format("CreateHandle for partitionPath %s fileID %s, took %d ms.", stat.getPartitionPath(), LOG.info("CreateHandle for partitionPath {} fileID {}, took {} ms.", stat.getPartitionPath(),
stat.getFileId(), runtimeStats.getTotalCreateTime())); stat.getFileId(), runtimeStats.getTotalCreateTime());
return writeStatus; return writeStatus;
} catch (IOException e) { } catch (IOException e) {

View File

@@ -31,8 +31,8 @@ import org.apache.hudi.table.HoodieTable;
import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path; import org.apache.hadoop.fs.Path;
import org.apache.log4j.LogManager; import org.slf4j.Logger;
import org.apache.log4j.Logger; import org.slf4j.LoggerFactory;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.HashSet; import java.util.HashSet;
@@ -44,7 +44,7 @@ import java.util.Set;
*/ */
public class HoodieKeyLookupHandle<T extends HoodieRecordPayload> extends HoodieReadHandle<T> { public class HoodieKeyLookupHandle<T extends HoodieRecordPayload> extends HoodieReadHandle<T> {
private static final Logger LOG = LogManager.getLogger(HoodieKeyLookupHandle.class); private static final Logger LOG = LoggerFactory.getLogger(HoodieKeyLookupHandle.class);
private final HoodieTableType tableType; private final HoodieTableType tableType;
@@ -63,7 +63,7 @@ public class HoodieKeyLookupHandle<T extends HoodieRecordPayload> extends Hoodie
HoodieTimer timer = new HoodieTimer().startTimer(); HoodieTimer timer = new HoodieTimer().startTimer();
this.bloomFilter = ParquetUtils.readBloomFilterFromParquetMetadata(hoodieTable.getHadoopConf(), this.bloomFilter = ParquetUtils.readBloomFilterFromParquetMetadata(hoodieTable.getHadoopConf(),
new Path(getLatestDataFile().getPath())); new Path(getLatestDataFile().getPath()));
LOG.info(String.format("Read bloom filter from %s in %d ms", partitionPathFilePair, timer.endTimer())); LOG.info("Read bloom filter from {} in {} ms", partitionPathFilePair, timer.endTimer());
} }
/** /**
@@ -82,7 +82,7 @@ public class HoodieKeyLookupHandle<T extends HoodieRecordPayload> extends Hoodie
LOG.info(String.format("Checked keys against file %s, in %d ms. #candidates (%d) #found (%d)", filePath, LOG.info(String.format("Checked keys against file %s, in %d ms. #candidates (%d) #found (%d)", filePath,
timer.endTimer(), candidateRecordKeys.size(), foundRecordKeys.size())); timer.endTimer(), candidateRecordKeys.size(), foundRecordKeys.size()));
if (LOG.isDebugEnabled()) { if (LOG.isDebugEnabled()) {
LOG.debug("Keys matching for file " + filePath + " => " + foundRecordKeys); LOG.debug("Keys matching for file {} => {}", filePath, foundRecordKeys);
} }
} }
} catch (Exception e) { } catch (Exception e) {
@@ -98,7 +98,7 @@ public class HoodieKeyLookupHandle<T extends HoodieRecordPayload> extends Hoodie
// check record key against bloom filter of current file & add to possible keys if needed // check record key against bloom filter of current file & add to possible keys if needed
if (bloomFilter.mightContain(recordKey)) { if (bloomFilter.mightContain(recordKey)) {
if (LOG.isDebugEnabled()) { if (LOG.isDebugEnabled()) {
LOG.debug("Record key " + recordKey + " matches bloom filter in " + partitionPathFilePair); LOG.debug("Record key {} matches bloom filter in {}", recordKey, partitionPathFilePair);
} }
candidateRecordKeys.add(recordKey); candidateRecordKeys.add(recordKey);
} }
@@ -110,15 +110,14 @@ public class HoodieKeyLookupHandle<T extends HoodieRecordPayload> extends Hoodie
*/ */
public KeyLookupResult getLookupResult() { public KeyLookupResult getLookupResult() {
if (LOG.isDebugEnabled()) { if (LOG.isDebugEnabled()) {
LOG.debug("#The candidate row keys for " + partitionPathFilePair + " => " + candidateRecordKeys); LOG.debug("#The candidate row keys for {} => {}", partitionPathFilePair, candidateRecordKeys);
} }
HoodieDataFile dataFile = getLatestDataFile(); HoodieDataFile dataFile = getLatestDataFile();
List<String> matchingKeys = List<String> matchingKeys =
checkCandidatesAgainstFile(hoodieTable.getHadoopConf(), candidateRecordKeys, new Path(dataFile.getPath())); checkCandidatesAgainstFile(hoodieTable.getHadoopConf(), candidateRecordKeys, new Path(dataFile.getPath()));
LOG.info( LOG.info("Total records ({}), bloom filter candidates ({})/fp({}), actual matches ({})", totalKeysChecked,
String.format("Total records (%d), bloom filter candidates (%d)/fp(%d), actual matches (%d)", totalKeysChecked, candidateRecordKeys.size(), candidateRecordKeys.size() - matchingKeys.size(), matchingKeys.size());
candidateRecordKeys.size(), candidateRecordKeys.size() - matchingKeys.size(), matchingKeys.size()));
return new KeyLookupResult(partitionPathFilePair.getRight(), partitionPathFilePair.getLeft(), return new KeyLookupResult(partitionPathFilePair.getRight(), partitionPathFilePair.getLeft(),
dataFile.getCommitTime(), matchingKeys); dataFile.getCommitTime(), matchingKeys);
} }

View File

@@ -43,9 +43,9 @@ import org.apache.avro.Schema;
import org.apache.avro.generic.GenericRecord; import org.apache.avro.generic.GenericRecord;
import org.apache.avro.generic.IndexedRecord; import org.apache.avro.generic.IndexedRecord;
import org.apache.hadoop.fs.Path; import org.apache.hadoop.fs.Path;
import org.apache.log4j.LogManager;
import org.apache.log4j.Logger;
import org.apache.spark.TaskContext; import org.apache.spark.TaskContext;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.io.IOException; import java.io.IOException;
import java.util.HashSet; import java.util.HashSet;
@@ -56,7 +56,7 @@ import java.util.Set;
@SuppressWarnings("Duplicates") @SuppressWarnings("Duplicates")
public class HoodieMergeHandle<T extends HoodieRecordPayload> extends HoodieWriteHandle<T> { public class HoodieMergeHandle<T extends HoodieRecordPayload> extends HoodieWriteHandle<T> {
private static final Logger LOG = LogManager.getLogger(HoodieMergeHandle.class); private static final Logger LOG = LoggerFactory.getLogger(HoodieMergeHandle.class);
private Map<String, HoodieRecord<T>> keyToNewRecords; private Map<String, HoodieRecord<T>> keyToNewRecords;
private Set<String> writtenRecordKeys; private Set<String> writtenRecordKeys;
@@ -137,7 +137,7 @@ public class HoodieMergeHandle<T extends HoodieRecordPayload> extends HoodieWrit
if (exception.isPresent() && exception.get() instanceof Throwable) { if (exception.isPresent() && exception.get() instanceof Throwable) {
// Not throwing exception from here, since we don't want to fail the entire job for a single record // Not throwing exception from here, since we don't want to fail the entire job for a single record
writeStatus.markFailure(record, exception.get(), recordMetadata); writeStatus.markFailure(record, exception.get(), recordMetadata);
LOG.error("Error writing record " + record, exception.get()); LOG.error("Error writing record {}", record, exception.get());
} else { } else {
write(record, avroRecord); write(record, avroRecord);
} }
@@ -155,7 +155,7 @@ public class HoodieMergeHandle<T extends HoodieRecordPayload> extends HoodieWrit
* Extract old file path, initialize StorageWriter and WriteStatus. * Extract old file path, initialize StorageWriter and WriteStatus.
*/ */
private void init(String fileId, String partitionPath, HoodieDataFile dataFileToBeMerged) { private void init(String fileId, String partitionPath, HoodieDataFile dataFileToBeMerged) {
LOG.info("partitionPath:" + partitionPath + ", fileId to be merged:" + fileId); LOG.info("partitionPath: {}, fileId to be merged: {}", partitionPath, fileId);
this.writtenRecordKeys = new HashSet<>(); this.writtenRecordKeys = new HashSet<>();
writeStatus.setStat(new HoodieWriteStat()); writeStatus.setStat(new HoodieWriteStat());
try { try {
@@ -171,8 +171,7 @@ public class HoodieMergeHandle<T extends HoodieRecordPayload> extends HoodieWrit
+ FSUtils.makeDataFileName(instantTime, writeToken, fileId)).toString(); + FSUtils.makeDataFileName(instantTime, writeToken, fileId)).toString();
newFilePath = new Path(config.getBasePath(), relativePath); newFilePath = new Path(config.getBasePath(), relativePath);
LOG.info(String.format("Merging new data into oldPath %s, as newPath %s", oldFilePath.toString(), LOG.info("Merging new data into oldPath {}, as newPath {}", oldFilePath.toString(), newFilePath.toString());
newFilePath.toString()));
// file name is same for all records, in this bunch // file name is same for all records, in this bunch
writeStatus.setFileId(fileId); writeStatus.setFileId(fileId);
writeStatus.setPartitionPath(partitionPath); writeStatus.setPartitionPath(partitionPath);
@@ -187,7 +186,7 @@ public class HoodieMergeHandle<T extends HoodieRecordPayload> extends HoodieWrit
storageWriter = storageWriter =
HoodieStorageWriterFactory.getStorageWriter(instantTime, newFilePath, hoodieTable, config, writerSchema); HoodieStorageWriterFactory.getStorageWriter(instantTime, newFilePath, hoodieTable, config, writerSchema);
} catch (IOException io) { } catch (IOException io) {
LOG.error("Error in update task at commit " + instantTime, io); LOG.error("Error in update task at commit {}", instantTime, io);
writeStatus.setGlobalError(io); writeStatus.setGlobalError(io);
throw new HoodieUpsertException("Failed to initialize HoodieUpdateHandle for FileId: " + fileId + " on commit " throw new HoodieUpsertException("Failed to initialize HoodieUpdateHandle for FileId: " + fileId + " on commit "
+ instantTime + " on path " + hoodieTable.getMetaClient().getBasePath(), io); + instantTime + " on path " + hoodieTable.getMetaClient().getBasePath(), io);
@@ -201,7 +200,7 @@ public class HoodieMergeHandle<T extends HoodieRecordPayload> extends HoodieWrit
try { try {
// Load the new records in a map // Load the new records in a map
long memoryForMerge = config.getMaxMemoryPerPartitionMerge(); long memoryForMerge = config.getMaxMemoryPerPartitionMerge();
LOG.info("MaxMemoryPerPartitionMerge => " + memoryForMerge); LOG.info("MaxMemoryPerPartitionMerge => {}", memoryForMerge);
this.keyToNewRecords = new ExternalSpillableMap<>(memoryForMerge, config.getSpillableMapBasePath(), this.keyToNewRecords = new ExternalSpillableMap<>(memoryForMerge, config.getSpillableMapBasePath(),
new DefaultSizeEstimator(), new HoodieRecordSizeEstimator(originalSchema)); new DefaultSizeEstimator(), new HoodieRecordSizeEstimator(originalSchema));
} catch (IOException io) { } catch (IOException io) {
@@ -218,12 +217,10 @@ public class HoodieMergeHandle<T extends HoodieRecordPayload> extends HoodieWrit
// NOTE: Once Records are added to map (spillable-map), DO NOT change it as they won't persist // NOTE: Once Records are added to map (spillable-map), DO NOT change it as they won't persist
keyToNewRecords.put(record.getRecordKey(), record); keyToNewRecords.put(record.getRecordKey(), record);
} }
LOG.info("Number of entries in MemoryBasedMap => " LOG.info("Number of entries in MemoryBasedMap => {}. Total size in bytes of MemoryBasedMap => {}. "
+ ((ExternalSpillableMap) keyToNewRecords).getInMemoryMapNumEntries() + "Number of entries in DiskBasedMap => {}. Size of file spilled to disk => {}",
+ "Total size in bytes of MemoryBasedMap => " ((ExternalSpillableMap) keyToNewRecords).getInMemoryMapNumEntries(), ((ExternalSpillableMap) keyToNewRecords).getCurrentInMemoryMapSize(),
+ ((ExternalSpillableMap) keyToNewRecords).getCurrentInMemoryMapSize() + "Number of entries in DiskBasedMap => " ((ExternalSpillableMap) keyToNewRecords).getDiskBasedMapNumEntries(), ((ExternalSpillableMap) keyToNewRecords).getSizeOfFileOnDiskInBytes());
+ ((ExternalSpillableMap) keyToNewRecords).getDiskBasedMapNumEntries() + "Size of file spilled to disk => "
+ ((ExternalSpillableMap) keyToNewRecords).getSizeOfFileOnDiskInBytes());
return partitionPath; return partitionPath;
} }
@@ -253,7 +250,7 @@ public class HoodieMergeHandle<T extends HoodieRecordPayload> extends HoodieWrit
hoodieRecord.deflate(); hoodieRecord.deflate();
return true; return true;
} catch (Exception e) { } catch (Exception e) {
LOG.error("Error writing record " + hoodieRecord, e); LOG.error("Error writing record {}", hoodieRecord, e);
writeStatus.markFailure(hoodieRecord, e, recordMetadata); writeStatus.markFailure(hoodieRecord, e, recordMetadata);
} }
return false; return false;
@@ -295,12 +292,12 @@ public class HoodieMergeHandle<T extends HoodieRecordPayload> extends HoodieWrit
try { try {
storageWriter.writeAvro(key, oldRecord); storageWriter.writeAvro(key, oldRecord);
} catch (ClassCastException e) { } catch (ClassCastException e) {
LOG.error("Schema mismatch when rewriting old record " + oldRecord + " from file " + getOldFilePath() LOG.error("Schema mismatch when rewriting old record {} from file {} to file {} with writerSchema {}",
+ " to file " + newFilePath + " with writerSchema " + writerSchema.toString(true)); oldRecord, getOldFilePath(), newFilePath, writerSchema.toString(true));
throw new HoodieUpsertException(errMsg, e); throw new HoodieUpsertException(errMsg, e);
} catch (IOException e) { } catch (IOException e) {
LOG.error("Failed to merge old record into new file for key " + key + " from old file " + getOldFilePath() LOG.error("Failed to merge old record into new file for key {} from old file {} to new file {}",
+ " to new file " + newFilePath, e); key, getOldFilePath(), newFilePath, e);
throw new HoodieUpsertException(errMsg, e); throw new HoodieUpsertException(errMsg, e);
} }
recordsWritten++; recordsWritten++;
@@ -345,6 +342,8 @@ public class HoodieMergeHandle<T extends HoodieRecordPayload> extends HoodieWrit
LOG.info(String.format("MergeHandle for partitionPath %s fileID %s, took %d ms.", stat.getPartitionPath(), LOG.info(String.format("MergeHandle for partitionPath %s fileID %s, took %d ms.", stat.getPartitionPath(),
stat.getFileId(), runtimeStats.getTotalUpsertTime())); stat.getFileId(), runtimeStats.getTotalUpsertTime()));
LOG.info("MergeHandle for partitionPath {} fileID {}, took {} ms.", stat.getPartitionPath(),
stat.getFileId(), runtimeStats.getTotalUpsertTime());
return writeStatus; return writeStatus;
} catch (IOException e) { } catch (IOException e) {

View File

@@ -36,9 +36,9 @@ import org.apache.avro.generic.GenericRecord;
import org.apache.avro.generic.IndexedRecord; import org.apache.avro.generic.IndexedRecord;
import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path; import org.apache.hadoop.fs.Path;
import org.apache.log4j.LogManager;
import org.apache.log4j.Logger;
import org.apache.spark.TaskContext; import org.apache.spark.TaskContext;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.io.IOException; import java.io.IOException;
@@ -47,7 +47,7 @@ import java.io.IOException;
*/ */
public abstract class HoodieWriteHandle<T extends HoodieRecordPayload> extends HoodieIOHandle { public abstract class HoodieWriteHandle<T extends HoodieRecordPayload> extends HoodieIOHandle {
private static final Logger LOG = LogManager.getLogger(HoodieWriteHandle.class); private static final Logger LOG = LoggerFactory.getLogger(HoodieWriteHandle.class);
protected final Schema originalSchema; protected final Schema originalSchema;
protected final Schema writerSchema; protected final Schema writerSchema;
protected HoodieTimer timer; protected HoodieTimer timer;
@@ -97,7 +97,7 @@ public abstract class HoodieWriteHandle<T extends HoodieRecordPayload> extends H
protected void createMarkerFile(String partitionPath) { protected void createMarkerFile(String partitionPath) {
Path markerPath = makeNewMarkerPath(partitionPath); Path markerPath = makeNewMarkerPath(partitionPath);
try { try {
LOG.info("Creating Marker Path=" + markerPath); LOG.info("Creating Marker Path={}", markerPath);
fs.create(markerPath, false).close(); fs.create(markerPath, false).close();
} catch (IOException e) { } catch (IOException e) {
throw new HoodieException("Failed to create marker file " + markerPath, e); throw new HoodieException("Failed to create marker file " + markerPath, e);
@@ -147,7 +147,7 @@ public abstract class HoodieWriteHandle<T extends HoodieRecordPayload> extends H
if (exception.isPresent() && exception.get() instanceof Throwable) { if (exception.isPresent() && exception.get() instanceof Throwable) {
// Not throwing exception from here, since we don't want to fail the entire job for a single record // Not throwing exception from here, since we don't want to fail the entire job for a single record
writeStatus.markFailure(record, exception.get(), recordMetadata); writeStatus.markFailure(record, exception.get(), recordMetadata);
LOG.error("Error writing record " + record, exception.get()); LOG.error("Error writing record {}", record, exception.get());
} else { } else {
write(record, avroRecord); write(record, avroRecord);
} }

View File

@@ -47,13 +47,13 @@ import com.google.common.collect.Sets;
import org.apache.avro.Schema; import org.apache.avro.Schema;
import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path; import org.apache.hadoop.fs.Path;
import org.apache.log4j.LogManager;
import org.apache.log4j.Logger;
import org.apache.spark.api.java.JavaRDD; import org.apache.spark.api.java.JavaRDD;
import org.apache.spark.api.java.JavaSparkContext; import org.apache.spark.api.java.JavaSparkContext;
import org.apache.spark.api.java.function.FlatMapFunction; import org.apache.spark.api.java.function.FlatMapFunction;
import org.apache.spark.util.AccumulatorV2; import org.apache.spark.util.AccumulatorV2;
import org.apache.spark.util.LongAccumulator; import org.apache.spark.util.LongAccumulator;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.io.IOException; import java.io.IOException;
import java.util.Collection; import java.util.Collection;
@@ -74,7 +74,7 @@ import static java.util.stream.Collectors.toList;
*/ */
public class HoodieRealtimeTableCompactor implements HoodieCompactor { public class HoodieRealtimeTableCompactor implements HoodieCompactor {
private static final Logger LOG = LogManager.getLogger(HoodieRealtimeTableCompactor.class); private static final Logger LOG = LoggerFactory.getLogger(HoodieRealtimeTableCompactor.class);
// Accumulator to keep track of total log files for a dataset // Accumulator to keep track of total log files for a dataset
private AccumulatorV2<Long, Long> totalLogFiles; private AccumulatorV2<Long, Long> totalLogFiles;
// Accumulator to keep track of total log file slices for a dataset // Accumulator to keep track of total log file slices for a dataset
@@ -92,7 +92,7 @@ public class HoodieRealtimeTableCompactor implements HoodieCompactor {
HoodieCopyOnWriteTable table = new HoodieCopyOnWriteTable(config, jsc); HoodieCopyOnWriteTable table = new HoodieCopyOnWriteTable(config, jsc);
List<CompactionOperation> operations = compactionPlan.getOperations().stream() List<CompactionOperation> operations = compactionPlan.getOperations().stream()
.map(CompactionOperation::convertFromAvroRecordInstance).collect(toList()); .map(CompactionOperation::convertFromAvroRecordInstance).collect(toList());
LOG.info("Compactor compacting " + operations + " files"); LOG.info("Compactor compacting {} files", operations);
return jsc.parallelize(operations, operations.size()) return jsc.parallelize(operations, operations.size())
.map(s -> compact(table, metaClient, config, s, compactionInstantTime)).flatMap(List::iterator); .map(s -> compact(table, metaClient, config, s, compactionInstantTime)).flatMap(List::iterator);
@@ -103,8 +103,8 @@ public class HoodieRealtimeTableCompactor implements HoodieCompactor {
FileSystem fs = metaClient.getFs(); FileSystem fs = metaClient.getFs();
Schema readerSchema = HoodieAvroUtils.addMetadataFields(new Schema.Parser().parse(config.getSchema())); Schema readerSchema = HoodieAvroUtils.addMetadataFields(new Schema.Parser().parse(config.getSchema()));
LOG.info("Compacting base " + operation.getDataFileName() + " with delta files " + operation.getDeltaFileNames() LOG.info("Compacting base {} with delta files {} for commit {}",
+ " for commit " + commitTime); operation.getDataFileName(), operation.getDeltaFileNames(), commitTime);
// TODO - FIX THIS // TODO - FIX THIS
// Reads the entire avro file. Always only specific blocks should be read from the avro file // Reads the entire avro file. Always only specific blocks should be read from the avro file
// (failure recover). // (failure recover).
@@ -115,7 +115,7 @@ public class HoodieRealtimeTableCompactor implements HoodieCompactor {
.getActiveTimeline().getTimelineOfActions(Sets.newHashSet(HoodieTimeline.COMMIT_ACTION, .getActiveTimeline().getTimelineOfActions(Sets.newHashSet(HoodieTimeline.COMMIT_ACTION,
HoodieTimeline.ROLLBACK_ACTION, HoodieTimeline.DELTA_COMMIT_ACTION)) HoodieTimeline.ROLLBACK_ACTION, HoodieTimeline.DELTA_COMMIT_ACTION))
.filterCompletedInstants().lastInstant().get().getTimestamp(); .filterCompletedInstants().lastInstant().get().getTimestamp();
LOG.info("MaxMemoryPerCompaction => " + config.getMaxMemoryPerCompaction()); LOG.info("MaxMemoryPerCompaction => {}", config.getMaxMemoryPerCompaction());
List<String> logFiles = operation.getDeltaFileNames().stream().map( List<String> logFiles = operation.getDeltaFileNames().stream().map(
p -> new Path(FSUtils.getPartitionPath(metaClient.getBasePath(), operation.getPartitionPath()), p).toString()) p -> new Path(FSUtils.getPartitionPath(metaClient.getBasePath(), operation.getPartitionPath()), p).toString())
@@ -176,7 +176,7 @@ public class HoodieRealtimeTableCompactor implements HoodieCompactor {
// TODO : check if maxMemory is not greater than JVM or spark.executor memory // TODO : check if maxMemory is not greater than JVM or spark.executor memory
// TODO - rollback any compactions in flight // TODO - rollback any compactions in flight
HoodieTableMetaClient metaClient = hoodieTable.getMetaClient(); HoodieTableMetaClient metaClient = hoodieTable.getMetaClient();
LOG.info("Compacting " + metaClient.getBasePath() + " with commit " + compactionCommitTime); LOG.info("Compacting {} with commit {}", metaClient.getBasePath(), compactionCommitTime);
List<String> partitionPaths = FSUtils.getAllPartitionPaths(metaClient.getFs(), metaClient.getBasePath(), List<String> partitionPaths = FSUtils.getAllPartitionPaths(metaClient.getFs(), metaClient.getBasePath(),
config.shouldAssumeDatePartitioning()); config.shouldAssumeDatePartitioning());
@@ -189,7 +189,7 @@ public class HoodieRealtimeTableCompactor implements HoodieCompactor {
} }
RealtimeView fileSystemView = hoodieTable.getRTFileSystemView(); RealtimeView fileSystemView = hoodieTable.getRTFileSystemView();
LOG.info("Compaction looking for files to compact in " + partitionPaths + " partitions"); LOG.info("Compaction looking for files to compact in {} partitions", partitionPaths);
List<HoodieCompactionOperation> operations = jsc.parallelize(partitionPaths, partitionPaths.size()) List<HoodieCompactionOperation> operations = jsc.parallelize(partitionPaths, partitionPaths.size())
.flatMap((FlatMapFunction<String, CompactionOperation>) partitionPath -> fileSystemView .flatMap((FlatMapFunction<String, CompactionOperation>) partitionPath -> fileSystemView
.getLatestFileSlices(partitionPath) .getLatestFileSlices(partitionPath)
@@ -206,10 +206,10 @@ public class HoodieRealtimeTableCompactor implements HoodieCompactor {
config.getCompactionStrategy().captureMetrics(config, dataFile, partitionPath, logFiles)); config.getCompactionStrategy().captureMetrics(config, dataFile, partitionPath, logFiles));
}).filter(c -> !c.getDeltaFileNames().isEmpty()).collect(toList()).iterator()) }).filter(c -> !c.getDeltaFileNames().isEmpty()).collect(toList()).iterator())
.collect().stream().map(CompactionUtils::buildHoodieCompactionOperation).collect(toList()); .collect().stream().map(CompactionUtils::buildHoodieCompactionOperation).collect(toList());
LOG.info("Total of " + operations.size() + " compactions are retrieved"); LOG.info("Total of {} compactions are retrieved", operations.size());
LOG.info("Total number of latest files slices " + totalFileSlices.value()); LOG.info("Total number of latest files slices {}", totalFileSlices.value());
LOG.info("Total number of log files " + totalLogFiles.value()); LOG.info("Total number of log files {}", totalLogFiles.value());
LOG.info("Total number of file slices " + totalFileSlices.value()); LOG.info("Total number of file slices {}", totalFileSlices.value());
// Filter the compactions with the passed in filter. This lets us choose most effective // Filter the compactions with the passed in filter. This lets us choose most effective
// compactions only // compactions only
HoodieCompactionPlan compactionPlan = config.getCompactionStrategy().generateCompactionPlan(config, operations, HoodieCompactionPlan compactionPlan = config.getCompactionStrategy().generateCompactionPlan(config, operations,
@@ -221,7 +221,7 @@ public class HoodieRealtimeTableCompactor implements HoodieCompactor {
+ "Please fix your strategy implementation. FileIdsWithPendingCompactions :" + fgIdsInPendingCompactions + "Please fix your strategy implementation. FileIdsWithPendingCompactions :" + fgIdsInPendingCompactions
+ ", Selected workload :" + compactionPlan); + ", Selected workload :" + compactionPlan);
if (compactionPlan.getOperations().isEmpty()) { if (compactionPlan.getOperations().isEmpty()) {
LOG.warn("After filtering, Nothing to compact for " + metaClient.getBasePath()); LOG.warn("After filtering, Nothing to compact for {}", metaClient.getBasePath());
} }
return compactionPlan; return compactionPlan;
} }

View File

@@ -24,15 +24,15 @@ import org.apache.hudi.config.HoodieWriteConfig;
import com.codahale.metrics.Timer; import com.codahale.metrics.Timer;
import com.google.common.annotations.VisibleForTesting; import com.google.common.annotations.VisibleForTesting;
import org.apache.log4j.LogManager; import org.slf4j.Logger;
import org.apache.log4j.Logger; import org.slf4j.LoggerFactory;
/** /**
* Wrapper for metrics-related operations. * Wrapper for metrics-related operations.
*/ */
public class HoodieMetrics { public class HoodieMetrics {
private static final Logger LOG = LogManager.getLogger(HoodieMetrics.class); private static final Logger LOG = LoggerFactory.getLogger(HoodieMetrics.class);
// Some timers // Some timers
public String rollbackTimerName = null; public String rollbackTimerName = null;
public String cleanTimerName = null; public String cleanTimerName = null;
@@ -155,8 +155,7 @@ public class HoodieMetrics {
public void updateRollbackMetrics(long durationInMs, long numFilesDeleted) { public void updateRollbackMetrics(long durationInMs, long numFilesDeleted) {
if (config.isMetricsOn()) { if (config.isMetricsOn()) {
LOG.info( LOG.info("Sending rollback metrics (duration={}, numFilesDeleted={})", durationInMs, numFilesDeleted);
String.format("Sending rollback metrics (duration=%d, numFilesDeleted=%d)", durationInMs, numFilesDeleted));
Metrics.registerGauge(getMetricsName("rollback", "duration"), durationInMs); Metrics.registerGauge(getMetricsName("rollback", "duration"), durationInMs);
Metrics.registerGauge(getMetricsName("rollback", "numFilesDeleted"), numFilesDeleted); Metrics.registerGauge(getMetricsName("rollback", "numFilesDeleted"), numFilesDeleted);
} }
@@ -164,8 +163,7 @@ public class HoodieMetrics {
public void updateCleanMetrics(long durationInMs, int numFilesDeleted) { public void updateCleanMetrics(long durationInMs, int numFilesDeleted) {
if (config.isMetricsOn()) { if (config.isMetricsOn()) {
LOG.info( LOG.info("Sending clean metrics (duration={}, numFilesDeleted={})", durationInMs, numFilesDeleted);
String.format("Sending clean metrics (duration=%d, numFilesDeleted=%d)", durationInMs, numFilesDeleted));
Metrics.registerGauge(getMetricsName("clean", "duration"), durationInMs); Metrics.registerGauge(getMetricsName("clean", "duration"), durationInMs);
Metrics.registerGauge(getMetricsName("clean", "numFilesDeleted"), numFilesDeleted); Metrics.registerGauge(getMetricsName("clean", "numFilesDeleted"), numFilesDeleted);
} }
@@ -173,8 +171,7 @@ public class HoodieMetrics {
public void updateFinalizeWriteMetrics(long durationInMs, long numFilesFinalized) { public void updateFinalizeWriteMetrics(long durationInMs, long numFilesFinalized) {
if (config.isMetricsOn()) { if (config.isMetricsOn()) {
LOG.info(String.format("Sending finalize write metrics (duration=%d, numFilesFinalized=%d)", durationInMs, LOG.info("Sending finalize write metrics (duration={}, numFilesFinalized={})", durationInMs, numFilesFinalized);
numFilesFinalized));
Metrics.registerGauge(getMetricsName("finalize", "duration"), durationInMs); Metrics.registerGauge(getMetricsName("finalize", "duration"), durationInMs);
Metrics.registerGauge(getMetricsName("finalize", "numFilesFinalized"), numFilesFinalized); Metrics.registerGauge(getMetricsName("finalize", "numFilesFinalized"), numFilesFinalized);
} }
@@ -182,7 +179,7 @@ public class HoodieMetrics {
public void updateIndexMetrics(final String action, final long durationInMs) { public void updateIndexMetrics(final String action, final long durationInMs) {
if (config.isMetricsOn()) { if (config.isMetricsOn()) {
LOG.info(String.format("Sending index metrics (%s.duration, %d)", action, durationInMs)); LOG.info("Sending index metrics ({}.duration, {})", action, durationInMs);
Metrics.registerGauge(getMetricsName("index", String.format("%s.duration", action)), durationInMs); Metrics.registerGauge(getMetricsName("index", String.format("%s.duration", action)), durationInMs);
} }
} }

View File

@@ -22,8 +22,8 @@ import org.apache.hudi.config.HoodieWriteConfig;
import org.apache.hudi.exception.HoodieException; import org.apache.hudi.exception.HoodieException;
import com.google.common.base.Preconditions; import com.google.common.base.Preconditions;
import org.apache.log4j.LogManager; import org.slf4j.Logger;
import org.apache.log4j.Logger; import org.slf4j.LoggerFactory;
import javax.management.remote.JMXConnectorServer; import javax.management.remote.JMXConnectorServer;
import javax.management.remote.JMXConnectorServerFactory; import javax.management.remote.JMXConnectorServerFactory;
@@ -38,7 +38,7 @@ import java.rmi.registry.LocateRegistry;
*/ */
public class JmxMetricsReporter extends MetricsReporter { public class JmxMetricsReporter extends MetricsReporter {
private static final Logger LOG = LogManager.getLogger(JmxMetricsReporter.class); private static final Logger LOG = LoggerFactory.getLogger(JmxMetricsReporter.class);
private final JMXConnectorServer connector; private final JMXConnectorServer connector;
private String host; private String host;
private int port; private int port;

View File

@@ -24,8 +24,8 @@ import org.apache.hudi.exception.HoodieException;
import com.codahale.metrics.Gauge; import com.codahale.metrics.Gauge;
import com.codahale.metrics.MetricRegistry; import com.codahale.metrics.MetricRegistry;
import com.google.common.io.Closeables; import com.google.common.io.Closeables;
import org.apache.log4j.LogManager; import org.slf4j.Logger;
import org.apache.log4j.Logger; import org.slf4j.LoggerFactory;
import java.io.Closeable; import java.io.Closeable;
@@ -33,7 +33,7 @@ import java.io.Closeable;
* This is the main class of the metrics system. * This is the main class of the metrics system.
*/ */
public class Metrics { public class Metrics {
private static final Logger LOG = LogManager.getLogger(Metrics.class); private static final Logger LOG = LoggerFactory.getLogger(Metrics.class);
private static volatile boolean initialized = false; private static volatile boolean initialized = false;
private static Metrics metrics = null; private static Metrics metrics = null;

View File

@@ -24,8 +24,8 @@ import com.codahale.metrics.MetricFilter;
import com.codahale.metrics.MetricRegistry; import com.codahale.metrics.MetricRegistry;
import com.codahale.metrics.graphite.Graphite; import com.codahale.metrics.graphite.Graphite;
import com.codahale.metrics.graphite.GraphiteReporter; import com.codahale.metrics.graphite.GraphiteReporter;
import org.apache.log4j.LogManager; import org.slf4j.Logger;
import org.apache.log4j.Logger; import org.slf4j.LoggerFactory;
import java.io.Closeable; import java.io.Closeable;
import java.net.InetSocketAddress; import java.net.InetSocketAddress;
@@ -36,7 +36,7 @@ import java.util.concurrent.TimeUnit;
*/ */
public class MetricsGraphiteReporter extends MetricsReporter { public class MetricsGraphiteReporter extends MetricsReporter {
private static final Logger LOG = LogManager.getLogger(MetricsGraphiteReporter.class); private static final Logger LOG = LoggerFactory.getLogger(MetricsGraphiteReporter.class);
private final MetricRegistry registry; private final MetricRegistry registry;
private final GraphiteReporter graphiteReporter; private final GraphiteReporter graphiteReporter;
private final HoodieWriteConfig config; private final HoodieWriteConfig config;

View File

@@ -21,15 +21,15 @@ package org.apache.hudi.metrics;
import org.apache.hudi.config.HoodieWriteConfig; import org.apache.hudi.config.HoodieWriteConfig;
import com.codahale.metrics.MetricRegistry; import com.codahale.metrics.MetricRegistry;
import org.apache.log4j.LogManager; import org.slf4j.Logger;
import org.apache.log4j.Logger; import org.slf4j.LoggerFactory;
/** /**
* Factory class for creating MetricsReporter. * Factory class for creating MetricsReporter.
*/ */
public class MetricsReporterFactory { public class MetricsReporterFactory {
private static final Logger LOG = LogManager.getLogger(MetricsReporterFactory.class); private static final Logger LOG = LoggerFactory.getLogger(MetricsReporterFactory.class);
public static MetricsReporter createReporter(HoodieWriteConfig config, MetricRegistry registry) { public static MetricsReporter createReporter(HoodieWriteConfig config, MetricRegistry registry) {
MetricsReporterType type = config.getMetricsReporterType(); MetricsReporterType type = config.getMetricsReporterType();
@@ -45,7 +45,7 @@ public class MetricsReporterFactory {
reporter = new JmxMetricsReporter(config); reporter = new JmxMetricsReporter(config);
break; break;
default: default:
LOG.error("Reporter type[" + type + "] is not supported."); LOG.error("Reporter type[{}] is not supported.", type);
break; break;
} }
return reporter; return reporter;

View File

@@ -58,8 +58,6 @@ import org.apache.avro.generic.GenericRecord;
import org.apache.avro.generic.IndexedRecord; import org.apache.avro.generic.IndexedRecord;
import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path; import org.apache.hadoop.fs.Path;
import org.apache.log4j.LogManager;
import org.apache.log4j.Logger;
import org.apache.parquet.avro.AvroParquetReader; import org.apache.parquet.avro.AvroParquetReader;
import org.apache.parquet.avro.AvroReadSupport; import org.apache.parquet.avro.AvroReadSupport;
import org.apache.parquet.hadoop.ParquetReader; import org.apache.parquet.hadoop.ParquetReader;
@@ -81,6 +79,8 @@ import java.util.Map;
import java.util.Set; import java.util.Set;
import java.util.stream.Collectors; import java.util.stream.Collectors;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import scala.Tuple2; import scala.Tuple2;
/** /**
@@ -92,7 +92,7 @@ import scala.Tuple2;
*/ */
public class HoodieCopyOnWriteTable<T extends HoodieRecordPayload> extends HoodieTable<T> { public class HoodieCopyOnWriteTable<T extends HoodieRecordPayload> extends HoodieTable<T> {
private static final Logger LOG = LogManager.getLogger(HoodieCopyOnWriteTable.class); private static final Logger LOG = LoggerFactory.getLogger(HoodieCopyOnWriteTable.class);
public HoodieCopyOnWriteTable(HoodieWriteConfig config, JavaSparkContext jsc) { public HoodieCopyOnWriteTable(HoodieWriteConfig config, JavaSparkContext jsc) {
super(config, jsc); super(config, jsc);
@@ -130,7 +130,7 @@ public class HoodieCopyOnWriteTable<T extends HoodieRecordPayload> extends Hoodi
try { try {
boolean deleteResult = fs.delete(deletePath, false); boolean deleteResult = fs.delete(deletePath, false);
if (deleteResult) { if (deleteResult) {
LOG.debug("Cleaned file at path :" + deletePath); LOG.debug("Cleaned file at path : {}", deletePath);
} }
return deleteResult; return deleteResult;
} catch (FileNotFoundException fio) { } catch (FileNotFoundException fio) {
@@ -172,7 +172,7 @@ public class HoodieCopyOnWriteTable<T extends HoodieRecordPayload> extends Hoodi
throws IOException { throws IOException {
// This is needed since sometimes some buckets are never picked in getPartition() and end up with 0 records // This is needed since sometimes some buckets are never picked in getPartition() and end up with 0 records
if (!recordItr.hasNext()) { if (!recordItr.hasNext()) {
LOG.info("Empty partition with fileId => " + fileId); LOG.info("Empty partition with fileId => {}", fileId);
return Collections.singletonList((List<WriteStatus>) Collections.EMPTY_LIST).iterator(); return Collections.singletonList((List<WriteStatus>) Collections.EMPTY_LIST).iterator();
} }
// these are updates // these are updates
@@ -212,8 +212,8 @@ public class HoodieCopyOnWriteTable<T extends HoodieRecordPayload> extends Hoodi
// TODO(vc): This needs to be revisited // TODO(vc): This needs to be revisited
if (upsertHandle.getWriteStatus().getPartitionPath() == null) { if (upsertHandle.getWriteStatus().getPartitionPath() == null) {
LOG.info("Upsert Handle has partition path as null " + upsertHandle.getOldFilePath() + ", " LOG.info("Upsert Handle has partition path as null {}, {}", upsertHandle.getOldFilePath(),
+ upsertHandle.getWriteStatus()); upsertHandle.getWriteStatus());
} }
return Collections.singletonList(Collections.singletonList(upsertHandle.getWriteStatus())).iterator(); return Collections.singletonList(Collections.singletonList(upsertHandle.getWriteStatus())).iterator();
} }
@@ -291,8 +291,7 @@ public class HoodieCopyOnWriteTable<T extends HoodieRecordPayload> extends Hoodi
LOG.info("Nothing to clean here. It is already clean"); LOG.info("Nothing to clean here. It is already clean");
return HoodieCleanerPlan.newBuilder().setPolicy(HoodieCleaningPolicy.KEEP_LATEST_COMMITS.name()).build(); return HoodieCleanerPlan.newBuilder().setPolicy(HoodieCleaningPolicy.KEEP_LATEST_COMMITS.name()).build();
} }
LOG.info( LOG.info("Total Partitions to clean : {}, with policy {}", partitionsToClean.size(), config.getCleanerPolicy());
"Total Partitions to clean : " + partitionsToClean.size() + ", with policy " + config.getCleanerPolicy());
int cleanerParallelism = Math.min(partitionsToClean.size(), config.getCleanerParallelism()); int cleanerParallelism = Math.min(partitionsToClean.size(), config.getCleanerParallelism());
LOG.info("Using cleanerParallelism: " + cleanerParallelism); LOG.info("Using cleanerParallelism: " + cleanerParallelism);
@@ -318,7 +317,7 @@ public class HoodieCopyOnWriteTable<T extends HoodieRecordPayload> extends Hoodi
int cleanerParallelism = Math.min( int cleanerParallelism = Math.min(
(int) (cleanerPlan.getFilesToBeDeletedPerPartition().values().stream().mapToInt(x -> x.size()).count()), (int) (cleanerPlan.getFilesToBeDeletedPerPartition().values().stream().mapToInt(x -> x.size()).count()),
config.getCleanerParallelism()); config.getCleanerParallelism());
LOG.info("Using cleanerParallelism: " + cleanerParallelism); LOG.info("Using cleanerParallelism: {}", cleanerParallelism);
List<Tuple2<String, PartitionCleanStat>> partitionCleanStats = jsc List<Tuple2<String, PartitionCleanStat>> partitionCleanStats = jsc
.parallelize(cleanerPlan.getFilesToBeDeletedPerPartition().entrySet().stream() .parallelize(cleanerPlan.getFilesToBeDeletedPerPartition().entrySet().stream()
.flatMap(x -> x.getValue().stream().map(y -> new Tuple2<String, String>(x.getKey(), y))) .flatMap(x -> x.getValue().stream().map(y -> new Tuple2<String, String>(x.getKey(), y)))
@@ -355,7 +354,7 @@ public class HoodieCopyOnWriteTable<T extends HoodieRecordPayload> extends Hoodi
HoodieActiveTimeline activeTimeline = this.getActiveTimeline(); HoodieActiveTimeline activeTimeline = this.getActiveTimeline();
if (instant.isCompleted()) { if (instant.isCompleted()) {
LOG.info("Unpublishing instant " + instant); LOG.info("Unpublishing instant {}", instant);
instant = activeTimeline.revertToInflight(instant); instant = activeTimeline.revertToInflight(instant);
} }
@@ -365,7 +364,7 @@ public class HoodieCopyOnWriteTable<T extends HoodieRecordPayload> extends Hoodi
String commit = instant.getTimestamp(); String commit = instant.getTimestamp();
// delete all the data files for this commit // delete all the data files for this commit
LOG.info("Clean out all parquet files generated for commit: " + commit); LOG.info("Clean out all parquet files generated for commit: {}", commit);
List<RollbackRequest> rollbackRequests = generateRollbackRequests(instant); List<RollbackRequest> rollbackRequests = generateRollbackRequests(instant);
//TODO: We need to persist this as rollback workload and use it in case of partial failures //TODO: We need to persist this as rollback workload and use it in case of partial failures
@@ -373,7 +372,7 @@ public class HoodieCopyOnWriteTable<T extends HoodieRecordPayload> extends Hoodi
} }
// Delete Inflight instant if enabled // Delete Inflight instant if enabled
deleteInflightAndRequestedInstant(deleteInstants, activeTimeline, instant); deleteInflightAndRequestedInstant(deleteInstants, activeTimeline, instant);
LOG.info("Time(in ms) taken to finish rollback " + (System.currentTimeMillis() - startTime)); LOG.info("Time(in ms) taken to finish rollback {}", (System.currentTimeMillis() - startTime));
return stats; return stats;
} }
@@ -400,7 +399,7 @@ public class HoodieCopyOnWriteTable<T extends HoodieRecordPayload> extends Hoodi
// Remove the rolled back inflight commits // Remove the rolled back inflight commits
if (deleteInstant) { if (deleteInstant) {
LOG.info("Deleting instant=" + instantToBeDeleted); LOG.info("Deleting instant={}", instantToBeDeleted);
activeTimeline.deletePending(instantToBeDeleted); activeTimeline.deletePending(instantToBeDeleted);
if (instantToBeDeleted.isInflight() && !metaClient.getTimelineLayoutVersion().isNullVersion()) { if (instantToBeDeleted.isInflight() && !metaClient.getTimelineLayoutVersion().isNullVersion()) {
// Delete corresponding requested instant // Delete corresponding requested instant
@@ -408,9 +407,9 @@ public class HoodieCopyOnWriteTable<T extends HoodieRecordPayload> extends Hoodi
instantToBeDeleted.getTimestamp()); instantToBeDeleted.getTimestamp());
activeTimeline.deletePending(instantToBeDeleted); activeTimeline.deletePending(instantToBeDeleted);
} }
LOG.info("Deleted pending commit " + instantToBeDeleted); LOG.info("Deleted pending commit {}", instantToBeDeleted);
} else { } else {
LOG.warn("Rollback finished without deleting inflight instant file. Instant=" + instantToBeDeleted); LOG.warn("Rollback finished without deleting inflight instant file. Instant={}", instantToBeDeleted);
} }
} }
@@ -579,9 +578,10 @@ public class HoodieCopyOnWriteTable<T extends HoodieRecordPayload> extends Hoodi
assignUpdates(profile); assignUpdates(profile);
assignInserts(profile); assignInserts(profile);
LOG.info("Total Buckets :" + totalBuckets + ", buckets info => " + bucketInfoMap + ", \n" LOG.info("Total Buckets :{}, buckets info => {}, \n"
+ "Partition to insert buckets => " + partitionPathToInsertBuckets + ", \n" + "Partition to insert buckets => {}, \n"
+ "UpdateLocations mapped to buckets =>" + updateLocationToBucket); + "UpdateLocations mapped to buckets =>{}",
totalBuckets, bucketInfoMap, partitionPathToInsertBuckets, updateLocationToBucket);
} }
private void assignUpdates(WorkloadProfile profile) { private void assignUpdates(WorkloadProfile profile) {
@@ -609,13 +609,13 @@ public class HoodieCopyOnWriteTable<T extends HoodieRecordPayload> extends Hoodi
long averageRecordSize = long averageRecordSize =
averageBytesPerRecord(metaClient.getActiveTimeline().getCommitTimeline().filterCompletedInstants(), averageBytesPerRecord(metaClient.getActiveTimeline().getCommitTimeline().filterCompletedInstants(),
config.getCopyOnWriteRecordSizeEstimate()); config.getCopyOnWriteRecordSizeEstimate());
LOG.info("AvgRecordSize => " + averageRecordSize); LOG.info("AvgRecordSize => {}", averageRecordSize);
for (String partitionPath : partitionPaths) { for (String partitionPath : partitionPaths) {
WorkloadStat pStat = profile.getWorkloadStat(partitionPath); WorkloadStat pStat = profile.getWorkloadStat(partitionPath);
if (pStat.getNumInserts() > 0) { if (pStat.getNumInserts() > 0) {
List<SmallFile> smallFiles = getSmallFiles(partitionPath); List<SmallFile> smallFiles = getSmallFiles(partitionPath);
LOG.info("For partitionPath : " + partitionPath + " Small Files => " + smallFiles); LOG.info("For partitionPath : {} Small Files => {}", partitionPath, smallFiles);
long totalUnassignedInserts = pStat.getNumInserts(); long totalUnassignedInserts = pStat.getNumInserts();
List<Integer> bucketNumbers = new ArrayList<>(); List<Integer> bucketNumbers = new ArrayList<>();
@@ -630,10 +630,10 @@ public class HoodieCopyOnWriteTable<T extends HoodieRecordPayload> extends Hoodi
int bucket; int bucket;
if (updateLocationToBucket.containsKey(smallFile.location.getFileId())) { if (updateLocationToBucket.containsKey(smallFile.location.getFileId())) {
bucket = updateLocationToBucket.get(smallFile.location.getFileId()); bucket = updateLocationToBucket.get(smallFile.location.getFileId());
LOG.info("Assigning " + recordsToAppend + " inserts to existing update bucket " + bucket); LOG.info("Assigning {} inserts to existing update bucket {}", recordsToAppend, bucket);
} else { } else {
bucket = addUpdateBucket(smallFile.location.getFileId()); bucket = addUpdateBucket(smallFile.location.getFileId());
LOG.info("Assigning " + recordsToAppend + " inserts to new update bucket " + bucket); LOG.info("Assigning {} inserts to new update bucket {}", recordsToAppend, bucket);
} }
bucketNumbers.add(bucket); bucketNumbers.add(bucket);
recordsPerBucket.add(recordsToAppend); recordsPerBucket.add(recordsToAppend);
@@ -649,8 +649,8 @@ public class HoodieCopyOnWriteTable<T extends HoodieRecordPayload> extends Hoodi
} }
int insertBuckets = (int) Math.ceil((1.0 * totalUnassignedInserts) / insertRecordsPerBucket); int insertBuckets = (int) Math.ceil((1.0 * totalUnassignedInserts) / insertRecordsPerBucket);
LOG.info("After small file assignment: unassignedInserts => " + totalUnassignedInserts LOG.info("After small file assignment: unassignedInserts => {}, totalInsertBuckets => {}, "
+ ", totalInsertBuckets => " + insertBuckets + ", recordsPerBucket => " + insertRecordsPerBucket); + "recordsPerBucket => {}", totalUnassignedInserts, insertBuckets, insertRecordsPerBucket);
for (int b = 0; b < insertBuckets; b++) { for (int b = 0; b < insertBuckets; b++) {
bucketNumbers.add(totalBuckets); bucketNumbers.add(totalBuckets);
recordsPerBucket.add(totalUnassignedInserts / insertBuckets); recordsPerBucket.add(totalUnassignedInserts / insertBuckets);
@@ -670,7 +670,7 @@ public class HoodieCopyOnWriteTable<T extends HoodieRecordPayload> extends Hoodi
bkt.weight = (1.0 * recordsPerBucket.get(i)) / pStat.getNumInserts(); bkt.weight = (1.0 * recordsPerBucket.get(i)) / pStat.getNumInserts();
insertBuckets.add(bkt); insertBuckets.add(bkt);
} }
LOG.info("Total insert buckets for partition path " + partitionPath + " => " + insertBuckets); LOG.info("Total insert buckets for partition path {} => {}", partitionPath, insertBuckets);
partitionPathToInsertBuckets.put(partitionPath, insertBuckets); partitionPathToInsertBuckets.put(partitionPath, insertBuckets);
} }
} }

View File

@@ -44,11 +44,11 @@ import org.apache.hudi.io.compact.HoodieRealtimeTableCompactor;
import com.google.common.annotations.VisibleForTesting; import com.google.common.annotations.VisibleForTesting;
import com.google.common.base.Preconditions; import com.google.common.base.Preconditions;
import org.apache.log4j.LogManager;
import org.apache.log4j.Logger;
import org.apache.spark.Partitioner; import org.apache.spark.Partitioner;
import org.apache.spark.api.java.JavaRDD; import org.apache.spark.api.java.JavaRDD;
import org.apache.spark.api.java.JavaSparkContext; import org.apache.spark.api.java.JavaSparkContext;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.io.IOException; import java.io.IOException;
import java.io.UncheckedIOException; import java.io.UncheckedIOException;
@@ -77,7 +77,7 @@ import java.util.stream.Collectors;
*/ */
public class HoodieMergeOnReadTable<T extends HoodieRecordPayload> extends HoodieCopyOnWriteTable<T> { public class HoodieMergeOnReadTable<T extends HoodieRecordPayload> extends HoodieCopyOnWriteTable<T> {
private static final Logger LOG = LogManager.getLogger(HoodieMergeOnReadTable.class); private static final Logger LOG = LoggerFactory.getLogger(HoodieMergeOnReadTable.class);
// UpsertPartitioner for MergeOnRead table type // UpsertPartitioner for MergeOnRead table type
private MergeOnReadUpsertPartitioner mergeOnReadUpsertPartitioner; private MergeOnReadUpsertPartitioner mergeOnReadUpsertPartitioner;
@@ -98,10 +98,10 @@ public class HoodieMergeOnReadTable<T extends HoodieRecordPayload> extends Hoodi
@Override @Override
public Iterator<List<WriteStatus>> handleUpdate(String commitTime, String fileId, Iterator<HoodieRecord<T>> recordItr) public Iterator<List<WriteStatus>> handleUpdate(String commitTime, String fileId, Iterator<HoodieRecord<T>> recordItr)
throws IOException { throws IOException {
LOG.info("Merging updates for commit " + commitTime + " for file " + fileId); LOG.info("Merging updates for commit {} for file {}", commitTime, fileId);
if (!index.canIndexLogFiles() && mergeOnReadUpsertPartitioner.getSmallFileIds().contains(fileId)) { if (!index.canIndexLogFiles() && mergeOnReadUpsertPartitioner.getSmallFileIds().contains(fileId)) {
LOG.info("Small file corrections for updates for commit " + commitTime + " for file " + fileId); LOG.info("Small file corrections for updates for commit {} for file {}", commitTime, fileId);
return super.handleUpdate(commitTime, fileId, recordItr); return super.handleUpdate(commitTime, fileId, recordItr);
} else { } else {
HoodieAppendHandle<T> appendHandle = new HoodieAppendHandle<>(config, commitTime, this, fileId, recordItr); HoodieAppendHandle<T> appendHandle = new HoodieAppendHandle<>(config, commitTime, this, fileId, recordItr);
@@ -124,7 +124,7 @@ public class HoodieMergeOnReadTable<T extends HoodieRecordPayload> extends Hoodi
@Override @Override
public HoodieCompactionPlan scheduleCompaction(JavaSparkContext jsc, String instantTime) { public HoodieCompactionPlan scheduleCompaction(JavaSparkContext jsc, String instantTime) {
LOG.info("Checking if compaction needs to be run on " + config.getBasePath()); LOG.info("Checking if compaction needs to be run on {}", config.getBasePath());
Option<HoodieInstant> lastCompaction = Option<HoodieInstant> lastCompaction =
getActiveTimeline().getCommitTimeline().filterCompletedInstants().lastInstant(); getActiveTimeline().getCommitTimeline().filterCompletedInstants().lastInstant();
String deltaCommitsSinceTs = "0"; String deltaCommitsSinceTs = "0";
@@ -135,13 +135,12 @@ public class HoodieMergeOnReadTable<T extends HoodieRecordPayload> extends Hoodi
int deltaCommitsSinceLastCompaction = getActiveTimeline().getDeltaCommitTimeline() int deltaCommitsSinceLastCompaction = getActiveTimeline().getDeltaCommitTimeline()
.findInstantsAfter(deltaCommitsSinceTs, Integer.MAX_VALUE).countInstants(); .findInstantsAfter(deltaCommitsSinceTs, Integer.MAX_VALUE).countInstants();
if (config.getInlineCompactDeltaCommitMax() > deltaCommitsSinceLastCompaction) { if (config.getInlineCompactDeltaCommitMax() > deltaCommitsSinceLastCompaction) {
LOG.info("Not running compaction as only " + deltaCommitsSinceLastCompaction LOG.info("Not running compaction as only {} delta commits was found since last compaction {}. Waiting for {}",
+ " delta commits was found since last compaction " + deltaCommitsSinceTs + ". Waiting for " deltaCommitsSinceLastCompaction, deltaCommitsSinceTs, config.getInlineCompactDeltaCommitMax());
+ config.getInlineCompactDeltaCommitMax());
return new HoodieCompactionPlan(); return new HoodieCompactionPlan();
} }
LOG.info("Compacting merge on read table " + config.getBasePath()); LOG.info("Compacting merge on read table {}", config.getBasePath());
HoodieRealtimeTableCompactor compactor = new HoodieRealtimeTableCompactor(); HoodieRealtimeTableCompactor compactor = new HoodieRealtimeTableCompactor();
try { try {
return compactor.generateCompactionPlan(jsc, this, config, instantTime, return compactor.generateCompactionPlan(jsc, this, config, instantTime,
@@ -171,11 +170,11 @@ public class HoodieMergeOnReadTable<T extends HoodieRecordPayload> extends Hoodi
Long startTime = System.currentTimeMillis(); Long startTime = System.currentTimeMillis();
String commit = instant.getTimestamp(); String commit = instant.getTimestamp();
LOG.error("Rolling back instant " + instant); LOG.error("Rolling back instant {}", instant);
// Atomically un-publish all non-inflight commits // Atomically un-publish all non-inflight commits
if (instant.isCompleted()) { if (instant.isCompleted()) {
LOG.error("Un-publishing instant " + instant + ", deleteInstants=" + deleteInstants); LOG.error("Un-publishing instant {}, deleteInstants={}", instant, deleteInstants);
instant = this.getActiveTimeline().revertToInflight(instant); instant = this.getActiveTimeline().revertToInflight(instant);
} }
@@ -191,7 +190,7 @@ public class HoodieMergeOnReadTable<T extends HoodieRecordPayload> extends Hoodi
// For Requested State (like failure during index lookup), there is nothing to do rollback other than // For Requested State (like failure during index lookup), there is nothing to do rollback other than
// deleting the timeline file // deleting the timeline file
if (!instant.isRequested()) { if (!instant.isRequested()) {
LOG.info("Unpublished " + commit); LOG.info("Unpublished {}", commit);
List<RollbackRequest> rollbackRequests = generateRollbackRequests(jsc, instant); List<RollbackRequest> rollbackRequests = generateRollbackRequests(jsc, instant);
// TODO: We need to persist this as rollback workload and use it in case of partial failures // TODO: We need to persist this as rollback workload and use it in case of partial failures
allRollbackStats = new RollbackExecutor(metaClient, config).performRollback(jsc, instant, rollbackRequests); allRollbackStats = new RollbackExecutor(metaClient, config).performRollback(jsc, instant, rollbackRequests);
@@ -200,7 +199,7 @@ public class HoodieMergeOnReadTable<T extends HoodieRecordPayload> extends Hoodi
// Delete Inflight instants if enabled // Delete Inflight instants if enabled
deleteInflightAndRequestedInstant(deleteInstants, this.getActiveTimeline(), instant); deleteInflightAndRequestedInstant(deleteInstants, this.getActiveTimeline(), instant);
LOG.info("Time(in ms) taken to finish rollback " + (System.currentTimeMillis() - startTime)); LOG.info("Time(in ms) taken to finish rollback {}", (System.currentTimeMillis() - startTime));
return allRollbackStats; return allRollbackStats;
} }

View File

@@ -52,11 +52,11 @@ import org.apache.hudi.index.HoodieIndex;
import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path; import org.apache.hadoop.fs.Path;
import org.apache.log4j.LogManager;
import org.apache.log4j.Logger;
import org.apache.spark.Partitioner; import org.apache.spark.Partitioner;
import org.apache.spark.api.java.JavaRDD; import org.apache.spark.api.java.JavaRDD;
import org.apache.spark.api.java.JavaSparkContext; import org.apache.spark.api.java.JavaSparkContext;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.io.IOException; import java.io.IOException;
import java.io.Serializable; import java.io.Serializable;
@@ -73,7 +73,7 @@ import java.util.stream.Stream;
*/ */
public abstract class HoodieTable<T extends HoodieRecordPayload> implements Serializable { public abstract class HoodieTable<T extends HoodieRecordPayload> implements Serializable {
private static final Logger LOG = LogManager.getLogger(HoodieTable.class); private static final Logger LOG = LoggerFactory.getLogger(HoodieTable.class);
protected final HoodieWriteConfig config; protected final HoodieWriteConfig config;
protected final HoodieTableMetaClient metaClient; protected final HoodieTableMetaClient metaClient;
@@ -324,7 +324,7 @@ public abstract class HoodieTable<T extends HoodieRecordPayload> implements Seri
Path markerDir = new Path(metaClient.getMarkerFolderPath(instantTs)); Path markerDir = new Path(metaClient.getMarkerFolderPath(instantTs));
if (fs.exists(markerDir)) { if (fs.exists(markerDir)) {
// For append only case, we do not write to marker dir. Hence, the above check // For append only case, we do not write to marker dir. Hence, the above check
LOG.info("Removing marker directory=" + markerDir); LOG.info("Removing marker directory={}", markerDir);
fs.delete(markerDir, true); fs.delete(markerDir, true);
} }
} catch (IOException ioe) { } catch (IOException ioe) {
@@ -363,7 +363,7 @@ public abstract class HoodieTable<T extends HoodieRecordPayload> implements Seri
invalidDataPaths.removeAll(validDataPaths); invalidDataPaths.removeAll(validDataPaths);
if (!invalidDataPaths.isEmpty()) { if (!invalidDataPaths.isEmpty()) {
LOG.info( LOG.info(
"Removing duplicate data files created due to spark retries before committing. Paths=" + invalidDataPaths); "Removing duplicate data files created due to spark retries before committing. Paths={}", invalidDataPaths);
} }
Map<String, List<Pair<String, String>>> groupByPartition = invalidDataPaths.stream() Map<String, List<Pair<String, String>>> groupByPartition = invalidDataPaths.stream()
@@ -381,7 +381,7 @@ public abstract class HoodieTable<T extends HoodieRecordPayload> implements Seri
jsc.parallelize(new ArrayList<>(groupByPartition.values()), config.getFinalizeWriteParallelism()) jsc.parallelize(new ArrayList<>(groupByPartition.values()), config.getFinalizeWriteParallelism())
.map(partitionWithFileList -> { .map(partitionWithFileList -> {
final FileSystem fileSystem = metaClient.getFs(); final FileSystem fileSystem = metaClient.getFs();
LOG.info("Deleting invalid data files=" + partitionWithFileList); LOG.info("Deleting invalid data files={}", partitionWithFileList);
if (partitionWithFileList.isEmpty()) { if (partitionWithFileList.isEmpty()) {
return true; return true;
} }

View File

@@ -36,8 +36,6 @@ import com.google.common.collect.Maps;
import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.PathFilter; import org.apache.hadoop.fs.PathFilter;
import org.apache.log4j.LogManager;
import org.apache.log4j.Logger;
import org.apache.spark.api.java.JavaSparkContext; import org.apache.spark.api.java.JavaSparkContext;
import java.io.IOException; import java.io.IOException;
@@ -48,6 +46,8 @@ import java.util.HashMap;
import java.util.List; import java.util.List;
import java.util.Map; import java.util.Map;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import scala.Tuple2; import scala.Tuple2;
/** /**
@@ -55,7 +55,7 @@ import scala.Tuple2;
*/ */
public class RollbackExecutor implements Serializable { public class RollbackExecutor implements Serializable {
private static final Logger LOG = LogManager.getLogger(RollbackExecutor.class); private static final Logger LOG = LoggerFactory.getLogger(RollbackExecutor.class);
private final HoodieTableMetaClient metaClient; private final HoodieTableMetaClient metaClient;
private final HoodieWriteConfig config; private final HoodieWriteConfig config;
@@ -181,13 +181,13 @@ public class RollbackExecutor implements Serializable {
*/ */
private Map<FileStatus, Boolean> deleteCleanedFiles(HoodieTableMetaClient metaClient, HoodieWriteConfig config, private Map<FileStatus, Boolean> deleteCleanedFiles(HoodieTableMetaClient metaClient, HoodieWriteConfig config,
Map<FileStatus, Boolean> results, String partitionPath, PathFilter filter) throws IOException { Map<FileStatus, Boolean> results, String partitionPath, PathFilter filter) throws IOException {
LOG.info("Cleaning path " + partitionPath); LOG.info("Cleaning path {}", partitionPath);
FileSystem fs = metaClient.getFs(); FileSystem fs = metaClient.getFs();
FileStatus[] toBeDeleted = fs.listStatus(FSUtils.getPartitionPath(config.getBasePath(), partitionPath), filter); FileStatus[] toBeDeleted = fs.listStatus(FSUtils.getPartitionPath(config.getBasePath(), partitionPath), filter);
for (FileStatus file : toBeDeleted) { for (FileStatus file : toBeDeleted) {
boolean success = fs.delete(file.getPath(), false); boolean success = fs.delete(file.getPath(), false);
results.put(file, success); results.put(file, success);
LOG.info("Delete file " + file.getPath() + "\t" + success); LOG.info("Delete file {} \t {}", file.getPath(), success);
} }
return results; return results;
} }
@@ -197,7 +197,7 @@ public class RollbackExecutor implements Serializable {
*/ */
private Map<FileStatus, Boolean> deleteCleanedFiles(HoodieTableMetaClient metaClient, HoodieWriteConfig config, private Map<FileStatus, Boolean> deleteCleanedFiles(HoodieTableMetaClient metaClient, HoodieWriteConfig config,
Map<FileStatus, Boolean> results, String commit, String partitionPath) throws IOException { Map<FileStatus, Boolean> results, String commit, String partitionPath) throws IOException {
LOG.info("Cleaning path " + partitionPath); LOG.info("Cleaning path {}", partitionPath);
FileSystem fs = metaClient.getFs(); FileSystem fs = metaClient.getFs();
PathFilter filter = (path) -> { PathFilter filter = (path) -> {
if (path.toString().contains(".parquet")) { if (path.toString().contains(".parquet")) {
@@ -210,7 +210,7 @@ public class RollbackExecutor implements Serializable {
for (FileStatus file : toBeDeleted) { for (FileStatus file : toBeDeleted) {
boolean success = fs.delete(file.getPath(), false); boolean success = fs.delete(file.getPath(), false);
results.put(file, success); results.put(file, success);
LOG.info("Delete file " + file.getPath() + "\t" + success); LOG.info("Delete file {} \t {}", file.getPath(), success);
} }
return results; return results;
} }