diff --git a/hoodie-client/src/main/java/com/uber/hoodie/table/HoodieTable.java b/hoodie-client/src/main/java/com/uber/hoodie/table/HoodieTable.java index 8e8ded9c4..11f545afc 100644 --- a/hoodie-client/src/main/java/com/uber/hoodie/table/HoodieTable.java +++ b/hoodie-client/src/main/java/com/uber/hoodie/table/HoodieTable.java @@ -341,7 +341,10 @@ public abstract class HoodieTable implements Seri .filter(p -> p.endsWith(".parquet")).collect(Collectors.toList()); // Contains list of partially created files. These needs to be cleaned up. invalidDataPaths.removeAll(validDataPaths); - logger.warn("InValid data paths=" + invalidDataPaths); + if (!invalidDataPaths.isEmpty()) { + logger.info("Removing duplicate data files created due to spark retries before committing. Paths=" + + invalidDataPaths); + } Map>> groupByPartition = invalidDataPaths.stream() .map(dp -> Pair.of(new Path(dp).getParent().toString(), dp))