From 11c4121f739d1d00a4ec66b4e243e47602d6ffb4 Mon Sep 17 00:00:00 2001 From: Ho Tien Vu Date: Sat, 13 Jul 2019 00:17:16 +0800 Subject: [PATCH] Fixed TableNotFoundException when write with structured streaming (#778) - When write to a new hoodie table, if checkpoint dir is under target path, Spark will create the base path and thus skip initializing .hoodie which result in error - apply .hoodie existent check for all save mode --- .../scala/com/uber/hoodie/HoodieSparkSqlWriter.scala | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/hoodie-spark/src/main/scala/com/uber/hoodie/HoodieSparkSqlWriter.scala b/hoodie-spark/src/main/scala/com/uber/hoodie/HoodieSparkSqlWriter.scala index 35c19aaed..cf44e091a 100644 --- a/hoodie-spark/src/main/scala/com/uber/hoodie/HoodieSparkSqlWriter.scala +++ b/hoodie-spark/src/main/scala/com/uber/hoodie/HoodieSparkSqlWriter.scala @@ -100,23 +100,23 @@ private[hoodie] object HoodieSparkSqlWriter { val basePath = new Path(parameters("path")) val fs = basePath.getFileSystem(sparkContext.hadoopConfiguration) - var exists = fs.exists(basePath) + var exists = fs.exists(new Path(basePath, HoodieTableMetaClient.METAFOLDER_NAME)) // Handle various save modes if (mode == SaveMode.ErrorIfExists && exists) { - throw new HoodieException(s"basePath ${basePath} already exists.") + throw new HoodieException(s"hoodie dataset at $basePath already exists.") } if (mode == SaveMode.Ignore && exists) { - log.warn(s" basePath ${basePath} already exists. Ignoring & not performing actual writes.") + log.warn(s"hoodie dataset at $basePath already exists. Ignoring & not performing actual writes.") return (true, None) } if (mode == SaveMode.Overwrite && exists) { - log.warn(s" basePath ${basePath} already exists. Deleting existing data & overwriting with new data.") + log.warn(s"hoodie dataset at $basePath already exists. Deleting existing data & overwriting with new data.") fs.delete(basePath, true) exists = false } - // Create the dataset if not present (APPEND mode) + // Create the dataset if not present if (!exists) { HoodieTableMetaClient.initTableType(sparkContext.hadoopConfiguration, path.get, storageType, tblName.get, "archived")