Refactor hoodie-hive

2017-05-19 23:47:27 -07:00
parent c192dd60b4
commit db6150c5ef
40 changed files with 1614 additions and 2296 deletions
--- a/hoodie-hive/src/main/java/com/uber/hoodie/hive/HiveSyncConfig.java
+++ b/hoodie-hive/src/main/java/com/uber/hoodie/hive/HiveSyncConfig.java
@@ -21,30 +21,45 @@ package com.uber.hoodie.hive;
 import com.beust.jcommander.Parameter;

 import java.io.Serializable;
+import java.util.ArrayList;
+import java.util.List;

 /**
 * Configs needed to sync data into Hive.
 */
 public class HiveSyncConfig implements Serializable {

-    @Parameter(names = {"--database"}, description = "name of the target database in Hive", required = true)
-    public String databaseName;
+  @Parameter(names = {
+      "--database"}, description = "name of the target database in Hive", required = true)
+  public String databaseName;

-    @Parameter(names = {"--table"}, description = "name of the target table in Hive", required = true)
-    public String tableName;
+  @Parameter(names = {"--table"}, description = "name of the target table in Hive", required = true)
+  public String tableName;

-    @Parameter(names = {"--user"}, description = "Hive username", required = true)
-    public String hiveUser;
+  @Parameter(names = {"--user"}, description = "Hive username", required = true)
+  public String hiveUser;

-    @Parameter(names = {"--pass"}, description = "Hive password", required = true)
-    public String hivePass;
+  @Parameter(names = {"--pass"}, description = "Hive password", required = true)
+  public String hivePass;

-    @Parameter(names = {"--jdbc-url"}, description = "Hive jdbc connect url", required = true)
-    public String jdbcUrl;
+  @Parameter(names = {"--jdbc-url"}, description = "Hive jdbc connect url", required = true)
+  public String jdbcUrl;

-    @Parameter(names = {"--base-path"}, description = "Basepath of hoodie dataset to sync", required = true)
-    public String basePath;
+  @Parameter(names = {
+      "--base-path"}, description = "Basepath of hoodie dataset to sync", required = true)
+  public String basePath;

-    @Parameter(names = {"--help", "-h"}, help = true)
-    public Boolean help = false;
+  @Parameter(names = "--partitioned-by", description = "Fields in the schema partitioned by")
+  public List<String> partitionFields = new ArrayList<>();
+
+  @Parameter(names = "-partition-value-extractor", description = "Class which implements PartitionValueExtractor to extract the partition values from HDFS path")
+  public String partitionValueExtractorClass = SlashEncodedDayPartitionValueExtractor.class
+      .getName();
+
+  @Parameter(names = {
+      "--assume-date-partitioning"}, description = "Assume standard yyyy/mm/dd partitioning, this exists to support backward compatibility. If you use hoodie 0.3.x, do not set this parameter")
+  public Boolean assumeDatePartitioning = false;
+
+  @Parameter(names = {"--help", "-h"}, help = true)
+  public Boolean help = false;
 }
--- a/hoodie-hive/src/main/java/com/uber/hoodie/hive/HiveSyncTool.java
+++ b/hoodie-hive/src/main/java/com/uber/hoodie/hive/HiveSyncTool.java
@@ -19,64 +19,161 @@
 package com.uber.hoodie.hive;

 import com.beust.jcommander.JCommander;
-import com.uber.hoodie.hive.impl.DayBasedPartitionStrategy;
-import com.uber.hoodie.hive.impl.ParseSchemaFromDataStrategy;
-import com.uber.hoodie.hive.model.HoodieDatasetReference;
-
-import org.apache.hadoop.conf.Configuration;
+import com.uber.hoodie.common.util.FSUtils;
+import com.uber.hoodie.exception.InvalidDatasetException;
+import com.uber.hoodie.hadoop.HoodieInputFormat;
+import com.uber.hoodie.hadoop.realtime.HoodieRealtimeInputFormat;
+import com.uber.hoodie.hive.HoodieHiveClient.PartitionEvent;
+import com.uber.hoodie.hive.HoodieHiveClient.PartitionEvent.PartitionEventType;
+import com.uber.hoodie.hive.util.SchemaUtil;
+import java.util.List;
+import java.util.Map;
+import java.util.Optional;
+import java.util.stream.Collectors;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.hive.conf.HiveConf;
+import org.apache.hadoop.hive.metastore.api.Partition;
+import org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat;
+import org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+import parquet.schema.MessageType;

 /**
- * Tool to sync new data from commits, into Hive in terms of
+ * Tool to sync a hoodie HDFS dataset with a hive metastore table.
+ * Either use it as a api HiveSyncTool.syncHoodieTable(HiveSyncConfig)
+ * or as a command line java -cp hoodie-hive.jar HiveSyncTool [args]
 *
- *  - New table/partitions
- *  - Updated schema for table/partitions
+ * This utility will get the schema from the latest commit and will sync hive table schema
+ * Also this will sync the partitions incrementally
+ * (all the partitions modified since the last commit)
 */
+@SuppressWarnings("WeakerAccess")
 public class HiveSyncTool {

+  private static Logger LOG = LoggerFactory.getLogger(HiveSyncTool.class);
+  private final HoodieHiveClient hoodieHiveClient;
+  private final HiveSyncConfig cfg;

-    /**
-     * Sync to Hive, based on day based partitioning
-     *
-     * @param cfg
-     */
-    public static void sync(HiveSyncConfig cfg) {
-        // Configure to point to which metastore and database to connect to
-        HoodieHiveConfiguration apiConfig =
-                HoodieHiveConfiguration.newBuilder().hadoopConfiguration(new Configuration())
-                        .hivedb(cfg.databaseName)
-                        .hiveJdbcUrl(cfg.jdbcUrl)
-                        .jdbcUsername(cfg.hiveUser)
-                        .jdbcPassword(cfg.hivePass)
-                        .build();
+  public HiveSyncTool(HiveSyncConfig cfg, HiveConf configuration, FileSystem fs) {
+    this.hoodieHiveClient = new HoodieHiveClient(cfg, configuration, fs);
+    this.cfg = cfg;
+  }

-        HoodieDatasetReference datasetReference =
-                new HoodieDatasetReference(cfg.tableName, cfg.basePath, cfg.databaseName);
+  public void syncHoodieTable() {
+    LOG.info("Trying to sync hoodie table" + cfg.tableName + " with base path " + hoodieHiveClient
+        .getBasePath() + " of type " + hoodieHiveClient
+        .getTableType());
+    // Check if the necessary table exists
+    boolean tableExists = hoodieHiveClient.doesTableExist();
+    // Get the parquet schema for this dataset looking at the latest commit
+    MessageType schema = hoodieHiveClient.getDataSchema();
+    // Sync schema if needed
+    syncSchema(tableExists, schema);

-        // initialize the strategies
-        PartitionStrategy partitionStrategy = new DayBasedPartitionStrategy();
-        SchemaStrategy schemaStrategy = new ParseSchemaFromDataStrategy();
-
-        // Creates a new dataset which reflects the state at the time of creation
-        HoodieHiveDatasetSyncTask datasetSyncTask =
-                HoodieHiveDatasetSyncTask.newBuilder().withReference(datasetReference)
-                        .withConfiguration(apiConfig).partitionStrategy(partitionStrategy)
-                        .schemaStrategy(schemaStrategy).build();
-
-        // Sync dataset
-        datasetSyncTask.sync();
+    LOG.info("Schema sync complete. Syncing partitions for " + cfg.tableName);
+    // Get the last time we successfully synced partitions
+    Optional<String> lastCommitTimeSynced = Optional.empty();
+    if (tableExists) {
+      lastCommitTimeSynced = hoodieHiveClient.getLastCommitTimeSynced();
    }
+    LOG.info("Last commit time synced was found to be " + lastCommitTimeSynced.orElse("null"));
+    List<String> writtenPartitionsSince = hoodieHiveClient
+        .getPartitionsWrittenToSince(lastCommitTimeSynced);
+    LOG.info("Storage partitions scan complete. Found " + writtenPartitionsSince.size());
+    // Sync the partitions if needed
+    syncPartitions(writtenPartitionsSince);

+    hoodieHiveClient.updateLastCommitTimeSynced();
+    LOG.info("Sync complete for " + cfg.tableName);

-    public static void main(String[] args) throws Exception {
+    hoodieHiveClient.close();
+  }

-        // parse the params
-        final HiveSyncConfig cfg = new HiveSyncConfig();
-        JCommander cmd = new JCommander(cfg, args);
-        if (cfg.help || args.length == 0) {
-            cmd.usage();
-            System.exit(1);
-        }
-
-        sync(cfg);
+  /**
+   * Get the latest schema from the last commit and check if its in sync with the hive table schema.
+   * If not, evolves the table schema.
+   *
+   * @param tableExists - does table exist
+   * @param schema - extracted schema
+   */
+  private void syncSchema(boolean tableExists, MessageType schema) {
+    // Check and sync schema
+    if (!tableExists) {
+      LOG.info("Table " + cfg.tableName + " is not found. Creating it");
+      switch (hoodieHiveClient.getTableType()) {
+        case COPY_ON_WRITE:
+          hoodieHiveClient.createTable(schema, HoodieInputFormat.class.getName(),
+              MapredParquetOutputFormat.class.getName(), ParquetHiveSerDe.class.getName());
+          break;
+        case MERGE_ON_READ:
+          // create RT Table
+          // Custom serde will not work with ALTER TABLE REPLACE COLUMNS
+          // https://github.com/apache/hive/blob/release-1.1.0/ql/src/java/org/apache/hadoop/hive/ql/exec/DDLTask.java#L3488
+          // Need a fix to check instance of
+          // hoodieHiveClient.createTable(schema, HoodieRealtimeInputFormat.class.getName(),
+          // MapredParquetOutputFormat.class.getName(), HoodieParquetSerde.class.getName());
+          hoodieHiveClient.createTable(schema, HoodieRealtimeInputFormat.class.getName(),
+              MapredParquetOutputFormat.class.getName(), ParquetHiveSerDe.class.getName());
+          // TODO - create RO Table
+          break;
+        default:
+          LOG.error("Unknown table type " + hoodieHiveClient.getTableType());
+          throw new InvalidDatasetException(hoodieHiveClient.getBasePath());
+      }
+    } else {
+      // Check if the dataset schema has evolved
+      Map<String, String> tableSchema = hoodieHiveClient.getTableSchema();
+      SchemaDifference schemaDiff = SchemaUtil
+          .getSchemaDifference(schema, tableSchema, cfg.partitionFields);
+      if (!schemaDiff.isEmpty()) {
+        LOG.info("Schema difference found for " + cfg.tableName);
+        hoodieHiveClient.updateTableDefinition(schema);
+      } else {
+        LOG.info("No Schema difference for " + cfg.tableName);
+      }
    }
+  }
+
+
+  /**
+   * Syncs the list of storage parititions passed in (checks if the partition is in hive, if not
+   * adds it or if the partition path does not match, it updates the partition path)
+   */
+  private void syncPartitions(List<String> writtenPartitionsSince) {
+    try {
+      List<Partition> hivePartitions = hoodieHiveClient.scanTablePartitions();
+      List<PartitionEvent> partitionEvents = hoodieHiveClient
+          .getPartitionEvents(hivePartitions, writtenPartitionsSince);
+      List<String> newPartitions = filterPartitions(partitionEvents, PartitionEventType.ADD);
+      LOG.info("New Partitions " + newPartitions);
+      hoodieHiveClient.addPartitionsToTable(newPartitions);
+      List<String> updatePartitions = filterPartitions(partitionEvents, PartitionEventType.UPDATE);
+      LOG.info("Changed Partitions " + updatePartitions);
+      hoodieHiveClient.updatePartitionsToTable(updatePartitions);
+    } catch (Exception e) {
+      throw new HoodieHiveSyncException("Failed to sync partitions for table " + cfg.tableName,
+          e);
+    }
+  }
+
+  private List<String> filterPartitions(List<PartitionEvent> events, PartitionEventType eventType) {
+    return events.stream()
+        .filter(s -> s.eventType == eventType).map(s -> s.storagePartition).collect(
+            Collectors.toList());
+  }
+
+  public static void main(String[] args) throws Exception {
+    // parse the params
+    final HiveSyncConfig cfg = new HiveSyncConfig();
+    JCommander cmd = new JCommander(cfg, args);
+    if (cfg.help || args.length == 0) {
+      cmd.usage();
+      System.exit(1);
+    }
+    FileSystem fs = FSUtils.getFs();
+    HiveConf hiveConf = new HiveConf();
+    hiveConf.addResource(fs.getConf());
+    new HiveSyncTool(cfg, hiveConf, fs).syncHoodieTable();
+  }
 }
--- a/hoodie-hive/src/main/java/com/uber/hoodie/hive/HoodieHiveClient.java
+++ b/hoodie-hive/src/main/java/com/uber/hoodie/hive/HoodieHiveClient.java
@@ -0,0 +1,607 @@
+/*
+ *  Copyright (c) 2017 Uber Technologies, Inc. (hoodie-dev-group@uber.com)
+ *
+ *  Licensed under the Apache License, Version 2.0 (the "License");
+ *  you may not use this file except in compliance with the License.
+ *  You may obtain a copy of the License at
+ *
+ *           http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *
+ */
+package com.uber.hoodie.hive;
+
+import com.google.common.base.Preconditions;
+import com.google.common.collect.Lists;
+import com.google.common.collect.Maps;
+import com.uber.hoodie.common.model.HoodieCommitMetadata;
+import com.uber.hoodie.common.model.HoodieCompactionMetadata;
+import com.uber.hoodie.common.model.HoodieTableType;
+import com.uber.hoodie.common.table.HoodieTableMetaClient;
+import com.uber.hoodie.common.table.HoodieTimeline;
+import com.uber.hoodie.common.table.log.HoodieLogFile;
+import com.uber.hoodie.common.table.log.HoodieLogFormat;
+import com.uber.hoodie.common.table.log.HoodieLogFormat.Reader;
+import com.uber.hoodie.common.table.log.block.HoodieAvroDataBlock;
+import com.uber.hoodie.common.table.log.block.HoodieLogBlock;
+import com.uber.hoodie.common.table.timeline.HoodieInstant;
+import com.uber.hoodie.common.util.FSUtils;
+import com.uber.hoodie.exception.HoodieIOException;
+import com.uber.hoodie.exception.InvalidDatasetException;
+import com.uber.hoodie.hive.util.SchemaUtil;
+import java.io.IOException;
+import java.sql.Connection;
+import java.sql.DatabaseMetaData;
+import java.sql.ResultSet;
+import java.sql.SQLException;
+import java.sql.Statement;
+import java.util.Collections;
+import java.util.List;
+import java.util.Map;
+import java.util.Optional;
+import java.util.stream.Collectors;
+import org.apache.commons.dbcp.BasicDataSource;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hive.conf.HiveConf;
+import org.apache.hadoop.hive.metastore.HiveMetaStoreClient;
+import org.apache.hadoop.hive.metastore.api.MetaException;
+import org.apache.hadoop.hive.metastore.api.Partition;
+import org.apache.hadoop.hive.metastore.api.Table;
+import org.apache.hive.jdbc.HiveDriver;
+import org.apache.thrift.TException;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+import parquet.format.converter.ParquetMetadataConverter;
+import parquet.hadoop.ParquetFileReader;
+import parquet.hadoop.metadata.ParquetMetadata;
+import parquet.schema.MessageType;
+
+@SuppressWarnings("ConstantConditions")
+public class HoodieHiveClient {
+
+  private static final String HOODIE_LAST_COMMIT_TIME_SYNC = "last_commit_time_sync";
+  // Make sure we have the hive JDBC driver in classpath
+  private static String driverName = HiveDriver.class.getName();
+
+  static {
+    try {
+      Class.forName(driverName);
+    } catch (ClassNotFoundException e) {
+      throw new IllegalStateException("Could not find " + driverName + " in classpath. ", e);
+    }
+  }
+
+  private static Logger LOG = LoggerFactory.getLogger(HoodieHiveClient.class);
+  private final HoodieTableMetaClient metaClient;
+  private final HoodieTableType tableType;
+  private final PartitionValueExtractor partitionValueExtractor;
+  private HiveMetaStoreClient client;
+  private HiveSyncConfig syncConfig;
+  private FileSystem fs;
+  private Connection connection;
+  private HoodieTimeline activeTimeline;
+
+  HoodieHiveClient(HiveSyncConfig cfg, HiveConf configuration, FileSystem fs) {
+    this.syncConfig = cfg;
+    this.fs = fs;
+    this.metaClient = new HoodieTableMetaClient(fs, cfg.basePath, true);
+    this.tableType = metaClient.getTableType();
+
+    LOG.info("Creating hive connection " + cfg.jdbcUrl);
+    createHiveConnection();
+    try {
+      this.client = new HiveMetaStoreClient(configuration);
+    } catch (MetaException e) {
+      throw new HoodieHiveSyncException("Failed to create HiveMetaStoreClient", e);
+    }
+
+    try {
+      this.partitionValueExtractor = (PartitionValueExtractor) Class
+          .forName(cfg.partitionValueExtractorClass).newInstance();
+    } catch (Exception e) {
+      throw new HoodieHiveSyncException(
+          "Failed to initialize PartitionValueExtractor class " + cfg.partitionValueExtractorClass,
+          e);
+    }
+
+    activeTimeline = metaClient.getActiveTimeline().getCommitsAndCompactionsTimeline()
+        .filterCompletedInstants();
+  }
+
+  public HoodieTimeline getActiveTimeline() {
+    return activeTimeline;
+  }
+
+  /**
+   * Add the (NEW) partitons to the table
+   */
+  void addPartitionsToTable(List<String> partitionsToAdd) {
+    if (partitionsToAdd.isEmpty()) {
+      LOG.info("No partitions to add for " + syncConfig.tableName);
+      return;
+    }
+    LOG.info("Adding partitions " + partitionsToAdd.size() + " to table " + syncConfig.tableName);
+    String sql = constructAddPartitions(partitionsToAdd);
+    updateHiveSQL(sql);
+  }
+
+  /**
+   * Partition path has changed - update the path for te following partitions
+   */
+  void updatePartitionsToTable(List<String> changedPartitions) {
+    if (changedPartitions.isEmpty()) {
+      LOG.info("No partitions to change for " + syncConfig.tableName);
+      return;
+    }
+    LOG.info("Changing partitions " + changedPartitions.size() + " on " + syncConfig.tableName);
+    List<String> sqls = constructChangePartitions(changedPartitions);
+    for (String sql : sqls) {
+      updateHiveSQL(sql);
+    }
+  }
+
+  private String constructAddPartitions(List<String> partitions) {
+    StringBuilder alterSQL = new StringBuilder("ALTER TABLE ");
+    alterSQL.append(syncConfig.databaseName).append(".").append(syncConfig.tableName)
+        .append(" ADD IF NOT EXISTS ");
+    for (String partition : partitions) {
+
+      StringBuilder partBuilder = new StringBuilder();
+      List<String> partitionValues = partitionValueExtractor
+          .extractPartitionValuesInPath(partition);
+      Preconditions.checkArgument(syncConfig.partitionFields.size() == partitionValues.size(),
+          "Partition key parts " + syncConfig.partitionFields
+              + " does not match with partition values " + partitionValues
+              + ". Check partition strategy. ");
+      for (int i = 0; i < syncConfig.partitionFields.size(); i++) {
+        partBuilder.append(syncConfig.partitionFields.get(i)).append("=").append("'")
+            .append(partitionValues.get(i)).append("'");
+      }
+
+      String fullPartitionPath = new Path(syncConfig.basePath, partition).toString();
+      alterSQL.append("  PARTITION (").append(partBuilder.toString()).append(") LOCATION '")
+          .append(fullPartitionPath).append("' ");
+    }
+    return alterSQL.toString();
+  }
+
+  private List<String> constructChangePartitions(List<String> partitions) {
+    List<String> changePartitions = Lists.newArrayList();
+    String alterTable = "ALTER TABLE " + syncConfig.databaseName + "." + syncConfig.tableName;
+    for (String partition : partitions) {
+      StringBuilder partBuilder = new StringBuilder();
+      List<String> partitionValues = partitionValueExtractor
+          .extractPartitionValuesInPath(partition);
+      Preconditions.checkArgument(syncConfig.partitionFields.size() == partitionValues.size(),
+          "Partition key parts " + syncConfig.partitionFields
+              + " does not match with partition values " + partitionValues
+              + ". Check partition strategy. ");
+      for (int i = 0; i < syncConfig.partitionFields.size(); i++) {
+        partBuilder.append(syncConfig.partitionFields.get(i)).append("=").append("'")
+            .append(partitionValues.get(i)).append("'");
+      }
+
+      String fullPartitionPath = new Path(syncConfig.basePath, partition).toString();
+      String changePartition =
+          alterTable + " PARTITION (" + partBuilder.toString() + ") SET LOCATION '"
+              + "hdfs://nameservice1" + fullPartitionPath + "'";
+      changePartitions.add(changePartition);
+    }
+    return changePartitions;
+  }
+
+  /**
+   * Iterate over the storage partitions and find if there are any new partitions that need
+   * to be added or updated. Generate a list of PartitionEvent based on the changes required.
+   */
+  List<PartitionEvent> getPartitionEvents(List<Partition> tablePartitions,
+      List<String> partitionStoragePartitions) {
+    Map<String, String> paths = Maps.newHashMap();
+    for (Partition tablePartition : tablePartitions) {
+      List<String> hivePartitionValues = tablePartition.getValues();
+      Collections.sort(hivePartitionValues);
+      String fullTablePartitionPath = Path
+          .getPathWithoutSchemeAndAuthority(new Path(tablePartition.getSd().getLocation())).toUri()
+          .getPath();
+      paths.put(String.join(", ", hivePartitionValues), fullTablePartitionPath);
+    }
+
+    List<PartitionEvent> events = Lists.newArrayList();
+    for (String storagePartition : partitionStoragePartitions) {
+      String fullStoragePartitionPath = new Path(syncConfig.basePath, storagePartition).toString();
+      // Check if the partition values or if hdfs path is the same
+      List<String> storagePartitionValues = partitionValueExtractor
+          .extractPartitionValuesInPath(storagePartition);
+      Collections.sort(storagePartitionValues);
+      String storageValue = String.join(", ", storagePartitionValues);
+      if (!paths.containsKey(storageValue)) {
+        events.add(PartitionEvent.newPartitionAddEvent(storagePartition));
+      } else if (!paths.get(storageValue).equals(fullStoragePartitionPath)) {
+        events.add(PartitionEvent.newPartitionUpdateEvent(storagePartition));
+      }
+    }
+    return events;
+  }
+
+
+  /**
+   * Scan table partitions
+   */
+  List<Partition> scanTablePartitions() throws TException {
+    return client
+        .listPartitions(syncConfig.databaseName, syncConfig.tableName, (short) -1);
+  }
+
+  void updateTableDefinition(MessageType newSchema) {
+    try {
+      String newSchemaStr = SchemaUtil.generateSchemaString(newSchema);
+      // Cascade clause should not be present for non-partitioned tables
+      String cascadeClause = syncConfig.partitionFields.size() > 0 ? " cascade" : "";
+      StringBuilder sqlBuilder = new StringBuilder("ALTER TABLE ").append("`")
+          .append(syncConfig.databaseName).append(".").append(syncConfig.tableName).append("`")
+          .append(" REPLACE COLUMNS(")
+          .append(newSchemaStr).append(" )").append(cascadeClause);
+      LOG.info("Creating table with " + sqlBuilder);
+      updateHiveSQL(sqlBuilder.toString());
+    } catch (IOException e) {
+      throw new HoodieHiveSyncException("Failed to update table for " + syncConfig.tableName, e);
+    }
+  }
+
+  void createTable(MessageType storageSchema,
+      String inputFormatClass, String outputFormatClass, String serdeClass) {
+    try {
+      String createSQLQuery = SchemaUtil
+          .generateCreateDDL(storageSchema, syncConfig, inputFormatClass,
+              outputFormatClass, serdeClass);
+      LOG.info("Creating table with " + createSQLQuery);
+      updateHiveSQL(createSQLQuery);
+    } catch (IOException e) {
+      throw new HoodieHiveSyncException("Failed to create table " + syncConfig.tableName, e);
+    }
+  }
+
+  /**
+   * Get the table schema
+   */
+  Map<String, String> getTableSchema() {
+    if (!doesTableExist()) {
+      throw new IllegalArgumentException(
+          "Failed to get schema for table " + syncConfig.tableName + " does not exist");
+    }
+    Map<String, String> schema = Maps.newHashMap();
+    ResultSet result = null;
+    try {
+      DatabaseMetaData databaseMetaData = connection.getMetaData();
+      result = databaseMetaData
+          .getColumns(null, syncConfig.databaseName, syncConfig.tableName, null);
+      while (result.next()) {
+        String columnName = result.getString(4);
+        String columnType = result.getString(6);
+        schema.put(columnName, columnType);
+      }
+      return schema;
+    } catch (SQLException e) {
+      throw new HoodieHiveSyncException(
+          "Failed to get table schema for " + syncConfig.tableName, e);
+    } finally {
+      closeQuietly(result, null);
+    }
+  }
+
+  /**
+   * Gets the schema for a hoodie dataset.
+   * Depending on the type of table, read from any file written in the latest commit.
+   * We will assume that the schema has not changed within a single atomic write.
+   *
+   * @return Parquet schema for this dataset
+   */
+  @SuppressWarnings("WeakerAccess")
+  public MessageType getDataSchema() {
+    try {
+      switch (tableType) {
+        case COPY_ON_WRITE:
+          // If this is COW, get the last commit and read the schema from a file written in the last commit
+          HoodieInstant lastCommit = activeTimeline.lastInstant()
+              .orElseThrow(() -> new InvalidDatasetException(syncConfig.basePath));
+          HoodieCommitMetadata commitMetadata = HoodieCommitMetadata
+              .fromBytes(activeTimeline.getInstantDetails(lastCommit).get());
+          String filePath = commitMetadata.getFileIdAndFullPaths().values().stream().findAny()
+              .orElseThrow(() -> new IllegalArgumentException(
+                  "Could not find any data file written for commit " + lastCommit
+                      + ", could not get schema for dataset " + metaClient.getBasePath()));
+          return readSchemaFromDataFile(new Path(filePath));
+        case MERGE_ON_READ:
+          // If this is MOR, depending on whether the latest commit is a delta commit or compaction commit
+          // Get a datafile written and get the schema from that file
+          Optional<HoodieInstant> lastCompactionCommit = metaClient.getActiveTimeline()
+              .getCompactionTimeline().filterCompletedInstants().lastInstant();
+          LOG.info("Found the last compaction commit as " + lastCompactionCommit);
+
+          Optional<HoodieInstant> lastDeltaCommitAfterCompaction = Optional.empty();
+          if (lastCompactionCommit.isPresent()) {
+            lastDeltaCommitAfterCompaction = metaClient.getActiveTimeline()
+                .getDeltaCommitTimeline()
+                .filterCompletedInstants()
+                .findInstantsAfter(lastCompactionCommit.get().getTimestamp(), Integer.MAX_VALUE).lastInstant();
+          }
+          LOG.info("Found the last delta commit after last compaction as "
+              + lastDeltaCommitAfterCompaction);
+
+          if (lastDeltaCommitAfterCompaction.isPresent()) {
+            HoodieInstant lastDeltaCommit = lastDeltaCommitAfterCompaction.get();
+            // read from the log file wrote
+            commitMetadata = HoodieCommitMetadata
+                .fromBytes(activeTimeline.getInstantDetails(lastDeltaCommit).get());
+            filePath = commitMetadata.getFileIdAndFullPaths().values().stream().filter(s -> s.contains(
+                HoodieLogFile.DELTA_EXTENSION)).findAny()
+                .orElseThrow(() -> new IllegalArgumentException(
+                    "Could not find any data file written for commit " + lastDeltaCommit
+                        + ", could not get schema for dataset " + metaClient.getBasePath()));
+            return readSchemaFromLogFile(lastCompactionCommit, new Path(filePath));
+          } else {
+            return readSchemaFromLastCompaction(lastCompactionCommit);
+          }
+        default:
+          LOG.error("Unknown table type " + tableType);
+          throw new InvalidDatasetException(syncConfig.basePath);
+      }
+    } catch (IOException e) {
+      throw new HoodieHiveSyncException(
+          "Failed to get dataset schema for " + syncConfig.tableName, e);
+    }
+  }
+
+  /**
+   * Read schema from a data file from the last compaction commit done.
+   *
+   * @param lastCompactionCommitOpt
+   * @return
+   * @throws IOException
+   */
+  @SuppressWarnings("OptionalUsedAsFieldOrParameterType")
+  private MessageType readSchemaFromLastCompaction(Optional<HoodieInstant> lastCompactionCommitOpt)
+      throws IOException {
+    HoodieInstant lastCompactionCommit = lastCompactionCommitOpt.orElseThrow(
+        () -> new HoodieHiveSyncException(
+            "Could not read schema from last compaction, no compaction commits found on path "
+                + syncConfig.basePath));
+
+    // Read from the compacted file wrote
+    HoodieCompactionMetadata compactionMetadata = HoodieCompactionMetadata
+        .fromBytes(activeTimeline.getInstantDetails(lastCompactionCommit).get());
+    String filePath = compactionMetadata.getFileIdAndFullPaths().values().stream().findAny()
+        .orElseThrow(() -> new IllegalArgumentException(
+            "Could not find any data file written for compaction " + lastCompactionCommit
+                + ", could not get schema for dataset " + metaClient.getBasePath()));
+    return readSchemaFromDataFile(new Path(filePath));
+  }
+
+  /**
+   * Read the schema from the log file on path
+   *
+   * @param lastCompactionCommitOpt
+   * @param path
+   * @return
+   * @throws IOException
+   */
+  @SuppressWarnings("OptionalUsedAsFieldOrParameterType")
+  private MessageType readSchemaFromLogFile(Optional<HoodieInstant> lastCompactionCommitOpt,
+      Path path) throws IOException {
+    Reader reader = HoodieLogFormat.newReader(fs, new HoodieLogFile(path), null);
+    HoodieAvroDataBlock lastBlock = null;
+    while (reader.hasNext()) {
+      HoodieLogBlock block = reader.next();
+      if (block instanceof HoodieAvroDataBlock) {
+        lastBlock = (HoodieAvroDataBlock) block;
+      }
+    }
+    if (lastBlock != null) {
+      return new parquet.avro.AvroSchemaConverter().convert(lastBlock.getSchema());
+    }
+    // Fall back to read the schema from last compaction
+    LOG.info("Falling back to read the schema from last compaction " + lastCompactionCommitOpt);
+    return readSchemaFromLastCompaction(lastCompactionCommitOpt);
+  }
+
+  /**
+   * Read the parquet schema from a parquet File
+   */
+  private MessageType readSchemaFromDataFile(Path parquetFilePath) throws IOException {
+    LOG.info("Reading schema from " + parquetFilePath);
+    if (!fs.exists(parquetFilePath)) {
+      throw new IllegalArgumentException(
+          "Failed to read schema from data file " + parquetFilePath
+              + ". File does not exist.");
+    }
+    ParquetMetadata fileFooter =
+        ParquetFileReader.readFooter(fs.getConf(), parquetFilePath, ParquetMetadataConverter.NO_FILTER);
+    return fileFooter.getFileMetaData().getSchema();
+  }
+
+  /**
+   * @return true if the configured table exists
+   */
+  boolean doesTableExist() {
+    try {
+      return client.tableExists(syncConfig.databaseName, syncConfig.tableName);
+    } catch (TException e) {
+      throw new HoodieHiveSyncException(
+          "Failed to check if table exists " + syncConfig.tableName, e);
+    }
+  }
+
+  /**
+   * Execute a update in hive metastore with this SQL
+   *
+   * @param s SQL to execute
+   */
+  void updateHiveSQL(String s) {
+    Statement stmt = null;
+    try {
+      stmt = connection.createStatement();
+      LOG.info("Executing SQL " + s);
+      stmt.execute(s);
+    } catch (SQLException e) {
+      throw new HoodieHiveSyncException("Failed in executing SQL " + s, e);
+    } finally {
+      closeQuietly(null, stmt);
+    }
+  }
+
+
+  private void createHiveConnection() {
+    if (connection == null) {
+      BasicDataSource ds = new BasicDataSource();
+      ds.setDriverClassName(driverName);
+      ds.setUrl(getHiveJdbcUrlWithDefaultDBName());
+      ds.setUsername(syncConfig.hiveUser);
+      ds.setPassword(syncConfig.hivePass);
+      LOG.info("Getting Hive Connection from Datasource " + ds);
+      try {
+        this.connection = ds.getConnection();
+      } catch (SQLException e) {
+        throw new HoodieHiveSyncException(
+            "Cannot create hive connection " + getHiveJdbcUrlWithDefaultDBName(), e);
+      }
+    }
+  }
+
+  private String getHiveJdbcUrlWithDefaultDBName() {
+    String hiveJdbcUrl = syncConfig.jdbcUrl;
+    String urlAppend = null;
+    // If the hive url contains addition properties like ;transportMode=http;httpPath=hs2
+    if (hiveJdbcUrl.contains(";")) {
+      urlAppend = hiveJdbcUrl.substring(hiveJdbcUrl.indexOf(";"));
+      hiveJdbcUrl = hiveJdbcUrl.substring(0, hiveJdbcUrl.indexOf(";"));
+    }
+    if (!hiveJdbcUrl.endsWith("/")) {
+      hiveJdbcUrl = hiveJdbcUrl + "/";
+    }
+    return hiveJdbcUrl + syncConfig.databaseName + (urlAppend == null ? "" : urlAppend);
+  }
+
+  private static void closeQuietly(ResultSet resultSet, Statement stmt) {
+    try {
+      if (stmt != null) {
+        stmt.close();
+      }
+      if (resultSet != null) {
+        resultSet.close();
+      }
+    } catch (SQLException e) {
+      LOG.error("Could not close the resultset opened ", e);
+    }
+  }
+
+  public String getBasePath() {
+    return metaClient.getBasePath();
+  }
+
+  HoodieTableType getTableType() {
+    return tableType;
+  }
+
+  public FileSystem getFs() {
+    return fs;
+  }
+
+  Optional<String> getLastCommitTimeSynced() {
+    // Get the last commit time from the TBLproperties
+    try {
+      Table database = client.getTable(syncConfig.databaseName, syncConfig.tableName);
+      return Optional
+          .ofNullable(database.getParameters().getOrDefault(HOODIE_LAST_COMMIT_TIME_SYNC, null));
+    } catch (Exception e) {
+      throw new HoodieHiveSyncException(
+          "Failed to get the last commit time synced from the database", e);
+    }
+  }
+
+  void close() {
+    try {
+      if (connection != null) {
+        connection.close();
+      }
+      if(client != null) {
+        client.close();
+      }
+    } catch (SQLException e) {
+      LOG.error("Could not close connection ", e);
+    }
+  }
+
+  @SuppressWarnings("OptionalUsedAsFieldOrParameterType")
+  List<String> getPartitionsWrittenToSince(Optional<String> lastCommitTimeSynced) {
+    if (!lastCommitTimeSynced.isPresent()) {
+      LOG.info("Last commit time synced is not known, listing all partitions");
+      try {
+        return FSUtils
+            .getAllPartitionPaths(fs, syncConfig.basePath, syncConfig.assumeDatePartitioning);
+      } catch (IOException e) {
+        throw new HoodieIOException("Failed to list all partitions in " + syncConfig.basePath, e);
+      }
+    } else {
+      LOG.info("Last commit time synced is " + lastCommitTimeSynced.get()
+          + ", Getting commits since then");
+
+      HoodieTimeline timelineToSync = activeTimeline
+          .findInstantsAfter(lastCommitTimeSynced.get(), Integer.MAX_VALUE);
+      return timelineToSync.getInstants().map(s -> {
+        try {
+          return HoodieCommitMetadata.fromBytes(activeTimeline.getInstantDetails(s).get());
+        } catch (IOException e) {
+          throw new HoodieIOException(
+              "Failed to get partitions written since " + lastCommitTimeSynced, e);
+        }
+      }).flatMap(s -> s.getPartitionToWriteStats().keySet().stream()).distinct()
+          .collect(Collectors.toList());
+    }
+  }
+
+  void updateLastCommitTimeSynced() {
+    // Set the last commit time from the TBLproperties
+    String lastCommitSynced = activeTimeline.lastInstant().get().getTimestamp();
+    try {
+      Table table = client.getTable(syncConfig.databaseName, syncConfig.tableName);
+      table.putToParameters(HOODIE_LAST_COMMIT_TIME_SYNC, lastCommitSynced);
+      client.alter_table(syncConfig.databaseName, syncConfig.tableName, table, true);
+    } catch (Exception e) {
+      throw new HoodieHiveSyncException(
+          "Failed to get update last commit time synced to " + lastCommitSynced, e);
+    }
+
+  }
+
+  /**
+   * Partition Event captures any partition that needs to be added or updated
+   */
+  static class PartitionEvent {
+
+    public enum PartitionEventType {ADD, UPDATE}
+
+    PartitionEventType eventType;
+    String storagePartition;
+
+    PartitionEvent(
+        PartitionEventType eventType, String storagePartition) {
+      this.eventType = eventType;
+      this.storagePartition = storagePartition;
+    }
+
+    static PartitionEvent newPartitionAddEvent(String storagePartition) {
+      return new PartitionEvent(PartitionEventType.ADD, storagePartition);
+    }
+
+    static PartitionEvent newPartitionUpdateEvent(String storagePartition) {
+      return new PartitionEvent(PartitionEventType.UPDATE, storagePartition);
+    }
+  }
+}
--- a/hoodie-hive/src/main/java/com/uber/hoodie/hive/HoodieHiveConfiguration.java
+++ b/hoodie-hive/src/main/java/com/uber/hoodie/hive/HoodieHiveConfiguration.java
@@ -1,119 +0,0 @@
-/*
- * Copyright (c) 2016 Uber Technologies, Inc. (hoodie-dev-group@uber.com)
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *           http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package com.uber.hoodie.hive;
-
-import org.apache.hadoop.conf.Configuration;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-/**
- * Configurations for registering a hoodie dataset into hive metastore
- */
-public class HoodieHiveConfiguration {
-    private final String hiveJdbcUrl;
-    private final String dbName;
-    private final String hiveUsername;
-    private final String hivePassword;
-    private final Configuration configuration;
-
-    private HoodieHiveConfiguration(String hiveJdbcUrl, String defaultDatabaseName,
-        String hiveUsername, String hivePassword, Configuration configuration) {
-        this.hiveJdbcUrl = hiveJdbcUrl;
-        this.dbName = defaultDatabaseName;
-        this.hiveUsername = hiveUsername;
-        this.hivePassword = hivePassword;
-        this.configuration = configuration;
-    }
-
-    public String getHiveJdbcUrl() {
-        return hiveJdbcUrl;
-    }
-
-    public String getDbName() {
-        return dbName;
-    }
-
-    public String getHiveUsername() {
-        return hiveUsername;
-    }
-
-    public String getHivePassword() {
-        return hivePassword;
-    }
-
-    public Configuration getConfiguration() {
-        return configuration;
-    }
-
-    @Override
-    public String toString() {
-        final StringBuilder sb = new StringBuilder("HoodieHiveConfiguration{");
-        sb.append("hiveJdbcUrl='").append(hiveJdbcUrl).append('\'');
-        sb.append(", dbName='").append(dbName).append('\'');
-        sb.append(", hiveUsername='").append(hiveUsername).append('\'');
-        sb.append(", hivePassword='").append(hivePassword).append('\'');
-        sb.append(", configuration=").append(configuration);
-        sb.append('}');
-        return sb.toString();
-    }
-
-    public static Builder newBuilder() {
-        return new Builder();
-    }
-
-    public static class Builder {
-        private static Logger LOG = LoggerFactory.getLogger(Builder.class);
-        private String hiveJdbcUrl;
-        private String dbName;
-        private String jdbcUsername;
-        private String jdbcPassword;
-        private Configuration configuration;
-
-        public Builder hiveJdbcUrl(String hiveJdbcUrl) {
-            this.hiveJdbcUrl = hiveJdbcUrl;
-            return this;
-        }
-
-        public Builder hivedb(String hiveDatabase) {
-            this.dbName = hiveDatabase;
-            return this;
-        }
-
-        public Builder jdbcUsername(String jdbcUsername) {
-            this.jdbcUsername = jdbcUsername;
-            return this;
-        }
-
-        public Builder jdbcPassword(String jdbcPassword) {
-            this.jdbcPassword = jdbcPassword;
-            return this;
-        }
-
-        public Builder hadoopConfiguration(Configuration configuration) {
-            this.configuration = configuration;
-            return this;
-        }
-
-        public HoodieHiveConfiguration build() {
-            HoodieHiveConfiguration config =
-                new HoodieHiveConfiguration(hiveJdbcUrl, dbName, jdbcUsername, jdbcPassword,
-                    configuration);
-            LOG.info("Hoodie Hive Configuration - " + config);
-            return config;
-        }
-    }
-}
--- a/hoodie-hive/src/main/java/com/uber/hoodie/hive/HoodieHiveDatasetSyncTask.java
+++ b/hoodie-hive/src/main/java/com/uber/hoodie/hive/HoodieHiveDatasetSyncTask.java
@@ -1,182 +0,0 @@
-/*
- * Copyright (c) 2016 Uber Technologies, Inc. (hoodie-dev-group@uber.com)
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *           http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package com.uber.hoodie.hive;
-
-import com.google.common.collect.ImmutableList;
-import com.google.common.collect.Lists;
-import com.uber.hoodie.hive.client.HoodieFSClient;
-import com.uber.hoodie.hive.client.HoodieHiveClient;
-import com.uber.hoodie.hive.model.HoodieDatasetReference;
-import com.uber.hoodie.hive.model.StoragePartition;
-import com.uber.hoodie.hive.model.TablePartition;
-import org.apache.commons.lang.ArrayUtils;
-import org.apache.hadoop.fs.FileStatus;
-import org.apache.hadoop.fs.Path;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-import java.util.List;
-
-/**
- * Represents a Hive External Dataset.
- * Contains metadata for storage and table partitions.
- */
-public class HoodieHiveDatasetSyncTask {
-    private static Logger LOG = LoggerFactory.getLogger(HoodieHiveDatasetSyncTask.class);
-    private final HoodieHiveSchemaSyncTask schemaSyncTask;
-    private final List<StoragePartition> newPartitions;
-    private final List<StoragePartition> changedPartitions;
-
-    public HoodieHiveDatasetSyncTask(HoodieHiveSchemaSyncTask schemaSyncTask,
-        List<StoragePartition> newPartitions, List<StoragePartition> changedPartitions) {
-        this.schemaSyncTask = schemaSyncTask;
-        this.newPartitions = ImmutableList.copyOf(newPartitions);
-        this.changedPartitions = ImmutableList.copyOf(changedPartitions);
-    }
-
-    public HoodieHiveSchemaSyncTask getSchemaSyncTask() {
-        return schemaSyncTask;
-    }
-
-    public List<StoragePartition> getNewPartitions() {
-        return newPartitions;
-    }
-
-    public List<StoragePartition> getChangedPartitions() {
-        return changedPartitions;
-    }
-
-    /**
-     * Sync this dataset
-     * 1. If any schema difference is found, then sync the table schema
-     * 2. If any new partitions are found, adds partitions to the table (which uses the table schema by default)
-     * 3. If any partition path has changed, modify the partition to the new path (which does not change the partition schema)
-     */
-    public void sync() {
-        LOG.info("Starting Sync for " + schemaSyncTask.getReference());
-        try {
-            // First sync the table schema
-            schemaSyncTask.sync();
-
-            // Add all the new partitions
-            schemaSyncTask.getHiveClient()
-                .addPartitionsToTable(schemaSyncTask.getReference(), newPartitions,
-                    schemaSyncTask.getPartitionStrategy());
-            // Update all the changed partitions
-            schemaSyncTask.getHiveClient()
-                .updatePartitionsToTable(schemaSyncTask.getReference(), changedPartitions,
-                    schemaSyncTask.getPartitionStrategy());
-        } catch (Exception e) {
-            throw new HoodieHiveDatasetException(
-                "Failed to sync dataset " + schemaSyncTask.getReference(), e);
-        }
-        LOG.info("Sync for " + schemaSyncTask.getReference() + " complete.");
-    }
-
-    public static Builder newBuilder(HoodieHiveDatasetSyncTask dataset) {
-        return newBuilder().withConfiguration(dataset.schemaSyncTask.getConf())
-            .withReference(dataset.schemaSyncTask.getReference())
-            .withFSClient(dataset.schemaSyncTask.getFsClient())
-            .withHiveClient(dataset.schemaSyncTask.getHiveClient())
-            .schemaStrategy(dataset.schemaSyncTask.getSchemaStrategy())
-            .partitionStrategy(dataset.schemaSyncTask.getPartitionStrategy());
-    }
-
-    public static Builder newBuilder() {
-        return new Builder();
-    }
-
-    public static class Builder {
-        private static Logger LOG = LoggerFactory.getLogger(Builder.class);
-        private HoodieHiveConfiguration configuration;
-        private HoodieDatasetReference datasetReference;
-        private SchemaStrategy schemaStrategy;
-        private PartitionStrategy partitionStrategy;
-        private HoodieHiveClient hiveClient;
-        private HoodieFSClient fsClient;
-
-        public Builder withReference(HoodieDatasetReference reference) {
-            this.datasetReference = reference;
-            return this;
-        }
-
-        public Builder withConfiguration(HoodieHiveConfiguration configuration) {
-            this.configuration = configuration;
-            return this;
-        }
-
-        public Builder schemaStrategy(SchemaStrategy schemaStrategy) {
-            this.schemaStrategy = schemaStrategy;
-            return this;
-        }
-
-        public Builder partitionStrategy(PartitionStrategy partitionStrategy) {
-            if(partitionStrategy != null) {
-                LOG.info("Partitioning the dataset with keys " + ArrayUtils
-                    .toString(partitionStrategy.getHivePartitionFieldNames()));
-            }
-            this.partitionStrategy = partitionStrategy;
-            return this;
-        }
-
-        public Builder withHiveClient(HoodieHiveClient hiveClient) {
-            this.hiveClient = hiveClient;
-            return this;
-        }
-
-        public Builder withFSClient(HoodieFSClient fsClient) {
-            this.fsClient = fsClient;
-            return this;
-        }
-
-        public HoodieHiveDatasetSyncTask build() {
-            LOG.info("Building dataset for " + datasetReference);
-            HoodieHiveSchemaSyncTask schemaSyncTask =
-                HoodieHiveSchemaSyncTask.newBuilder().withReference(datasetReference)
-                    .withConfiguration(configuration).schemaStrategy(schemaStrategy)
-                    .partitionStrategy(partitionStrategy).withHiveClient(hiveClient)
-                    .withFSClient(fsClient).build();
-
-            List<StoragePartition> storagePartitions = Lists.newArrayList();
-            List<String> storagePartitionPaths = schemaSyncTask.getPartitionStrategy()
-                .scanAllPartitions(schemaSyncTask.getReference(), schemaSyncTask.getFsClient());
-            for (String path : storagePartitionPaths) {
-                storagePartitions.add(new StoragePartition(schemaSyncTask.getReference(),
-                    schemaSyncTask.getPartitionStrategy(), path));
-            }
-            LOG.info("Storage partitions scan complete. Found " + storagePartitions.size());
-
-            List<StoragePartition> newPartitions;
-            List<StoragePartition> changedPartitions;
-
-            // Check if table exists
-            if (schemaSyncTask.getHiveClient().checkTableExists(schemaSyncTask.getReference())) {
-                List<TablePartition> partitions =
-                    schemaSyncTask.getHiveClient().scanPartitions(schemaSyncTask.getReference());
-                LOG.info("Table partition scan complete. Found " + partitions.size());
-                newPartitions = schemaSyncTask.getFsClient()
-                    .getUnregisteredStoragePartitions(partitions, storagePartitions);
-                changedPartitions = schemaSyncTask.getFsClient()
-                    .getChangedStoragePartitions(partitions, storagePartitions);
-            } else {
-                newPartitions = storagePartitions;
-                changedPartitions = Lists.newArrayList();
-            }
-            return new HoodieHiveDatasetSyncTask(schemaSyncTask, newPartitions, changedPartitions);
-        }
-    }
-}
--- a/hoodie-hive/src/main/java/com/uber/hoodie/hive/HoodieHiveSchemaSyncTask.java
+++ b/hoodie-hive/src/main/java/com/uber/hoodie/hive/HoodieHiveSchemaSyncTask.java
@@ -1,243 +0,0 @@
-/*
- * Copyright (c) 2016 Uber Technologies, Inc. (hoodie-dev-group@uber.com)
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *           http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package com.uber.hoodie.hive;
-
-
-import com.google.common.base.Objects;
-import com.google.common.collect.Maps;
-import com.uber.hoodie.hadoop.HoodieInputFormat;
-import com.uber.hoodie.hive.impl.DayBasedPartitionStrategy;
-import com.uber.hoodie.hive.client.HoodieFSClient;
-import com.uber.hoodie.hive.client.HoodieHiveClient;
-import com.uber.hoodie.hive.impl.ParseSchemaFromDataStrategy;
-import com.uber.hoodie.hive.client.SchemaUtil;
-import com.uber.hoodie.hive.model.HoodieDatasetReference;
-import com.uber.hoodie.hive.model.SchemaDifference;
-import org.apache.commons.lang.ArrayUtils;
-import org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-import parquet.schema.MessageType;
-
-import java.util.Map;
-
-/**
- * Represents the Schema sync task for the dataset.
- * Execute sync() on this task to sync up the HDFS dataset schema and hive table schema
- */
-public class HoodieHiveSchemaSyncTask {
-    private static Logger LOG = LoggerFactory.getLogger(HoodieHiveSchemaSyncTask.class);
-
-    private static final String DEFAULT_INPUTFORMAT = HoodieInputFormat.class.getName();
-    private static final String DEFAULT_OUTPUTFORMAT = MapredParquetOutputFormat.class.getName();
-
-    private final HoodieDatasetReference reference;
-    private final MessageType storageSchema;
-    private final Map<String, String> tableSchema;
-    private final PartitionStrategy partitionStrategy;
-    private final SchemaStrategy schemaStrategy;
-    private final HoodieHiveClient hiveClient;
-    private final HoodieHiveConfiguration conf;
-    private final HoodieFSClient fsClient;
-
-    public HoodieHiveSchemaSyncTask(HoodieDatasetReference datasetReference,
-        MessageType schemaInferred, Map<String, String> fieldsSchema,
-        PartitionStrategy partitionStrategy, SchemaStrategy schemaStrategy,
-        HoodieHiveConfiguration configuration, HoodieHiveClient hiveClient,
-        HoodieFSClient fsClient) {
-        this.reference = datasetReference;
-        this.storageSchema = schemaInferred;
-        this.tableSchema = fieldsSchema;
-        this.partitionStrategy = partitionStrategy;
-        this.schemaStrategy = schemaStrategy;
-        this.hiveClient = hiveClient;
-        this.conf = configuration;
-        this.fsClient = fsClient;
-    }
-
-    public SchemaDifference getSchemaDifference() {
-        return SchemaUtil.getSchemaDifference(storageSchema, tableSchema,
-            partitionStrategy.getHivePartitionFieldNames());
-    }
-
-    /**
-     * Checks if the table schema is present. If not, creates one.
-     * If already exists, computes the schema difference and if there is any difference
-     * it generates a alter table and syncs up the schema to hive metastore.
-     */
-    public void sync() {
-        try {
-            // Check if the table needs to be created
-            if (tableSchema.isEmpty()) {
-                // create the database
-                LOG.info("Schema not found. Creating for " + reference);
-                hiveClient.createTable(storageSchema, reference,
-                    partitionStrategy.getHivePartitionFieldNames(), DEFAULT_INPUTFORMAT,
-                    DEFAULT_OUTPUTFORMAT);
-            } else {
-                if (!getSchemaDifference().isEmpty()) {
-                    LOG.info("Schema sync required for " + reference);
-                    hiveClient.updateTableDefinition(reference,
-                        partitionStrategy.getHivePartitionFieldNames(), storageSchema);
-                } else {
-                    LOG.info("Schema sync not required for " + reference);
-                }
-            }
-        } catch (Exception e) {
-            throw new HoodieHiveDatasetException("Failed to sync dataset " + reference,
-                e);
-        }
-    }
-
-    public static Builder newBuilder() {
-        return new Builder();
-    }
-
-    public MessageType getStorageSchema() {
-        return storageSchema;
-    }
-
-    public Map<String, String> getTableSchema() {
-        return tableSchema;
-    }
-
-    public PartitionStrategy getPartitionStrategy() {
-        return partitionStrategy;
-    }
-
-    public SchemaStrategy getSchemaStrategy() {
-        return schemaStrategy;
-    }
-
-    public HoodieHiveClient getHiveClient() {
-        return hiveClient;
-    }
-
-    public HoodieHiveConfiguration getConf() {
-        return conf;
-    }
-
-    public HoodieDatasetReference getReference() {
-        return reference;
-    }
-
-    public HoodieFSClient getFsClient() {
-        return fsClient;
-    }
-
-    @Override
-    public boolean equals(Object o) {
-        if (this == o)
-            return true;
-        if (o == null || getClass() != o.getClass())
-            return false;
-        HoodieHiveSchemaSyncTask that = (HoodieHiveSchemaSyncTask) o;
-        return Objects.equal(storageSchema, that.storageSchema) && Objects
-            .equal(tableSchema, that.tableSchema);
-    }
-
-    @Override
-    public int hashCode() {
-        return Objects.hashCode(storageSchema, tableSchema);
-    }
-
-    public static class Builder {
-        private static Logger LOG = LoggerFactory.getLogger(Builder.class);
-        private HoodieHiveConfiguration configuration;
-        private HoodieDatasetReference datasetReference;
-        private SchemaStrategy schemaStrategy;
-        private PartitionStrategy partitionStrategy;
-        private HoodieHiveClient hiveClient;
-        private HoodieFSClient fsClient;
-
-        public Builder withReference(HoodieDatasetReference reference) {
-            this.datasetReference = reference;
-            return this;
-        }
-
-        public Builder withConfiguration(HoodieHiveConfiguration configuration) {
-            this.configuration = configuration;
-            return this;
-        }
-
-        public Builder schemaStrategy(SchemaStrategy schemaStrategy) {
-            this.schemaStrategy = schemaStrategy;
-            return this;
-        }
-
-        public Builder partitionStrategy(PartitionStrategy partitionStrategy) {
-            if(partitionStrategy != null) {
-                LOG.info("Partitioning the dataset with keys " + ArrayUtils
-                    .toString(partitionStrategy.getHivePartitionFieldNames()));
-            }
-            this.partitionStrategy = partitionStrategy;
-            return this;
-        }
-
-        public Builder withHiveClient(HoodieHiveClient hiveClient) {
-            this.hiveClient = hiveClient;
-            return this;
-        }
-
-        public Builder withFSClient(HoodieFSClient fsClient) {
-            this.fsClient = fsClient;
-            return this;
-        }
-
-        public HoodieHiveSchemaSyncTask build() {
-            LOG.info("Building dataset schema for " + datasetReference);
-            createDefaults();
-
-            MessageType schemaInferred =
-                schemaStrategy.getDatasetSchema(datasetReference, fsClient);
-            LOG.info("Storage Schema inferred for dataset " + datasetReference);
-            LOG.debug("Inferred Storage Schema " + schemaInferred);
-
-            Map<String, String> fieldsSchema;
-            if (!hiveClient.checkTableExists(datasetReference)) {
-                fieldsSchema = Maps.newHashMap();
-            } else {
-                fieldsSchema = hiveClient.getTableSchema(datasetReference);
-            }
-            LOG.info("Table Schema inferred for dataset " + datasetReference);
-            LOG.debug("Inferred Table Schema " + fieldsSchema);
-
-            return new HoodieHiveSchemaSyncTask(datasetReference, schemaInferred, fieldsSchema,
-                partitionStrategy, schemaStrategy, configuration, hiveClient, fsClient);
-        }
-
-        private void createDefaults() {
-            if (partitionStrategy == null) {
-                LOG.info("Partition strategy is not set. Selecting the default strategy");
-                partitionStrategy = new DayBasedPartitionStrategy();
-            }
-            if (schemaStrategy == null) {
-                LOG.info(
-                    "Schema strategy not specified. Selecting the default based on the dataset type");
-                schemaStrategy = new ParseSchemaFromDataStrategy();
-            }
-            if (fsClient == null) {
-                LOG.info("Creating a new FS Client as none has been passed in");
-                fsClient = new HoodieFSClient(configuration);
-            }
-            if (hiveClient == null) {
-                LOG.info("Creating a new Hive Client as none has been passed in");
-                hiveClient = new HoodieHiveClient(configuration);
-            }
-        }
-    }
-}
--- a/hoodie-hive/src/main/java/com/uber/hoodie/hive/HoodieHiveDatasetException.java
+++ b/hoodie-hive/src/main/java/com/uber/hoodie/hive/HoodieHiveDatasetException.java
@@ -16,21 +16,21 @@

 package com.uber.hoodie.hive;

-public class HoodieHiveDatasetException extends RuntimeException {
+public class HoodieHiveSyncException extends RuntimeException {

-    public HoodieHiveDatasetException() {
+    public HoodieHiveSyncException() {
        super();
    }

-    public HoodieHiveDatasetException(String message) {
+    public HoodieHiveSyncException(String message) {
        super(message);
    }

-    public HoodieHiveDatasetException(String message, Throwable t) {
+    public HoodieHiveSyncException(String message, Throwable t) {
        super(message, t);
    }

-    public HoodieHiveDatasetException(Throwable t) {
+    public HoodieHiveSyncException(Throwable t) {
        super(t);
    }

--- a/hoodie-hive/src/main/java/com/uber/hoodie/hive/PartitionStrategy.java
+++ b/hoodie-hive/src/main/java/com/uber/hoodie/hive/PartitionStrategy.java
@@ -1,59 +0,0 @@
-/*
- * Copyright (c) 2016 Uber Technologies, Inc. (hoodie-dev-group@uber.com)
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *           http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package com.uber.hoodie.hive;
-
-import com.uber.hoodie.hive.client.HoodieFSClient;
-import com.uber.hoodie.hive.model.HoodieDatasetReference;
-import org.apache.hadoop.fs.FileStatus;
-import org.apache.hadoop.fs.Path;
-
-import java.util.List;
-
-/**
- * Abstraction to define HDFS partition strategies.
- * Strategy provides hookups to map partitions on to physical layout
- *
- * @see SchemaStrategy
- */
-public interface PartitionStrategy {
-    /**
-     * Scans the file system for all partitions and returns String[] which are the available partitions, relative to
-     * the base path
-     *
-     * @param basePath
-     * @param fsClient
-     * @return
-     */
-    List<String> scanAllPartitions(HoodieDatasetReference basePath, HoodieFSClient fsClient);
-
-    /**
-     * Get the list of hive field names the dataset will be partitioned on.
-     * The field name should be present in the storage schema.
-     *
-     * @return List of partitions field names
-     */
-    String[] getHivePartitionFieldNames();
-
-    /**
-     * Convert a Partition path (returned in scanAllPartitions) to values for column names returned in getHivePartitionFieldNames
-     * e.g. 2016/12/12/ will return [2016, 12, 12]
-     *
-     * @param partitionPath storage path
-     * @return List of partitions field values
-     */
-    String[] convertPartitionToValues(HoodieDatasetReference metadata, String partitionPath);
-}
--- a/hoodie-hive/src/main/java/com/uber/hoodie/hive/PartitionValueExtractor.java
+++ b/hoodie-hive/src/main/java/com/uber/hoodie/hive/PartitionValueExtractor.java
@@ -0,0 +1,31 @@
+/*
+ *  Copyright (c) 2017 Uber Technologies, Inc. (hoodie-dev-group@uber.com)
+ *
+ *  Licensed under the Apache License, Version 2.0 (the "License");
+ *  you may not use this file except in compliance with the License.
+ *  You may obtain a copy of the License at
+ *
+ *           http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *
+ */
+
+package com.uber.hoodie.hive;
+
+import java.util.List;
+
+/**
+ * HDFS Path contain hive partition values for the keys it is partitioned on.
+ * This mapping is not straight forward and requires a pluggable implementation to extract the partition value from HDFS path.
+ *
+ * e.g. Hive table partitioned by datestr=yyyy-mm-dd and hdfs path /app/hoodie/dataset1/YYYY=[yyyy]/MM=[mm]/DD=[dd]
+ */
+public interface PartitionValueExtractor {
+  List<String> extractPartitionValuesInPath(String partitionPath);
+}
--- a/hoodie-hive/src/main/java/com/uber/hoodie/hive/model/SchemaDifference.java
+++ b/hoodie-hive/src/main/java/com/uber/hoodie/hive/model/SchemaDifference.java
@@ -14,7 +14,7 @@
 * limitations under the License.
 */

-package com.uber.hoodie.hive.model;
+package com.uber.hoodie.hive;

 import com.google.common.base.Objects;
 import com.google.common.collect.ImmutableList;
--- a/hoodie-hive/src/main/java/com/uber/hoodie/hive/SchemaStrategy.java
+++ b/hoodie-hive/src/main/java/com/uber/hoodie/hive/SchemaStrategy.java
@@ -1,31 +0,0 @@
-/*
- * Copyright (c) 2016 Uber Technologies, Inc. (hoodie-dev-group@uber.com)
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *           http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package com.uber.hoodie.hive;
-
-import com.uber.hoodie.hive.client.HoodieFSClient;
-import com.uber.hoodie.hive.model.HoodieDatasetReference;
-import parquet.schema.MessageType;
-
-/**
- * Abstraction to get the Parquet schema for a {@link HoodieDatasetReference}
- * If you are managing the schemas externally, connect to the system and get the schema.
- *
- * @see PartitionStrategy
- */
-public interface SchemaStrategy {
-    MessageType getDatasetSchema(HoodieDatasetReference metadata, HoodieFSClient fsClient);
-}
--- a/hoodie-hive/src/main/java/com/uber/hoodie/hive/SlashEncodedDayPartitionValueExtractor.java
+++ b/hoodie-hive/src/main/java/com/uber/hoodie/hive/SlashEncodedDayPartitionValueExtractor.java
@@ -0,0 +1,55 @@
+/*
+ *  Copyright (c) 2017 Uber Technologies, Inc. (hoodie-dev-group@uber.com)
+ *
+ *  Licensed under the Apache License, Version 2.0 (the "License");
+ *  you may not use this file except in compliance with the License.
+ *  You may obtain a copy of the License at
+ *
+ *           http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *
+ */
+
+package com.uber.hoodie.hive;
+
+import com.beust.jcommander.internal.Lists;
+import java.util.List;
+import org.joda.time.DateTime;
+import org.joda.time.format.DateTimeFormat;
+import org.joda.time.format.DateTimeFormatter;
+/**
+ * HDFS Path contain hive partition values for the keys it is partitioned on.
+ * This mapping is not straight forward and requires a pluggable implementation to extract the partition value from HDFS path.
+ *
+ * This implementation extracts datestr=yyyy-mm-dd from path of type /yyyy/mm/dd
+ */
+public class SlashEncodedDayPartitionValueExtractor implements PartitionValueExtractor {
+
+  private final DateTimeFormatter dtfOut;
+
+  public SlashEncodedDayPartitionValueExtractor() {
+    this.dtfOut = DateTimeFormat.forPattern("yyyy-MM-dd");
+  }
+
+  @Override
+  public List<String> extractPartitionValuesInPath(String partitionPath) {
+    // partition path is expected to be in this format yyyy/mm/dd
+    String[] splits = partitionPath.split("/");
+    if (splits.length != 3) {
+      throw new IllegalArgumentException(
+          "Partition path " + partitionPath + " is not in the form yyyy/mm/dd ");
+    }
+    // Get the partition part and remove the / as well at the end
+    int year = Integer.parseInt(splits[0]);
+    int mm = Integer.parseInt(splits[1]);
+    int dd = Integer.parseInt(splits[2]);
+    DateTime dateTime = new DateTime(year, mm, dd, 0, 0);
+    return Lists.newArrayList(dtfOut.print(dateTime));
+  }
+}
--- a/hoodie-hive/src/main/java/com/uber/hoodie/hive/client/HoodieFSClient.java
+++ b/hoodie-hive/src/main/java/com/uber/hoodie/hive/client/HoodieFSClient.java
@@ -1,186 +0,0 @@
-/*
- * Copyright (c) 2016 Uber Technologies, Inc. (hoodie-dev-group@uber.com)
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *           http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package com.uber.hoodie.hive.client;
-
-import com.google.common.base.Objects;
-import com.google.common.collect.Lists;
-import com.google.common.collect.Maps;
-import com.google.common.collect.Sets;
-import com.uber.hoodie.hive.HoodieHiveConfiguration;
-import com.uber.hoodie.hive.HoodieHiveDatasetException;
-import com.uber.hoodie.hive.model.HoodieDatasetReference;
-import com.uber.hoodie.hive.model.StoragePartition;
-import com.uber.hoodie.hive.model.TablePartition;
-import org.apache.hadoop.fs.FileStatus;
-import org.apache.hadoop.fs.FileSystem;
-import org.apache.hadoop.fs.LocatedFileStatus;
-import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.fs.RemoteIterator;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-import parquet.hadoop.ParquetFileReader;
-import parquet.hadoop.metadata.ParquetMetadata;
-import parquet.schema.MessageType;
-
-import java.io.IOException;
-import java.util.Arrays;
-import java.util.List;
-import java.util.Map;
-import java.util.Set;
-
-/**
- * Client to access HDFS
- */
-public class HoodieFSClient {
-    final public static String PARQUET_EXTENSION = ".parquet";
-    final public static String PARQUET_EXTENSION_ZIPPED = ".parquet.gz";
-    private final static Logger LOG = LoggerFactory.getLogger(HoodieFSClient.class);
-    private final HoodieHiveConfiguration conf;
-
-    private final FileSystem fs;
-
-    public HoodieFSClient(HoodieHiveConfiguration configuration) {
-        this.conf = configuration;
-        try {
-            this.fs = FileSystem.get(configuration.getConfiguration());
-        } catch (IOException e) {
-            throw new HoodieHiveDatasetException(
-                "Could not initialize file system from configuration", e);
-        }
-    }
-
-    /**
-     * Read the parquet schema from a parquet File
-     *
-     * @param parquetFilePath
-     * @return
-     * @throws IOException
-     */
-    public MessageType readSchemaFromDataFile(Path parquetFilePath) throws IOException {
-        LOG.info("Reading schema from " + parquetFilePath);
-
-        if (!fs.exists(parquetFilePath)) {
-            throw new IllegalArgumentException(
-                "Failed to read schema from data file " + parquetFilePath
-                    + ". File does not exist.");
-        }
-        ParquetMetadata fileFooter =
-            ParquetFileReader.readFooter(conf.getConfiguration(), parquetFilePath);
-        return fileFooter.getFileMetaData().getSchema();
-    }
-
-    /**
-     * Find the last data file under the partition path.
-     *
-     * @param metadata
-     * @param partitionPathString
-     * @return
-     */
-    public Path lastDataFileForDataset(HoodieDatasetReference metadata,
-        String partitionPathString) {
-        try {
-            Path partitionPath = new Path(partitionPathString);
-            if (!fs.exists(partitionPath)) {
-                throw new HoodieHiveDatasetException(
-                    "Partition path " + partitionPath + " not found in Dataset " + metadata);
-            }
-
-            RemoteIterator<LocatedFileStatus> files = fs.listFiles(partitionPath, true);
-            // Iterate over the list. List is generally is listed in chronological order becasue of the date partitions
-            // Get the latest schema
-            Path returnPath = null;
-            while (files.hasNext()) {
-                Path path = files.next().getPath();
-                if (path.getName().endsWith(PARQUET_EXTENSION) || path.getName()
-                    .endsWith(PARQUET_EXTENSION_ZIPPED)) {
-                    if(returnPath == null || path.toString().compareTo(returnPath.toString()) > 0) {
-                        returnPath = path;
-                    }
-                }
-            }
-            if (returnPath != null) {
-                return returnPath;
-            }
-            throw new HoodieHiveDatasetException(
-                "No data file found in path " + partitionPath + " for dataset " + metadata);
-        } catch (IOException e) {
-            throw new HoodieHiveDatasetException(
-                "Failed to get data file in path " + partitionPathString + " for dataset "
-                    + metadata, e);
-        }
-    }
-
-    /**
-     * Get the list of storage partitions which does not have its equivalent hive partitions
-     *
-     * @param tablePartitions
-     * @param storagePartitions
-     * @return
-     */
-    public List<StoragePartition> getUnregisteredStoragePartitions(
-        List<TablePartition> tablePartitions, List<StoragePartition> storagePartitions) {
-        Set<String> paths = Sets.newHashSet();
-        for (TablePartition tablePartition : tablePartitions) {
-            paths.add(tablePartition.getLocation().toUri().getPath());
-        }
-        List<StoragePartition> missing = Lists.newArrayList();
-        for (StoragePartition storagePartition : storagePartitions) {
-            String hdfsPath = storagePartition.getPartitionPath().toUri().getPath();
-            if (!paths.contains(hdfsPath)) {
-                missing.add(storagePartition);
-            }
-        }
-        return missing;
-    }
-
-    /**
-     * Get the list of storage partitions which does not have its equivalent hive partitions
-     *
-     * @param tablePartitions
-     * @param storagePartitions
-     * @return
-     */
-    public List<StoragePartition> getChangedStoragePartitions(
-        List<TablePartition> tablePartitions, List<StoragePartition> storagePartitions) {
-        Map<String, String> paths = Maps.newHashMap();
-        for (TablePartition tablePartition : tablePartitions) {
-            String[] partitionKeyValueStr = tablePartition.getPartitionFieldValues();
-            Arrays.sort(partitionKeyValueStr);
-            paths.put(Arrays.toString(partitionKeyValueStr), tablePartition.getLocation().toUri().getPath());
-        }
-
-        List<StoragePartition> changed = Lists.newArrayList();
-        for (StoragePartition storagePartition : storagePartitions) {
-            String[] partitionKeyValues = storagePartition.getPartitionFieldValues();
-            Arrays.sort(partitionKeyValues);
-            String partitionKeyValueStr = Arrays.toString(partitionKeyValues);
-            String hdfsPath = storagePartition.getPartitionPath().toUri().getPath();
-            if (paths.containsKey(partitionKeyValueStr) && !paths.get(partitionKeyValueStr).equals(hdfsPath)) {
-                changed.add(storagePartition);
-            }
-        }
-        return changed;
-    }
-
-    public int calculateStorageHash(FileStatus[] paths) {
-        return Objects.hashCode(paths);
-    }
-
-    public FileSystem getFs() {
-        return fs;
-    }
-}
--- a/hoodie-hive/src/main/java/com/uber/hoodie/hive/client/HoodieHiveClient.java
+++ b/hoodie-hive/src/main/java/com/uber/hoodie/hive/client/HoodieHiveClient.java
@@ -1,365 +0,0 @@
-/*
- * Copyright (c) 2016 Uber Technologies, Inc. (hoodie-dev-group@uber.com)
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *           http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package com.uber.hoodie.hive.client;
-
-import com.google.common.base.Preconditions;
-import com.google.common.collect.Lists;
-import com.google.common.collect.Maps;
-import com.uber.hoodie.hive.HoodieHiveConfiguration;
-import com.uber.hoodie.hive.HoodieHiveDatasetException;
-import com.uber.hoodie.hive.PartitionStrategy;
-import com.uber.hoodie.hive.model.HoodieDatasetReference;
-import com.uber.hoodie.hive.model.SchemaDifference;
-import com.uber.hoodie.hive.model.StoragePartition;
-import com.uber.hoodie.hive.model.TablePartition;
-import org.apache.commons.dbcp.BasicDataSource;
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.hive.conf.HiveConf;
-import org.apache.hadoop.hive.metastore.HiveMetaStoreClient;
-import org.apache.hadoop.hive.metastore.api.Partition;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-import parquet.schema.MessageType;
-
-import javax.sql.DataSource;
-import java.io.Closeable;
-import java.io.IOException;
-import java.sql.Connection;
-import java.sql.DatabaseMetaData;
-import java.sql.ResultSet;
-import java.sql.SQLException;
-import java.sql.Statement;
-import java.util.Arrays;
-import java.util.List;
-import java.util.Map;
-
-/**
- * Client to access Hive
- */
-public class HoodieHiveClient implements Closeable {
-    private static Logger LOG = LoggerFactory.getLogger(HoodieHiveClient.class);
-    private static String driverName = "org.apache.hive.jdbc.HiveDriver";
-
-    static {
-        try {
-            Class.forName(driverName);
-        } catch (ClassNotFoundException e) {
-            throw new IllegalStateException("Could not find " + driverName + " in classpath. ", e);
-        }
-    }
-
-    private final HoodieHiveConfiguration configuration;
-    private Connection connection;
-    private HiveConf hiveConf;
-
-    public HoodieHiveClient(HoodieHiveConfiguration configuration) {
-        this.configuration = configuration;
-        this.hiveConf = new HiveConf();
-        this.hiveConf.addResource(configuration.getConfiguration());
-        try {
-            this.connection = getConnection();
-        } catch (SQLException e) {
-            throw new HoodieHiveDatasetException("Failed to connect to hive metastore ", e);
-        }
-    }
-
-    /**
-     * Scan all the partitions for the given {@link HoodieDatasetReference} with the given {@link PartitionStrategy}
-     *
-     * @param metadata
-     * @return
-     */
-    public List<TablePartition> scanPartitions(HoodieDatasetReference metadata) {
-        if (!checkTableExists(metadata)) {
-            throw new IllegalArgumentException(
-                "Failed to scan partitions as table " + metadata.getDatabaseTableName()
-                    + " does not exist");
-        }
-        List<TablePartition> partitions = Lists.newArrayList();
-        HiveMetaStoreClient client = null;
-        try {
-            client = new HiveMetaStoreClient(hiveConf);
-            List<Partition> hivePartitions = client
-                .listPartitions(metadata.getDatabaseName(), metadata.getTableName(), (short) -1);
-            for (Partition partition : hivePartitions) {
-                partitions.add(new TablePartition(metadata, partition));
-            }
-            return partitions;
-        } catch (Exception e) {
-            throw new HoodieHiveDatasetException("Failed to scan partitions for " + metadata, e);
-        } finally {
-            if (client != null) {
-                client.close();
-            }
-        }
-    }
-
-    /**
-     * Check if table exists
-     *
-     * @param metadata
-     * @return
-     */
-    public boolean checkTableExists(HoodieDatasetReference metadata) {
-        ResultSet resultSet = null;
-        try {
-            Connection conn = getConnection();
-            resultSet = conn.getMetaData()
-                .getTables(null, metadata.getDatabaseName(), metadata.getTableName(), null);
-            return resultSet.next();
-        } catch (SQLException e) {
-            throw new HoodieHiveDatasetException("Failed to check if table exists " + metadata, e);
-        } finally {
-            closeQuietly(resultSet, null);
-        }
-    }
-
-    /**
-     * Update the hive metastore pointed to by {@link HoodieDatasetReference} with the difference
-     * in schema {@link SchemaDifference}
-     *
-     * @param metadata
-     * @param hivePartitionFieldNames
-     * @param newSchema               @return
-     */
-    public boolean updateTableDefinition(HoodieDatasetReference metadata,
-        String[] hivePartitionFieldNames, MessageType newSchema) {
-        try {
-            String newSchemaStr = SchemaUtil.generateSchemaString(newSchema);
-            // Cascade clause should not be present for non-partitioned tables
-            String cascadeClause = hivePartitionFieldNames.length > 0 ? " cascade" : "";
-            StringBuilder sqlBuilder = new StringBuilder("ALTER TABLE ").append("`")
-                .append(metadata.getDatabaseTableName()).append("`").append(" REPLACE COLUMNS(")
-                .append(newSchemaStr).append(" )").append(cascadeClause);
-            LOG.info("Creating table with " + sqlBuilder);
-            return updateHiveSQL(sqlBuilder.toString());
-        } catch (IOException e) {
-            throw new HoodieHiveDatasetException("Failed to update table for " + metadata, e);
-        }
-    }
-
-    /**
-     * Execute a update in hive metastore with this SQL
-     *
-     * @param s SQL to execute
-     * @return
-     */
-    public boolean updateHiveSQL(String s) {
-        Statement stmt = null;
-        try {
-            Connection conn = getConnection();
-            stmt = conn.createStatement();
-            LOG.info("Executing SQL " + s);
-            return stmt.execute(s);
-        } catch (SQLException e) {
-            throw new HoodieHiveDatasetException("Failed in executing SQL " + s, e);
-        } finally {
-            closeQuietly(null, stmt);
-        }
-    }
-
-    /**
-     * Get the table schema
-     *
-     * @param datasetReference
-     * @return
-     */
-    public Map<String, String> getTableSchema(HoodieDatasetReference datasetReference) {
-        if (!checkTableExists(datasetReference)) {
-            throw new IllegalArgumentException(
-                "Failed to get schema as table " + datasetReference.getDatabaseTableName()
-                    + " does not exist");
-        }
-        Map<String, String> schema = Maps.newHashMap();
-        ResultSet result = null;
-        try {
-            Connection connection = getConnection();
-            DatabaseMetaData databaseMetaData = connection.getMetaData();
-            result = databaseMetaData.getColumns(null, datasetReference.getDatabaseName(),
-                datasetReference.getTableName(), null);
-            while (result.next()) {
-                String columnName = result.getString(4);
-                String columnType = result.getString(6);
-                schema.put(columnName, columnType);
-            }
-            return schema;
-        } catch (SQLException e) {
-            throw new HoodieHiveDatasetException(
-                "Failed to get table schema for " + datasetReference, e);
-        } finally {
-            closeQuietly(result, null);
-        }
-    }
-
-    public void addPartitionsToTable(HoodieDatasetReference datasetReference,
-        List<StoragePartition> partitionsToAdd, PartitionStrategy strategy) {
-        if (partitionsToAdd.isEmpty()) {
-            LOG.info("No partitions to add for " + datasetReference);
-            return;
-        }
-        LOG.info("Adding partitions " + partitionsToAdd.size() + " to dataset " + datasetReference);
-        String sql = constructAddPartitions(datasetReference, partitionsToAdd, strategy);
-        updateHiveSQL(sql);
-    }
-
-    public void updatePartitionsToTable(HoodieDatasetReference datasetReference,
-        List<StoragePartition> changedPartitions, PartitionStrategy partitionStrategy) {
-        if (changedPartitions.isEmpty()) {
-            LOG.info("No partitions to change for " + datasetReference);
-            return;
-        }
-        LOG.info(
-            "Changing partitions " + changedPartitions.size() + " on dataset " + datasetReference);
-        List<String> sqls =
-            constructChangePartitions(datasetReference, changedPartitions, partitionStrategy);
-        for (String sql : sqls) {
-            updateHiveSQL(sql);
-        }
-    }
-
-    public void createTable(MessageType storageSchema, HoodieDatasetReference metadata,
-        String[] partitionKeys, String inputFormatClass, String outputFormatClass) {
-        try {
-            String createSQLQuery = SchemaUtil
-                .generateCreateDDL(storageSchema, metadata, partitionKeys, inputFormatClass,
-                    outputFormatClass);
-            LOG.info("Creating table with " + createSQLQuery);
-            updateHiveSQL(createSQLQuery);
-        } catch (IOException e) {
-            throw new HoodieHiveDatasetException("Failed to create table for " + metadata, e);
-        }
-    }
-
-    private static void closeQuietly(ResultSet resultSet, Statement stmt) {
-        try {
-            if (stmt != null)
-                stmt.close();
-            if (resultSet != null)
-                resultSet.close();
-        } catch (SQLException e) {
-            LOG.error("Could not close the resultset opened ", e);
-        }
-    }
-
-    private Connection getConnection() throws SQLException {
-        int count = 0;
-        int maxTries = 3;
-        if (connection == null) {
-            Configuration conf = configuration.getConfiguration();
-            DataSource ds = getDatasource();
-            LOG.info("Getting Hive Connection from Datasource " + ds);
-            while (true) {
-                try {
-                    this.connection = ds.getConnection();
-                    break;
-                } catch (SQLException e) {
-                    if (++count == maxTries)
-                        throw e;
-                }
-            }
-        }
-        return connection;
-    }
-
-    private DataSource getDatasource() {
-        BasicDataSource ds = new BasicDataSource();
-        ds.setDriverClassName(driverName);
-        ds.setUrl(getHiveJdbcUrlWithDefaultDBName());
-        ds.setUsername(configuration.getHiveUsername());
-        ds.setPassword(configuration.getHivePassword());
-        return ds;
-    }
-
-    public String getHiveJdbcUrlWithDefaultDBName() {
-        String hiveJdbcUrl = configuration.getHiveJdbcUrl();
-        String urlAppend = null;
-        // If the hive url contains addition properties like ;transportMode=http;httpPath=hs2
-        if (hiveJdbcUrl.contains(";")) {
-            urlAppend = hiveJdbcUrl.substring(hiveJdbcUrl.indexOf(";"));
-            hiveJdbcUrl = hiveJdbcUrl.substring(0, hiveJdbcUrl.indexOf(";"));
-        }
-        if (!hiveJdbcUrl.endsWith("/")) {
-            hiveJdbcUrl = hiveJdbcUrl + "/";
-        }
-        return hiveJdbcUrl + configuration.getDbName() + (urlAppend == null ? "" : urlAppend);
-    }
-
-    private static List<String> constructChangePartitions(HoodieDatasetReference metadata,
-        List<StoragePartition> partitions, PartitionStrategy partitionStrategy) {
-        String[] partitionFieldNames = partitionStrategy.getHivePartitionFieldNames();
-
-        List<String> changePartitions = Lists.newArrayList();
-        String alterTable = "ALTER TABLE " + metadata.getDatabaseTableName();
-        for (StoragePartition partition : partitions) {
-            StringBuilder partBuilder = new StringBuilder();
-            String[] partitionValues = partition.getPartitionFieldValues();
-            Preconditions.checkArgument(partitionFieldNames.length == partitionValues.length,
-                "Partition key parts " + Arrays.toString(partitionFieldNames)
-                    + " does not match with partition values " + Arrays.toString(partitionValues)
-                    + ". Check partition strategy. ");
-            for (int i = 0; i < partitionFieldNames.length; i++) {
-                partBuilder.append(partitionFieldNames[i]).append("=").append("'")
-                    .append(partitionValues[i]).append("'");
-            }
-            String changePartition =
-                alterTable + " PARTITION (" + partBuilder.toString() + ") SET LOCATION '"
-                    + "hdfs://nameservice1" + partition.getPartitionPath() + "'";
-            changePartitions.add(changePartition);
-        }
-        return changePartitions;
-    }
-
-    private static String constructAddPartitions(HoodieDatasetReference metadata,
-        List<StoragePartition> partitions, PartitionStrategy partitionStrategy) {
-        return constructAddPartitions(metadata.getDatabaseTableName(), partitions,
-            partitionStrategy);
-    }
-
-    private static String constructAddPartitions(String newDbTableName,
-        List<StoragePartition> partitions, PartitionStrategy partitionStrategy) {
-        String[] partitionFieldNames = partitionStrategy.getHivePartitionFieldNames();
-        StringBuilder alterSQL = new StringBuilder("ALTER TABLE ");
-        alterSQL.append(newDbTableName).append(" ADD IF NOT EXISTS ");
-        for (StoragePartition partition : partitions) {
-            StringBuilder partBuilder = new StringBuilder();
-            String[] partitionValues = partition.getPartitionFieldValues();
-            Preconditions.checkArgument(partitionFieldNames.length == partitionValues.length,
-                "Partition key parts " + Arrays.toString(partitionFieldNames)
-                    + " does not match with partition values " + Arrays.toString(partitionValues)
-                    + ". Check partition strategy. ");
-            for (int i = 0; i < partitionFieldNames.length; i++) {
-                partBuilder.append(partitionFieldNames[i]).append("=").append("'")
-                    .append(partitionValues[i]).append("'");
-            }
-            alterSQL.append("  PARTITION (").append(partBuilder.toString()).append(") LOCATION '")
-                .append(partition.getPartitionPath()).append("' ");
-        }
-
-        return alterSQL.toString();
-    }
-
-    @Override
-    public void close() throws IOException {
-        if (connection != null) {
-            try {
-                connection.close();
-            } catch (SQLException e) {
-                LOG.error("Could not close the connection opened ", e);
-            }
-        }
-    }
-}
--- a/hoodie-hive/src/main/java/com/uber/hoodie/hive/example/HoodieHiveSyncExample.java
+++ b/hoodie-hive/src/main/java/com/uber/hoodie/hive/example/HoodieHiveSyncExample.java
@@ -1,39 +0,0 @@
-/*
- * Copyright (c) 2016 Uber Technologies, Inc. (hoodie-dev-group@uber.com)
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *           http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package com.uber.hoodie.hive.example;
-
-import com.uber.hoodie.hive.HiveSyncTool;
-import com.uber.hoodie.hive.HiveSyncConfig;
-
-/**
- * Example showing how to sync the dataset, written by `HoodieClientExample`
- */
-public class HoodieHiveSyncExample {
-
-    public static void main(String[] args) {
-
-        HiveSyncConfig cfg = new HiveSyncConfig();
-        cfg.databaseName = "default";
-        cfg.tableName = "uber_trips";
-        cfg.basePath = "/tmp/hoodie/sample-table/";
-        cfg.hiveUser = "hive";
-        cfg.hivePass = "hive";
-        cfg.jdbcUrl = "jdbc:hive2://localhost:10010/";
-
-        HiveSyncTool.sync(cfg);
-    }
-}
--- a/hoodie-hive/src/main/java/com/uber/hoodie/hive/impl/DayBasedPartitionStrategy.java
+++ b/hoodie-hive/src/main/java/com/uber/hoodie/hive/impl/DayBasedPartitionStrategy.java
@@ -1,76 +0,0 @@
-/*
- * Copyright (c) 2016 Uber Technologies, Inc. (hoodie-dev-group@uber.com)
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *           http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package com.uber.hoodie.hive.impl;
-
-import com.uber.hoodie.common.util.FSUtils;
-import com.uber.hoodie.hive.HoodieHiveDatasetException;
-import com.uber.hoodie.hive.PartitionStrategy;
-import com.uber.hoodie.hive.client.HoodieFSClient;
-import com.uber.hoodie.hive.model.HoodieDatasetReference;
-import org.joda.time.DateTime;
-import org.joda.time.format.DateTimeFormat;
-import org.joda.time.format.DateTimeFormatter;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-import java.io.IOException;
-import java.util.List;
-
-/**
- * Simple day based partitions.
- * Storage is of this format yyyy/mm/dd
- * Table is partitioned by dateStringFieldName=MM/dd/yyyy
- */
-public class DayBasedPartitionStrategy implements PartitionStrategy {
-    private Logger LOG = LoggerFactory.getLogger(DayBasedPartitionStrategy.class);
-    private final String dateStringFieldName;
-    private final DateTimeFormatter dtfOut;
-
-    public DayBasedPartitionStrategy() {
-        this.dateStringFieldName = "datestr";
-        this.dtfOut = DateTimeFormat.forPattern("yyyy-MM-dd");
-    }
-
-    @Override public List<String> scanAllPartitions(HoodieDatasetReference ref, HoodieFSClient fsClient) {
-        try {
-            return FSUtils.getAllPartitionPaths(fsClient.getFs(), ref.getBaseDatasetPath(), true);
-        } catch (IOException ioe) {
-            throw new HoodieHiveDatasetException(
-                    "IOException when listing partitions under dataset " + ref , ioe);
-        }
-    }
-
-    @Override public String[] getHivePartitionFieldNames() {
-        return new String[] {dateStringFieldName};
-    }
-
-    @Override
-    public String[] convertPartitionToValues(HoodieDatasetReference metadata, String partitionPath) {
-        //yyyy/mm/dd
-        String[] splits = partitionPath.split("/");
-        if (splits.length != 3) {
-            throw new IllegalArgumentException(
-                    "Partition path " + partitionPath + " is not in the form yyyy/mm/dd ");
-        }
-        // Get the partition part and remove the / as well at the end
-        int year = Integer.parseInt(splits[0]);
-        int mm = Integer.parseInt(splits[1]);
-        int dd = Integer.parseInt(splits[2]);
-        DateTime dateTime = new DateTime(year, mm, dd, 0, 0);
-        return new String[] {dtfOut.print(dateTime)};
-    }
-}
--- a/hoodie-hive/src/main/java/com/uber/hoodie/hive/impl/ParseSchemaFromDataStrategy.java
+++ b/hoodie-hive/src/main/java/com/uber/hoodie/hive/impl/ParseSchemaFromDataStrategy.java
@@ -1,43 +0,0 @@
-/*
- * Copyright (c) 2016 Uber Technologies, Inc. (hoodie-dev-group@uber.com)
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *           http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package com.uber.hoodie.hive.impl;
-
-import com.uber.hoodie.hive.HoodieHiveDatasetException;
-import com.uber.hoodie.hive.SchemaStrategy;
-import com.uber.hoodie.hive.client.HoodieFSClient;
-import com.uber.hoodie.hive.model.HoodieDatasetReference;
-import org.apache.hadoop.fs.Path;
-import parquet.schema.MessageType;
-
-import java.io.IOException;
-
-/**
- * Schema strategy to read the parquet schema from any of the data file
- */
-public class ParseSchemaFromDataStrategy implements SchemaStrategy {
-    @Override
-    public MessageType getDatasetSchema(HoodieDatasetReference metadata, HoodieFSClient fsClient) {
-        Path anyDataFile = fsClient.lastDataFileForDataset(metadata, metadata.getBaseDatasetPath());
-        try {
-            return fsClient.readSchemaFromDataFile(anyDataFile);
-        } catch (IOException e) {
-            throw new HoodieHiveDatasetException(
-                "Could not read schema for " + metadata + ", tried to read schema from "
-                    + anyDataFile, e);
-        }
-    }
-}
--- a/hoodie-hive/src/main/java/com/uber/hoodie/hive/model/HoodieDatasetReference.java
+++ b/hoodie-hive/src/main/java/com/uber/hoodie/hive/model/HoodieDatasetReference.java
@@ -1,79 +0,0 @@
-/*
- * Copyright (c) 2016 Uber Technologies, Inc. (hoodie-dev-group@uber.com)
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *           http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package com.uber.hoodie.hive.model;
-
-
-import java.util.Objects;
-
-/**
- * A reference to a Dataset. Each dataset will have a hadoop configuration, table name,
- * base path in HDFS. {@link HoodieDatasetReference} is immutable.
- */
-public class HoodieDatasetReference {
-    private String tableName;
-    private String baseDatasetPath;
-    private String databaseName;
-
-    public HoodieDatasetReference(String tableName, String baseDatasetPath, String databaseName) {
-        this.tableName = tableName;
-        this.baseDatasetPath = baseDatasetPath;
-        this.databaseName = databaseName;
-    }
-
-    public String getDatabaseTableName() {
-        return databaseName + "." + tableName;
-    }
-
-    public String getTableName() {
-        return tableName;
-    }
-
-    public String getBaseDatasetPath() {
-        return baseDatasetPath;
-    }
-
-    public String getDatabaseName() {
-        return databaseName;
-    }
-
-    @Override
-    public boolean equals(Object o) {
-        if (this == o)
-            return true;
-        if (o == null || getClass() != o.getClass())
-            return false;
-        HoodieDatasetReference that = (HoodieDatasetReference) o;
-        return Objects.equals(tableName, that.tableName) &&
-            Objects.equals(baseDatasetPath, that.baseDatasetPath) &&
-            Objects.equals(databaseName, that.databaseName);
-    }
-
-    @Override
-    public int hashCode() {
-        return Objects.hash(tableName, baseDatasetPath, databaseName);
-    }
-
-    @Override
-    public String toString() {
-        final StringBuilder sb = new StringBuilder("HoodieDatasetReference{");
-        sb.append("tableName='").append(tableName).append('\'');
-        sb.append(", baseDatasetPath='").append(baseDatasetPath).append('\'');
-        sb.append(", databaseName='").append(databaseName).append('\'');
-        sb.append('}');
-        return sb.toString();
-    }
-}
--- a/hoodie-hive/src/main/java/com/uber/hoodie/hive/model/StoragePartition.java
+++ b/hoodie-hive/src/main/java/com/uber/hoodie/hive/model/StoragePartition.java
@@ -1,51 +0,0 @@
-/*
- * Copyright (c) 2016 Uber Technologies, Inc. (hoodie-dev-group@uber.com)
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *           http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package com.uber.hoodie.hive.model;
-
-import com.google.common.base.Objects;
-import com.uber.hoodie.hive.PartitionStrategy;
-import org.apache.hadoop.fs.FileStatus;
-import org.apache.hadoop.fs.Path;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-public class StoragePartition {
-    private static Logger LOG = LoggerFactory.getLogger(StoragePartition.class);
-    private final PartitionStrategy partitionStrategy;
-    private final String partitionPath;
-    private final HoodieDatasetReference metadata;
-
-    public StoragePartition(HoodieDatasetReference metadata, PartitionStrategy partitionStrategy, String partitionPath) {
-        this.metadata = metadata;
-        this.partitionPath = partitionPath;
-        this.partitionStrategy = partitionStrategy;
-    }
-
-    public String[] getPartitionFieldValues() {
-        return partitionStrategy.convertPartitionToValues(metadata, partitionPath);
-    }
-
-    public Path getPartitionPath() {
-        return new Path(metadata.getBaseDatasetPath(), partitionPath);
-        //return Path.getPathWithoutSchemeAndAuthority(new Path(metadata.getBaseDatasetPath(), partitionPath));
-    }
-
-    @Override public String toString() {
-        return Objects.toStringHelper(this).add("partitionPath", partitionPath)
-            .add("metadata", metadata).toString();
-    }
-}
--- a/hoodie-hive/src/main/java/com/uber/hoodie/hive/model/TablePartition.java
+++ b/hoodie-hive/src/main/java/com/uber/hoodie/hive/model/TablePartition.java
@@ -1,38 +0,0 @@
-/*
- * Copyright (c) 2016 Uber Technologies, Inc. (hoodie-dev-group@uber.com)
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *           http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package com.uber.hoodie.hive.model;
-
-import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.hive.metastore.api.Partition;
-
-public class TablePartition {
-    private final HoodieDatasetReference metadata;
-    private final Partition partition;
-
-    public TablePartition(HoodieDatasetReference metadata, Partition partition) {
-        this.metadata = metadata;
-        this.partition = partition;
-    }
-
-    public Path getLocation() {
-        return Path.getPathWithoutSchemeAndAuthority(new Path(partition.getSd().getLocation()));
-    }
-
-    public String[] getPartitionFieldValues() {
-        return partition.getValues().toArray(new String[partition.getValuesSize()]);
-    }
-}
--- a/hoodie-hive/src/main/java/com/uber/hoodie/hive/client/ColumnNameXLator.java
+++ b/hoodie-hive/src/main/java/com/uber/hoodie/hive/client/ColumnNameXLator.java
@@ -14,7 +14,7 @@
 * limitations under the License.
 */

-package com.uber.hoodie.hive.client;
+package com.uber.hoodie.hive.util;

 import com.google.common.collect.Maps;

--- a/hoodie-hive/src/main/java/com/uber/hoodie/hive/client/SchemaUtil.java
+++ b/hoodie-hive/src/main/java/com/uber/hoodie/hive/client/SchemaUtil.java
@@ -14,15 +14,13 @@
 * limitations under the License.
 */

-package com.uber.hoodie.hive.client;
+package com.uber.hoodie.hive.util;

 import com.google.common.collect.Maps;
 import com.google.common.collect.Sets;
-import com.uber.hoodie.hive.HoodieHiveDatasetException;
-import com.uber.hoodie.hive.model.HoodieDatasetReference;
-import com.uber.hoodie.hive.model.SchemaDifference;
-import org.apache.commons.lang.ArrayUtils;
-import org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe;
+import com.uber.hoodie.hive.HiveSyncConfig;
+import com.uber.hoodie.hive.HoodieHiveSyncException;
+import com.uber.hoodie.hive.SchemaDifference;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 import parquet.schema.DecimalMetadata;
@@ -52,12 +50,12 @@ public class SchemaUtil {
     * @return
     */
    public static SchemaDifference getSchemaDifference(MessageType storageSchema,
-        Map<String, String> tableSchema, String[] partitionKeys) {
+        Map<String, String> tableSchema, List<String> partitionKeys) {
        Map<String, String> newTableSchema;
        try {
            newTableSchema = convertParquetSchemaToHiveSchema(storageSchema);
        } catch (IOException e) {
-            throw new HoodieHiveDatasetException("Failed to convert parquet schema to hive schema",
+            throw new HoodieHiveSyncException("Failed to convert parquet schema to hive schema",
                e);
        }
        LOG.info("Getting schema difference for " + tableSchema + "\r\n\r\n" + newTableSchema);
@@ -68,14 +66,13 @@ public class SchemaUtil {
        for (Map.Entry<String, String> field : tableSchema.entrySet()) {
            String fieldName = field.getKey().toLowerCase();
            String tickSurroundedFieldName = tickSurround(fieldName);
-            if (!isFieldExistsInSchema(newTableSchema, tickSurroundedFieldName) && !ArrayUtils
-                .contains(partitionKeys, fieldName)) {
+            if (!isFieldExistsInSchema(newTableSchema, tickSurroundedFieldName) && !partitionKeys.contains(fieldName)) {
                schemaDiffBuilder.deleteTableColumn(fieldName);
            } else {
                // check type
                String tableColumnType = field.getValue();
                if (!isFieldExistsInSchema(newTableSchema, tickSurroundedFieldName)) {
-                    if (ArrayUtils.contains(partitionKeys, fieldName)) {
+                    if (partitionKeys.contains(fieldName)) {
                        // Partition key does not have to be part of the storage schema
                        continue;
                    }
@@ -93,7 +90,7 @@ public class SchemaUtil {
                if (!tableColumnType.equalsIgnoreCase(expectedType)) {
                    // check for incremental datasets, the schema type change is allowed as per evolution rules
                    if (!isSchemaTypeUpdateAllowed(tableColumnType, expectedType)) {
-                        throw new HoodieHiveDatasetException(
+                        throw new HoodieHiveSyncException(
                            "Could not convert field Type from " + tableColumnType + " to "
                                + expectedType + " for field " + fieldName);
                    }
@@ -401,27 +398,27 @@ public class SchemaUtil {
    }

    public static String generateCreateDDL(MessageType storageSchema,
-        HoodieDatasetReference metadata, String[] partitionKeys, String inputFormatClass,
-        String outputFormatClass) throws IOException {
+        HiveSyncConfig config, String inputFormatClass,
+        String outputFormatClass, String serdeClass) throws IOException {
        Map<String, String> hiveSchema = convertParquetSchemaToHiveSchema(storageSchema);
        String columns = generateSchemaString(storageSchema);

        StringBuilder partitionFields = new StringBuilder();
-        for (String partitionKey : partitionKeys) {
+        for (String partitionKey : config.partitionFields) {
            partitionFields.append(partitionKey).append(" ")
                .append(getPartitionKeyType(hiveSchema, partitionKey));
        }

        StringBuilder sb = new StringBuilder("CREATE EXTERNAL TABLE  IF NOT EXISTS ");
-        sb = sb.append(metadata.getDatabaseTableName());
+        sb = sb.append(config.databaseName).append(".").append(config.tableName);
        sb = sb.append("( ").append(columns).append(")");
-        if (partitionKeys.length > 0) {
+        if (!config.partitionFields.isEmpty()) {
            sb = sb.append(" PARTITIONED BY (").append(partitionFields).append(")");
        }
-        sb = sb.append(" ROW FORMAT SERDE '").append(ParquetHiveSerDe.class.getName()).append("'");
+        sb = sb.append(" ROW FORMAT SERDE '").append(serdeClass).append("'");
        sb = sb.append(" STORED AS INPUTFORMAT '").append(inputFormatClass).append("'");
        sb = sb.append(" OUTPUTFORMAT '").append(outputFormatClass).append("' LOCATION '")
-            .append(metadata.getBaseDatasetPath()).append("'");
+            .append(config.basePath).append("'");
        return sb.toString();
    }