1
0

Import from Hoodie private repo: Part 1

This commit is contained in:
Prasanna Rajaperumal
2016-12-16 14:03:59 -08:00
commit 0512da094b
56 changed files with 8868 additions and 0 deletions

View File

@@ -0,0 +1,54 @@
/*
* Copyright (c) 2016 Uber Technologies, Inc. (hoodie-dev-group@uber.com)
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.uber.hoodie.cli;
import com.uber.hoodie.common.model.HoodieTableMetadata;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import java.io.IOException;
public class HoodieCLI {
public static Configuration conf;
public static FileSystem fs;
public static CLIState state = CLIState.INIT;
public static HoodieTableMetadata tableMetadata;
public static HoodieTableMetadata syncTableMetadata;
public enum CLIState {
INIT, DATASET, SYNC
}
public static boolean initConf() {
if (HoodieCLI.conf == null) {
HoodieCLI.conf = new Configuration();
return true;
}
return false;
}
public static void initFS(boolean force) throws IOException {
if(fs == null || force) {
fs = FileSystem.get(conf);
}
}
public static void setTableMetadata(HoodieTableMetadata tableMetadata) {
HoodieCLI.tableMetadata = tableMetadata;
}
}

View File

@@ -0,0 +1,37 @@
/*
* Copyright (c) 2016 Uber Technologies, Inc. (hoodie-dev-group@uber.com)
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.uber.hoodie.cli;
import org.springframework.core.Ordered;
import org.springframework.core.annotation.Order;
import org.springframework.shell.plugin.support.DefaultHistoryFileNameProvider;
import org.springframework.stereotype.Component;
@Component
@Order(Ordered.HIGHEST_PRECEDENCE)
public class HoodieHistoryFileNameProvider extends DefaultHistoryFileNameProvider {
public String getHistoryFileName() {
return "hoodie-cmd.log";
}
@Override
public String getProviderName() {
return "Hoodie file name provider";
}
}

View File

@@ -0,0 +1,34 @@
/*
* Copyright (c) 2016 Uber Technologies, Inc. (hoodie-dev-group@uber.com)
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.uber.hoodie.cli;
import dnl.utils.text.table.TextTable;
import java.io.ByteArrayOutputStream;
import java.io.PrintStream;
import java.nio.charset.Charset;
public class HoodiePrintHelper {
public static String print(String[] header, String[][] rows) {
TextTable textTable = new TextTable(header, rows);
ByteArrayOutputStream baos = new ByteArrayOutputStream();
PrintStream ps = new PrintStream(baos);
textTable.printTable(ps, 4);
return new String(baos.toByteArray(), Charset.forName("utf-8"));
}
}

View File

@@ -0,0 +1,49 @@
/*
* Copyright (c) 2016 Uber Technologies, Inc. (hoodie-dev-group@uber.com)
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.uber.hoodie.cli;
import org.springframework.core.Ordered;
import org.springframework.core.annotation.Order;
import org.springframework.shell.plugin.support.DefaultPromptProvider;
import org.springframework.stereotype.Component;
@Component
@Order(Ordered.HIGHEST_PRECEDENCE)
public class HoodiePrompt extends DefaultPromptProvider {
@Override
public String getPrompt() {
switch (HoodieCLI.state) {
case INIT:
return "hoodie->";
case DATASET:
return "hoodie:" + HoodieCLI.tableMetadata.getTableName() + "->";
case SYNC:
return "hoodie:" + HoodieCLI.tableMetadata.getTableName() + " <==> "
+ HoodieCLI.syncTableMetadata.getTableName() + "->";
}
if (HoodieCLI.tableMetadata != null)
return "hoodie:" + HoodieCLI.tableMetadata.getTableName() + "->";
return "hoodie->";
}
@Override
public String getProviderName() {
return "Hoodie provider";
}
}

View File

@@ -0,0 +1,55 @@
/*
* Copyright (c) 2016 Uber Technologies, Inc. (hoodie-dev-group@uber.com)
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.uber.hoodie.cli;
import org.springframework.core.Ordered;
import org.springframework.core.annotation.Order;
import org.springframework.shell.plugin.support.DefaultBannerProvider;
import org.springframework.shell.support.util.OsUtils;
import org.springframework.stereotype.Component;
@Component @Order(Ordered.HIGHEST_PRECEDENCE) public class HoodieSplashScreen
extends DefaultBannerProvider {
private static String screen = "============================================" + OsUtils.LINE_SEPARATOR +
"* *" + OsUtils.LINE_SEPARATOR +
"* _ _ _ _ *" + OsUtils.LINE_SEPARATOR +
"* | | | | | (_) *" + OsUtils.LINE_SEPARATOR +
"* | |__| | ___ ___ __| |_ ___ *" + OsUtils.LINE_SEPARATOR +
"* | __ |/ _ \\ / _ \\ / _` | |/ _ \\ *" +
OsUtils.LINE_SEPARATOR +
"* | | | | (_) | (_) | (_| | | __/ *" + OsUtils.LINE_SEPARATOR +
"* |_| |_|\\___/ \\___/ \\__,_|_|\\___| *" +
OsUtils.LINE_SEPARATOR +
"* *" + OsUtils.LINE_SEPARATOR +
"============================================" + OsUtils.LINE_SEPARATOR;
public String getBanner() {
return screen;
}
public String getVersion() {
return "1.0";
}
public String getWelcomeMessage() {
return "Welcome to Hoodie CLI. Please type help if you are looking for help. ";
}
@Override public String getProviderName() {
return "Hoodie Banner";
}
}

View File

@@ -0,0 +1,33 @@
/*
* Copyright (c) 2016 Uber Technologies, Inc. (hoodie-dev-group@uber.com)
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.uber.hoodie.cli;
import org.springframework.shell.Bootstrap;
import java.io.IOException;
public class Main {
/**
* Main class that delegates to Spring Shell's Bootstrap class in order to simplify debugging inside an IDE
*
* @param args
* @throws IOException
*/
public static void main(String[] args) throws IOException {
Bootstrap.main(args);
}
}

View File

@@ -0,0 +1,244 @@
/*
* Copyright (c) 2016 Uber Technologies, Inc. (hoodie-dev-group@uber.com)
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.uber.hoodie.cli.commands;
import com.uber.hoodie.cli.HoodieCLI;
import com.uber.hoodie.cli.HoodiePrintHelper;
import com.uber.hoodie.cli.utils.InputStreamConsumer;
import com.uber.hoodie.cli.utils.SparkUtil;
import com.uber.hoodie.common.model.HoodieCommitMetadata;
import com.uber.hoodie.common.model.HoodieCommits;
import com.uber.hoodie.common.model.HoodieTableMetadata;
import com.uber.hoodie.common.model.HoodieWriteStat;
import com.uber.hoodie.common.util.NumericUtils;
import org.apache.spark.launcher.SparkLauncher;
import org.springframework.shell.core.CommandMarker;
import org.springframework.shell.core.annotation.CliAvailabilityIndicator;
import org.springframework.shell.core.annotation.CliCommand;
import org.springframework.shell.core.annotation.CliOption;
import org.springframework.stereotype.Component;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
import java.util.Map;
import java.util.SortedMap;
@Component
public class CommitsCommand implements CommandMarker {
@CliAvailabilityIndicator({"commits show"})
public boolean isShowAvailable() {
return HoodieCLI.tableMetadata != null;
}
@CliAvailabilityIndicator({"commits refresh"})
public boolean isRefreshAvailable() {
return HoodieCLI.tableMetadata != null;
}
@CliAvailabilityIndicator({"commit rollback"})
public boolean isRollbackAvailable() {
return HoodieCLI.tableMetadata != null;
}
@CliAvailabilityIndicator({"commit show"})
public boolean isCommitShowAvailable() {
return HoodieCLI.tableMetadata != null;
}
@CliCommand(value = "commits show", help = "Show the commits")
public String showCommits(
@CliOption(key = {
"limit"}, mandatory = false, help = "Limit commits", unspecifiedDefaultValue = "10")
final Integer limit) throws IOException {
SortedMap<String, HoodieCommitMetadata> map =
HoodieCLI.tableMetadata.getAllCommitMetadata();
int arraySize =
Math.min(limit, HoodieCLI.tableMetadata.getAllCommits().getCommitList().size());
String[][] rows = new String[arraySize][];
ArrayList<String> commitList =
new ArrayList<String>(HoodieCLI.tableMetadata.getAllCommits().getCommitList());
Collections.reverse(commitList);
for (int i = 0; i < arraySize; i++) {
String commit = commitList.get(i);
HoodieCommitMetadata commitMetadata = map.get(commit);
rows[i] = new String[] {commit,
NumericUtils.humanReadableByteCount(commitMetadata.fetchTotalBytesWritten()),
String.valueOf(commitMetadata.fetchTotalFilesInsert()),
String.valueOf(commitMetadata.fetchTotalFilesUpdated()),
String.valueOf(commitMetadata.fetchTotalPartitionsWritten()),
String.valueOf(commitMetadata.fetchTotalRecordsWritten()),
String.valueOf(commitMetadata.fetchTotalUpdateRecordsWritten()),
String.valueOf(commitMetadata.fetchTotalWriteErrors())};
}
return HoodiePrintHelper.print(
new String[] {"CommitTime", "Total Written (B)", "Total Files Added",
"Total Files Updated", "Total Partitions Written", "Total Records Written",
"Total Update Records Written", "Total Errors"}, rows);
}
@CliCommand(value = "commits refresh", help = "Refresh the commits")
public String refreshCommits() throws IOException {
HoodieTableMetadata metadata =
new HoodieTableMetadata(HoodieCLI.fs, HoodieCLI.tableMetadata.getBasePath());
HoodieCLI.setTableMetadata(metadata);
return "Metadata for table " + metadata.getTableName() + " refreshed.";
}
@CliCommand(value = "commit rollback", help = "Rollback a commit")
public String rollbackCommit(
@CliOption(key = {"commit"}, help = "Commit to rollback")
final String commitTime,
@CliOption(key = {"sparkProperties"}, help = "Spark Properites File Path")
final String sparkPropertiesPath) throws Exception {
if (!HoodieCLI.tableMetadata.getAllCommits().contains(commitTime)) {
return "Commit " + commitTime + " not found in Commits " + HoodieCLI.tableMetadata
.getAllCommits();
}
SparkLauncher sparkLauncher = SparkUtil.initLauncher(sparkPropertiesPath);
sparkLauncher.addAppArgs(SparkMain.SparkCommand.ROLLBACK.toString(),
commitTime,
HoodieCLI.tableMetadata.getBasePath());
Process process = sparkLauncher.launch();
InputStreamConsumer.captureOutput(process);
int exitCode = process.waitFor();
// Refresh the current
refreshCommits();
if (exitCode != 0) {
return "Commit " + commitTime + " failed to roll back";
}
return "Commit " + commitTime + " rolled back";
}
@CliCommand(value = "commit showpartitions", help = "Show partition level details of a commit")
public String showCommitPartitions(
@CliOption(key = {"commit"}, help = "Commit to show")
final String commitTime) throws Exception {
if (!HoodieCLI.tableMetadata.getAllCommits().contains(commitTime)) {
return "Commit " + commitTime + " not found in Commits " + HoodieCLI.tableMetadata
.getAllCommits();
}
HoodieCommitMetadata meta = HoodieCLI.tableMetadata.getAllCommitMetadata().get(commitTime);
List<String[]> rows = new ArrayList<String[]>();
for (Map.Entry<String, List<HoodieWriteStat>> entry : meta.getPartitionToWriteStats()
.entrySet()) {
String path = entry.getKey();
List<HoodieWriteStat> stats = entry.getValue();
long totalFilesAdded = 0;
long totalFilesUpdated = 0;
long totalRecordsUpdated = 0;
long totalRecordsInserted = 0;
long totalBytesWritten = 0;
long totalWriteErrors = 0;
for (HoodieWriteStat stat : stats) {
if (stat.getPrevCommit().equals(HoodieWriteStat.NULL_COMMIT)) {
totalFilesAdded += 1;
totalRecordsInserted += stat.getNumWrites();
} else {
totalFilesUpdated += 1;
totalRecordsUpdated += stat.getNumUpdateWrites();
}
totalBytesWritten += stat.getTotalWriteBytes();
totalWriteErrors += stat.getTotalWriteErrors();
}
rows.add(new String[] {path, String.valueOf(totalFilesAdded),
String.valueOf(totalFilesUpdated), String.valueOf(totalRecordsInserted),
String.valueOf(totalRecordsUpdated),
NumericUtils.humanReadableByteCount(totalBytesWritten),
String.valueOf(totalWriteErrors)});
}
return HoodiePrintHelper.print(
new String[] {"Partition Path", "Total Files Added", "Total Files Updated",
"Total Records Inserted", "Total Records Updated", "Total Bytes Written",
"Total Errors"}, rows.toArray(new String[rows.size()][]));
}
@CliCommand(value = "commit showfiles", help = "Show file level details of a commit")
public String showCommitFiles(
@CliOption(key = {"commit"}, help = "Commit to show")
final String commitTime) throws Exception {
if (!HoodieCLI.tableMetadata.getAllCommits().contains(commitTime)) {
return "Commit " + commitTime + " not found in Commits " + HoodieCLI.tableMetadata
.getAllCommits();
}
HoodieCommitMetadata meta = HoodieCLI.tableMetadata.getAllCommitMetadata().get(commitTime);
List<String[]> rows = new ArrayList<String[]>();
for (Map.Entry<String, List<HoodieWriteStat>> entry : meta.getPartitionToWriteStats()
.entrySet()) {
String path = entry.getKey();
List<HoodieWriteStat> stats = entry.getValue();
for (HoodieWriteStat stat : stats) {
rows.add(new String[] {path, stat.getFileId(), stat.getPrevCommit(),
String.valueOf(stat.getNumUpdateWrites()), String.valueOf(stat.getNumWrites()),
String.valueOf(stat.getTotalWriteBytes()),
String.valueOf(stat.getTotalWriteErrors())});
}
}
return HoodiePrintHelper.print(
new String[] {"Partition Path", "File ID", "Previous Commit", "Total Records Updated",
"Total Records Written", "Total Bytes Written", "Total Errors"},
rows.toArray(new String[rows.size()][]));
}
@CliAvailabilityIndicator({"commits compare"})
public boolean isCompareCommitsAvailable() {
return HoodieCLI.tableMetadata != null;
}
@CliCommand(value = "commits compare", help = "Compare commits with another Hoodie dataset")
public String compareCommits(
@CliOption(key = {"path"}, help = "Path of the dataset to compare to")
final String path) throws Exception {
HoodieTableMetadata target = new HoodieTableMetadata(HoodieCLI.fs, path);
HoodieTableMetadata source = HoodieCLI.tableMetadata;
String targetLatestCommit =
target.isCommitsEmpty() ? "0" : target.getAllCommits().lastCommit();
String sourceLatestCommit =
source.isCommitsEmpty() ? "0" : source.getAllCommits().lastCommit();
if (sourceLatestCommit != null && HoodieCommits
.isCommit1After(targetLatestCommit, sourceLatestCommit)) {
// source is behind the target
List<String> commitsToCatchup = target.findCommitsSinceTs(sourceLatestCommit);
return "Source " + source.getTableName() + " is behind by " + commitsToCatchup.size()
+ " commits. Commits to catch up - " + commitsToCatchup;
} else {
List<String> commitsToCatchup = source.findCommitsSinceTs(targetLatestCommit);
return "Source " + source.getTableName() + " is ahead by " + commitsToCatchup.size()
+ " commits. Commits to catch up - " + commitsToCatchup;
}
}
@CliAvailabilityIndicator({"commits sync"})
public boolean isSyncCommitsAvailable() {
return HoodieCLI.tableMetadata != null;
}
@CliCommand(value = "commits sync", help = "Compare commits with another Hoodie dataset")
public String syncCommits(
@CliOption(key = {"path"}, help = "Path of the dataset to compare to")
final String path) throws Exception {
HoodieCLI.syncTableMetadata = new HoodieTableMetadata(HoodieCLI.fs, path);
HoodieCLI.state = HoodieCLI.CLIState.SYNC;
return "Load sync state between " + HoodieCLI.tableMetadata.getTableName() + " and "
+ HoodieCLI.syncTableMetadata.getTableName();
}
}

View File

@@ -0,0 +1,42 @@
/*
* Copyright (c) 2016 Uber Technologies, Inc. (hoodie-dev-group@uber.com)
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.uber.hoodie.cli.commands;
import com.uber.hoodie.cli.HoodieCLI;
import com.uber.hoodie.common.model.HoodieTableMetadata;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.springframework.shell.core.CommandMarker;
import org.springframework.shell.core.annotation.CliCommand;
import org.springframework.shell.core.annotation.CliOption;
import org.springframework.stereotype.Component;
import java.io.IOException;
@Component
public class DatasetsCommand implements CommandMarker {
@CliCommand(value = "connect", help = "Connect to a hoodie dataset")
public String connect(
@CliOption(key = {"path"}, mandatory = true, help = "Base Path of the dataset")
final String path) throws IOException {
boolean initialized = HoodieCLI.initConf();
HoodieCLI.initFS(initialized);
HoodieCLI.setTableMetadata(new HoodieTableMetadata(HoodieCLI.fs, path));
HoodieCLI.state = HoodieCLI.CLIState.DATASET;
return "Metadata for table " + HoodieCLI.tableMetadata.getTableName() + " loaded";
}
}

View File

@@ -0,0 +1,106 @@
/*
* Copyright (c) 2016 Uber Technologies, Inc. (hoodie-dev-group@uber.com)
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.uber.hoodie.cli.commands;
import com.uber.hoodie.cli.utils.CommitUtil;
import com.uber.hoodie.cli.utils.HiveUtil;
import com.uber.hoodie.cli.HoodieCLI;
import com.uber.hoodie.common.model.HoodieCommits;
import com.uber.hoodie.common.model.HoodieTableMetadata;
import org.springframework.shell.core.CommandMarker;
import org.springframework.shell.core.annotation.CliAvailabilityIndicator;
import org.springframework.shell.core.annotation.CliCommand;
import org.springframework.shell.core.annotation.CliOption;
import org.springframework.stereotype.Component;
import java.util.List;
@Component
public class HoodieSyncCommand implements CommandMarker {
@CliAvailabilityIndicator({"sync validate"})
public boolean isSyncVerificationAvailable() {
return HoodieCLI.tableMetadata != null && HoodieCLI.syncTableMetadata != null;
}
@CliCommand(value = "sync validate", help = "Validate the sync by counting the number of records")
public String validateSync(
@CliOption(key = {"mode"}, unspecifiedDefaultValue = "complete", help = "Check mode")
final String mode,
@CliOption(key = {
"sourceDb"}, unspecifiedDefaultValue = "rawdata", help = "source database")
final String srcDb,
@CliOption(key = {
"targetDb"}, unspecifiedDefaultValue = "dwh_hoodie", help = "target database")
final String tgtDb,
@CliOption(key = {
"partitionCount"}, unspecifiedDefaultValue = "5", help = "total number of recent partitions to validate")
final int partitionCount,
@CliOption(key = {
"hiveServerUrl"}, mandatory = true, help = "hiveServerURL to connect to")
final String hiveServerUrl,
@CliOption(key = {
"hiveUser"}, mandatory = false, unspecifiedDefaultValue = "", help = "hive username to connect to")
final String hiveUser,
@CliOption(key = {
"hivePass"}, mandatory = true, unspecifiedDefaultValue = "", help = "hive password to connect to")
final String hivePass) throws Exception {
HoodieTableMetadata target = HoodieCLI.syncTableMetadata;
HoodieTableMetadata source = HoodieCLI.tableMetadata;
long sourceCount = 0;
long targetCount = 0;
if ("complete".equals(mode)) {
sourceCount = HiveUtil.countRecords(hiveServerUrl, source, srcDb, hiveUser, hivePass);
targetCount = HiveUtil.countRecords(hiveServerUrl, target, tgtDb, hiveUser, hivePass);
} else if ("latestPartitions".equals(mode)) {
sourceCount = HiveUtil.countRecords(hiveServerUrl, source, srcDb, partitionCount, hiveUser, hivePass);
targetCount = HiveUtil.countRecords(hiveServerUrl, target, tgtDb, partitionCount, hiveUser, hivePass);
}
String targetLatestCommit =
target.isCommitsEmpty() ? "0" : target.getAllCommits().lastCommit();
String sourceLatestCommit =
source.isCommitsEmpty() ? "0" : source.getAllCommits().lastCommit();
if (sourceLatestCommit != null && HoodieCommits
.isCommit1After(targetLatestCommit, sourceLatestCommit)) {
// source is behind the target
List<String> commitsToCatchup = target.findCommitsSinceTs(sourceLatestCommit);
if (commitsToCatchup.isEmpty()) {
return "Count difference now is (count(" + target.getTableName() + ") - count("
+ source.getTableName() + ") == " + (targetCount - sourceCount);
} else {
long newInserts = CommitUtil.countNewRecords(target, commitsToCatchup);
return "Count difference now is (count(" + target.getTableName() + ") - count("
+ source.getTableName() + ") == " + (targetCount - sourceCount)
+ ". Catch up count is " + newInserts;
}
} else {
List<String> commitsToCatchup = source.findCommitsSinceTs(targetLatestCommit);
if (commitsToCatchup.isEmpty()) {
return "Count difference now is (count(" + source.getTableName() + ") - count("
+ target.getTableName() + ") == " + (sourceCount - targetCount);
} else {
long newInserts = CommitUtil.countNewRecords(source, commitsToCatchup);
return "Count difference now is (count(" + source.getTableName() + ") - count("
+ target.getTableName() + ") == " + (sourceCount - targetCount)
+ ". Catch up count is " + newInserts;
}
}
}
}

View File

@@ -0,0 +1,78 @@
/*
* Copyright (c) 2016 Uber Technologies, Inc. (hoodie-dev-group@uber.com)
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.uber.hoodie.cli.commands;
import com.uber.hoodie.cli.HoodieCLI;
import com.uber.hoodie.cli.utils.InputStreamConsumer;
import com.uber.hoodie.cli.utils.SparkUtil;
import org.apache.spark.launcher.SparkLauncher;
import org.springframework.shell.core.CommandMarker;
import org.springframework.shell.core.annotation.CliAvailabilityIndicator;
import org.springframework.shell.core.annotation.CliCommand;
import org.springframework.shell.core.annotation.CliOption;
import org.springframework.stereotype.Component;
@Component
public class RecordsCommand implements CommandMarker {
@CliAvailabilityIndicator({"records deduplicate"})
public boolean isRecordsDeduplicateAvailable() {
return HoodieCLI.tableMetadata != null;
}
@CliCommand(value = "records deduplicate", help = "De-duplicate a partition path contains duplicates & produce repaired files to replace with")
public String deduplicate(
@CliOption(key = {
"duplicatedPartitionPath"}, help = "Partition Path containing the duplicates")
final String duplicatedPartitionPath,
@CliOption(key = {"repairedOutputPath"}, help = "Location to place the repaired files")
final String repairedOutputPath,
@CliOption(key = {"sparkProperties"}, help = "Spark Properites File Path")
final String sparkPropertiesPath) throws Exception {
SparkLauncher sparkLauncher = SparkUtil.initLauncher(sparkPropertiesPath);
sparkLauncher
.addAppArgs(SparkMain.SparkCommand.DEDUPLICATE.toString(), duplicatedPartitionPath,
repairedOutputPath, HoodieCLI.tableMetadata.getBasePath());
Process process = sparkLauncher.launch();
InputStreamConsumer.captureOutput(process);
int exitCode = process.waitFor();
if (exitCode != 0) {
return "Deduplicated files placed in: " + repairedOutputPath;
}
return "Deduplication failed ";
}
// @CliCommand(value = "records find", help = "Find Records in a hoodie dataset")
// public String findRecords(
// @CliOption(key = {"keys"}, help = "Keys To Find (Comma seperated)")
// final String hoodieKeys,
// @CliOption(key = {"sparkProperties"}, help = "Spark Properites File Path")
// final String sparkPropertiesPath) throws Exception {
// SparkLauncher sparkLauncher = SparkUtil.initLauncher(sparkPropertiesPath);
// sparkLauncher
// .addAppArgs(SparkMain.RECORD_FIND, hoodieKeys, HoodieCLI.tableMetadata.getBasePath());
// Process process = sparkLauncher.launch();
// InputStreamConsumer.captureOutput(process);
// int exitCode = process.waitFor();
//
// if (exitCode != 0) {
// return "Deduplicated files placed in: " + repairedOutputPath;
// }
// return "Deduplication failed ";
// }
}

View File

@@ -0,0 +1,94 @@
/*
* Copyright (c) 2016 Uber Technologies, Inc. (hoodie-dev-group@uber.com)
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.uber.hoodie.cli.commands;
import com.uber.hoodie.HoodieWriteClient;
import com.uber.hoodie.cli.DedupeSparkJob;
import com.uber.hoodie.cli.utils.SparkUtil;
import com.uber.hoodie.common.util.FSUtils;
import com.uber.hoodie.config.HoodieIndexConfig;
import com.uber.hoodie.config.HoodieWriteConfig;
import com.uber.hoodie.index.HoodieIndex;
import org.apache.log4j.Logger;
import org.apache.spark.api.java.JavaSparkContext;
import org.apache.spark.sql.SQLContext;
public class SparkMain {
protected final static Logger LOG = Logger.getLogger(SparkMain.class);
/**
* Commands
*/
enum SparkCommand {
ROLLBACK,
DEDUPLICATE
}
public static void main(String[] args) throws Exception {
String command = args[0];
LOG.info("Invoking SparkMain:" + command);
SparkCommand cmd = SparkCommand.valueOf(command);
JavaSparkContext jsc = SparkUtil.initJavaSparkConf("hoodie-cli-" + command);
int returnCode = 0;
if (SparkCommand.ROLLBACK.equals(cmd)) {
assert (args.length == 3);
returnCode = rollback(jsc, args[1], args[2]);
} else if(SparkCommand.DEDUPLICATE.equals(cmd)) {
assert (args.length == 4);
returnCode = deduplicatePartitionPath(jsc, args[1], args[2], args[3]);
}
System.exit(returnCode);
}
private static int deduplicatePartitionPath(JavaSparkContext jsc,
String duplicatedPartitionPath,
String repairedOutputPath,
String basePath)
throws Exception {
DedupeSparkJob job = new DedupeSparkJob(basePath,
duplicatedPartitionPath,repairedOutputPath,new SQLContext(jsc), FSUtils.getFs());
job.fixDuplicates(true);
return 0;
}
private static int rollback(JavaSparkContext jsc, String commitTime, String basePath)
throws Exception {
HoodieWriteClient client = createHoodieClient(jsc, basePath);
if (client.rollback(commitTime)) {
LOG.info(String.format("The commit \"%s\" rolled back.", commitTime));
return -1;
} else {
LOG.info(String.format("The commit \"%s\" failed to roll back.", commitTime));
}
return 0;
}
private static HoodieWriteClient createHoodieClient(JavaSparkContext jsc, String basePath)
throws Exception {
HoodieWriteConfig config = HoodieWriteConfig.newBuilder().withPath(basePath)
.withIndexConfig(
HoodieIndexConfig.newBuilder().withIndexType(HoodieIndex.IndexType.BLOOM).build())
.build();
return new HoodieWriteClient(jsc, config);
}
}

View File

@@ -0,0 +1,136 @@
/*
* Copyright (c) 2016 Uber Technologies, Inc. (hoodie-dev-group@uber.com)
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.uber.hoodie.cli.commands;
import com.codahale.metrics.Histogram;
import com.codahale.metrics.Snapshot;
import com.codahale.metrics.UniformReservoir;
import com.uber.hoodie.cli.HoodieCLI;
import com.uber.hoodie.cli.HoodiePrintHelper;
import com.uber.hoodie.common.model.HoodieCommitMetadata;
import com.uber.hoodie.common.util.FSUtils;
import com.uber.hoodie.common.util.NumericUtils;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.springframework.shell.core.CommandMarker;
import org.springframework.shell.core.annotation.CliAvailabilityIndicator;
import org.springframework.shell.core.annotation.CliCommand;
import org.springframework.shell.core.annotation.CliOption;
import org.springframework.stereotype.Component;
import java.io.IOException;
import java.text.DecimalFormat;
import java.util.HashMap;
import java.util.Map;
@Component
public class StatsCommand implements CommandMarker {
@CliAvailabilityIndicator({"stats wa"})
public boolean isWriteAmpAvailable() {
return HoodieCLI.tableMetadata != null;
}
@CliCommand(value = "stats wa", help = "Write Amplification. Ratio of how many records were upserted to how many records were actually written")
public String writeAmplificationStats() throws IOException {
long totalRecordsUpserted = 0;
long totalRecordsWritten = 0;
String[][] rows = new String[HoodieCLI.tableMetadata.getAllCommitMetadata().size() + 1][];
int i = 0;
DecimalFormat df = new DecimalFormat("#.00");
for (Map.Entry<String, HoodieCommitMetadata> commit : HoodieCLI.tableMetadata
.getAllCommitMetadata().entrySet()) {
String waf = "0";
if (commit.getValue().fetchTotalUpdateRecordsWritten() > 0) {
waf = df.format(
(float) commit.getValue().fetchTotalRecordsWritten() / commit.getValue()
.fetchTotalUpdateRecordsWritten());
}
rows[i++] = new String[] {commit.getKey(),
String.valueOf(commit.getValue().fetchTotalUpdateRecordsWritten()),
String.valueOf(commit.getValue().fetchTotalRecordsWritten()), waf};
totalRecordsUpserted += commit.getValue().fetchTotalUpdateRecordsWritten();
totalRecordsWritten += commit.getValue().fetchTotalRecordsWritten();
}
String waf = "0";
if (totalRecordsUpserted > 0) {
waf = df.format((float) totalRecordsWritten / totalRecordsUpserted);
}
rows[i] = new String[] {"Total", String.valueOf(totalRecordsUpserted),
String.valueOf(totalRecordsWritten), waf};
return HoodiePrintHelper.print(
new String[] {"CommitTime", "Total Upserted", "Total Written",
"Write Amplifiation Factor"}, rows);
}
private String[] printFileSizeHistogram(String commitTime, Snapshot s) {
return new String[]{
commitTime,
NumericUtils.humanReadableByteCount(s.getMin()),
NumericUtils.humanReadableByteCount(s.getValue(0.1)),
NumericUtils.humanReadableByteCount(s.getMedian()),
NumericUtils.humanReadableByteCount(s.getMean()),
NumericUtils.humanReadableByteCount(s.get95thPercentile()),
NumericUtils.humanReadableByteCount(s.getMax()),
String.valueOf(s.size()),
NumericUtils.humanReadableByteCount(s.getStdDev())
};
}
@CliCommand(value = "stats filesizes", help = "File Sizes. Display summary stats on sizes of files")
public String fileSizeStats(
@CliOption(key = {"partitionPath"}, help = "regex to select files, eg: 2016/08/02", unspecifiedDefaultValue = "*/*/*")
final String globRegex) throws IOException {
FileSystem fs = HoodieCLI.fs;
String globPath = String.format("%s/%s/*",
HoodieCLI.tableMetadata.getBasePath(),
globRegex);
FileStatus[] statuses = fs.globStatus(new Path(globPath));
// max, min, #small files < 10MB, 50th, avg, 95th
final int MAX_FILES = 1000000;
Histogram globalHistogram = new Histogram(new UniformReservoir(MAX_FILES));
HashMap<String, Histogram> commitHistoMap = new HashMap<String, Histogram>();
for (FileStatus fileStatus: statuses) {
String commitTime = FSUtils.getCommitTime(fileStatus.getPath().getName());
long sz = fileStatus.getLen();
if (!commitHistoMap.containsKey(commitTime)) {
commitHistoMap.put(commitTime, new Histogram(new UniformReservoir(MAX_FILES)));
}
commitHistoMap.get(commitTime).update(sz);
globalHistogram.update(sz);
}
String[][] rows = new String[commitHistoMap.size() + 1][];
int ind = 0;
for (String commitTime: commitHistoMap.keySet()) {
Snapshot s = commitHistoMap.get(commitTime).getSnapshot();
rows[ind++] = printFileSizeHistogram(commitTime, s);
}
Snapshot s = globalHistogram.getSnapshot();
rows[ind++] = printFileSizeHistogram("ALL", s);
return HoodiePrintHelper.print(
new String[] {"CommitTime", "Min", "10th", "50th", "avg", "95th", "Max", "NumFiles", "StdDev"}, rows);
}
}

View File

@@ -0,0 +1,34 @@
/*
* Copyright (c) 2016 Uber Technologies, Inc. (hoodie-dev-group@uber.com)
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.uber.hoodie.cli.commands;
import org.springframework.shell.core.CommandMarker;
import org.springframework.shell.core.annotation.CliCommand;
import org.springframework.shell.core.annotation.CliOption;
import org.springframework.stereotype.Component;
@Component
public class UtilsCommand implements CommandMarker {
@CliCommand(value = "utils loadClass", help = "Load a class" )
public String loadClass(
@CliOption(key = {"class"}, help = "Check mode" ) final String clazz
) throws Exception {
Class klass = Class.forName(clazz);
return klass.getProtectionDomain().getCodeSource().getLocation().toExternalForm();
}
}

View File

@@ -0,0 +1,38 @@
/*
* Copyright (c) 2016 Uber Technologies, Inc. (hoodie-dev-group@uber.com)
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.uber.hoodie.cli.utils;
import com.uber.hoodie.common.model.HoodieCommitMetadata;
import com.uber.hoodie.common.model.HoodieTableMetadata;
import java.io.IOException;
import java.util.List;
import java.util.SortedMap;
import java.util.TreeMap;
public class CommitUtil {
public static long countNewRecords(HoodieTableMetadata target, List<String> commitsToCatchup)
throws IOException {
long totalNew = 0;
SortedMap<String, HoodieCommitMetadata> meta = target.getAllCommitMetadata();
for(String commit:commitsToCatchup) {
HoodieCommitMetadata c = meta.get(commit);
totalNew += c.fetchTotalRecordsWritten() - c.fetchTotalUpdateRecordsWritten();
}
return totalNew;
}
}

View File

@@ -0,0 +1,125 @@
/*
* Copyright (c) 2016 Uber Technologies, Inc. (hoodie-dev-group@uber.com)
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.uber.hoodie.cli.utils;
import com.uber.hoodie.common.model.HoodieTableMetadata;
import com.uber.hoodie.hadoop.HoodieInputFormat;
import org.apache.commons.dbcp.BasicDataSource;
import org.joda.time.DateTime;
import javax.sql.DataSource;
import java.sql.Connection;
import java.sql.ResultSet;
import java.sql.SQLException;
import java.sql.Statement;
public class HiveUtil {
private static String driverName = "org.apache.hive.jdbc.HiveDriver";
static {
try {
Class.forName(driverName);
} catch (ClassNotFoundException e) {
throw new IllegalStateException("Could not find " + driverName + " in classpath. ", e);
}
}
private static Connection connection;
private static Connection getConnection(String jdbcUrl, String user, String pass) throws SQLException {
DataSource ds = getDatasource(jdbcUrl, user, pass);
return ds.getConnection();
}
private static DataSource getDatasource(String jdbcUrl, String user, String pass) {
BasicDataSource ds = new BasicDataSource();
ds.setDriverClassName(driverName);
ds.setUrl(jdbcUrl);
ds.setUsername(user);
ds.setPassword(pass);
return ds;
}
public static long countRecords(String jdbcUrl, HoodieTableMetadata source, String dbName, String user, String pass) throws SQLException {
Connection conn = HiveUtil.getConnection(jdbcUrl, user, pass);
ResultSet rs = null;
Statement stmt = conn.createStatement();
try {
//stmt.execute("set mapred.job.queue.name=<queue_name>");
stmt.execute("set hive.input.format=org.apache.hadoop.hive.ql.io.HiveInputFormat" );
stmt.execute("set hive.stats.autogather=false" );
System.out.println("Class " + HoodieInputFormat.class.getName());
rs = stmt.executeQuery(
"select count(`_hoodie_commit_time`) as cnt from " + dbName + "." + source
.getTableName());
long count = -1;
if(rs.next()) {
count = rs.getLong("cnt");
}
System.out.println("Total records in " + source.getTableName() + " is " + count);
return count;
} finally {
if (rs != null) {
rs.close();
}
if (stmt != null) {
stmt.close();
}
}
}
public static long countRecords(String jdbcUrl, HoodieTableMetadata source, String srcDb,
int partitions, String user, String pass) throws SQLException {
DateTime dateTime = DateTime.now();
String endDateStr =
dateTime.getYear() + "-" + String.format("%02d", dateTime.getMonthOfYear()) + "-" +
String.format("%02d", dateTime.getDayOfMonth());
dateTime = dateTime.minusDays(partitions);
String startDateStr =
dateTime.getYear() + "-" + String.format("%02d", dateTime.getMonthOfYear()) + "-" +
String.format("%02d", dateTime.getDayOfMonth());
System.out.println("Start date " + startDateStr + " and end date " + endDateStr);
return countRecords(jdbcUrl, source, srcDb, startDateStr, endDateStr, user, pass);
}
private static long countRecords(String jdbcUrl, HoodieTableMetadata source, String srcDb, String startDateStr,
String endDateStr, String user, String pass) throws SQLException {
Connection conn = HiveUtil.getConnection(jdbcUrl, user, pass);
ResultSet rs = null;
Statement stmt = conn.createStatement();
try {
//stmt.execute("set mapred.job.queue.name=<queue_name>");
stmt.execute("set hive.input.format=org.apache.hadoop.hive.ql.io.HiveInputFormat");
stmt.execute("set hive.stats.autogather=false");
rs = stmt.executeQuery(
"select count(`_hoodie_commit_time`) as cnt from " + srcDb + "." + source
.getTableName() + " where datestr>'" + startDateStr + "' and datestr<='"
+ endDateStr + "'");
if(rs.next()) {
return rs.getLong("cnt");
}
return -1;
} finally {
if (rs != null) {
rs.close();
}
if (stmt != null) {
stmt.close();
}
}
}
}

View File

@@ -0,0 +1,56 @@
/*
* Copyright (c) 2016 Uber Technologies, Inc. (hoodie-dev-group@uber.com)
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.uber.hoodie.cli.utils;
import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.util.logging.Logger;
public class InputStreamConsumer extends Thread {
protected final static Logger LOG = Logger.getLogger(InputStreamConsumer.class.getName());
private InputStream is;
public InputStreamConsumer(InputStream is) {
this.is = is;
}
@Override
public void run() {
try {
InputStreamReader isr = new InputStreamReader(is);
BufferedReader br = new BufferedReader(isr);
String line;
while ( (line = br.readLine()) != null)
LOG.info(line);
} catch (IOException ioe) {
LOG.severe(ioe.toString());
ioe.printStackTrace();
}
}
public static void captureOutput(Process p) {
InputStreamConsumer stdout;
InputStreamConsumer errout;
errout = new InputStreamConsumer(p.getErrorStream());
stdout = new InputStreamConsumer(p.getInputStream());
errout.start();
stdout.start();
}
}

View File

@@ -0,0 +1,75 @@
/*
* Copyright (c) 2016 Uber Technologies, Inc. (hoodie-dev-group@uber.com)
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.uber.hoodie.cli.utils;
import com.uber.hoodie.HoodieWriteClient;
import com.uber.hoodie.cli.commands.SparkMain;
import org.apache.log4j.Logger;
import org.apache.spark.SparkConf;
import org.apache.spark.api.java.JavaSparkContext;
import org.apache.spark.launcher.SparkLauncher;
import java.io.File;
import java.net.URISyntaxException;
public class SparkUtil {
public static Logger logger = Logger.getLogger(SparkUtil.class);
/**
*
* TODO: Need to fix a bunch of hardcoded stuff here eg: history server, spark distro
*
* @return
* @throws URISyntaxException
*/
public static SparkLauncher initLauncher(String propertiesFile) throws URISyntaxException {
String currentJar = new File(
SparkUtil.class.getProtectionDomain().getCodeSource().getLocation().toURI().getPath())
.getAbsolutePath();
SparkLauncher sparkLauncher =
new SparkLauncher().setAppResource(currentJar)
.setMainClass(SparkMain.class.getName())
.setPropertiesFile(propertiesFile);
File libDirectory = new File(new File(currentJar).getParent(), "lib");
for (String library : libDirectory.list()) {
sparkLauncher.addJar(new File(libDirectory, library).getAbsolutePath());
}
return sparkLauncher;
}
public static JavaSparkContext initJavaSparkConf(String name) {
SparkConf sparkConf = new SparkConf().setAppName(name);
sparkConf.setMaster("yarn-client");
sparkConf.set("spark.serializer", "org.apache.spark.serializer.KryoSerializer");
sparkConf.set("spark.driver.maxResultSize", "2g");
sparkConf.set("spark.eventLog.overwrite", "true");
sparkConf.set("spark.eventLog.enabled", "true");
// Configure hadoop conf
sparkConf.set("spark.hadoop.mapred.output.compress", "true");
sparkConf.set("spark.hadoop.mapred.output.compression.codec", "true");
sparkConf.set("spark.hadoop.mapred.output.compression.codec", "org.apache.hadoop.io.compress.GzipCodec");
sparkConf.set("spark.hadoop.mapred.output.compression.type", "BLOCK");
sparkConf = HoodieWriteClient.registerClasses(sparkConf);
JavaSparkContext jsc = new JavaSparkContext(sparkConf);
jsc.hadoopConfiguration().setBoolean("parquet.enable.summary-metadata", false);
return jsc;
}
}