Import from Hoodie private repo: Part 1
This commit is contained in:
54
hoodie-cli/src/main/java/com/uber/hoodie/cli/HoodieCLI.java
Normal file
54
hoodie-cli/src/main/java/com/uber/hoodie/cli/HoodieCLI.java
Normal file
@@ -0,0 +1,54 @@
|
||||
/*
|
||||
* Copyright (c) 2016 Uber Technologies, Inc. (hoodie-dev-group@uber.com)
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package com.uber.hoodie.cli;
|
||||
|
||||
import com.uber.hoodie.common.model.HoodieTableMetadata;
|
||||
import org.apache.hadoop.conf.Configuration;
|
||||
import org.apache.hadoop.fs.FileSystem;
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
public class HoodieCLI {
|
||||
public static Configuration conf;
|
||||
public static FileSystem fs;
|
||||
public static CLIState state = CLIState.INIT;
|
||||
public static HoodieTableMetadata tableMetadata;
|
||||
public static HoodieTableMetadata syncTableMetadata;
|
||||
|
||||
|
||||
public enum CLIState {
|
||||
INIT, DATASET, SYNC
|
||||
}
|
||||
|
||||
public static boolean initConf() {
|
||||
if (HoodieCLI.conf == null) {
|
||||
HoodieCLI.conf = new Configuration();
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
public static void initFS(boolean force) throws IOException {
|
||||
if(fs == null || force) {
|
||||
fs = FileSystem.get(conf);
|
||||
}
|
||||
}
|
||||
|
||||
public static void setTableMetadata(HoodieTableMetadata tableMetadata) {
|
||||
HoodieCLI.tableMetadata = tableMetadata;
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,37 @@
|
||||
/*
|
||||
* Copyright (c) 2016 Uber Technologies, Inc. (hoodie-dev-group@uber.com)
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package com.uber.hoodie.cli;
|
||||
|
||||
import org.springframework.core.Ordered;
|
||||
import org.springframework.core.annotation.Order;
|
||||
import org.springframework.shell.plugin.support.DefaultHistoryFileNameProvider;
|
||||
import org.springframework.stereotype.Component;
|
||||
|
||||
@Component
|
||||
@Order(Ordered.HIGHEST_PRECEDENCE)
|
||||
public class HoodieHistoryFileNameProvider extends DefaultHistoryFileNameProvider {
|
||||
|
||||
public String getHistoryFileName() {
|
||||
return "hoodie-cmd.log";
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getProviderName() {
|
||||
return "Hoodie file name provider";
|
||||
}
|
||||
|
||||
}
|
||||
@@ -0,0 +1,34 @@
|
||||
/*
|
||||
* Copyright (c) 2016 Uber Technologies, Inc. (hoodie-dev-group@uber.com)
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package com.uber.hoodie.cli;
|
||||
|
||||
import dnl.utils.text.table.TextTable;
|
||||
|
||||
import java.io.ByteArrayOutputStream;
|
||||
import java.io.PrintStream;
|
||||
import java.nio.charset.Charset;
|
||||
|
||||
public class HoodiePrintHelper {
|
||||
|
||||
public static String print(String[] header, String[][] rows) {
|
||||
TextTable textTable = new TextTable(header, rows);
|
||||
ByteArrayOutputStream baos = new ByteArrayOutputStream();
|
||||
PrintStream ps = new PrintStream(baos);
|
||||
textTable.printTable(ps, 4);
|
||||
return new String(baos.toByteArray(), Charset.forName("utf-8"));
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,49 @@
|
||||
/*
|
||||
* Copyright (c) 2016 Uber Technologies, Inc. (hoodie-dev-group@uber.com)
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package com.uber.hoodie.cli;
|
||||
|
||||
import org.springframework.core.Ordered;
|
||||
import org.springframework.core.annotation.Order;
|
||||
import org.springframework.shell.plugin.support.DefaultPromptProvider;
|
||||
import org.springframework.stereotype.Component;
|
||||
|
||||
@Component
|
||||
@Order(Ordered.HIGHEST_PRECEDENCE)
|
||||
public class HoodiePrompt extends DefaultPromptProvider {
|
||||
|
||||
@Override
|
||||
public String getPrompt() {
|
||||
switch (HoodieCLI.state) {
|
||||
case INIT:
|
||||
return "hoodie->";
|
||||
case DATASET:
|
||||
return "hoodie:" + HoodieCLI.tableMetadata.getTableName() + "->";
|
||||
case SYNC:
|
||||
return "hoodie:" + HoodieCLI.tableMetadata.getTableName() + " <==> "
|
||||
+ HoodieCLI.syncTableMetadata.getTableName() + "->";
|
||||
}
|
||||
if (HoodieCLI.tableMetadata != null)
|
||||
return "hoodie:" + HoodieCLI.tableMetadata.getTableName() + "->";
|
||||
return "hoodie->";
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getProviderName() {
|
||||
return "Hoodie provider";
|
||||
}
|
||||
|
||||
}
|
||||
@@ -0,0 +1,55 @@
|
||||
/*
|
||||
* Copyright (c) 2016 Uber Technologies, Inc. (hoodie-dev-group@uber.com)
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package com.uber.hoodie.cli;
|
||||
|
||||
import org.springframework.core.Ordered;
|
||||
import org.springframework.core.annotation.Order;
|
||||
import org.springframework.shell.plugin.support.DefaultBannerProvider;
|
||||
import org.springframework.shell.support.util.OsUtils;
|
||||
import org.springframework.stereotype.Component;
|
||||
|
||||
@Component @Order(Ordered.HIGHEST_PRECEDENCE) public class HoodieSplashScreen
|
||||
extends DefaultBannerProvider {
|
||||
private static String screen = "============================================" + OsUtils.LINE_SEPARATOR +
|
||||
"* *" + OsUtils.LINE_SEPARATOR +
|
||||
"* _ _ _ _ *" + OsUtils.LINE_SEPARATOR +
|
||||
"* | | | | | (_) *" + OsUtils.LINE_SEPARATOR +
|
||||
"* | |__| | ___ ___ __| |_ ___ *" + OsUtils.LINE_SEPARATOR +
|
||||
"* | __ |/ _ \\ / _ \\ / _` | |/ _ \\ *" +
|
||||
OsUtils.LINE_SEPARATOR +
|
||||
"* | | | | (_) | (_) | (_| | | __/ *" + OsUtils.LINE_SEPARATOR +
|
||||
"* |_| |_|\\___/ \\___/ \\__,_|_|\\___| *" +
|
||||
OsUtils.LINE_SEPARATOR +
|
||||
"* *" + OsUtils.LINE_SEPARATOR +
|
||||
"============================================" + OsUtils.LINE_SEPARATOR;
|
||||
|
||||
public String getBanner() {
|
||||
return screen;
|
||||
}
|
||||
|
||||
public String getVersion() {
|
||||
return "1.0";
|
||||
}
|
||||
|
||||
public String getWelcomeMessage() {
|
||||
return "Welcome to Hoodie CLI. Please type help if you are looking for help. ";
|
||||
}
|
||||
|
||||
@Override public String getProviderName() {
|
||||
return "Hoodie Banner";
|
||||
}
|
||||
}
|
||||
33
hoodie-cli/src/main/java/com/uber/hoodie/cli/Main.java
Normal file
33
hoodie-cli/src/main/java/com/uber/hoodie/cli/Main.java
Normal file
@@ -0,0 +1,33 @@
|
||||
/*
|
||||
* Copyright (c) 2016 Uber Technologies, Inc. (hoodie-dev-group@uber.com)
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package com.uber.hoodie.cli;
|
||||
|
||||
import org.springframework.shell.Bootstrap;
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
public class Main {
|
||||
/**
|
||||
* Main class that delegates to Spring Shell's Bootstrap class in order to simplify debugging inside an IDE
|
||||
*
|
||||
* @param args
|
||||
* @throws IOException
|
||||
*/
|
||||
public static void main(String[] args) throws IOException {
|
||||
Bootstrap.main(args);
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,244 @@
|
||||
/*
|
||||
* Copyright (c) 2016 Uber Technologies, Inc. (hoodie-dev-group@uber.com)
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package com.uber.hoodie.cli.commands;
|
||||
|
||||
import com.uber.hoodie.cli.HoodieCLI;
|
||||
import com.uber.hoodie.cli.HoodiePrintHelper;
|
||||
import com.uber.hoodie.cli.utils.InputStreamConsumer;
|
||||
import com.uber.hoodie.cli.utils.SparkUtil;
|
||||
import com.uber.hoodie.common.model.HoodieCommitMetadata;
|
||||
import com.uber.hoodie.common.model.HoodieCommits;
|
||||
import com.uber.hoodie.common.model.HoodieTableMetadata;
|
||||
import com.uber.hoodie.common.model.HoodieWriteStat;
|
||||
import com.uber.hoodie.common.util.NumericUtils;
|
||||
|
||||
import org.apache.spark.launcher.SparkLauncher;
|
||||
import org.springframework.shell.core.CommandMarker;
|
||||
import org.springframework.shell.core.annotation.CliAvailabilityIndicator;
|
||||
import org.springframework.shell.core.annotation.CliCommand;
|
||||
import org.springframework.shell.core.annotation.CliOption;
|
||||
import org.springframework.stereotype.Component;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collections;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.SortedMap;
|
||||
|
||||
@Component
|
||||
public class CommitsCommand implements CommandMarker {
|
||||
@CliAvailabilityIndicator({"commits show"})
|
||||
public boolean isShowAvailable() {
|
||||
return HoodieCLI.tableMetadata != null;
|
||||
}
|
||||
|
||||
@CliAvailabilityIndicator({"commits refresh"})
|
||||
public boolean isRefreshAvailable() {
|
||||
return HoodieCLI.tableMetadata != null;
|
||||
}
|
||||
|
||||
@CliAvailabilityIndicator({"commit rollback"})
|
||||
public boolean isRollbackAvailable() {
|
||||
return HoodieCLI.tableMetadata != null;
|
||||
}
|
||||
|
||||
@CliAvailabilityIndicator({"commit show"})
|
||||
public boolean isCommitShowAvailable() {
|
||||
return HoodieCLI.tableMetadata != null;
|
||||
}
|
||||
|
||||
@CliCommand(value = "commits show", help = "Show the commits")
|
||||
public String showCommits(
|
||||
@CliOption(key = {
|
||||
"limit"}, mandatory = false, help = "Limit commits", unspecifiedDefaultValue = "10")
|
||||
final Integer limit) throws IOException {
|
||||
SortedMap<String, HoodieCommitMetadata> map =
|
||||
HoodieCLI.tableMetadata.getAllCommitMetadata();
|
||||
int arraySize =
|
||||
Math.min(limit, HoodieCLI.tableMetadata.getAllCommits().getCommitList().size());
|
||||
String[][] rows = new String[arraySize][];
|
||||
ArrayList<String> commitList =
|
||||
new ArrayList<String>(HoodieCLI.tableMetadata.getAllCommits().getCommitList());
|
||||
Collections.reverse(commitList);
|
||||
for (int i = 0; i < arraySize; i++) {
|
||||
String commit = commitList.get(i);
|
||||
HoodieCommitMetadata commitMetadata = map.get(commit);
|
||||
rows[i] = new String[] {commit,
|
||||
NumericUtils.humanReadableByteCount(commitMetadata.fetchTotalBytesWritten()),
|
||||
String.valueOf(commitMetadata.fetchTotalFilesInsert()),
|
||||
String.valueOf(commitMetadata.fetchTotalFilesUpdated()),
|
||||
String.valueOf(commitMetadata.fetchTotalPartitionsWritten()),
|
||||
String.valueOf(commitMetadata.fetchTotalRecordsWritten()),
|
||||
String.valueOf(commitMetadata.fetchTotalUpdateRecordsWritten()),
|
||||
String.valueOf(commitMetadata.fetchTotalWriteErrors())};
|
||||
}
|
||||
return HoodiePrintHelper.print(
|
||||
new String[] {"CommitTime", "Total Written (B)", "Total Files Added",
|
||||
"Total Files Updated", "Total Partitions Written", "Total Records Written",
|
||||
"Total Update Records Written", "Total Errors"}, rows);
|
||||
}
|
||||
|
||||
@CliCommand(value = "commits refresh", help = "Refresh the commits")
|
||||
public String refreshCommits() throws IOException {
|
||||
HoodieTableMetadata metadata =
|
||||
new HoodieTableMetadata(HoodieCLI.fs, HoodieCLI.tableMetadata.getBasePath());
|
||||
HoodieCLI.setTableMetadata(metadata);
|
||||
return "Metadata for table " + metadata.getTableName() + " refreshed.";
|
||||
}
|
||||
|
||||
@CliCommand(value = "commit rollback", help = "Rollback a commit")
|
||||
public String rollbackCommit(
|
||||
@CliOption(key = {"commit"}, help = "Commit to rollback")
|
||||
final String commitTime,
|
||||
@CliOption(key = {"sparkProperties"}, help = "Spark Properites File Path")
|
||||
final String sparkPropertiesPath) throws Exception {
|
||||
if (!HoodieCLI.tableMetadata.getAllCommits().contains(commitTime)) {
|
||||
return "Commit " + commitTime + " not found in Commits " + HoodieCLI.tableMetadata
|
||||
.getAllCommits();
|
||||
}
|
||||
SparkLauncher sparkLauncher = SparkUtil.initLauncher(sparkPropertiesPath);
|
||||
sparkLauncher.addAppArgs(SparkMain.SparkCommand.ROLLBACK.toString(),
|
||||
commitTime,
|
||||
HoodieCLI.tableMetadata.getBasePath());
|
||||
Process process = sparkLauncher.launch();
|
||||
InputStreamConsumer.captureOutput(process);
|
||||
int exitCode = process.waitFor();
|
||||
// Refresh the current
|
||||
refreshCommits();
|
||||
if (exitCode != 0) {
|
||||
return "Commit " + commitTime + " failed to roll back";
|
||||
}
|
||||
return "Commit " + commitTime + " rolled back";
|
||||
}
|
||||
|
||||
@CliCommand(value = "commit showpartitions", help = "Show partition level details of a commit")
|
||||
public String showCommitPartitions(
|
||||
@CliOption(key = {"commit"}, help = "Commit to show")
|
||||
final String commitTime) throws Exception {
|
||||
if (!HoodieCLI.tableMetadata.getAllCommits().contains(commitTime)) {
|
||||
return "Commit " + commitTime + " not found in Commits " + HoodieCLI.tableMetadata
|
||||
.getAllCommits();
|
||||
}
|
||||
HoodieCommitMetadata meta = HoodieCLI.tableMetadata.getAllCommitMetadata().get(commitTime);
|
||||
List<String[]> rows = new ArrayList<String[]>();
|
||||
for (Map.Entry<String, List<HoodieWriteStat>> entry : meta.getPartitionToWriteStats()
|
||||
.entrySet()) {
|
||||
String path = entry.getKey();
|
||||
List<HoodieWriteStat> stats = entry.getValue();
|
||||
long totalFilesAdded = 0;
|
||||
long totalFilesUpdated = 0;
|
||||
long totalRecordsUpdated = 0;
|
||||
long totalRecordsInserted = 0;
|
||||
long totalBytesWritten = 0;
|
||||
long totalWriteErrors = 0;
|
||||
for (HoodieWriteStat stat : stats) {
|
||||
if (stat.getPrevCommit().equals(HoodieWriteStat.NULL_COMMIT)) {
|
||||
totalFilesAdded += 1;
|
||||
totalRecordsInserted += stat.getNumWrites();
|
||||
} else {
|
||||
totalFilesUpdated += 1;
|
||||
totalRecordsUpdated += stat.getNumUpdateWrites();
|
||||
}
|
||||
totalBytesWritten += stat.getTotalWriteBytes();
|
||||
totalWriteErrors += stat.getTotalWriteErrors();
|
||||
}
|
||||
rows.add(new String[] {path, String.valueOf(totalFilesAdded),
|
||||
String.valueOf(totalFilesUpdated), String.valueOf(totalRecordsInserted),
|
||||
String.valueOf(totalRecordsUpdated),
|
||||
NumericUtils.humanReadableByteCount(totalBytesWritten),
|
||||
String.valueOf(totalWriteErrors)});
|
||||
|
||||
}
|
||||
return HoodiePrintHelper.print(
|
||||
new String[] {"Partition Path", "Total Files Added", "Total Files Updated",
|
||||
"Total Records Inserted", "Total Records Updated", "Total Bytes Written",
|
||||
"Total Errors"}, rows.toArray(new String[rows.size()][]));
|
||||
}
|
||||
|
||||
@CliCommand(value = "commit showfiles", help = "Show file level details of a commit")
|
||||
public String showCommitFiles(
|
||||
@CliOption(key = {"commit"}, help = "Commit to show")
|
||||
final String commitTime) throws Exception {
|
||||
if (!HoodieCLI.tableMetadata.getAllCommits().contains(commitTime)) {
|
||||
return "Commit " + commitTime + " not found in Commits " + HoodieCLI.tableMetadata
|
||||
.getAllCommits();
|
||||
}
|
||||
HoodieCommitMetadata meta = HoodieCLI.tableMetadata.getAllCommitMetadata().get(commitTime);
|
||||
List<String[]> rows = new ArrayList<String[]>();
|
||||
for (Map.Entry<String, List<HoodieWriteStat>> entry : meta.getPartitionToWriteStats()
|
||||
.entrySet()) {
|
||||
String path = entry.getKey();
|
||||
List<HoodieWriteStat> stats = entry.getValue();
|
||||
for (HoodieWriteStat stat : stats) {
|
||||
rows.add(new String[] {path, stat.getFileId(), stat.getPrevCommit(),
|
||||
String.valueOf(stat.getNumUpdateWrites()), String.valueOf(stat.getNumWrites()),
|
||||
String.valueOf(stat.getTotalWriteBytes()),
|
||||
String.valueOf(stat.getTotalWriteErrors())});
|
||||
}
|
||||
}
|
||||
return HoodiePrintHelper.print(
|
||||
new String[] {"Partition Path", "File ID", "Previous Commit", "Total Records Updated",
|
||||
"Total Records Written", "Total Bytes Written", "Total Errors"},
|
||||
rows.toArray(new String[rows.size()][]));
|
||||
}
|
||||
|
||||
@CliAvailabilityIndicator({"commits compare"})
|
||||
public boolean isCompareCommitsAvailable() {
|
||||
return HoodieCLI.tableMetadata != null;
|
||||
}
|
||||
|
||||
@CliCommand(value = "commits compare", help = "Compare commits with another Hoodie dataset")
|
||||
public String compareCommits(
|
||||
@CliOption(key = {"path"}, help = "Path of the dataset to compare to")
|
||||
final String path) throws Exception {
|
||||
HoodieTableMetadata target = new HoodieTableMetadata(HoodieCLI.fs, path);
|
||||
HoodieTableMetadata source = HoodieCLI.tableMetadata;
|
||||
String targetLatestCommit =
|
||||
target.isCommitsEmpty() ? "0" : target.getAllCommits().lastCommit();
|
||||
String sourceLatestCommit =
|
||||
source.isCommitsEmpty() ? "0" : source.getAllCommits().lastCommit();
|
||||
|
||||
if (sourceLatestCommit != null && HoodieCommits
|
||||
.isCommit1After(targetLatestCommit, sourceLatestCommit)) {
|
||||
// source is behind the target
|
||||
List<String> commitsToCatchup = target.findCommitsSinceTs(sourceLatestCommit);
|
||||
return "Source " + source.getTableName() + " is behind by " + commitsToCatchup.size()
|
||||
+ " commits. Commits to catch up - " + commitsToCatchup;
|
||||
} else {
|
||||
List<String> commitsToCatchup = source.findCommitsSinceTs(targetLatestCommit);
|
||||
return "Source " + source.getTableName() + " is ahead by " + commitsToCatchup.size()
|
||||
+ " commits. Commits to catch up - " + commitsToCatchup;
|
||||
}
|
||||
}
|
||||
|
||||
@CliAvailabilityIndicator({"commits sync"})
|
||||
public boolean isSyncCommitsAvailable() {
|
||||
return HoodieCLI.tableMetadata != null;
|
||||
}
|
||||
|
||||
@CliCommand(value = "commits sync", help = "Compare commits with another Hoodie dataset")
|
||||
public String syncCommits(
|
||||
@CliOption(key = {"path"}, help = "Path of the dataset to compare to")
|
||||
final String path) throws Exception {
|
||||
HoodieCLI.syncTableMetadata = new HoodieTableMetadata(HoodieCLI.fs, path);
|
||||
HoodieCLI.state = HoodieCLI.CLIState.SYNC;
|
||||
return "Load sync state between " + HoodieCLI.tableMetadata.getTableName() + " and "
|
||||
+ HoodieCLI.syncTableMetadata.getTableName();
|
||||
}
|
||||
|
||||
}
|
||||
@@ -0,0 +1,42 @@
|
||||
/*
|
||||
* Copyright (c) 2016 Uber Technologies, Inc. (hoodie-dev-group@uber.com)
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package com.uber.hoodie.cli.commands;
|
||||
|
||||
import com.uber.hoodie.cli.HoodieCLI;
|
||||
import com.uber.hoodie.common.model.HoodieTableMetadata;
|
||||
import org.apache.hadoop.conf.Configuration;
|
||||
import org.apache.hadoop.fs.FileSystem;
|
||||
import org.springframework.shell.core.CommandMarker;
|
||||
import org.springframework.shell.core.annotation.CliCommand;
|
||||
import org.springframework.shell.core.annotation.CliOption;
|
||||
import org.springframework.stereotype.Component;
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
@Component
|
||||
public class DatasetsCommand implements CommandMarker {
|
||||
@CliCommand(value = "connect", help = "Connect to a hoodie dataset")
|
||||
public String connect(
|
||||
@CliOption(key = {"path"}, mandatory = true, help = "Base Path of the dataset")
|
||||
final String path) throws IOException {
|
||||
boolean initialized = HoodieCLI.initConf();
|
||||
HoodieCLI.initFS(initialized);
|
||||
HoodieCLI.setTableMetadata(new HoodieTableMetadata(HoodieCLI.fs, path));
|
||||
HoodieCLI.state = HoodieCLI.CLIState.DATASET;
|
||||
return "Metadata for table " + HoodieCLI.tableMetadata.getTableName() + " loaded";
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,106 @@
|
||||
/*
|
||||
* Copyright (c) 2016 Uber Technologies, Inc. (hoodie-dev-group@uber.com)
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package com.uber.hoodie.cli.commands;
|
||||
|
||||
import com.uber.hoodie.cli.utils.CommitUtil;
|
||||
import com.uber.hoodie.cli.utils.HiveUtil;
|
||||
import com.uber.hoodie.cli.HoodieCLI;
|
||||
import com.uber.hoodie.common.model.HoodieCommits;
|
||||
import com.uber.hoodie.common.model.HoodieTableMetadata;
|
||||
import org.springframework.shell.core.CommandMarker;
|
||||
import org.springframework.shell.core.annotation.CliAvailabilityIndicator;
|
||||
import org.springframework.shell.core.annotation.CliCommand;
|
||||
import org.springframework.shell.core.annotation.CliOption;
|
||||
import org.springframework.stereotype.Component;
|
||||
|
||||
import java.util.List;
|
||||
|
||||
@Component
|
||||
public class HoodieSyncCommand implements CommandMarker {
|
||||
@CliAvailabilityIndicator({"sync validate"})
|
||||
public boolean isSyncVerificationAvailable() {
|
||||
return HoodieCLI.tableMetadata != null && HoodieCLI.syncTableMetadata != null;
|
||||
}
|
||||
|
||||
@CliCommand(value = "sync validate", help = "Validate the sync by counting the number of records")
|
||||
public String validateSync(
|
||||
@CliOption(key = {"mode"}, unspecifiedDefaultValue = "complete", help = "Check mode")
|
||||
final String mode,
|
||||
@CliOption(key = {
|
||||
"sourceDb"}, unspecifiedDefaultValue = "rawdata", help = "source database")
|
||||
final String srcDb,
|
||||
@CliOption(key = {
|
||||
"targetDb"}, unspecifiedDefaultValue = "dwh_hoodie", help = "target database")
|
||||
final String tgtDb,
|
||||
@CliOption(key = {
|
||||
"partitionCount"}, unspecifiedDefaultValue = "5", help = "total number of recent partitions to validate")
|
||||
final int partitionCount,
|
||||
@CliOption(key = {
|
||||
"hiveServerUrl"}, mandatory = true, help = "hiveServerURL to connect to")
|
||||
final String hiveServerUrl,
|
||||
@CliOption(key = {
|
||||
"hiveUser"}, mandatory = false, unspecifiedDefaultValue = "", help = "hive username to connect to")
|
||||
final String hiveUser,
|
||||
@CliOption(key = {
|
||||
"hivePass"}, mandatory = true, unspecifiedDefaultValue = "", help = "hive password to connect to")
|
||||
final String hivePass) throws Exception {
|
||||
HoodieTableMetadata target = HoodieCLI.syncTableMetadata;
|
||||
HoodieTableMetadata source = HoodieCLI.tableMetadata;
|
||||
long sourceCount = 0;
|
||||
long targetCount = 0;
|
||||
if ("complete".equals(mode)) {
|
||||
sourceCount = HiveUtil.countRecords(hiveServerUrl, source, srcDb, hiveUser, hivePass);
|
||||
targetCount = HiveUtil.countRecords(hiveServerUrl, target, tgtDb, hiveUser, hivePass);
|
||||
} else if ("latestPartitions".equals(mode)) {
|
||||
sourceCount = HiveUtil.countRecords(hiveServerUrl, source, srcDb, partitionCount, hiveUser, hivePass);
|
||||
targetCount = HiveUtil.countRecords(hiveServerUrl, target, tgtDb, partitionCount, hiveUser, hivePass);
|
||||
}
|
||||
|
||||
String targetLatestCommit =
|
||||
target.isCommitsEmpty() ? "0" : target.getAllCommits().lastCommit();
|
||||
String sourceLatestCommit =
|
||||
source.isCommitsEmpty() ? "0" : source.getAllCommits().lastCommit();
|
||||
|
||||
if (sourceLatestCommit != null && HoodieCommits
|
||||
.isCommit1After(targetLatestCommit, sourceLatestCommit)) {
|
||||
// source is behind the target
|
||||
List<String> commitsToCatchup = target.findCommitsSinceTs(sourceLatestCommit);
|
||||
if (commitsToCatchup.isEmpty()) {
|
||||
return "Count difference now is (count(" + target.getTableName() + ") - count("
|
||||
+ source.getTableName() + ") == " + (targetCount - sourceCount);
|
||||
} else {
|
||||
long newInserts = CommitUtil.countNewRecords(target, commitsToCatchup);
|
||||
return "Count difference now is (count(" + target.getTableName() + ") - count("
|
||||
+ source.getTableName() + ") == " + (targetCount - sourceCount)
|
||||
+ ". Catch up count is " + newInserts;
|
||||
}
|
||||
} else {
|
||||
List<String> commitsToCatchup = source.findCommitsSinceTs(targetLatestCommit);
|
||||
if (commitsToCatchup.isEmpty()) {
|
||||
return "Count difference now is (count(" + source.getTableName() + ") - count("
|
||||
+ target.getTableName() + ") == " + (sourceCount - targetCount);
|
||||
} else {
|
||||
long newInserts = CommitUtil.countNewRecords(source, commitsToCatchup);
|
||||
return "Count difference now is (count(" + source.getTableName() + ") - count("
|
||||
+ target.getTableName() + ") == " + (sourceCount - targetCount)
|
||||
+ ". Catch up count is " + newInserts;
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
@@ -0,0 +1,78 @@
|
||||
/*
|
||||
* Copyright (c) 2016 Uber Technologies, Inc. (hoodie-dev-group@uber.com)
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package com.uber.hoodie.cli.commands;
|
||||
|
||||
import com.uber.hoodie.cli.HoodieCLI;
|
||||
import com.uber.hoodie.cli.utils.InputStreamConsumer;
|
||||
import com.uber.hoodie.cli.utils.SparkUtil;
|
||||
import org.apache.spark.launcher.SparkLauncher;
|
||||
import org.springframework.shell.core.CommandMarker;
|
||||
import org.springframework.shell.core.annotation.CliAvailabilityIndicator;
|
||||
import org.springframework.shell.core.annotation.CliCommand;
|
||||
import org.springframework.shell.core.annotation.CliOption;
|
||||
import org.springframework.stereotype.Component;
|
||||
|
||||
@Component
|
||||
public class RecordsCommand implements CommandMarker {
|
||||
|
||||
@CliAvailabilityIndicator({"records deduplicate"})
|
||||
public boolean isRecordsDeduplicateAvailable() {
|
||||
return HoodieCLI.tableMetadata != null;
|
||||
}
|
||||
|
||||
@CliCommand(value = "records deduplicate", help = "De-duplicate a partition path contains duplicates & produce repaired files to replace with")
|
||||
public String deduplicate(
|
||||
@CliOption(key = {
|
||||
"duplicatedPartitionPath"}, help = "Partition Path containing the duplicates")
|
||||
final String duplicatedPartitionPath,
|
||||
@CliOption(key = {"repairedOutputPath"}, help = "Location to place the repaired files")
|
||||
final String repairedOutputPath,
|
||||
@CliOption(key = {"sparkProperties"}, help = "Spark Properites File Path")
|
||||
final String sparkPropertiesPath) throws Exception {
|
||||
SparkLauncher sparkLauncher = SparkUtil.initLauncher(sparkPropertiesPath);
|
||||
sparkLauncher
|
||||
.addAppArgs(SparkMain.SparkCommand.DEDUPLICATE.toString(), duplicatedPartitionPath,
|
||||
repairedOutputPath, HoodieCLI.tableMetadata.getBasePath());
|
||||
Process process = sparkLauncher.launch();
|
||||
InputStreamConsumer.captureOutput(process);
|
||||
int exitCode = process.waitFor();
|
||||
|
||||
if (exitCode != 0) {
|
||||
return "Deduplicated files placed in: " + repairedOutputPath;
|
||||
}
|
||||
return "Deduplication failed ";
|
||||
}
|
||||
|
||||
// @CliCommand(value = "records find", help = "Find Records in a hoodie dataset")
|
||||
// public String findRecords(
|
||||
// @CliOption(key = {"keys"}, help = "Keys To Find (Comma seperated)")
|
||||
// final String hoodieKeys,
|
||||
// @CliOption(key = {"sparkProperties"}, help = "Spark Properites File Path")
|
||||
// final String sparkPropertiesPath) throws Exception {
|
||||
// SparkLauncher sparkLauncher = SparkUtil.initLauncher(sparkPropertiesPath);
|
||||
// sparkLauncher
|
||||
// .addAppArgs(SparkMain.RECORD_FIND, hoodieKeys, HoodieCLI.tableMetadata.getBasePath());
|
||||
// Process process = sparkLauncher.launch();
|
||||
// InputStreamConsumer.captureOutput(process);
|
||||
// int exitCode = process.waitFor();
|
||||
//
|
||||
// if (exitCode != 0) {
|
||||
// return "Deduplicated files placed in: " + repairedOutputPath;
|
||||
// }
|
||||
// return "Deduplication failed ";
|
||||
// }
|
||||
}
|
||||
@@ -0,0 +1,94 @@
|
||||
/*
|
||||
* Copyright (c) 2016 Uber Technologies, Inc. (hoodie-dev-group@uber.com)
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package com.uber.hoodie.cli.commands;
|
||||
|
||||
import com.uber.hoodie.HoodieWriteClient;
|
||||
import com.uber.hoodie.cli.DedupeSparkJob;
|
||||
import com.uber.hoodie.cli.utils.SparkUtil;
|
||||
import com.uber.hoodie.common.util.FSUtils;
|
||||
import com.uber.hoodie.config.HoodieIndexConfig;
|
||||
import com.uber.hoodie.config.HoodieWriteConfig;
|
||||
import com.uber.hoodie.index.HoodieIndex;
|
||||
|
||||
import org.apache.log4j.Logger;
|
||||
import org.apache.spark.api.java.JavaSparkContext;
|
||||
import org.apache.spark.sql.SQLContext;
|
||||
|
||||
public class SparkMain {
|
||||
|
||||
protected final static Logger LOG = Logger.getLogger(SparkMain.class);
|
||||
|
||||
|
||||
/**
|
||||
* Commands
|
||||
*/
|
||||
enum SparkCommand {
|
||||
ROLLBACK,
|
||||
DEDUPLICATE
|
||||
}
|
||||
|
||||
public static void main(String[] args) throws Exception {
|
||||
String command = args[0];
|
||||
LOG.info("Invoking SparkMain:" + command);
|
||||
|
||||
SparkCommand cmd = SparkCommand.valueOf(command);
|
||||
|
||||
JavaSparkContext jsc = SparkUtil.initJavaSparkConf("hoodie-cli-" + command);
|
||||
int returnCode = 0;
|
||||
if (SparkCommand.ROLLBACK.equals(cmd)) {
|
||||
assert (args.length == 3);
|
||||
returnCode = rollback(jsc, args[1], args[2]);
|
||||
} else if(SparkCommand.DEDUPLICATE.equals(cmd)) {
|
||||
assert (args.length == 4);
|
||||
returnCode = deduplicatePartitionPath(jsc, args[1], args[2], args[3]);
|
||||
}
|
||||
|
||||
System.exit(returnCode);
|
||||
}
|
||||
|
||||
private static int deduplicatePartitionPath(JavaSparkContext jsc,
|
||||
String duplicatedPartitionPath,
|
||||
String repairedOutputPath,
|
||||
String basePath)
|
||||
throws Exception {
|
||||
DedupeSparkJob job = new DedupeSparkJob(basePath,
|
||||
duplicatedPartitionPath,repairedOutputPath,new SQLContext(jsc), FSUtils.getFs());
|
||||
job.fixDuplicates(true);
|
||||
return 0;
|
||||
}
|
||||
|
||||
private static int rollback(JavaSparkContext jsc, String commitTime, String basePath)
|
||||
throws Exception {
|
||||
HoodieWriteClient client = createHoodieClient(jsc, basePath);
|
||||
if (client.rollback(commitTime)) {
|
||||
LOG.info(String.format("The commit \"%s\" rolled back.", commitTime));
|
||||
return -1;
|
||||
} else {
|
||||
LOG.info(String.format("The commit \"%s\" failed to roll back.", commitTime));
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
private static HoodieWriteClient createHoodieClient(JavaSparkContext jsc, String basePath)
|
||||
throws Exception {
|
||||
HoodieWriteConfig config = HoodieWriteConfig.newBuilder().withPath(basePath)
|
||||
.withIndexConfig(
|
||||
HoodieIndexConfig.newBuilder().withIndexType(HoodieIndex.IndexType.BLOOM).build())
|
||||
.build();
|
||||
return new HoodieWriteClient(jsc, config);
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,136 @@
|
||||
/*
|
||||
* Copyright (c) 2016 Uber Technologies, Inc. (hoodie-dev-group@uber.com)
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package com.uber.hoodie.cli.commands;
|
||||
|
||||
|
||||
import com.codahale.metrics.Histogram;
|
||||
import com.codahale.metrics.Snapshot;
|
||||
import com.codahale.metrics.UniformReservoir;
|
||||
import com.uber.hoodie.cli.HoodieCLI;
|
||||
import com.uber.hoodie.cli.HoodiePrintHelper;
|
||||
import com.uber.hoodie.common.model.HoodieCommitMetadata;
|
||||
import com.uber.hoodie.common.util.FSUtils;
|
||||
import com.uber.hoodie.common.util.NumericUtils;
|
||||
|
||||
import org.apache.hadoop.fs.FileStatus;
|
||||
import org.apache.hadoop.fs.FileSystem;
|
||||
import org.apache.hadoop.fs.Path;
|
||||
import org.springframework.shell.core.CommandMarker;
|
||||
import org.springframework.shell.core.annotation.CliAvailabilityIndicator;
|
||||
import org.springframework.shell.core.annotation.CliCommand;
|
||||
import org.springframework.shell.core.annotation.CliOption;
|
||||
import org.springframework.stereotype.Component;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.text.DecimalFormat;
|
||||
import java.util.HashMap;
|
||||
import java.util.Map;
|
||||
|
||||
@Component
|
||||
public class StatsCommand implements CommandMarker {
|
||||
@CliAvailabilityIndicator({"stats wa"})
|
||||
public boolean isWriteAmpAvailable() {
|
||||
return HoodieCLI.tableMetadata != null;
|
||||
}
|
||||
|
||||
@CliCommand(value = "stats wa", help = "Write Amplification. Ratio of how many records were upserted to how many records were actually written")
|
||||
public String writeAmplificationStats() throws IOException {
|
||||
long totalRecordsUpserted = 0;
|
||||
long totalRecordsWritten = 0;
|
||||
|
||||
String[][] rows = new String[HoodieCLI.tableMetadata.getAllCommitMetadata().size() + 1][];
|
||||
int i = 0;
|
||||
DecimalFormat df = new DecimalFormat("#.00");
|
||||
for (Map.Entry<String, HoodieCommitMetadata> commit : HoodieCLI.tableMetadata
|
||||
.getAllCommitMetadata().entrySet()) {
|
||||
String waf = "0";
|
||||
if (commit.getValue().fetchTotalUpdateRecordsWritten() > 0) {
|
||||
waf = df.format(
|
||||
(float) commit.getValue().fetchTotalRecordsWritten() / commit.getValue()
|
||||
.fetchTotalUpdateRecordsWritten());
|
||||
}
|
||||
rows[i++] = new String[] {commit.getKey(),
|
||||
String.valueOf(commit.getValue().fetchTotalUpdateRecordsWritten()),
|
||||
String.valueOf(commit.getValue().fetchTotalRecordsWritten()), waf};
|
||||
totalRecordsUpserted += commit.getValue().fetchTotalUpdateRecordsWritten();
|
||||
totalRecordsWritten += commit.getValue().fetchTotalRecordsWritten();
|
||||
}
|
||||
String waf = "0";
|
||||
if (totalRecordsUpserted > 0) {
|
||||
waf = df.format((float) totalRecordsWritten / totalRecordsUpserted);
|
||||
}
|
||||
rows[i] = new String[] {"Total", String.valueOf(totalRecordsUpserted),
|
||||
String.valueOf(totalRecordsWritten), waf};
|
||||
return HoodiePrintHelper.print(
|
||||
new String[] {"CommitTime", "Total Upserted", "Total Written",
|
||||
"Write Amplifiation Factor"}, rows);
|
||||
|
||||
}
|
||||
|
||||
|
||||
private String[] printFileSizeHistogram(String commitTime, Snapshot s) {
|
||||
return new String[]{
|
||||
commitTime,
|
||||
NumericUtils.humanReadableByteCount(s.getMin()),
|
||||
NumericUtils.humanReadableByteCount(s.getValue(0.1)),
|
||||
NumericUtils.humanReadableByteCount(s.getMedian()),
|
||||
NumericUtils.humanReadableByteCount(s.getMean()),
|
||||
NumericUtils.humanReadableByteCount(s.get95thPercentile()),
|
||||
NumericUtils.humanReadableByteCount(s.getMax()),
|
||||
String.valueOf(s.size()),
|
||||
NumericUtils.humanReadableByteCount(s.getStdDev())
|
||||
};
|
||||
}
|
||||
|
||||
@CliCommand(value = "stats filesizes", help = "File Sizes. Display summary stats on sizes of files")
|
||||
public String fileSizeStats(
|
||||
@CliOption(key = {"partitionPath"}, help = "regex to select files, eg: 2016/08/02", unspecifiedDefaultValue = "*/*/*")
|
||||
final String globRegex) throws IOException {
|
||||
|
||||
FileSystem fs = HoodieCLI.fs;
|
||||
String globPath = String.format("%s/%s/*",
|
||||
HoodieCLI.tableMetadata.getBasePath(),
|
||||
globRegex);
|
||||
FileStatus[] statuses = fs.globStatus(new Path(globPath));
|
||||
|
||||
// max, min, #small files < 10MB, 50th, avg, 95th
|
||||
final int MAX_FILES = 1000000;
|
||||
Histogram globalHistogram = new Histogram(new UniformReservoir(MAX_FILES));
|
||||
HashMap<String, Histogram> commitHistoMap = new HashMap<String, Histogram>();
|
||||
for (FileStatus fileStatus: statuses) {
|
||||
String commitTime = FSUtils.getCommitTime(fileStatus.getPath().getName());
|
||||
long sz = fileStatus.getLen();
|
||||
if (!commitHistoMap.containsKey(commitTime)) {
|
||||
commitHistoMap.put(commitTime, new Histogram(new UniformReservoir(MAX_FILES)));
|
||||
}
|
||||
commitHistoMap.get(commitTime).update(sz);
|
||||
globalHistogram.update(sz);
|
||||
}
|
||||
|
||||
String[][] rows = new String[commitHistoMap.size() + 1][];
|
||||
int ind = 0;
|
||||
for (String commitTime: commitHistoMap.keySet()) {
|
||||
Snapshot s = commitHistoMap.get(commitTime).getSnapshot();
|
||||
rows[ind++] = printFileSizeHistogram(commitTime, s);
|
||||
}
|
||||
Snapshot s = globalHistogram.getSnapshot();
|
||||
rows[ind++] = printFileSizeHistogram("ALL", s);
|
||||
|
||||
return HoodiePrintHelper.print(
|
||||
new String[] {"CommitTime", "Min", "10th", "50th", "avg", "95th", "Max", "NumFiles", "StdDev"}, rows);
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,34 @@
|
||||
/*
|
||||
* Copyright (c) 2016 Uber Technologies, Inc. (hoodie-dev-group@uber.com)
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package com.uber.hoodie.cli.commands;
|
||||
|
||||
import org.springframework.shell.core.CommandMarker;
|
||||
import org.springframework.shell.core.annotation.CliCommand;
|
||||
import org.springframework.shell.core.annotation.CliOption;
|
||||
import org.springframework.stereotype.Component;
|
||||
|
||||
@Component
|
||||
public class UtilsCommand implements CommandMarker {
|
||||
@CliCommand(value = "utils loadClass", help = "Load a class" )
|
||||
public String loadClass(
|
||||
@CliOption(key = {"class"}, help = "Check mode" ) final String clazz
|
||||
) throws Exception {
|
||||
Class klass = Class.forName(clazz);
|
||||
return klass.getProtectionDomain().getCodeSource().getLocation().toExternalForm();
|
||||
}
|
||||
|
||||
}
|
||||
@@ -0,0 +1,38 @@
|
||||
/*
|
||||
* Copyright (c) 2016 Uber Technologies, Inc. (hoodie-dev-group@uber.com)
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package com.uber.hoodie.cli.utils;
|
||||
|
||||
import com.uber.hoodie.common.model.HoodieCommitMetadata;
|
||||
import com.uber.hoodie.common.model.HoodieTableMetadata;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.List;
|
||||
import java.util.SortedMap;
|
||||
import java.util.TreeMap;
|
||||
|
||||
public class CommitUtil {
|
||||
public static long countNewRecords(HoodieTableMetadata target, List<String> commitsToCatchup)
|
||||
throws IOException {
|
||||
long totalNew = 0;
|
||||
SortedMap<String, HoodieCommitMetadata> meta = target.getAllCommitMetadata();
|
||||
for(String commit:commitsToCatchup) {
|
||||
HoodieCommitMetadata c = meta.get(commit);
|
||||
totalNew += c.fetchTotalRecordsWritten() - c.fetchTotalUpdateRecordsWritten();
|
||||
}
|
||||
return totalNew;
|
||||
}
|
||||
}
|
||||
125
hoodie-cli/src/main/java/com/uber/hoodie/cli/utils/HiveUtil.java
Normal file
125
hoodie-cli/src/main/java/com/uber/hoodie/cli/utils/HiveUtil.java
Normal file
@@ -0,0 +1,125 @@
|
||||
/*
|
||||
* Copyright (c) 2016 Uber Technologies, Inc. (hoodie-dev-group@uber.com)
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package com.uber.hoodie.cli.utils;
|
||||
|
||||
import com.uber.hoodie.common.model.HoodieTableMetadata;
|
||||
import com.uber.hoodie.hadoop.HoodieInputFormat;
|
||||
import org.apache.commons.dbcp.BasicDataSource;
|
||||
import org.joda.time.DateTime;
|
||||
|
||||
import javax.sql.DataSource;
|
||||
import java.sql.Connection;
|
||||
import java.sql.ResultSet;
|
||||
import java.sql.SQLException;
|
||||
import java.sql.Statement;
|
||||
|
||||
public class HiveUtil {
|
||||
private static String driverName = "org.apache.hive.jdbc.HiveDriver";
|
||||
|
||||
static {
|
||||
try {
|
||||
Class.forName(driverName);
|
||||
} catch (ClassNotFoundException e) {
|
||||
throw new IllegalStateException("Could not find " + driverName + " in classpath. ", e);
|
||||
}
|
||||
}
|
||||
|
||||
private static Connection connection;
|
||||
|
||||
private static Connection getConnection(String jdbcUrl, String user, String pass) throws SQLException {
|
||||
DataSource ds = getDatasource(jdbcUrl, user, pass);
|
||||
return ds.getConnection();
|
||||
}
|
||||
|
||||
private static DataSource getDatasource(String jdbcUrl, String user, String pass) {
|
||||
BasicDataSource ds = new BasicDataSource();
|
||||
ds.setDriverClassName(driverName);
|
||||
ds.setUrl(jdbcUrl);
|
||||
ds.setUsername(user);
|
||||
ds.setPassword(pass);
|
||||
return ds;
|
||||
}
|
||||
|
||||
public static long countRecords(String jdbcUrl, HoodieTableMetadata source, String dbName, String user, String pass) throws SQLException {
|
||||
Connection conn = HiveUtil.getConnection(jdbcUrl, user, pass);
|
||||
ResultSet rs = null;
|
||||
Statement stmt = conn.createStatement();
|
||||
try {
|
||||
//stmt.execute("set mapred.job.queue.name=<queue_name>");
|
||||
stmt.execute("set hive.input.format=org.apache.hadoop.hive.ql.io.HiveInputFormat" );
|
||||
stmt.execute("set hive.stats.autogather=false" );
|
||||
System.out.println("Class " + HoodieInputFormat.class.getName());
|
||||
rs = stmt.executeQuery(
|
||||
"select count(`_hoodie_commit_time`) as cnt from " + dbName + "." + source
|
||||
.getTableName());
|
||||
long count = -1;
|
||||
if(rs.next()) {
|
||||
count = rs.getLong("cnt");
|
||||
}
|
||||
System.out.println("Total records in " + source.getTableName() + " is " + count);
|
||||
return count;
|
||||
} finally {
|
||||
if (rs != null) {
|
||||
rs.close();
|
||||
}
|
||||
if (stmt != null) {
|
||||
stmt.close();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
public static long countRecords(String jdbcUrl, HoodieTableMetadata source, String srcDb,
|
||||
int partitions, String user, String pass) throws SQLException {
|
||||
DateTime dateTime = DateTime.now();
|
||||
String endDateStr =
|
||||
dateTime.getYear() + "-" + String.format("%02d", dateTime.getMonthOfYear()) + "-" +
|
||||
String.format("%02d", dateTime.getDayOfMonth());
|
||||
dateTime = dateTime.minusDays(partitions);
|
||||
String startDateStr =
|
||||
dateTime.getYear() + "-" + String.format("%02d", dateTime.getMonthOfYear()) + "-" +
|
||||
String.format("%02d", dateTime.getDayOfMonth());
|
||||
System.out.println("Start date " + startDateStr + " and end date " + endDateStr);
|
||||
return countRecords(jdbcUrl, source, srcDb, startDateStr, endDateStr, user, pass);
|
||||
}
|
||||
|
||||
private static long countRecords(String jdbcUrl, HoodieTableMetadata source, String srcDb, String startDateStr,
|
||||
String endDateStr, String user, String pass) throws SQLException {
|
||||
Connection conn = HiveUtil.getConnection(jdbcUrl, user, pass);
|
||||
ResultSet rs = null;
|
||||
Statement stmt = conn.createStatement();
|
||||
try {
|
||||
//stmt.execute("set mapred.job.queue.name=<queue_name>");
|
||||
stmt.execute("set hive.input.format=org.apache.hadoop.hive.ql.io.HiveInputFormat");
|
||||
stmt.execute("set hive.stats.autogather=false");
|
||||
rs = stmt.executeQuery(
|
||||
"select count(`_hoodie_commit_time`) as cnt from " + srcDb + "." + source
|
||||
.getTableName() + " where datestr>'" + startDateStr + "' and datestr<='"
|
||||
+ endDateStr + "'");
|
||||
if(rs.next()) {
|
||||
return rs.getLong("cnt");
|
||||
}
|
||||
return -1;
|
||||
} finally {
|
||||
if (rs != null) {
|
||||
rs.close();
|
||||
}
|
||||
if (stmt != null) {
|
||||
stmt.close();
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,56 @@
|
||||
/*
|
||||
* Copyright (c) 2016 Uber Technologies, Inc. (hoodie-dev-group@uber.com)
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package com.uber.hoodie.cli.utils;
|
||||
|
||||
import java.io.BufferedReader;
|
||||
import java.io.IOException;
|
||||
import java.io.InputStream;
|
||||
import java.io.InputStreamReader;
|
||||
import java.util.logging.Logger;
|
||||
|
||||
public class InputStreamConsumer extends Thread {
|
||||
protected final static Logger LOG = Logger.getLogger(InputStreamConsumer.class.getName());
|
||||
private InputStream is;
|
||||
public InputStreamConsumer(InputStream is) {
|
||||
this.is = is;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void run() {
|
||||
try {
|
||||
InputStreamReader isr = new InputStreamReader(is);
|
||||
BufferedReader br = new BufferedReader(isr);
|
||||
String line;
|
||||
while ( (line = br.readLine()) != null)
|
||||
LOG.info(line);
|
||||
} catch (IOException ioe) {
|
||||
LOG.severe(ioe.toString());
|
||||
ioe.printStackTrace();
|
||||
}
|
||||
}
|
||||
|
||||
public static void captureOutput(Process p) {
|
||||
InputStreamConsumer stdout;
|
||||
InputStreamConsumer errout;
|
||||
errout = new InputStreamConsumer(p.getErrorStream());
|
||||
stdout = new InputStreamConsumer(p.getInputStream());
|
||||
errout.start();
|
||||
stdout.start();
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
@@ -0,0 +1,75 @@
|
||||
/*
|
||||
* Copyright (c) 2016 Uber Technologies, Inc. (hoodie-dev-group@uber.com)
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package com.uber.hoodie.cli.utils;
|
||||
|
||||
import com.uber.hoodie.HoodieWriteClient;
|
||||
import com.uber.hoodie.cli.commands.SparkMain;
|
||||
|
||||
import org.apache.log4j.Logger;
|
||||
import org.apache.spark.SparkConf;
|
||||
import org.apache.spark.api.java.JavaSparkContext;
|
||||
import org.apache.spark.launcher.SparkLauncher;
|
||||
|
||||
import java.io.File;
|
||||
import java.net.URISyntaxException;
|
||||
|
||||
public class SparkUtil {
|
||||
|
||||
public static Logger logger = Logger.getLogger(SparkUtil.class);
|
||||
|
||||
/**
|
||||
*
|
||||
* TODO: Need to fix a bunch of hardcoded stuff here eg: history server, spark distro
|
||||
*
|
||||
* @return
|
||||
* @throws URISyntaxException
|
||||
*/
|
||||
public static SparkLauncher initLauncher(String propertiesFile) throws URISyntaxException {
|
||||
String currentJar = new File(
|
||||
SparkUtil.class.getProtectionDomain().getCodeSource().getLocation().toURI().getPath())
|
||||
.getAbsolutePath();
|
||||
SparkLauncher sparkLauncher =
|
||||
new SparkLauncher().setAppResource(currentJar)
|
||||
.setMainClass(SparkMain.class.getName())
|
||||
.setPropertiesFile(propertiesFile);
|
||||
File libDirectory = new File(new File(currentJar).getParent(), "lib");
|
||||
for (String library : libDirectory.list()) {
|
||||
sparkLauncher.addJar(new File(libDirectory, library).getAbsolutePath());
|
||||
}
|
||||
return sparkLauncher;
|
||||
}
|
||||
|
||||
public static JavaSparkContext initJavaSparkConf(String name) {
|
||||
SparkConf sparkConf = new SparkConf().setAppName(name);
|
||||
sparkConf.setMaster("yarn-client");
|
||||
sparkConf.set("spark.serializer", "org.apache.spark.serializer.KryoSerializer");
|
||||
sparkConf.set("spark.driver.maxResultSize", "2g");
|
||||
sparkConf.set("spark.eventLog.overwrite", "true");
|
||||
sparkConf.set("spark.eventLog.enabled", "true");
|
||||
|
||||
// Configure hadoop conf
|
||||
sparkConf.set("spark.hadoop.mapred.output.compress", "true");
|
||||
sparkConf.set("spark.hadoop.mapred.output.compression.codec", "true");
|
||||
sparkConf.set("spark.hadoop.mapred.output.compression.codec", "org.apache.hadoop.io.compress.GzipCodec");
|
||||
sparkConf.set("spark.hadoop.mapred.output.compression.type", "BLOCK");
|
||||
|
||||
sparkConf = HoodieWriteClient.registerClasses(sparkConf);
|
||||
JavaSparkContext jsc = new JavaSparkContext(sparkConf);
|
||||
jsc.hadoopConfiguration().setBoolean("parquet.enable.summary-metadata", false);
|
||||
return jsc;
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user