1
0

CodeStyle formatting to conform to basic Checkstyle rules.

The code-style rules follow google style with some changes:

1. Increase line length from 100 to 120
2. Disable JavaDoc related checkstyles as this needs more manual work.

Both source and test code are checked for code-style
This commit is contained in:
Balaji Varadarajan
2018-03-20 16:29:20 -07:00
committed by vinoth chandar
parent 987f5d6b96
commit 788e4f2d2e
200 changed files with 6209 additions and 5975 deletions

View File

@@ -35,10 +35,10 @@ public class HoodiePrompt extends DefaultPromptProvider {
case DATASET:
return "hoodie:" + tableName + "->";
case SYNC:
return "hoodie:" + tableName + " <==> "
+ HoodieCLI.syncTableMetadata.getTableConfig().getTableName() + "->";
return "hoodie:" + tableName + " <==> " + HoodieCLI.syncTableMetadata.getTableConfig().getTableName() + "->";
default:
return "hoodie:" + tableName + "->";
}
return "hoodie:" + tableName + "->";
}
return "hoodie->";
}

View File

@@ -24,22 +24,18 @@ import org.springframework.stereotype.Component;
@Component
@Order(Ordered.HIGHEST_PRECEDENCE)
public class HoodieSplashScreen
extends DefaultBannerProvider {
public class HoodieSplashScreen extends DefaultBannerProvider {
private static String screen =
"============================================" + OsUtils.LINE_SEPARATOR +
"* *" + OsUtils.LINE_SEPARATOR +
"* _ _ _ _ *" + OsUtils.LINE_SEPARATOR +
"* | | | | | (_) *" + OsUtils.LINE_SEPARATOR +
"* | |__| | ___ ___ __| |_ ___ *" + OsUtils.LINE_SEPARATOR +
"* | __ |/ _ \\ / _ \\ / _` | |/ _ \\ *" +
OsUtils.LINE_SEPARATOR +
"* | | | | (_) | (_) | (_| | | __/ *" + OsUtils.LINE_SEPARATOR +
"* |_| |_|\\___/ \\___/ \\__,_|_|\\___| *" +
OsUtils.LINE_SEPARATOR +
"* *" + OsUtils.LINE_SEPARATOR +
"============================================" + OsUtils.LINE_SEPARATOR;
private static String screen = "============================================" + OsUtils.LINE_SEPARATOR
+ "* *" + OsUtils.LINE_SEPARATOR
+ "* _ _ _ _ *" + OsUtils.LINE_SEPARATOR
+ "* | | | | | (_) *" + OsUtils.LINE_SEPARATOR
+ "* | |__| | ___ ___ __| |_ ___ *" + OsUtils.LINE_SEPARATOR
+ "* | __ |/ _ \\ / _ \\ / _` | |/ _ \\ *" + OsUtils.LINE_SEPARATOR
+ "* | | | | (_) | (_) | (_| | | __/ *" + OsUtils.LINE_SEPARATOR
+ "* |_| |_|\\___/ \\___/ \\__,_|_|\\___| *" + OsUtils.LINE_SEPARATOR
+ "* *" + OsUtils.LINE_SEPARATOR
+ "============================================" + OsUtils.LINE_SEPARATOR;
public String getBanner() {
return screen;

View File

@@ -22,8 +22,7 @@ import org.springframework.shell.Bootstrap;
public class Main {
/**
* Main class that delegates to Spring Shell's Bootstrap class in order to simplify debugging
* inside an IDE
* Main class that delegates to Spring Shell's Bootstrap class in order to simplify debugging inside an IDE
*/
public static void main(String[] args) throws IOException {
Bootstrap.main(args);

View File

@@ -47,13 +47,11 @@ public class ArchivedCommitsCommand implements CommandMarker {
}
@CliCommand(value = "show archived commits", help = "Read commits from archived files and show details")
public String showCommits(
@CliOption(key = {
"limit"}, mandatory = false, help = "Limit commits", unspecifiedDefaultValue = "10")
final Integer limit) throws IOException {
public String showCommits(@CliOption(key = {
"limit"}, mandatory = false, help = "Limit commits", unspecifiedDefaultValue = "10") final Integer limit)
throws IOException {
System.out
.println("===============> Showing only " + limit + " archived commits <===============");
System.out.println("===============> Showing only " + limit + " archived commits <===============");
String basePath = HoodieCLI.tableMetadata.getBasePath();
FileStatus[] fsStatuses = FSUtils.getFs(basePath, HoodieCLI.conf)
.globStatus(new Path(basePath + "/.hoodie/.commits_.archive*"));
@@ -61,8 +59,7 @@ public class ArchivedCommitsCommand implements CommandMarker {
int commits = 0;
for (FileStatus fs : fsStatuses) {
//read the archived file
HoodieLogFormat.Reader reader = HoodieLogFormat
.newReader(FSUtils.getFs(basePath, HoodieCLI.conf),
HoodieLogFormat.Reader reader = HoodieLogFormat.newReader(FSUtils.getFs(basePath, HoodieCLI.conf),
new HoodieLogFile(fs.getPath()), HoodieArchivedMetaEntry.getClassSchema());
List<IndexedRecord> readRecords = new ArrayList<>();
@@ -71,20 +68,19 @@ public class ArchivedCommitsCommand implements CommandMarker {
HoodieAvroDataBlock blk = (HoodieAvroDataBlock) reader.next();
List<IndexedRecord> records = blk.getRecords();
readRecords.addAll(records);
if(commits == limit) {
if (commits == limit) {
break;
}
commits++;
}
List<String[]> readCommits = readRecords.stream().map(r -> (GenericRecord) r)
.map(r -> readCommit(r)).collect(Collectors.toList());
List<String[]> readCommits = readRecords.stream().map(r -> (GenericRecord) r).map(r -> readCommit(r))
.collect(Collectors.toList());
allCommits.addAll(readCommits);
if(commits == limit) {
if (commits == limit) {
break;
}
}
return HoodiePrintHelper.print(
new String[]{"CommitTime", "CommitType", "CommitDetails"},
return HoodiePrintHelper.print(new String[] {"CommitTime", "CommitType", "CommitDetails"},
allCommits.toArray(new String[allCommits.size()][]));
}
@@ -122,6 +118,8 @@ public class ArchivedCommitsCommand implements CommandMarker {
commitDetails.add(record.get("hoodieSavePointMetadata").toString());
break;
}
default:
return commitDetails.toArray(new String[commitDetails.size()]);
}
} catch (Exception e) {
e.printStackTrace();

View File

@@ -13,6 +13,7 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.uber.hoodie.cli.commands;
import com.uber.hoodie.avro.model.HoodieCleanMetadata;
@@ -63,51 +64,46 @@ public class CleansCommand implements CommandMarker {
Collections.reverse(cleans);
for (int i = 0; i < cleans.size(); i++) {
HoodieInstant clean = cleans.get(i);
HoodieCleanMetadata cleanMetadata =
AvroUtils.deserializeHoodieCleanMetadata(timeline.getInstantDetails(clean).get());
rows[i] = new String[]{clean.getTimestamp(), cleanMetadata.getEarliestCommitToRetain(),
String.valueOf(cleanMetadata.getTotalFilesDeleted()),
String.valueOf(cleanMetadata.getTimeTakenInMillis())};
HoodieCleanMetadata cleanMetadata = AvroUtils
.deserializeHoodieCleanMetadata(timeline.getInstantDetails(clean).get());
rows[i] = new String[] {clean.getTimestamp(), cleanMetadata.getEarliestCommitToRetain(),
String.valueOf(cleanMetadata.getTotalFilesDeleted()), String.valueOf(cleanMetadata.getTimeTakenInMillis())};
}
return HoodiePrintHelper.print(
new String[]{"CleanTime", "EarliestCommandRetained", "Total Files Deleted",
"Total Time Taken"}, rows);
return HoodiePrintHelper
.print(new String[] {"CleanTime", "EarliestCommandRetained", "Total Files Deleted", "Total Time Taken"},
rows);
}
@CliCommand(value = "cleans refresh", help = "Refresh the commits")
public String refreshCleans() throws IOException {
HoodieTableMetaClient metadata =
new HoodieTableMetaClient(HoodieCLI.conf, HoodieCLI.tableMetadata.getBasePath());
HoodieTableMetaClient metadata = new HoodieTableMetaClient(HoodieCLI.conf, HoodieCLI.tableMetadata.getBasePath());
HoodieCLI.setTableMetadata(metadata);
return "Metadata for table " + metadata.getTableConfig().getTableName() + " refreshed.";
}
@CliCommand(value = "clean showpartitions", help = "Show partition level details of a clean")
public String showCleanPartitions(
@CliOption(key = {"clean"}, help = "clean to show")
final String commitTime) throws Exception {
public String showCleanPartitions(@CliOption(key = {"clean"}, help = "clean to show") final String commitTime)
throws Exception {
HoodieActiveTimeline activeTimeline = HoodieCLI.tableMetadata.getActiveTimeline();
HoodieTimeline timeline = activeTimeline.getCleanerTimeline().filterCompletedInstants();
HoodieInstant cleanInstant =
new HoodieInstant(false, HoodieTimeline.CLEAN_ACTION, commitTime);
HoodieInstant cleanInstant = new HoodieInstant(false, HoodieTimeline.CLEAN_ACTION, commitTime);
if (!timeline.containsInstant(cleanInstant)) {
return "Clean " + commitTime + " not found in metadata " + timeline;
}
HoodieCleanMetadata cleanMetadata =
AvroUtils.deserializeHoodieCleanMetadata(timeline.getInstantDetails(cleanInstant).get());
HoodieCleanMetadata cleanMetadata = AvroUtils.deserializeHoodieCleanMetadata(
timeline.getInstantDetails(cleanInstant).get());
List<String[]> rows = new ArrayList<>();
for (Map.Entry<String, HoodieCleanPartitionMetadata> entry : cleanMetadata
.getPartitionMetadata().entrySet()) {
for (Map.Entry<String, HoodieCleanPartitionMetadata> entry : cleanMetadata.getPartitionMetadata().entrySet()) {
String path = entry.getKey();
HoodieCleanPartitionMetadata stats = entry.getValue();
String policy = stats.getPolicy();
String totalSuccessDeletedFiles = String.valueOf(stats.getSuccessDeleteFiles().size());
String totalFailedDeletedFiles = String.valueOf(stats.getFailedDeleteFiles().size());
rows.add(new String[]{path, policy, totalSuccessDeletedFiles, totalFailedDeletedFiles});
rows.add(new String[] {path, policy, totalSuccessDeletedFiles, totalFailedDeletedFiles});
}
return HoodiePrintHelper.print(
new String[]{"Partition Path", "Cleaning policy", "Total Files Successfully Deleted",
new String[] {"Partition Path", "Cleaning policy", "Total Files Successfully Deleted",
"Total Failed Deletions"}, rows.toArray(new String[rows.size()][]));
}
}

View File

@@ -64,21 +64,18 @@ public class CommitsCommand implements CommandMarker {
}
@CliCommand(value = "commits show", help = "Show the commits")
public String showCommits(
@CliOption(key = {
"limit"}, mandatory = false, help = "Limit commits", unspecifiedDefaultValue = "10")
final Integer limit) throws IOException {
public String showCommits(@CliOption(key = {
"limit"}, mandatory = false, help = "Limit commits", unspecifiedDefaultValue = "10") final Integer limit)
throws IOException {
HoodieActiveTimeline activeTimeline = HoodieCLI.tableMetadata.getActiveTimeline();
HoodieTimeline timeline = activeTimeline.getCommitsTimeline()
.filterCompletedInstants();
HoodieTimeline timeline = activeTimeline.getCommitsTimeline().filterCompletedInstants();
List<HoodieInstant> commits = timeline.getInstants().collect(Collectors.toList());
String[][] rows = new String[commits.size()][];
Collections.reverse(commits);
for (int i = 0; i < commits.size(); i++) {
HoodieInstant commit = commits.get(i);
HoodieCommitMetadata commitMetadata =
HoodieCommitMetadata.fromBytes(timeline.getInstantDetails(commit).get());
rows[i] = new String[]{commit.getTimestamp(),
HoodieCommitMetadata commitMetadata = HoodieCommitMetadata.fromBytes(timeline.getInstantDetails(commit).get());
rows[i] = new String[] {commit.getTimestamp(),
NumericUtils.humanReadableByteCount(commitMetadata.fetchTotalBytesWritten()),
String.valueOf(commitMetadata.fetchTotalFilesInsert()),
String.valueOf(commitMetadata.fetchTotalFilesUpdated()),
@@ -88,39 +85,32 @@ public class CommitsCommand implements CommandMarker {
String.valueOf(commitMetadata.fetchTotalWriteErrors())};
}
return HoodiePrintHelper.print(
new String[]{"CommitTime", "Total Written (B)", "Total Files Added",
"Total Files Updated", "Total Partitions Written", "Total Records Written",
"Total Update Records Written", "Total Errors"}, rows);
new String[] {"CommitTime", "Total Written (B)", "Total Files Added", "Total Files Updated",
"Total Partitions Written", "Total Records Written", "Total Update Records Written", "Total Errors"}, rows);
}
@CliCommand(value = "commits refresh", help = "Refresh the commits")
public String refreshCommits() throws IOException {
HoodieTableMetaClient metadata =
new HoodieTableMetaClient(HoodieCLI.conf, HoodieCLI.tableMetadata.getBasePath());
HoodieTableMetaClient metadata = new HoodieTableMetaClient(HoodieCLI.conf, HoodieCLI.tableMetadata.getBasePath());
HoodieCLI.setTableMetadata(metadata);
return "Metadata for table " + metadata.getTableConfig().getTableName() + " refreshed.";
}
@CliCommand(value = "commit rollback", help = "Rollback a commit")
public String rollbackCommit(
@CliOption(key = {"commit"}, help = "Commit to rollback")
final String commitTime,
@CliOption(key = {"sparkProperties"}, help = "Spark Properites File Path")
final String sparkPropertiesPath) throws Exception {
public String rollbackCommit(@CliOption(key = {"commit"}, help = "Commit to rollback") final String commitTime,
@CliOption(key = {"sparkProperties"}, help = "Spark Properites File Path") final String sparkPropertiesPath)
throws Exception {
HoodieActiveTimeline activeTimeline = HoodieCLI.tableMetadata.getActiveTimeline();
HoodieTimeline timeline = activeTimeline.getCommitsTimeline()
.filterCompletedInstants();
HoodieInstant commitInstant = new HoodieInstant(false, HoodieTimeline.COMMIT_ACTION,
commitTime);
HoodieTimeline timeline = activeTimeline.getCommitsTimeline().filterCompletedInstants();
HoodieInstant commitInstant = new HoodieInstant(false, HoodieTimeline.COMMIT_ACTION, commitTime);
if (!timeline.containsInstant(commitInstant)) {
return "Commit " + commitTime + " not found in Commits " + timeline;
}
SparkLauncher sparkLauncher = SparkUtil.initLauncher(sparkPropertiesPath);
sparkLauncher.addAppArgs(SparkMain.SparkCommand.ROLLBACK.toString(),
commitTime,
HoodieCLI.tableMetadata.getBasePath());
sparkLauncher
.addAppArgs(SparkMain.SparkCommand.ROLLBACK.toString(), commitTime, HoodieCLI.tableMetadata.getBasePath());
Process process = sparkLauncher.launch();
InputStreamConsumer.captureOutput(process);
int exitCode = process.waitFor();
@@ -133,23 +123,18 @@ public class CommitsCommand implements CommandMarker {
}
@CliCommand(value = "commit showpartitions", help = "Show partition level details of a commit")
public String showCommitPartitions(
@CliOption(key = {"commit"}, help = "Commit to show")
final String commitTime) throws Exception {
public String showCommitPartitions(@CliOption(key = {"commit"}, help = "Commit to show") final String commitTime)
throws Exception {
HoodieActiveTimeline activeTimeline = HoodieCLI.tableMetadata.getActiveTimeline();
HoodieTimeline timeline = activeTimeline.getCommitsTimeline()
.filterCompletedInstants();
HoodieInstant commitInstant = new HoodieInstant(false, HoodieTimeline.COMMIT_ACTION,
commitTime);
HoodieTimeline timeline = activeTimeline.getCommitsTimeline().filterCompletedInstants();
HoodieInstant commitInstant = new HoodieInstant(false, HoodieTimeline.COMMIT_ACTION, commitTime);
if (!timeline.containsInstant(commitInstant)) {
return "Commit " + commitTime + " not found in Commits " + timeline;
}
HoodieCommitMetadata meta =
HoodieCommitMetadata.fromBytes(activeTimeline.getInstantDetails(commitInstant).get());
HoodieCommitMetadata meta = HoodieCommitMetadata.fromBytes(activeTimeline.getInstantDetails(commitInstant).get());
List<String[]> rows = new ArrayList<String[]>();
for (Map.Entry<String, List<HoodieWriteStat>> entry : meta.getPartitionToWriteStats()
.entrySet()) {
for (Map.Entry<String, List<HoodieWriteStat>> entry : meta.getPartitionToWriteStats().entrySet()) {
String path = entry.getKey();
List<HoodieWriteStat> stats = entry.getValue();
long totalFilesAdded = 0;
@@ -169,50 +154,40 @@ public class CommitsCommand implements CommandMarker {
totalBytesWritten += stat.getTotalWriteBytes();
totalWriteErrors += stat.getTotalWriteErrors();
}
rows.add(new String[]{path, String.valueOf(totalFilesAdded),
String.valueOf(totalFilesUpdated), String.valueOf(totalRecordsInserted),
String.valueOf(totalRecordsUpdated),
NumericUtils.humanReadableByteCount(totalBytesWritten),
String.valueOf(totalWriteErrors)});
rows.add(new String[] {path, String.valueOf(totalFilesAdded), String.valueOf(totalFilesUpdated),
String.valueOf(totalRecordsInserted), String.valueOf(totalRecordsUpdated),
NumericUtils.humanReadableByteCount(totalBytesWritten), String.valueOf(totalWriteErrors)});
}
return HoodiePrintHelper.print(
new String[]{"Partition Path", "Total Files Added", "Total Files Updated",
"Total Records Inserted", "Total Records Updated", "Total Bytes Written",
"Total Errors"}, rows.toArray(new String[rows.size()][]));
new String[] {"Partition Path", "Total Files Added", "Total Files Updated", "Total Records Inserted",
"Total Records Updated", "Total Bytes Written", "Total Errors"}, rows.toArray(new String[rows.size()][]));
}
@CliCommand(value = "commit showfiles", help = "Show file level details of a commit")
public String showCommitFiles(
@CliOption(key = {"commit"}, help = "Commit to show")
final String commitTime) throws Exception {
public String showCommitFiles(@CliOption(key = {"commit"}, help = "Commit to show") final String commitTime)
throws Exception {
HoodieActiveTimeline activeTimeline = HoodieCLI.tableMetadata.getActiveTimeline();
HoodieTimeline timeline = activeTimeline.getCommitsTimeline()
.filterCompletedInstants();
HoodieInstant commitInstant = new HoodieInstant(false, HoodieTimeline.COMMIT_ACTION,
commitTime);
HoodieTimeline timeline = activeTimeline.getCommitsTimeline().filterCompletedInstants();
HoodieInstant commitInstant = new HoodieInstant(false, HoodieTimeline.COMMIT_ACTION, commitTime);
if (!timeline.containsInstant(commitInstant)) {
return "Commit " + commitTime + " not found in Commits " + timeline;
}
HoodieCommitMetadata meta =
HoodieCommitMetadata.fromBytes(activeTimeline.getInstantDetails(commitInstant).get());
HoodieCommitMetadata meta = HoodieCommitMetadata.fromBytes(activeTimeline.getInstantDetails(commitInstant).get());
List<String[]> rows = new ArrayList<String[]>();
for (Map.Entry<String, List<HoodieWriteStat>> entry : meta.getPartitionToWriteStats()
.entrySet()) {
for (Map.Entry<String, List<HoodieWriteStat>> entry : meta.getPartitionToWriteStats().entrySet()) {
String path = entry.getKey();
List<HoodieWriteStat> stats = entry.getValue();
for (HoodieWriteStat stat : stats) {
rows.add(new String[]{path, stat.getFileId(), stat.getPrevCommit(),
String.valueOf(stat.getNumUpdateWrites()), String.valueOf(stat.getNumWrites()),
String.valueOf(stat.getTotalWriteBytes()),
rows.add(new String[] {path, stat.getFileId(), stat.getPrevCommit(), String.valueOf(stat.getNumUpdateWrites()),
String.valueOf(stat.getNumWrites()), String.valueOf(stat.getTotalWriteBytes()),
String.valueOf(stat.getTotalWriteErrors())});
}
}
return HoodiePrintHelper.print(
new String[]{"Partition Path", "File ID", "Previous Commit", "Total Records Updated",
"Total Records Written", "Total Bytes Written", "Total Errors"},
rows.toArray(new String[rows.size()][]));
new String[] {"Partition Path", "File ID", "Previous Commit", "Total Records Updated", "Total Records Written",
"Total Bytes Written", "Total Errors"}, rows.toArray(new String[rows.size()][]));
}
@CliAvailabilityIndicator({"commits compare"})
@@ -221,38 +196,30 @@ public class CommitsCommand implements CommandMarker {
}
@CliCommand(value = "commits compare", help = "Compare commits with another Hoodie dataset")
public String compareCommits(
@CliOption(key = {"path"}, help = "Path of the dataset to compare to")
final String path) throws Exception {
public String compareCommits(@CliOption(key = {"path"}, help = "Path of the dataset to compare to") final String path)
throws Exception {
HoodieTableMetaClient target = new HoodieTableMetaClient(HoodieCLI.conf, path);
HoodieTimeline targetTimeline = target.getActiveTimeline().getCommitsTimeline()
.filterCompletedInstants();
HoodieTimeline targetTimeline = target.getActiveTimeline().getCommitsTimeline().filterCompletedInstants();
HoodieTableMetaClient source = HoodieCLI.tableMetadata;
HoodieTimeline sourceTimeline = source.getActiveTimeline().getCommitsTimeline()
.filterCompletedInstants();
HoodieTimeline sourceTimeline = source.getActiveTimeline().getCommitsTimeline().filterCompletedInstants();
String targetLatestCommit =
targetTimeline.getInstants().iterator().hasNext() ? "0"
: targetTimeline.lastInstant().get().getTimestamp();
targetTimeline.getInstants().iterator().hasNext() ? "0" : targetTimeline.lastInstant().get().getTimestamp();
String sourceLatestCommit =
sourceTimeline.getInstants().iterator().hasNext() ? "0"
: sourceTimeline.lastInstant().get().getTimestamp();
sourceTimeline.getInstants().iterator().hasNext() ? "0" : sourceTimeline.lastInstant().get().getTimestamp();
if (sourceLatestCommit != null &&
HoodieTimeline
.compareTimestamps(targetLatestCommit, sourceLatestCommit, HoodieTimeline.GREATER)) {
if (sourceLatestCommit != null && HoodieTimeline.compareTimestamps(targetLatestCommit, sourceLatestCommit,
HoodieTimeline.GREATER)) {
// source is behind the target
List<String> commitsToCatchup =
targetTimeline.findInstantsAfter(sourceLatestCommit, Integer.MAX_VALUE)
.getInstants().map(HoodieInstant::getTimestamp).collect(Collectors.toList());
return "Source " + source.getTableConfig().getTableName() + " is behind by "
+ commitsToCatchup.size() + " commits. Commits to catch up - " + commitsToCatchup;
List<String> commitsToCatchup = targetTimeline.findInstantsAfter(sourceLatestCommit, Integer.MAX_VALUE)
.getInstants().map(HoodieInstant::getTimestamp).collect(Collectors.toList());
return "Source " + source.getTableConfig().getTableName() + " is behind by " + commitsToCatchup.size()
+ " commits. Commits to catch up - " + commitsToCatchup;
} else {
List<String> commitsToCatchup =
sourceTimeline.findInstantsAfter(targetLatestCommit, Integer.MAX_VALUE)
.getInstants().map(HoodieInstant::getTimestamp).collect(Collectors.toList());
return "Source " + source.getTableConfig().getTableName() + " is ahead by "
+ commitsToCatchup.size() + " commits. Commits to catch up - " + commitsToCatchup;
List<String> commitsToCatchup = sourceTimeline.findInstantsAfter(targetLatestCommit, Integer.MAX_VALUE)
.getInstants().map(HoodieInstant::getTimestamp).collect(Collectors.toList());
return "Source " + source.getTableConfig().getTableName() + " is ahead by " + commitsToCatchup.size()
+ " commits. Commits to catch up - " + commitsToCatchup;
}
}
@@ -262,13 +229,12 @@ public class CommitsCommand implements CommandMarker {
}
@CliCommand(value = "commits sync", help = "Compare commits with another Hoodie dataset")
public String syncCommits(
@CliOption(key = {"path"}, help = "Path of the dataset to compare to")
final String path) throws Exception {
public String syncCommits(@CliOption(key = {"path"}, help = "Path of the dataset to compare to") final String path)
throws Exception {
HoodieCLI.syncTableMetadata = new HoodieTableMetaClient(HoodieCLI.conf, path);
HoodieCLI.state = HoodieCLI.CLIState.SYNC;
return "Load sync state between " + HoodieCLI.tableMetadata.getTableConfig().getTableName()
+ " and " + HoodieCLI.syncTableMetadata.getTableConfig().getTableName();
return "Load sync state between " + HoodieCLI.tableMetadata.getTableConfig().getTableName() + " and "
+ HoodieCLI.syncTableMetadata.getTableConfig().getTableName();
}
}

View File

@@ -29,13 +29,12 @@ public class DatasetsCommand implements CommandMarker {
@CliCommand(value = "connect", help = "Connect to a hoodie dataset")
public String connect(
@CliOption(key = {"path"}, mandatory = true, help = "Base Path of the dataset")
final String path) throws IOException {
@CliOption(key = {"path"}, mandatory = true, help = "Base Path of the dataset") final String path)
throws IOException {
boolean initialized = HoodieCLI.initConf();
HoodieCLI.initFS(initialized);
HoodieCLI.setTableMetadata(new HoodieTableMetaClient(HoodieCLI.conf, path));
HoodieCLI.state = HoodieCLI.CLIState.DATASET;
return "Metadata for table " + HoodieCLI.tableMetadata.getTableConfig().getTableName()
+ " loaded";
return "Metadata for table " + HoodieCLI.tableMetadata.getTableConfig().getTableName() + " loaded";
}
}

View File

@@ -37,44 +37,33 @@ public class HDFSParquetImportCommand implements CommandMarker {
@CliCommand(value = "hdfsparquetimport", help = "Imports hdfs dataset to a hoodie dataset")
public String convert(
@CliOption(key = "srcPath", mandatory = true, help = "Base path for the input dataset")
final String srcPath,
@CliOption(key = "srcType", mandatory = true, help = "Source type for the input dataset")
final String srcType,
@CliOption(key = "targetPath", mandatory = true, help = "Base path for the target hoodie dataset")
final String targetPath,
@CliOption(key = "tableName", mandatory = true, help = "Table name")
final String tableName,
@CliOption(key = "tableType", mandatory = true, help = "Table type")
final String tableType,
@CliOption(key = "rowKeyField", mandatory = true, help = "Row key field name")
final String rowKeyField,
@CliOption(key = "partitionPathField", mandatory = true, help = "Partition path field name")
final String partitionPathField,
@CliOption(key = {"parallelism"}, mandatory = true, help = "Parallelism for hoodie insert")
final String parallelism,
@CliOption(key = "schemaFilePath", mandatory = true, help = "Path for Avro schema file")
final String schemaFilePath,
@CliOption(key = "format", mandatory = true, help = "Format for the input data")
final String format,
@CliOption(key = "sparkMemory", mandatory = true, help = "Spark executor memory")
final String sparkMemory,
@CliOption(key = "retry", mandatory = true, help = "Number of retries")
final String retry)
throws Exception {
@CliOption(key = "srcPath", mandatory = true, help = "Base path for the input dataset") final String srcPath,
@CliOption(key = "srcType", mandatory = true, help = "Source type for the input dataset") final String srcType,
@CliOption(key = "targetPath", mandatory = true, help = "Base path for the target hoodie dataset") final String
targetPath,
@CliOption(key = "tableName", mandatory = true, help = "Table name") final String tableName,
@CliOption(key = "tableType", mandatory = true, help = "Table type") final String tableType,
@CliOption(key = "rowKeyField", mandatory = true, help = "Row key field name") final String rowKeyField,
@CliOption(key = "partitionPathField", mandatory = true, help = "Partition path field name") final String
partitionPathField,
@CliOption(key = {
"parallelism"}, mandatory = true, help = "Parallelism for hoodie insert") final String parallelism,
@CliOption(key = "schemaFilePath", mandatory = true, help = "Path for Avro schema file") final String
schemaFilePath,
@CliOption(key = "format", mandatory = true, help = "Format for the input data") final String format,
@CliOption(key = "sparkMemory", mandatory = true, help = "Spark executor memory") final String sparkMemory,
@CliOption(key = "retry", mandatory = true, help = "Number of retries") final String retry) throws Exception {
validate(format, srcType);
boolean initialized = HoodieCLI.initConf();
HoodieCLI.initFS(initialized);
String sparkPropertiesPath = Utils
.getDefaultPropertiesFile(
scala.collection.JavaConversions.propertiesAsScalaMap(System.getProperties()));
String sparkPropertiesPath = Utils.getDefaultPropertiesFile(
scala.collection.JavaConversions.propertiesAsScalaMap(System.getProperties()));
SparkLauncher sparkLauncher = SparkUtil.initLauncher(sparkPropertiesPath);
sparkLauncher.addAppArgs(SparkCommand.IMPORT.toString(), srcPath, targetPath, tableName,
tableType, rowKeyField, partitionPathField, parallelism, schemaFilePath, sparkMemory,
retry);
sparkLauncher.addAppArgs(SparkCommand.IMPORT.toString(), srcPath, targetPath, tableName, tableType, rowKeyField,
partitionPathField, parallelism, schemaFilePath, sparkMemory, retry);
Process process = sparkLauncher.launch();
InputStreamConsumer.captureOutput(process);
int exitCode = process.waitFor();

View File

@@ -64,25 +64,25 @@ public class HoodieLogFileCommand implements CommandMarker {
@CliCommand(value = "show logfile metadata", help = "Read commit metadata from log files")
public String showLogFileCommits(
@CliOption(key = "logFilePathPattern", mandatory = true, help = "Fully qualified path for the log file")
final String logFilePathPattern) throws IOException {
@CliOption(key = "logFilePathPattern", mandatory = true, help = "Fully qualified path for the log file") final
String logFilePathPattern)
throws IOException {
FileSystem fs = HoodieCLI.tableMetadata.getFs();
List<String> logFilePaths = Arrays.stream(fs.globStatus(new Path(logFilePathPattern)))
.map(status -> status.getPath().toString()).collect(Collectors.toList());
Map<String, List<Tuple3<HoodieLogBlockType, Tuple2<Map<HeaderMetadataType, String>, Map<HeaderMetadataType, String>>, Integer>>> commitCountAndMetadata = Maps
.newHashMap();
Map<String, List<Tuple3<HoodieLogBlockType, Tuple2<Map<HeaderMetadataType, String>, Map<HeaderMetadataType,
String>>, Integer>>>
commitCountAndMetadata = Maps.newHashMap();
int totalEntries = 0;
int numCorruptBlocks = 0;
for (String logFilePath : logFilePaths) {
FileStatus[] fsStatus = fs.listStatus(
new Path(logFilePath));
Schema writerSchema = new AvroSchemaConverter()
.convert(SchemaUtil
.readSchemaFromLogFile(HoodieCLI.tableMetadata.getFs(), new Path(logFilePath)));
HoodieLogFormat.Reader reader = HoodieLogFormat.newReader(fs,
new HoodieLogFile(fsStatus[0].getPath()), writerSchema);
FileStatus[] fsStatus = fs.listStatus(new Path(logFilePath));
Schema writerSchema = new AvroSchemaConverter().convert(
SchemaUtil.readSchemaFromLogFile(HoodieCLI.tableMetadata.getFs(), new Path(logFilePath)));
HoodieLogFormat.Reader reader = HoodieLogFormat
.newReader(fs, new HoodieLogFile(fsStatus[0].getPath()), writerSchema);
// read the avro blocks
while (reader.hasNext()) {
@@ -104,15 +104,14 @@ public class HoodieLogFileCommand implements CommandMarker {
}
}
if (commitCountAndMetadata.containsKey(instantTime)) {
commitCountAndMetadata.get(instantTime)
.add(new Tuple3<>(n.getBlockType(),
new Tuple2<>(n.getLogBlockHeader(), n.getLogBlockFooter()), recordCount));
commitCountAndMetadata.get(instantTime).add(
new Tuple3<>(n.getBlockType(), new Tuple2<>(n.getLogBlockHeader(), n.getLogBlockFooter()), recordCount));
totalEntries++;
} else {
List<Tuple3<HoodieLogBlockType, Tuple2<Map<HeaderMetadataType, String>, Map<HeaderMetadataType, String>>, Integer>> list
= new ArrayList<>();
list.add(new Tuple3<>(n.getBlockType(),
new Tuple2<>(n.getLogBlockHeader(), n.getLogBlockFooter()), recordCount));
List<Tuple3<HoodieLogBlockType, Tuple2<Map<HeaderMetadataType, String>, Map<HeaderMetadataType, String>>,
Integer>> list = new ArrayList<>();
list.add(
new Tuple3<>(n.getBlockType(), new Tuple2<>(n.getLogBlockHeader(), n.getLogBlockFooter()), recordCount));
commitCountAndMetadata.put(instantTime, list);
totalEntries++;
}
@@ -121,11 +120,12 @@ public class HoodieLogFileCommand implements CommandMarker {
String[][] rows = new String[totalEntries + 1][];
int i = 0;
ObjectMapper objectMapper = new ObjectMapper();
for (Map.Entry<String, List<Tuple3<HoodieLogBlockType, Tuple2<Map<HeaderMetadataType, String>, Map<HeaderMetadataType, String>>, Integer>>> entry : commitCountAndMetadata
.entrySet()) {
for (Map.Entry<String, List<Tuple3<HoodieLogBlockType,
Tuple2<Map<HeaderMetadataType, String>, Map<HeaderMetadataType, String>>, Integer>>> entry
: commitCountAndMetadata.entrySet()) {
String instantTime = entry.getKey().toString();
for (Tuple3<HoodieLogBlockType, Tuple2<Map<HeaderMetadataType, String>, Map<HeaderMetadataType, String>>, Integer> tuple3 : entry
.getValue()) {
for (Tuple3<HoodieLogBlockType, Tuple2<Map<HeaderMetadataType, String>,
Map<HeaderMetadataType, String>>, Integer> tuple3 : entry.getValue()) {
String[] output = new String[5];
output[0] = instantTime;
output[1] = String.valueOf(tuple3._3());
@@ -136,24 +136,21 @@ public class HoodieLogFileCommand implements CommandMarker {
i++;
}
}
return HoodiePrintHelper.print(
new String[]{"InstantTime", "RecordCount", "BlockType", "HeaderMetadata", "FooterMetadata"},
rows);
return HoodiePrintHelper
.print(new String[] {"InstantTime", "RecordCount", "BlockType", "HeaderMetadata", "FooterMetadata"},
rows);
}
@CliCommand(value = "show logfile records", help = "Read records from log files")
public String showLogFileRecords(
@CliOption(key = {
"limit"}, mandatory = false, help = "Limit commits", unspecifiedDefaultValue = "10")
final Integer limit,
public String showLogFileRecords(@CliOption(key = {
"limit"}, mandatory = false, help = "Limit commits", unspecifiedDefaultValue = "10") final Integer limit,
@CliOption(key = "logFilePathPattern", mandatory = true, help = "Fully qualified paths for the log files")
final String logFilePathPattern,
@CliOption(key = "mergeRecords", mandatory = false, help = "If the records in the log files should be merged",
unspecifiedDefaultValue = "false")
final Boolean shouldMerge) throws IOException {
unspecifiedDefaultValue = "false") final Boolean shouldMerge)
throws IOException {
System.out
.println("===============> Showing only " + limit + " records <===============");
System.out.println("===============> Showing only " + limit + " records <===============");
FileSystem fs = HoodieCLI.tableMetadata.getFs();
List<String> logFilePaths = Arrays.stream(fs.globStatus(new Path(logFilePathPattern)))
@@ -162,9 +159,8 @@ public class HoodieLogFileCommand implements CommandMarker {
// TODO : readerSchema can change across blocks/log files, fix this inside Scanner
AvroSchemaConverter converter = new AvroSchemaConverter();
// get schema from last log file
Schema readerSchema = converter
.convert(SchemaUtil
.readSchemaFromLogFile(fs, new Path(logFilePaths.get(logFilePaths.size() - 1))));
Schema readerSchema = converter.convert(
SchemaUtil.readSchemaFromLogFile(fs, new Path(logFilePaths.get(logFilePaths.size() - 1))));
List<IndexedRecord> allRecords = new ArrayList<>();
@@ -186,11 +182,10 @@ public class HoodieLogFileCommand implements CommandMarker {
}
} else {
for (String logFile : logFilePaths) {
Schema writerSchema = new AvroSchemaConverter()
.convert(SchemaUtil
.readSchemaFromLogFile(HoodieCLI.tableMetadata.getFs(), new Path(logFile)));
HoodieLogFormat.Reader reader = HoodieLogFormat.newReader(fs,
new HoodieLogFile(new Path(logFile)), writerSchema);
Schema writerSchema = new AvroSchemaConverter().convert(
SchemaUtil.readSchemaFromLogFile(HoodieCLI.tableMetadata.getFs(), new Path(logFile)));
HoodieLogFormat.Reader reader = HoodieLogFormat
.newReader(fs, new HoodieLogFile(new Path(logFile)), writerSchema);
// read the avro blocks
while (reader.hasNext()) {
HoodieLogBlock n = reader.next();
@@ -216,7 +211,6 @@ public class HoodieLogFileCommand implements CommandMarker {
rows[i] = data;
i++;
}
return HoodiePrintHelper.print(
new String[]{"Records"}, rows);
return HoodiePrintHelper.print(new String[] {"Records"}, rows);
}
}

View File

@@ -40,26 +40,22 @@ public class HoodieSyncCommand implements CommandMarker {
@CliCommand(value = "sync validate", help = "Validate the sync by counting the number of records")
public String validateSync(
@CliOption(key = {"mode"}, unspecifiedDefaultValue = "complete", help = "Check mode")
final String mode,
@CliOption(key = {"mode"}, unspecifiedDefaultValue = "complete", help = "Check mode") final String mode,
@CliOption(key = {"sourceDb"}, unspecifiedDefaultValue = "rawdata", help = "source database") final String srcDb,
@CliOption(key = {
"sourceDb"}, unspecifiedDefaultValue = "rawdata", help = "source database")
final String srcDb,
@CliOption(key = {
"targetDb"}, unspecifiedDefaultValue = "dwh_hoodie", help = "target database")
final String tgtDb,
"targetDb"}, unspecifiedDefaultValue = "dwh_hoodie", help = "target database") final String tgtDb,
@CliOption(key = {
"partitionCount"}, unspecifiedDefaultValue = "5", help = "total number of recent partitions to validate")
final int partitionCount,
@CliOption(key = {
"hiveServerUrl"}, mandatory = true, help = "hiveServerURL to connect to")
final String hiveServerUrl,
"hiveServerUrl"}, mandatory = true, help = "hiveServerURL to connect to") final String hiveServerUrl,
@CliOption(key = {
"hiveUser"}, mandatory = false, unspecifiedDefaultValue = "", help = "hive username to connect to")
final String hiveUser,
"hiveUser"}, mandatory = false, unspecifiedDefaultValue = "", help = "hive username to connect to") final
String hiveUser,
@CliOption(key = {
"hivePass"}, mandatory = true, unspecifiedDefaultValue = "", help = "hive password to connect to")
final String hivePass) throws Exception {
"hivePass"}, mandatory = true, unspecifiedDefaultValue = "", help = "hive password to connect to") final
String hivePass)
throws Exception {
HoodieTableMetaClient target = HoodieCLI.syncTableMetadata;
HoodieTimeline targetTimeline = target.getActiveTimeline().getCommitsTimeline();
HoodieTableMetaClient source = HoodieCLI.tableMetadata;
@@ -70,52 +66,42 @@ public class HoodieSyncCommand implements CommandMarker {
sourceCount = HiveUtil.countRecords(hiveServerUrl, source, srcDb, hiveUser, hivePass);
targetCount = HiveUtil.countRecords(hiveServerUrl, target, tgtDb, hiveUser, hivePass);
} else if ("latestPartitions".equals(mode)) {
sourceCount = HiveUtil
.countRecords(hiveServerUrl, source, srcDb, partitionCount, hiveUser, hivePass);
targetCount = HiveUtil
.countRecords(hiveServerUrl, target, tgtDb, partitionCount, hiveUser, hivePass);
sourceCount = HiveUtil.countRecords(hiveServerUrl, source, srcDb, partitionCount, hiveUser, hivePass);
targetCount = HiveUtil.countRecords(hiveServerUrl, target, tgtDb, partitionCount, hiveUser, hivePass);
}
String targetLatestCommit =
targetTimeline.getInstants().iterator().hasNext() ? "0"
: targetTimeline.lastInstant().get().getTimestamp();
targetTimeline.getInstants().iterator().hasNext() ? "0" : targetTimeline.lastInstant().get().getTimestamp();
String sourceLatestCommit =
sourceTimeline.getInstants().iterator().hasNext() ? "0"
: sourceTimeline.lastInstant().get().getTimestamp();
sourceTimeline.getInstants().iterator().hasNext() ? "0" : sourceTimeline.lastInstant().get().getTimestamp();
if (sourceLatestCommit != null && HoodieTimeline
.compareTimestamps(targetLatestCommit, sourceLatestCommit, HoodieTimeline.GREATER)) {
if (sourceLatestCommit != null && HoodieTimeline.compareTimestamps(targetLatestCommit, sourceLatestCommit,
HoodieTimeline.GREATER)) {
// source is behind the target
List<HoodieInstant> commitsToCatchup =
targetTimeline.findInstantsAfter(sourceLatestCommit, Integer.MAX_VALUE).getInstants()
.collect(Collectors.toList());
List<HoodieInstant> commitsToCatchup = targetTimeline.findInstantsAfter(sourceLatestCommit, Integer.MAX_VALUE)
.getInstants().collect(Collectors.toList());
if (commitsToCatchup.isEmpty()) {
return "Count difference now is (count(" + target.getTableConfig().getTableName()
+ ") - count(" + source.getTableConfig().getTableName() + ") == " + (targetCount
- sourceCount);
return "Count difference now is (count(" + target.getTableConfig().getTableName() + ") - count("
+ source.getTableConfig().getTableName() + ") == " + (targetCount - sourceCount);
} else {
long newInserts = CommitUtil.countNewRecords(target,
commitsToCatchup.stream().map(HoodieInstant::getTimestamp)
.collect(Collectors.toList()));
return "Count difference now is (count(" + target.getTableConfig().getTableName()
+ ") - count(" + source.getTableConfig().getTableName() + ") == " + (targetCount
- sourceCount) + ". Catch up count is " + newInserts;
commitsToCatchup.stream().map(HoodieInstant::getTimestamp).collect(Collectors.toList()));
return "Count difference now is (count(" + target.getTableConfig().getTableName() + ") - count("
+ source.getTableConfig().getTableName()
+ ") == " + (targetCount - sourceCount) + ". Catch up count is " + newInserts;
}
} else {
List<HoodieInstant> commitsToCatchup =
sourceTimeline.findInstantsAfter(targetLatestCommit, Integer.MAX_VALUE).getInstants()
.collect(Collectors.toList());
List<HoodieInstant> commitsToCatchup = sourceTimeline.findInstantsAfter(targetLatestCommit, Integer.MAX_VALUE)
.getInstants().collect(Collectors.toList());
if (commitsToCatchup.isEmpty()) {
return "Count difference now is (count(" + source.getTableConfig().getTableName()
+ ") - count(" + target.getTableConfig().getTableName() + ") == " + (sourceCount
- targetCount);
return "Count difference now is (count(" + source.getTableConfig().getTableName() + ") - count("
+ target.getTableConfig().getTableName() + ") == " + (sourceCount - targetCount);
} else {
long newInserts = CommitUtil.countNewRecords(source,
commitsToCatchup.stream().map(HoodieInstant::getTimestamp)
.collect(Collectors.toList()));
return "Count difference now is (count(" + source.getTableConfig().getTableName()
+ ") - count(" + target.getTableConfig().getTableName() + ") == " + (sourceCount
- targetCount) + ". Catch up count is " + newInserts;
commitsToCatchup.stream().map(HoodieInstant::getTimestamp).collect(Collectors.toList()));
return "Count difference now is (count(" + source.getTableConfig().getTableName() + ") - count("
+ target.getTableConfig().getTableName()
+ ") == " + (sourceCount - targetCount) + ". Catch up count is " + newInserts;
}
}

View File

@@ -45,20 +45,20 @@ public class RepairsCommand implements CommandMarker {
return HoodieCLI.tableMetadata != null;
}
@CliCommand(value = "repair deduplicate", help = "De-duplicate a partition path contains duplicates & produce repaired files to replace with")
public String deduplicate(
@CliCommand(value = "repair deduplicate", help = "De-duplicate a partition path contains duplicates & produce "
+ "repaired files to replace with")
public String deduplicate(@CliOption(key = {
"duplicatedPartitionPath"}, help = "Partition Path containing the duplicates", mandatory = true) final String
duplicatedPartitionPath,
@CliOption(key = {
"duplicatedPartitionPath"}, help = "Partition Path containing the duplicates", mandatory = true)
final String duplicatedPartitionPath,
"repairedOutputPath"}, help = "Location to place the repaired files", mandatory = true) final String
repairedOutputPath,
@CliOption(key = {
"repairedOutputPath"}, help = "Location to place the repaired files", mandatory = true)
final String repairedOutputPath,
@CliOption(key = {"sparkProperties"}, help = "Spark Properites File Path", mandatory = true)
final String sparkPropertiesPath) throws Exception {
"sparkProperties"}, help = "Spark Properites File Path", mandatory = true) final String sparkPropertiesPath)
throws Exception {
SparkLauncher sparkLauncher = SparkUtil.initLauncher(sparkPropertiesPath);
sparkLauncher
.addAppArgs(SparkMain.SparkCommand.DEDUPLICATE.toString(), duplicatedPartitionPath,
repairedOutputPath, HoodieCLI.tableMetadata.getBasePath());
sparkLauncher.addAppArgs(SparkMain.SparkCommand.DEDUPLICATE.toString(), duplicatedPartitionPath, repairedOutputPath,
HoodieCLI.tableMetadata.getBasePath());
Process process = sparkLauncher.launch();
InputStreamConsumer.captureOutput(process);
int exitCode = process.waitFor();
@@ -71,14 +71,12 @@ public class RepairsCommand implements CommandMarker {
@CliCommand(value = "repair addpartitionmeta", help = "Add partition metadata to a dataset, if not present")
public String addPartitionMeta(
@CliOption(key = {"dryrun"},
help = "Should we actually add or just print what would be done",
unspecifiedDefaultValue = "true")
public String addPartitionMeta(@CliOption(key = {
"dryrun"}, help = "Should we actually add or just print what would be done", unspecifiedDefaultValue = "true")
final boolean dryRun) throws IOException {
String latestCommit = HoodieCLI.tableMetadata.getActiveTimeline().getCommitTimeline()
.lastInstant().get().getTimestamp();
String latestCommit = HoodieCLI.tableMetadata.getActiveTimeline().getCommitTimeline().lastInstant().get()
.getTimestamp();
List<String> partitionPaths = FSUtils.getAllFoldersThreeLevelsDown(HoodieCLI.fs,
HoodieCLI.tableMetadata.getBasePath());
Path basePath = new Path(HoodieCLI.tableMetadata.getBasePath());
@@ -94,10 +92,7 @@ public class RepairsCommand implements CommandMarker {
if (!HoodiePartitionMetadata.hasPartitionMetadata(HoodieCLI.fs, partitionPath)) {
row[1] = "No";
if (!dryRun) {
HoodiePartitionMetadata partitionMetadata = new HoodiePartitionMetadata(
HoodieCLI.fs,
latestCommit,
basePath,
HoodiePartitionMetadata partitionMetadata = new HoodiePartitionMetadata(HoodieCLI.fs, latestCommit, basePath,
partitionPath);
partitionMetadata.trySave(0);
}
@@ -105,7 +100,6 @@ public class RepairsCommand implements CommandMarker {
rows[ind++] = row;
}
return HoodiePrintHelper.print(
new String[]{"Partition Path", "Metadata Present?", "Action"}, rows);
return HoodiePrintHelper.print(new String[] {"Partition Path", "Metadata Present?", "Action"}, rows);
}
}

View File

@@ -13,6 +13,7 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.uber.hoodie.cli.commands;
import com.uber.hoodie.HoodieWriteClient;
@@ -60,8 +61,8 @@ public class SavepointsCommand implements CommandMarker {
@CliAvailabilityIndicator({"savepoint rollback"})
public boolean isRollbackToSavepointAvailable() {
return HoodieCLI.tableMetadata != null && !HoodieCLI.tableMetadata.getActiveTimeline()
.getSavePointTimeline().filterCompletedInstants().empty();
return HoodieCLI.tableMetadata != null && !HoodieCLI.tableMetadata.getActiveTimeline().getSavePointTimeline()
.filterCompletedInstants().empty();
}
@CliCommand(value = "savepoints show", help = "Show the savepoints")
@@ -73,23 +74,19 @@ public class SavepointsCommand implements CommandMarker {
Collections.reverse(commits);
for (int i = 0; i < commits.size(); i++) {
HoodieInstant commit = commits.get(i);
rows[i] = new String[]{commit.getTimestamp()};
rows[i] = new String[] {commit.getTimestamp()};
}
return HoodiePrintHelper.print(new String[]{"SavepointTime"}, rows);
return HoodiePrintHelper.print(new String[] {"SavepointTime"}, rows);
}
@CliCommand(value = "savepoint create", help = "Savepoint a commit")
public String savepoint(
@CliOption(key = {"commit"}, help = "Commit to savepoint")
final String commitTime,
@CliOption(key = {"user"}, help = "User who is creating the savepoint")
final String user,
@CliOption(key = {"comments"}, help = "Comments for creating the savepoint")
final String comments) throws Exception {
public String savepoint(@CliOption(key = {"commit"}, help = "Commit to savepoint") final String commitTime,
@CliOption(key = {"user"}, help = "User who is creating the savepoint") final String user,
@CliOption(key = {"comments"}, help = "Comments for creating the savepoint") final String comments)
throws Exception {
HoodieActiveTimeline activeTimeline = HoodieCLI.tableMetadata.getActiveTimeline();
HoodieTimeline timeline = activeTimeline.getCommitTimeline().filterCompletedInstants();
HoodieInstant
commitInstant = new HoodieInstant(false, HoodieTimeline.COMMIT_ACTION, commitTime);
HoodieInstant commitInstant = new HoodieInstant(false, HoodieTimeline.COMMIT_ACTION, commitTime);
if (!timeline.containsInstant(commitInstant)) {
return "Commit " + commitTime + " not found in Commits " + timeline;
@@ -106,22 +103,19 @@ public class SavepointsCommand implements CommandMarker {
@CliCommand(value = "savepoint rollback", help = "Savepoint a commit")
public String rollbackToSavepoint(
@CliOption(key = {"savepoint"}, help = "Savepoint to rollback")
final String commitTime,
@CliOption(key = {"sparkProperties"}, help = "Spark Properites File Path")
final String sparkPropertiesPath) throws Exception {
@CliOption(key = {"savepoint"}, help = "Savepoint to rollback") final String commitTime,
@CliOption(key = {"sparkProperties"}, help = "Spark Properites File Path") final String sparkPropertiesPath)
throws Exception {
HoodieActiveTimeline activeTimeline = HoodieCLI.tableMetadata.getActiveTimeline();
HoodieTimeline timeline = activeTimeline.getCommitTimeline().filterCompletedInstants();
HoodieInstant
commitInstant = new HoodieInstant(false, HoodieTimeline.COMMIT_ACTION, commitTime);
HoodieInstant commitInstant = new HoodieInstant(false, HoodieTimeline.COMMIT_ACTION, commitTime);
if (!timeline.containsInstant(commitInstant)) {
return "Commit " + commitTime + " not found in Commits " + timeline;
}
SparkLauncher sparkLauncher = SparkUtil.initLauncher(sparkPropertiesPath);
sparkLauncher.addAppArgs(SparkMain.SparkCommand.ROLLBACK_TO_SAVEPOINT.toString(),
commitTime,
sparkLauncher.addAppArgs(SparkMain.SparkCommand.ROLLBACK_TO_SAVEPOINT.toString(), commitTime,
HoodieCLI.tableMetadata.getBasePath());
Process process = sparkLauncher.launch();
InputStreamConsumer.captureOutput(process);
@@ -137,18 +131,14 @@ public class SavepointsCommand implements CommandMarker {
@CliCommand(value = "savepoints refresh", help = "Refresh the savepoints")
public String refreshMetaClient() throws IOException {
HoodieTableMetaClient metadata =
new HoodieTableMetaClient(HoodieCLI.conf, HoodieCLI.tableMetadata.getBasePath());
HoodieTableMetaClient metadata = new HoodieTableMetaClient(HoodieCLI.conf, HoodieCLI.tableMetadata.getBasePath());
HoodieCLI.setTableMetadata(metadata);
return "Metadata for table " + metadata.getTableConfig().getTableName() + " refreshed.";
}
private static HoodieWriteClient createHoodieClient(JavaSparkContext jsc, String basePath)
throws Exception {
HoodieWriteConfig config = HoodieWriteConfig.newBuilder().withPath(basePath)
.withIndexConfig(
HoodieIndexConfig.newBuilder().withIndexType(HoodieIndex.IndexType.BLOOM).build())
.build();
private static HoodieWriteClient createHoodieClient(JavaSparkContext jsc, String basePath) throws Exception {
HoodieWriteConfig config = HoodieWriteConfig.newBuilder().withPath(basePath).withIndexConfig(
HoodieIndexConfig.newBuilder().withIndexType(HoodieIndex.IndexType.BLOOM).build()).build();
return new HoodieWriteClient(jsc, config, false);
}

View File

@@ -30,18 +30,14 @@ import org.apache.spark.sql.SQLContext;
public class SparkMain {
protected final static Logger LOG = Logger.getLogger(SparkMain.class);
protected static final Logger LOG = Logger.getLogger(SparkMain.class);
/**
* Commands
*/
enum SparkCommand {
ROLLBACK,
DEDUPLICATE,
ROLLBACK_TO_SAVEPOINT,
SAVEPOINT,
IMPORT
ROLLBACK, DEDUPLICATE, ROLLBACK_TO_SAVEPOINT, SAVEPOINT, IMPORT
}
public static void main(String[] args) throws Exception {
@@ -67,18 +63,19 @@ public class SparkMain {
break;
case IMPORT:
assert (args.length == 11);
returnCode = dataImport(jsc, args[1], args[2], args[3], args[4], args[5], args[6],
Integer.parseInt(args[7]), args[8], SparkUtil.DEFUALT_SPARK_MASTER, args[9],
Integer.parseInt(args[10]));
returnCode = dataImport(jsc, args[1], args[2], args[3], args[4], args[5], args[6], Integer.parseInt(args[7]),
args[8], SparkUtil.DEFUALT_SPARK_MASTER, args[9], Integer.parseInt(args[10]));
break;
default:
break;
}
System.exit(returnCode);
}
private static int dataImport(JavaSparkContext jsc, String srcPath, String targetPath,
String tableName, String tableType, String rowKey, String partitionKey, int parallelism,
String schemaFile, String sparkMaster, String sparkMemory, int retry) throws Exception {
private static int dataImport(JavaSparkContext jsc, String srcPath, String targetPath, String tableName,
String tableType, String rowKey, String partitionKey, int parallelism, String schemaFile, String sparkMaster,
String sparkMemory, int retry) throws Exception {
HDFSParquetImporter.Config cfg = new HDFSParquetImporter.Config();
cfg.srcPath = srcPath;
cfg.targetPath = targetPath;
@@ -92,19 +89,15 @@ public class SparkMain {
return new HDFSParquetImporter(cfg).dataImport(jsc, retry);
}
private static int deduplicatePartitionPath(JavaSparkContext jsc,
String duplicatedPartitionPath,
String repairedOutputPath,
String basePath)
throws Exception {
DedupeSparkJob job = new DedupeSparkJob(basePath, duplicatedPartitionPath, repairedOutputPath,
new SQLContext(jsc), FSUtils.getFs(basePath, jsc.hadoopConfiguration()));
private static int deduplicatePartitionPath(JavaSparkContext jsc, String duplicatedPartitionPath,
String repairedOutputPath, String basePath) throws Exception {
DedupeSparkJob job = new DedupeSparkJob(basePath, duplicatedPartitionPath, repairedOutputPath, new SQLContext(jsc),
FSUtils.getFs(basePath, jsc.hadoopConfiguration()));
job.fixDuplicates(true);
return 0;
}
private static int rollback(JavaSparkContext jsc, String commitTime, String basePath)
throws Exception {
private static int rollback(JavaSparkContext jsc, String commitTime, String basePath) throws Exception {
HoodieWriteClient client = createHoodieClient(jsc, basePath);
if (client.rollback(commitTime)) {
LOG.info(String.format("The commit \"%s\" rolled back.", commitTime));
@@ -115,9 +108,7 @@ public class SparkMain {
}
}
private static int rollbackToSavepoint(JavaSparkContext jsc, String savepointTime,
String basePath)
throws Exception {
private static int rollbackToSavepoint(JavaSparkContext jsc, String savepointTime, String basePath) throws Exception {
HoodieWriteClient client = createHoodieClient(jsc, basePath);
if (client.rollbackToSavepoint(savepointTime)) {
LOG.info(String.format("The commit \"%s\" rolled back.", savepointTime));
@@ -128,12 +119,9 @@ public class SparkMain {
}
}
private static HoodieWriteClient createHoodieClient(JavaSparkContext jsc, String basePath)
throws Exception {
HoodieWriteConfig config = HoodieWriteConfig.newBuilder().withPath(basePath)
.withIndexConfig(
HoodieIndexConfig.newBuilder().withIndexType(HoodieIndex.IndexType.BLOOM).build())
.build();
private static HoodieWriteClient createHoodieClient(JavaSparkContext jsc, String basePath) throws Exception {
HoodieWriteConfig config = HoodieWriteConfig.newBuilder().withPath(basePath).withIndexConfig(
HoodieIndexConfig.newBuilder().withIndexType(HoodieIndex.IndexType.BLOOM).build()).build();
return new HoodieWriteClient(jsc, config);
}
}

View File

@@ -16,7 +16,6 @@
package com.uber.hoodie.cli.commands;
import com.codahale.metrics.Histogram;
import com.codahale.metrics.Snapshot;
import com.codahale.metrics.UniformReservoir;
@@ -44,12 +43,15 @@ import org.springframework.stereotype.Component;
@Component
public class StatsCommand implements CommandMarker {
private static final int MAX_FILES = 1000000;
@CliAvailabilityIndicator({"stats wa"})
public boolean isWriteAmpAvailable() {
return HoodieCLI.tableMetadata != null;
}
@CliCommand(value = "stats wa", help = "Write Amplification. Ratio of how many records were upserted to how many records were actually written")
@CliCommand(value = "stats wa", help = "Write Amplification. Ratio of how many records were upserted to how many "
+ "records were actually written")
public String writeAmplificationStats() throws IOException {
long totalRecordsUpserted = 0;
long totalRecordsWritten = 0;
@@ -60,18 +62,13 @@ public class StatsCommand implements CommandMarker {
String[][] rows = new String[new Long(timeline.countInstants()).intValue() + 1][];
int i = 0;
DecimalFormat df = new DecimalFormat("#.00");
for (HoodieInstant commitTime : timeline.getInstants().collect(
Collectors.toList())) {
for (HoodieInstant commitTime : timeline.getInstants().collect(Collectors.toList())) {
String waf = "0";
HoodieCommitMetadata commit = HoodieCommitMetadata
.fromBytes(activeTimeline.getInstantDetails(commitTime).get());
HoodieCommitMetadata commit = HoodieCommitMetadata.fromBytes(activeTimeline.getInstantDetails(commitTime).get());
if (commit.fetchTotalUpdateRecordsWritten() > 0) {
waf = df.format(
(float) commit.fetchTotalRecordsWritten() / commit
.fetchTotalUpdateRecordsWritten());
waf = df.format((float) commit.fetchTotalRecordsWritten() / commit.fetchTotalUpdateRecordsWritten());
}
rows[i++] = new String[]{commitTime.getTimestamp(),
String.valueOf(commit.fetchTotalUpdateRecordsWritten()),
rows[i++] = new String[] {commitTime.getTimestamp(), String.valueOf(commit.fetchTotalUpdateRecordsWritten()),
String.valueOf(commit.fetchTotalRecordsWritten()), waf};
totalRecordsUpserted += commit.fetchTotalUpdateRecordsWritten();
totalRecordsWritten += commit.fetchTotalRecordsWritten();
@@ -80,43 +77,32 @@ public class StatsCommand implements CommandMarker {
if (totalRecordsUpserted > 0) {
waf = df.format((float) totalRecordsWritten / totalRecordsUpserted);
}
rows[i] = new String[]{"Total", String.valueOf(totalRecordsUpserted),
String.valueOf(totalRecordsWritten), waf};
return HoodiePrintHelper.print(
new String[]{"CommitTime", "Total Upserted", "Total Written",
"Write Amplifiation Factor"}, rows);
rows[i] = new String[] {"Total", String.valueOf(totalRecordsUpserted), String.valueOf(totalRecordsWritten), waf};
return HoodiePrintHelper
.print(new String[] {"CommitTime", "Total Upserted", "Total Written", "Write Amplifiation Factor"},
rows);
}
private String[] printFileSizeHistogram(String commitTime, Snapshot s) {
return new String[]{
commitTime,
NumericUtils.humanReadableByteCount(s.getMin()),
NumericUtils.humanReadableByteCount(s.getValue(0.1)),
NumericUtils.humanReadableByteCount(s.getMedian()),
NumericUtils.humanReadableByteCount(s.getMean()),
NumericUtils.humanReadableByteCount(s.get95thPercentile()),
NumericUtils.humanReadableByteCount(s.getMax()),
String.valueOf(s.size()),
NumericUtils.humanReadableByteCount(s.getStdDev())
};
return new String[] {commitTime, NumericUtils.humanReadableByteCount(s.getMin()),
NumericUtils.humanReadableByteCount(s.getValue(0.1)), NumericUtils.humanReadableByteCount(s.getMedian()),
NumericUtils.humanReadableByteCount(s.getMean()), NumericUtils.humanReadableByteCount(s.get95thPercentile()),
NumericUtils.humanReadableByteCount(s.getMax()), String.valueOf(s.size()),
NumericUtils.humanReadableByteCount(s.getStdDev())};
}
@CliCommand(value = "stats filesizes", help = "File Sizes. Display summary stats on sizes of files")
public String fileSizeStats(
@CliOption(key = {
"partitionPath"}, help = "regex to select files, eg: 2016/08/02", unspecifiedDefaultValue = "*/*/*")
final String globRegex) throws IOException {
public String fileSizeStats(@CliOption(key = {
"partitionPath"}, help = "regex to select files, eg: 2016/08/02", unspecifiedDefaultValue = "*/*/*") final
String globRegex) throws IOException {
FileSystem fs = HoodieCLI.fs;
String globPath = String.format("%s/%s/*",
HoodieCLI.tableMetadata.getBasePath(),
globRegex);
String globPath = String.format("%s/%s/*", HoodieCLI.tableMetadata.getBasePath(), globRegex);
FileStatus[] statuses = fs.globStatus(new Path(globPath));
// max, min, #small files < 10MB, 50th, avg, 95th
final int MAX_FILES = 1000000;
Histogram globalHistogram = new Histogram(new UniformReservoir(MAX_FILES));
HashMap<String, Histogram> commitHistoMap = new HashMap<String, Histogram>();
for (FileStatus fileStatus : statuses) {
@@ -138,8 +124,8 @@ public class StatsCommand implements CommandMarker {
Snapshot s = globalHistogram.getSnapshot();
rows[ind++] = printFileSizeHistogram("ALL", s);
return HoodiePrintHelper.print(
new String[]{"CommitTime", "Min", "10th", "50th", "avg", "95th", "Max", "NumFiles",
"StdDev"}, rows);
return HoodiePrintHelper
.print(new String[] {"CommitTime", "Min", "10th", "50th", "avg", "95th", "Max", "NumFiles", "StdDev"},
rows);
}
}

View File

@@ -25,9 +25,7 @@ import org.springframework.stereotype.Component;
public class UtilsCommand implements CommandMarker {
@CliCommand(value = "utils loadClass", help = "Load a class")
public String loadClass(
@CliOption(key = {"class"}, help = "Check mode") final String clazz
) throws Exception {
public String loadClass(@CliOption(key = {"class"}, help = "Check mode") final String clazz) throws Exception {
Class klass = Class.forName(clazz);
return klass.getProtectionDomain().getCodeSource().getLocation().toExternalForm();
}

View File

@@ -25,15 +25,12 @@ import java.util.List;
public class CommitUtil {
public static long countNewRecords(HoodieTableMetaClient target, List<String> commitsToCatchup)
throws IOException {
public static long countNewRecords(HoodieTableMetaClient target, List<String> commitsToCatchup) throws IOException {
long totalNew = 0;
HoodieTimeline timeline = target.getActiveTimeline().reload().getCommitTimeline()
.filterCompletedInstants();
HoodieTimeline timeline = target.getActiveTimeline().reload().getCommitTimeline().filterCompletedInstants();
for (String commit : commitsToCatchup) {
HoodieCommitMetadata c = HoodieCommitMetadata.fromBytes(timeline
.getInstantDetails(new HoodieInstant(false, HoodieTimeline.COMMIT_ACTION, commit))
.get());
HoodieCommitMetadata c = HoodieCommitMetadata.fromBytes(
timeline.getInstantDetails(new HoodieInstant(false, HoodieTimeline.COMMIT_ACTION, commit)).get());
totalNew += c.fetchTotalRecordsWritten() - c.fetchTotalUpdateRecordsWritten();
}
return totalNew;

View File

@@ -27,7 +27,7 @@ import org.joda.time.DateTime;
public class HiveUtil {
private static String driverName = "org.apache.hive.jdbc.HiveDriver";
private static final String driverName = "org.apache.hive.jdbc.HiveDriver";
static {
try {
@@ -39,8 +39,7 @@ public class HiveUtil {
private static Connection connection;
private static Connection getConnection(String jdbcUrl, String user, String pass)
throws SQLException {
private static Connection getConnection(String jdbcUrl, String user, String pass) throws SQLException {
DataSource ds = getDatasource(jdbcUrl, user, pass);
return ds.getConnection();
}
@@ -54,8 +53,8 @@ public class HiveUtil {
return ds;
}
public static long countRecords(String jdbcUrl, HoodieTableMetaClient source, String dbName,
String user, String pass) throws SQLException {
public static long countRecords(String jdbcUrl, HoodieTableMetaClient source, String dbName, String user, String pass)
throws SQLException {
Connection conn = HiveUtil.getConnection(jdbcUrl, user, pass);
ResultSet rs = null;
Statement stmt = conn.createStatement();
@@ -64,15 +63,13 @@ public class HiveUtil {
stmt.execute("set hive.input.format=org.apache.hadoop.hive.ql.io.HiveInputFormat");
stmt.execute("set hive.stats.autogather=false");
rs = stmt.executeQuery(
"select count(`_hoodie_commit_time`) as cnt from " + dbName + "." + source
.getTableConfig()
.getTableName());
"select count(`_hoodie_commit_time`) as cnt from " + dbName + "."
+ source.getTableConfig().getTableName());
long count = -1;
if (rs.next()) {
count = rs.getLong("cnt");
}
System.out
.println("Total records in " + source.getTableConfig().getTableName() + " is " + count);
System.out.println("Total records in " + source.getTableConfig().getTableName() + " is " + count);
return count;
} finally {
if (rs != null) {
@@ -84,22 +81,19 @@ public class HiveUtil {
}
}
public static long countRecords(String jdbcUrl, HoodieTableMetaClient source, String srcDb,
int partitions, String user, String pass) throws SQLException {
public static long countRecords(String jdbcUrl, HoodieTableMetaClient source, String srcDb, int partitions,
String user, String pass) throws SQLException {
DateTime dateTime = DateTime.now();
String endDateStr =
dateTime.getYear() + "-" + String.format("%02d", dateTime.getMonthOfYear()) + "-" +
String.format("%02d", dateTime.getDayOfMonth());
String endDateStr = dateTime.getYear() + "-" + String.format("%02d", dateTime.getMonthOfYear()) + "-"
+ String.format("%02d", dateTime.getDayOfMonth());
dateTime = dateTime.minusDays(partitions);
String startDateStr =
dateTime.getYear() + "-" + String.format("%02d", dateTime.getMonthOfYear()) + "-" +
String.format("%02d", dateTime.getDayOfMonth());
String startDateStr = dateTime.getYear() + "-" + String.format("%02d", dateTime.getMonthOfYear()) + "-"
+ String.format("%02d", dateTime.getDayOfMonth());
System.out.println("Start date " + startDateStr + " and end date " + endDateStr);
return countRecords(jdbcUrl, source, srcDb, startDateStr, endDateStr, user, pass);
}
private static long countRecords(String jdbcUrl, HoodieTableMetaClient source, String srcDb,
String startDateStr,
private static long countRecords(String jdbcUrl, HoodieTableMetaClient source, String srcDb, String startDateStr,
String endDateStr, String user, String pass) throws SQLException {
Connection conn = HiveUtil.getConnection(jdbcUrl, user, pass);
ResultSet rs = null;
@@ -109,9 +103,8 @@ public class HiveUtil {
stmt.execute("set hive.input.format=org.apache.hadoop.hive.ql.io.HiveInputFormat");
stmt.execute("set hive.stats.autogather=false");
rs = stmt.executeQuery(
"select count(`_hoodie_commit_time`) as cnt from " + srcDb + "." + source.getTableConfig()
.getTableName() + " where datestr>'" + startDateStr + "' and datestr<='"
+ endDateStr + "'");
"select count(`_hoodie_commit_time`) as cnt from " + srcDb + "." + source.getTableConfig().getTableName()
+ " where datestr>'" + startDateStr + "' and datestr<='" + endDateStr + "'");
if (rs.next()) {
return rs.getLong("cnt");
}

View File

@@ -24,7 +24,7 @@ import java.util.logging.Logger;
public class InputStreamConsumer extends Thread {
protected final static Logger LOG = Logger.getLogger(InputStreamConsumer.class.getName());
protected static final Logger LOG = Logger.getLogger(InputStreamConsumer.class.getName());
private InputStream is;
public InputStreamConsumer(InputStream is) {

View File

@@ -35,13 +35,10 @@ public class SparkUtil {
* TODO: Need to fix a bunch of hardcoded stuff here eg: history server, spark distro
*/
public static SparkLauncher initLauncher(String propertiesFile) throws URISyntaxException {
String currentJar = new File(
SparkUtil.class.getProtectionDomain().getCodeSource().getLocation().toURI().getPath())
String currentJar = new File(SparkUtil.class.getProtectionDomain().getCodeSource().getLocation().toURI().getPath())
.getAbsolutePath();
SparkLauncher sparkLauncher =
new SparkLauncher().setAppResource(currentJar)
.setMainClass(SparkMain.class.getName())
.setPropertiesFile(propertiesFile);
SparkLauncher sparkLauncher = new SparkLauncher().setAppResource(currentJar).setMainClass(SparkMain.class.getName())
.setPropertiesFile(propertiesFile);
File libDirectory = new File(new File(currentJar).getParent(), "lib");
for (String library : libDirectory.list()) {
sparkLauncher.addJar(new File(libDirectory, library).getAbsolutePath());
@@ -60,8 +57,7 @@ public class SparkUtil {
// Configure hadoop conf
sparkConf.set("spark.hadoop.mapred.output.compress", "true");
sparkConf.set("spark.hadoop.mapred.output.compression.codec", "true");
sparkConf.set("spark.hadoop.mapred.output.compression.codec",
"org.apache.hadoop.io.compress.GzipCodec");
sparkConf.set("spark.hadoop.mapred.output.compression.codec", "org.apache.hadoop.io.compress.GzipCodec");
sparkConf.set("spark.hadoop.mapred.output.compression.type", "BLOCK");
sparkConf = HoodieWriteClient.registerClasses(sparkConf);