CodeStyle formatting to conform to basic Checkstyle rules.
The code-style rules follow google style with some changes: 1. Increase line length from 100 to 120 2. Disable JavaDoc related checkstyles as this needs more manual work. Both source and test code are checked for code-style
This commit is contained in:
committed by
vinoth chandar
parent
987f5d6b96
commit
788e4f2d2e
@@ -35,10 +35,10 @@ public class HoodiePrompt extends DefaultPromptProvider {
|
||||
case DATASET:
|
||||
return "hoodie:" + tableName + "->";
|
||||
case SYNC:
|
||||
return "hoodie:" + tableName + " <==> "
|
||||
+ HoodieCLI.syncTableMetadata.getTableConfig().getTableName() + "->";
|
||||
return "hoodie:" + tableName + " <==> " + HoodieCLI.syncTableMetadata.getTableConfig().getTableName() + "->";
|
||||
default:
|
||||
return "hoodie:" + tableName + "->";
|
||||
}
|
||||
return "hoodie:" + tableName + "->";
|
||||
}
|
||||
return "hoodie->";
|
||||
}
|
||||
|
||||
@@ -24,22 +24,18 @@ import org.springframework.stereotype.Component;
|
||||
|
||||
@Component
|
||||
@Order(Ordered.HIGHEST_PRECEDENCE)
|
||||
public class HoodieSplashScreen
|
||||
extends DefaultBannerProvider {
|
||||
public class HoodieSplashScreen extends DefaultBannerProvider {
|
||||
|
||||
private static String screen =
|
||||
"============================================" + OsUtils.LINE_SEPARATOR +
|
||||
"* *" + OsUtils.LINE_SEPARATOR +
|
||||
"* _ _ _ _ *" + OsUtils.LINE_SEPARATOR +
|
||||
"* | | | | | (_) *" + OsUtils.LINE_SEPARATOR +
|
||||
"* | |__| | ___ ___ __| |_ ___ *" + OsUtils.LINE_SEPARATOR +
|
||||
"* | __ |/ _ \\ / _ \\ / _` | |/ _ \\ *" +
|
||||
OsUtils.LINE_SEPARATOR +
|
||||
"* | | | | (_) | (_) | (_| | | __/ *" + OsUtils.LINE_SEPARATOR +
|
||||
"* |_| |_|\\___/ \\___/ \\__,_|_|\\___| *" +
|
||||
OsUtils.LINE_SEPARATOR +
|
||||
"* *" + OsUtils.LINE_SEPARATOR +
|
||||
"============================================" + OsUtils.LINE_SEPARATOR;
|
||||
private static String screen = "============================================" + OsUtils.LINE_SEPARATOR
|
||||
+ "* *" + OsUtils.LINE_SEPARATOR
|
||||
+ "* _ _ _ _ *" + OsUtils.LINE_SEPARATOR
|
||||
+ "* | | | | | (_) *" + OsUtils.LINE_SEPARATOR
|
||||
+ "* | |__| | ___ ___ __| |_ ___ *" + OsUtils.LINE_SEPARATOR
|
||||
+ "* | __ |/ _ \\ / _ \\ / _` | |/ _ \\ *" + OsUtils.LINE_SEPARATOR
|
||||
+ "* | | | | (_) | (_) | (_| | | __/ *" + OsUtils.LINE_SEPARATOR
|
||||
+ "* |_| |_|\\___/ \\___/ \\__,_|_|\\___| *" + OsUtils.LINE_SEPARATOR
|
||||
+ "* *" + OsUtils.LINE_SEPARATOR
|
||||
+ "============================================" + OsUtils.LINE_SEPARATOR;
|
||||
|
||||
public String getBanner() {
|
||||
return screen;
|
||||
|
||||
@@ -22,8 +22,7 @@ import org.springframework.shell.Bootstrap;
|
||||
public class Main {
|
||||
|
||||
/**
|
||||
* Main class that delegates to Spring Shell's Bootstrap class in order to simplify debugging
|
||||
* inside an IDE
|
||||
* Main class that delegates to Spring Shell's Bootstrap class in order to simplify debugging inside an IDE
|
||||
*/
|
||||
public static void main(String[] args) throws IOException {
|
||||
Bootstrap.main(args);
|
||||
|
||||
@@ -47,13 +47,11 @@ public class ArchivedCommitsCommand implements CommandMarker {
|
||||
}
|
||||
|
||||
@CliCommand(value = "show archived commits", help = "Read commits from archived files and show details")
|
||||
public String showCommits(
|
||||
@CliOption(key = {
|
||||
"limit"}, mandatory = false, help = "Limit commits", unspecifiedDefaultValue = "10")
|
||||
final Integer limit) throws IOException {
|
||||
public String showCommits(@CliOption(key = {
|
||||
"limit"}, mandatory = false, help = "Limit commits", unspecifiedDefaultValue = "10") final Integer limit)
|
||||
throws IOException {
|
||||
|
||||
System.out
|
||||
.println("===============> Showing only " + limit + " archived commits <===============");
|
||||
System.out.println("===============> Showing only " + limit + " archived commits <===============");
|
||||
String basePath = HoodieCLI.tableMetadata.getBasePath();
|
||||
FileStatus[] fsStatuses = FSUtils.getFs(basePath, HoodieCLI.conf)
|
||||
.globStatus(new Path(basePath + "/.hoodie/.commits_.archive*"));
|
||||
@@ -61,8 +59,7 @@ public class ArchivedCommitsCommand implements CommandMarker {
|
||||
int commits = 0;
|
||||
for (FileStatus fs : fsStatuses) {
|
||||
//read the archived file
|
||||
HoodieLogFormat.Reader reader = HoodieLogFormat
|
||||
.newReader(FSUtils.getFs(basePath, HoodieCLI.conf),
|
||||
HoodieLogFormat.Reader reader = HoodieLogFormat.newReader(FSUtils.getFs(basePath, HoodieCLI.conf),
|
||||
new HoodieLogFile(fs.getPath()), HoodieArchivedMetaEntry.getClassSchema());
|
||||
|
||||
List<IndexedRecord> readRecords = new ArrayList<>();
|
||||
@@ -71,20 +68,19 @@ public class ArchivedCommitsCommand implements CommandMarker {
|
||||
HoodieAvroDataBlock blk = (HoodieAvroDataBlock) reader.next();
|
||||
List<IndexedRecord> records = blk.getRecords();
|
||||
readRecords.addAll(records);
|
||||
if(commits == limit) {
|
||||
if (commits == limit) {
|
||||
break;
|
||||
}
|
||||
commits++;
|
||||
}
|
||||
List<String[]> readCommits = readRecords.stream().map(r -> (GenericRecord) r)
|
||||
.map(r -> readCommit(r)).collect(Collectors.toList());
|
||||
List<String[]> readCommits = readRecords.stream().map(r -> (GenericRecord) r).map(r -> readCommit(r))
|
||||
.collect(Collectors.toList());
|
||||
allCommits.addAll(readCommits);
|
||||
if(commits == limit) {
|
||||
if (commits == limit) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
return HoodiePrintHelper.print(
|
||||
new String[]{"CommitTime", "CommitType", "CommitDetails"},
|
||||
return HoodiePrintHelper.print(new String[] {"CommitTime", "CommitType", "CommitDetails"},
|
||||
allCommits.toArray(new String[allCommits.size()][]));
|
||||
}
|
||||
|
||||
@@ -122,6 +118,8 @@ public class ArchivedCommitsCommand implements CommandMarker {
|
||||
commitDetails.add(record.get("hoodieSavePointMetadata").toString());
|
||||
break;
|
||||
}
|
||||
default:
|
||||
return commitDetails.toArray(new String[commitDetails.size()]);
|
||||
}
|
||||
} catch (Exception e) {
|
||||
e.printStackTrace();
|
||||
|
||||
@@ -13,6 +13,7 @@
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package com.uber.hoodie.cli.commands;
|
||||
|
||||
import com.uber.hoodie.avro.model.HoodieCleanMetadata;
|
||||
@@ -63,51 +64,46 @@ public class CleansCommand implements CommandMarker {
|
||||
Collections.reverse(cleans);
|
||||
for (int i = 0; i < cleans.size(); i++) {
|
||||
HoodieInstant clean = cleans.get(i);
|
||||
HoodieCleanMetadata cleanMetadata =
|
||||
AvroUtils.deserializeHoodieCleanMetadata(timeline.getInstantDetails(clean).get());
|
||||
rows[i] = new String[]{clean.getTimestamp(), cleanMetadata.getEarliestCommitToRetain(),
|
||||
String.valueOf(cleanMetadata.getTotalFilesDeleted()),
|
||||
String.valueOf(cleanMetadata.getTimeTakenInMillis())};
|
||||
HoodieCleanMetadata cleanMetadata = AvroUtils
|
||||
.deserializeHoodieCleanMetadata(timeline.getInstantDetails(clean).get());
|
||||
rows[i] = new String[] {clean.getTimestamp(), cleanMetadata.getEarliestCommitToRetain(),
|
||||
String.valueOf(cleanMetadata.getTotalFilesDeleted()), String.valueOf(cleanMetadata.getTimeTakenInMillis())};
|
||||
}
|
||||
return HoodiePrintHelper.print(
|
||||
new String[]{"CleanTime", "EarliestCommandRetained", "Total Files Deleted",
|
||||
"Total Time Taken"}, rows);
|
||||
return HoodiePrintHelper
|
||||
.print(new String[] {"CleanTime", "EarliestCommandRetained", "Total Files Deleted", "Total Time Taken"},
|
||||
rows);
|
||||
}
|
||||
|
||||
@CliCommand(value = "cleans refresh", help = "Refresh the commits")
|
||||
public String refreshCleans() throws IOException {
|
||||
HoodieTableMetaClient metadata =
|
||||
new HoodieTableMetaClient(HoodieCLI.conf, HoodieCLI.tableMetadata.getBasePath());
|
||||
HoodieTableMetaClient metadata = new HoodieTableMetaClient(HoodieCLI.conf, HoodieCLI.tableMetadata.getBasePath());
|
||||
HoodieCLI.setTableMetadata(metadata);
|
||||
return "Metadata for table " + metadata.getTableConfig().getTableName() + " refreshed.";
|
||||
}
|
||||
|
||||
@CliCommand(value = "clean showpartitions", help = "Show partition level details of a clean")
|
||||
public String showCleanPartitions(
|
||||
@CliOption(key = {"clean"}, help = "clean to show")
|
||||
final String commitTime) throws Exception {
|
||||
public String showCleanPartitions(@CliOption(key = {"clean"}, help = "clean to show") final String commitTime)
|
||||
throws Exception {
|
||||
HoodieActiveTimeline activeTimeline = HoodieCLI.tableMetadata.getActiveTimeline();
|
||||
HoodieTimeline timeline = activeTimeline.getCleanerTimeline().filterCompletedInstants();
|
||||
HoodieInstant cleanInstant =
|
||||
new HoodieInstant(false, HoodieTimeline.CLEAN_ACTION, commitTime);
|
||||
HoodieInstant cleanInstant = new HoodieInstant(false, HoodieTimeline.CLEAN_ACTION, commitTime);
|
||||
|
||||
if (!timeline.containsInstant(cleanInstant)) {
|
||||
return "Clean " + commitTime + " not found in metadata " + timeline;
|
||||
}
|
||||
HoodieCleanMetadata cleanMetadata =
|
||||
AvroUtils.deserializeHoodieCleanMetadata(timeline.getInstantDetails(cleanInstant).get());
|
||||
HoodieCleanMetadata cleanMetadata = AvroUtils.deserializeHoodieCleanMetadata(
|
||||
timeline.getInstantDetails(cleanInstant).get());
|
||||
List<String[]> rows = new ArrayList<>();
|
||||
for (Map.Entry<String, HoodieCleanPartitionMetadata> entry : cleanMetadata
|
||||
.getPartitionMetadata().entrySet()) {
|
||||
for (Map.Entry<String, HoodieCleanPartitionMetadata> entry : cleanMetadata.getPartitionMetadata().entrySet()) {
|
||||
String path = entry.getKey();
|
||||
HoodieCleanPartitionMetadata stats = entry.getValue();
|
||||
String policy = stats.getPolicy();
|
||||
String totalSuccessDeletedFiles = String.valueOf(stats.getSuccessDeleteFiles().size());
|
||||
String totalFailedDeletedFiles = String.valueOf(stats.getFailedDeleteFiles().size());
|
||||
rows.add(new String[]{path, policy, totalSuccessDeletedFiles, totalFailedDeletedFiles});
|
||||
rows.add(new String[] {path, policy, totalSuccessDeletedFiles, totalFailedDeletedFiles});
|
||||
}
|
||||
return HoodiePrintHelper.print(
|
||||
new String[]{"Partition Path", "Cleaning policy", "Total Files Successfully Deleted",
|
||||
new String[] {"Partition Path", "Cleaning policy", "Total Files Successfully Deleted",
|
||||
"Total Failed Deletions"}, rows.toArray(new String[rows.size()][]));
|
||||
}
|
||||
}
|
||||
|
||||
@@ -64,21 +64,18 @@ public class CommitsCommand implements CommandMarker {
|
||||
}
|
||||
|
||||
@CliCommand(value = "commits show", help = "Show the commits")
|
||||
public String showCommits(
|
||||
@CliOption(key = {
|
||||
"limit"}, mandatory = false, help = "Limit commits", unspecifiedDefaultValue = "10")
|
||||
final Integer limit) throws IOException {
|
||||
public String showCommits(@CliOption(key = {
|
||||
"limit"}, mandatory = false, help = "Limit commits", unspecifiedDefaultValue = "10") final Integer limit)
|
||||
throws IOException {
|
||||
HoodieActiveTimeline activeTimeline = HoodieCLI.tableMetadata.getActiveTimeline();
|
||||
HoodieTimeline timeline = activeTimeline.getCommitsTimeline()
|
||||
.filterCompletedInstants();
|
||||
HoodieTimeline timeline = activeTimeline.getCommitsTimeline().filterCompletedInstants();
|
||||
List<HoodieInstant> commits = timeline.getInstants().collect(Collectors.toList());
|
||||
String[][] rows = new String[commits.size()][];
|
||||
Collections.reverse(commits);
|
||||
for (int i = 0; i < commits.size(); i++) {
|
||||
HoodieInstant commit = commits.get(i);
|
||||
HoodieCommitMetadata commitMetadata =
|
||||
HoodieCommitMetadata.fromBytes(timeline.getInstantDetails(commit).get());
|
||||
rows[i] = new String[]{commit.getTimestamp(),
|
||||
HoodieCommitMetadata commitMetadata = HoodieCommitMetadata.fromBytes(timeline.getInstantDetails(commit).get());
|
||||
rows[i] = new String[] {commit.getTimestamp(),
|
||||
NumericUtils.humanReadableByteCount(commitMetadata.fetchTotalBytesWritten()),
|
||||
String.valueOf(commitMetadata.fetchTotalFilesInsert()),
|
||||
String.valueOf(commitMetadata.fetchTotalFilesUpdated()),
|
||||
@@ -88,39 +85,32 @@ public class CommitsCommand implements CommandMarker {
|
||||
String.valueOf(commitMetadata.fetchTotalWriteErrors())};
|
||||
}
|
||||
return HoodiePrintHelper.print(
|
||||
new String[]{"CommitTime", "Total Written (B)", "Total Files Added",
|
||||
"Total Files Updated", "Total Partitions Written", "Total Records Written",
|
||||
"Total Update Records Written", "Total Errors"}, rows);
|
||||
new String[] {"CommitTime", "Total Written (B)", "Total Files Added", "Total Files Updated",
|
||||
"Total Partitions Written", "Total Records Written", "Total Update Records Written", "Total Errors"}, rows);
|
||||
}
|
||||
|
||||
@CliCommand(value = "commits refresh", help = "Refresh the commits")
|
||||
public String refreshCommits() throws IOException {
|
||||
HoodieTableMetaClient metadata =
|
||||
new HoodieTableMetaClient(HoodieCLI.conf, HoodieCLI.tableMetadata.getBasePath());
|
||||
HoodieTableMetaClient metadata = new HoodieTableMetaClient(HoodieCLI.conf, HoodieCLI.tableMetadata.getBasePath());
|
||||
HoodieCLI.setTableMetadata(metadata);
|
||||
return "Metadata for table " + metadata.getTableConfig().getTableName() + " refreshed.";
|
||||
}
|
||||
|
||||
@CliCommand(value = "commit rollback", help = "Rollback a commit")
|
||||
public String rollbackCommit(
|
||||
@CliOption(key = {"commit"}, help = "Commit to rollback")
|
||||
final String commitTime,
|
||||
@CliOption(key = {"sparkProperties"}, help = "Spark Properites File Path")
|
||||
final String sparkPropertiesPath) throws Exception {
|
||||
public String rollbackCommit(@CliOption(key = {"commit"}, help = "Commit to rollback") final String commitTime,
|
||||
@CliOption(key = {"sparkProperties"}, help = "Spark Properites File Path") final String sparkPropertiesPath)
|
||||
throws Exception {
|
||||
HoodieActiveTimeline activeTimeline = HoodieCLI.tableMetadata.getActiveTimeline();
|
||||
HoodieTimeline timeline = activeTimeline.getCommitsTimeline()
|
||||
.filterCompletedInstants();
|
||||
HoodieInstant commitInstant = new HoodieInstant(false, HoodieTimeline.COMMIT_ACTION,
|
||||
commitTime);
|
||||
HoodieTimeline timeline = activeTimeline.getCommitsTimeline().filterCompletedInstants();
|
||||
HoodieInstant commitInstant = new HoodieInstant(false, HoodieTimeline.COMMIT_ACTION, commitTime);
|
||||
|
||||
if (!timeline.containsInstant(commitInstant)) {
|
||||
return "Commit " + commitTime + " not found in Commits " + timeline;
|
||||
}
|
||||
|
||||
SparkLauncher sparkLauncher = SparkUtil.initLauncher(sparkPropertiesPath);
|
||||
sparkLauncher.addAppArgs(SparkMain.SparkCommand.ROLLBACK.toString(),
|
||||
commitTime,
|
||||
HoodieCLI.tableMetadata.getBasePath());
|
||||
sparkLauncher
|
||||
.addAppArgs(SparkMain.SparkCommand.ROLLBACK.toString(), commitTime, HoodieCLI.tableMetadata.getBasePath());
|
||||
Process process = sparkLauncher.launch();
|
||||
InputStreamConsumer.captureOutput(process);
|
||||
int exitCode = process.waitFor();
|
||||
@@ -133,23 +123,18 @@ public class CommitsCommand implements CommandMarker {
|
||||
}
|
||||
|
||||
@CliCommand(value = "commit showpartitions", help = "Show partition level details of a commit")
|
||||
public String showCommitPartitions(
|
||||
@CliOption(key = {"commit"}, help = "Commit to show")
|
||||
final String commitTime) throws Exception {
|
||||
public String showCommitPartitions(@CliOption(key = {"commit"}, help = "Commit to show") final String commitTime)
|
||||
throws Exception {
|
||||
HoodieActiveTimeline activeTimeline = HoodieCLI.tableMetadata.getActiveTimeline();
|
||||
HoodieTimeline timeline = activeTimeline.getCommitsTimeline()
|
||||
.filterCompletedInstants();
|
||||
HoodieInstant commitInstant = new HoodieInstant(false, HoodieTimeline.COMMIT_ACTION,
|
||||
commitTime);
|
||||
HoodieTimeline timeline = activeTimeline.getCommitsTimeline().filterCompletedInstants();
|
||||
HoodieInstant commitInstant = new HoodieInstant(false, HoodieTimeline.COMMIT_ACTION, commitTime);
|
||||
|
||||
if (!timeline.containsInstant(commitInstant)) {
|
||||
return "Commit " + commitTime + " not found in Commits " + timeline;
|
||||
}
|
||||
HoodieCommitMetadata meta =
|
||||
HoodieCommitMetadata.fromBytes(activeTimeline.getInstantDetails(commitInstant).get());
|
||||
HoodieCommitMetadata meta = HoodieCommitMetadata.fromBytes(activeTimeline.getInstantDetails(commitInstant).get());
|
||||
List<String[]> rows = new ArrayList<String[]>();
|
||||
for (Map.Entry<String, List<HoodieWriteStat>> entry : meta.getPartitionToWriteStats()
|
||||
.entrySet()) {
|
||||
for (Map.Entry<String, List<HoodieWriteStat>> entry : meta.getPartitionToWriteStats().entrySet()) {
|
||||
String path = entry.getKey();
|
||||
List<HoodieWriteStat> stats = entry.getValue();
|
||||
long totalFilesAdded = 0;
|
||||
@@ -169,50 +154,40 @@ public class CommitsCommand implements CommandMarker {
|
||||
totalBytesWritten += stat.getTotalWriteBytes();
|
||||
totalWriteErrors += stat.getTotalWriteErrors();
|
||||
}
|
||||
rows.add(new String[]{path, String.valueOf(totalFilesAdded),
|
||||
String.valueOf(totalFilesUpdated), String.valueOf(totalRecordsInserted),
|
||||
String.valueOf(totalRecordsUpdated),
|
||||
NumericUtils.humanReadableByteCount(totalBytesWritten),
|
||||
String.valueOf(totalWriteErrors)});
|
||||
rows.add(new String[] {path, String.valueOf(totalFilesAdded), String.valueOf(totalFilesUpdated),
|
||||
String.valueOf(totalRecordsInserted), String.valueOf(totalRecordsUpdated),
|
||||
NumericUtils.humanReadableByteCount(totalBytesWritten), String.valueOf(totalWriteErrors)});
|
||||
|
||||
}
|
||||
return HoodiePrintHelper.print(
|
||||
new String[]{"Partition Path", "Total Files Added", "Total Files Updated",
|
||||
"Total Records Inserted", "Total Records Updated", "Total Bytes Written",
|
||||
"Total Errors"}, rows.toArray(new String[rows.size()][]));
|
||||
new String[] {"Partition Path", "Total Files Added", "Total Files Updated", "Total Records Inserted",
|
||||
"Total Records Updated", "Total Bytes Written", "Total Errors"}, rows.toArray(new String[rows.size()][]));
|
||||
}
|
||||
|
||||
@CliCommand(value = "commit showfiles", help = "Show file level details of a commit")
|
||||
public String showCommitFiles(
|
||||
@CliOption(key = {"commit"}, help = "Commit to show")
|
||||
final String commitTime) throws Exception {
|
||||
public String showCommitFiles(@CliOption(key = {"commit"}, help = "Commit to show") final String commitTime)
|
||||
throws Exception {
|
||||
HoodieActiveTimeline activeTimeline = HoodieCLI.tableMetadata.getActiveTimeline();
|
||||
HoodieTimeline timeline = activeTimeline.getCommitsTimeline()
|
||||
.filterCompletedInstants();
|
||||
HoodieInstant commitInstant = new HoodieInstant(false, HoodieTimeline.COMMIT_ACTION,
|
||||
commitTime);
|
||||
HoodieTimeline timeline = activeTimeline.getCommitsTimeline().filterCompletedInstants();
|
||||
HoodieInstant commitInstant = new HoodieInstant(false, HoodieTimeline.COMMIT_ACTION, commitTime);
|
||||
|
||||
if (!timeline.containsInstant(commitInstant)) {
|
||||
return "Commit " + commitTime + " not found in Commits " + timeline;
|
||||
}
|
||||
HoodieCommitMetadata meta =
|
||||
HoodieCommitMetadata.fromBytes(activeTimeline.getInstantDetails(commitInstant).get());
|
||||
HoodieCommitMetadata meta = HoodieCommitMetadata.fromBytes(activeTimeline.getInstantDetails(commitInstant).get());
|
||||
List<String[]> rows = new ArrayList<String[]>();
|
||||
for (Map.Entry<String, List<HoodieWriteStat>> entry : meta.getPartitionToWriteStats()
|
||||
.entrySet()) {
|
||||
for (Map.Entry<String, List<HoodieWriteStat>> entry : meta.getPartitionToWriteStats().entrySet()) {
|
||||
String path = entry.getKey();
|
||||
List<HoodieWriteStat> stats = entry.getValue();
|
||||
for (HoodieWriteStat stat : stats) {
|
||||
rows.add(new String[]{path, stat.getFileId(), stat.getPrevCommit(),
|
||||
String.valueOf(stat.getNumUpdateWrites()), String.valueOf(stat.getNumWrites()),
|
||||
String.valueOf(stat.getTotalWriteBytes()),
|
||||
rows.add(new String[] {path, stat.getFileId(), stat.getPrevCommit(), String.valueOf(stat.getNumUpdateWrites()),
|
||||
String.valueOf(stat.getNumWrites()), String.valueOf(stat.getTotalWriteBytes()),
|
||||
String.valueOf(stat.getTotalWriteErrors())});
|
||||
}
|
||||
}
|
||||
return HoodiePrintHelper.print(
|
||||
new String[]{"Partition Path", "File ID", "Previous Commit", "Total Records Updated",
|
||||
"Total Records Written", "Total Bytes Written", "Total Errors"},
|
||||
rows.toArray(new String[rows.size()][]));
|
||||
new String[] {"Partition Path", "File ID", "Previous Commit", "Total Records Updated", "Total Records Written",
|
||||
"Total Bytes Written", "Total Errors"}, rows.toArray(new String[rows.size()][]));
|
||||
}
|
||||
|
||||
@CliAvailabilityIndicator({"commits compare"})
|
||||
@@ -221,38 +196,30 @@ public class CommitsCommand implements CommandMarker {
|
||||
}
|
||||
|
||||
@CliCommand(value = "commits compare", help = "Compare commits with another Hoodie dataset")
|
||||
public String compareCommits(
|
||||
@CliOption(key = {"path"}, help = "Path of the dataset to compare to")
|
||||
final String path) throws Exception {
|
||||
public String compareCommits(@CliOption(key = {"path"}, help = "Path of the dataset to compare to") final String path)
|
||||
throws Exception {
|
||||
|
||||
HoodieTableMetaClient target = new HoodieTableMetaClient(HoodieCLI.conf, path);
|
||||
HoodieTimeline targetTimeline = target.getActiveTimeline().getCommitsTimeline()
|
||||
.filterCompletedInstants();
|
||||
HoodieTimeline targetTimeline = target.getActiveTimeline().getCommitsTimeline().filterCompletedInstants();
|
||||
HoodieTableMetaClient source = HoodieCLI.tableMetadata;
|
||||
HoodieTimeline sourceTimeline = source.getActiveTimeline().getCommitsTimeline()
|
||||
.filterCompletedInstants();
|
||||
HoodieTimeline sourceTimeline = source.getActiveTimeline().getCommitsTimeline().filterCompletedInstants();
|
||||
String targetLatestCommit =
|
||||
targetTimeline.getInstants().iterator().hasNext() ? "0"
|
||||
: targetTimeline.lastInstant().get().getTimestamp();
|
||||
targetTimeline.getInstants().iterator().hasNext() ? "0" : targetTimeline.lastInstant().get().getTimestamp();
|
||||
String sourceLatestCommit =
|
||||
sourceTimeline.getInstants().iterator().hasNext() ? "0"
|
||||
: sourceTimeline.lastInstant().get().getTimestamp();
|
||||
sourceTimeline.getInstants().iterator().hasNext() ? "0" : sourceTimeline.lastInstant().get().getTimestamp();
|
||||
|
||||
if (sourceLatestCommit != null &&
|
||||
HoodieTimeline
|
||||
.compareTimestamps(targetLatestCommit, sourceLatestCommit, HoodieTimeline.GREATER)) {
|
||||
if (sourceLatestCommit != null && HoodieTimeline.compareTimestamps(targetLatestCommit, sourceLatestCommit,
|
||||
HoodieTimeline.GREATER)) {
|
||||
// source is behind the target
|
||||
List<String> commitsToCatchup =
|
||||
targetTimeline.findInstantsAfter(sourceLatestCommit, Integer.MAX_VALUE)
|
||||
.getInstants().map(HoodieInstant::getTimestamp).collect(Collectors.toList());
|
||||
return "Source " + source.getTableConfig().getTableName() + " is behind by "
|
||||
+ commitsToCatchup.size() + " commits. Commits to catch up - " + commitsToCatchup;
|
||||
List<String> commitsToCatchup = targetTimeline.findInstantsAfter(sourceLatestCommit, Integer.MAX_VALUE)
|
||||
.getInstants().map(HoodieInstant::getTimestamp).collect(Collectors.toList());
|
||||
return "Source " + source.getTableConfig().getTableName() + " is behind by " + commitsToCatchup.size()
|
||||
+ " commits. Commits to catch up - " + commitsToCatchup;
|
||||
} else {
|
||||
List<String> commitsToCatchup =
|
||||
sourceTimeline.findInstantsAfter(targetLatestCommit, Integer.MAX_VALUE)
|
||||
.getInstants().map(HoodieInstant::getTimestamp).collect(Collectors.toList());
|
||||
return "Source " + source.getTableConfig().getTableName() + " is ahead by "
|
||||
+ commitsToCatchup.size() + " commits. Commits to catch up - " + commitsToCatchup;
|
||||
List<String> commitsToCatchup = sourceTimeline.findInstantsAfter(targetLatestCommit, Integer.MAX_VALUE)
|
||||
.getInstants().map(HoodieInstant::getTimestamp).collect(Collectors.toList());
|
||||
return "Source " + source.getTableConfig().getTableName() + " is ahead by " + commitsToCatchup.size()
|
||||
+ " commits. Commits to catch up - " + commitsToCatchup;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -262,13 +229,12 @@ public class CommitsCommand implements CommandMarker {
|
||||
}
|
||||
|
||||
@CliCommand(value = "commits sync", help = "Compare commits with another Hoodie dataset")
|
||||
public String syncCommits(
|
||||
@CliOption(key = {"path"}, help = "Path of the dataset to compare to")
|
||||
final String path) throws Exception {
|
||||
public String syncCommits(@CliOption(key = {"path"}, help = "Path of the dataset to compare to") final String path)
|
||||
throws Exception {
|
||||
HoodieCLI.syncTableMetadata = new HoodieTableMetaClient(HoodieCLI.conf, path);
|
||||
HoodieCLI.state = HoodieCLI.CLIState.SYNC;
|
||||
return "Load sync state between " + HoodieCLI.tableMetadata.getTableConfig().getTableName()
|
||||
+ " and " + HoodieCLI.syncTableMetadata.getTableConfig().getTableName();
|
||||
return "Load sync state between " + HoodieCLI.tableMetadata.getTableConfig().getTableName() + " and "
|
||||
+ HoodieCLI.syncTableMetadata.getTableConfig().getTableName();
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@@ -29,13 +29,12 @@ public class DatasetsCommand implements CommandMarker {
|
||||
|
||||
@CliCommand(value = "connect", help = "Connect to a hoodie dataset")
|
||||
public String connect(
|
||||
@CliOption(key = {"path"}, mandatory = true, help = "Base Path of the dataset")
|
||||
final String path) throws IOException {
|
||||
@CliOption(key = {"path"}, mandatory = true, help = "Base Path of the dataset") final String path)
|
||||
throws IOException {
|
||||
boolean initialized = HoodieCLI.initConf();
|
||||
HoodieCLI.initFS(initialized);
|
||||
HoodieCLI.setTableMetadata(new HoodieTableMetaClient(HoodieCLI.conf, path));
|
||||
HoodieCLI.state = HoodieCLI.CLIState.DATASET;
|
||||
return "Metadata for table " + HoodieCLI.tableMetadata.getTableConfig().getTableName()
|
||||
+ " loaded";
|
||||
return "Metadata for table " + HoodieCLI.tableMetadata.getTableConfig().getTableName() + " loaded";
|
||||
}
|
||||
}
|
||||
|
||||
@@ -37,44 +37,33 @@ public class HDFSParquetImportCommand implements CommandMarker {
|
||||
|
||||
@CliCommand(value = "hdfsparquetimport", help = "Imports hdfs dataset to a hoodie dataset")
|
||||
public String convert(
|
||||
@CliOption(key = "srcPath", mandatory = true, help = "Base path for the input dataset")
|
||||
final String srcPath,
|
||||
@CliOption(key = "srcType", mandatory = true, help = "Source type for the input dataset")
|
||||
final String srcType,
|
||||
@CliOption(key = "targetPath", mandatory = true, help = "Base path for the target hoodie dataset")
|
||||
final String targetPath,
|
||||
@CliOption(key = "tableName", mandatory = true, help = "Table name")
|
||||
final String tableName,
|
||||
@CliOption(key = "tableType", mandatory = true, help = "Table type")
|
||||
final String tableType,
|
||||
@CliOption(key = "rowKeyField", mandatory = true, help = "Row key field name")
|
||||
final String rowKeyField,
|
||||
@CliOption(key = "partitionPathField", mandatory = true, help = "Partition path field name")
|
||||
final String partitionPathField,
|
||||
@CliOption(key = {"parallelism"}, mandatory = true, help = "Parallelism for hoodie insert")
|
||||
final String parallelism,
|
||||
@CliOption(key = "schemaFilePath", mandatory = true, help = "Path for Avro schema file")
|
||||
final String schemaFilePath,
|
||||
@CliOption(key = "format", mandatory = true, help = "Format for the input data")
|
||||
final String format,
|
||||
@CliOption(key = "sparkMemory", mandatory = true, help = "Spark executor memory")
|
||||
final String sparkMemory,
|
||||
@CliOption(key = "retry", mandatory = true, help = "Number of retries")
|
||||
final String retry)
|
||||
throws Exception {
|
||||
@CliOption(key = "srcPath", mandatory = true, help = "Base path for the input dataset") final String srcPath,
|
||||
@CliOption(key = "srcType", mandatory = true, help = "Source type for the input dataset") final String srcType,
|
||||
@CliOption(key = "targetPath", mandatory = true, help = "Base path for the target hoodie dataset") final String
|
||||
targetPath,
|
||||
@CliOption(key = "tableName", mandatory = true, help = "Table name") final String tableName,
|
||||
@CliOption(key = "tableType", mandatory = true, help = "Table type") final String tableType,
|
||||
@CliOption(key = "rowKeyField", mandatory = true, help = "Row key field name") final String rowKeyField,
|
||||
@CliOption(key = "partitionPathField", mandatory = true, help = "Partition path field name") final String
|
||||
partitionPathField,
|
||||
@CliOption(key = {
|
||||
"parallelism"}, mandatory = true, help = "Parallelism for hoodie insert") final String parallelism,
|
||||
@CliOption(key = "schemaFilePath", mandatory = true, help = "Path for Avro schema file") final String
|
||||
schemaFilePath,
|
||||
@CliOption(key = "format", mandatory = true, help = "Format for the input data") final String format,
|
||||
@CliOption(key = "sparkMemory", mandatory = true, help = "Spark executor memory") final String sparkMemory,
|
||||
@CliOption(key = "retry", mandatory = true, help = "Number of retries") final String retry) throws Exception {
|
||||
|
||||
validate(format, srcType);
|
||||
|
||||
boolean initialized = HoodieCLI.initConf();
|
||||
HoodieCLI.initFS(initialized);
|
||||
String sparkPropertiesPath = Utils
|
||||
.getDefaultPropertiesFile(
|
||||
scala.collection.JavaConversions.propertiesAsScalaMap(System.getProperties()));
|
||||
String sparkPropertiesPath = Utils.getDefaultPropertiesFile(
|
||||
scala.collection.JavaConversions.propertiesAsScalaMap(System.getProperties()));
|
||||
SparkLauncher sparkLauncher = SparkUtil.initLauncher(sparkPropertiesPath);
|
||||
|
||||
sparkLauncher.addAppArgs(SparkCommand.IMPORT.toString(), srcPath, targetPath, tableName,
|
||||
tableType, rowKeyField, partitionPathField, parallelism, schemaFilePath, sparkMemory,
|
||||
retry);
|
||||
sparkLauncher.addAppArgs(SparkCommand.IMPORT.toString(), srcPath, targetPath, tableName, tableType, rowKeyField,
|
||||
partitionPathField, parallelism, schemaFilePath, sparkMemory, retry);
|
||||
Process process = sparkLauncher.launch();
|
||||
InputStreamConsumer.captureOutput(process);
|
||||
int exitCode = process.waitFor();
|
||||
|
||||
@@ -64,25 +64,25 @@ public class HoodieLogFileCommand implements CommandMarker {
|
||||
|
||||
@CliCommand(value = "show logfile metadata", help = "Read commit metadata from log files")
|
||||
public String showLogFileCommits(
|
||||
@CliOption(key = "logFilePathPattern", mandatory = true, help = "Fully qualified path for the log file")
|
||||
final String logFilePathPattern) throws IOException {
|
||||
@CliOption(key = "logFilePathPattern", mandatory = true, help = "Fully qualified path for the log file") final
|
||||
String logFilePathPattern)
|
||||
throws IOException {
|
||||
|
||||
FileSystem fs = HoodieCLI.tableMetadata.getFs();
|
||||
List<String> logFilePaths = Arrays.stream(fs.globStatus(new Path(logFilePathPattern)))
|
||||
.map(status -> status.getPath().toString()).collect(Collectors.toList());
|
||||
Map<String, List<Tuple3<HoodieLogBlockType, Tuple2<Map<HeaderMetadataType, String>, Map<HeaderMetadataType, String>>, Integer>>> commitCountAndMetadata = Maps
|
||||
.newHashMap();
|
||||
Map<String, List<Tuple3<HoodieLogBlockType, Tuple2<Map<HeaderMetadataType, String>, Map<HeaderMetadataType,
|
||||
String>>, Integer>>>
|
||||
commitCountAndMetadata = Maps.newHashMap();
|
||||
int totalEntries = 0;
|
||||
int numCorruptBlocks = 0;
|
||||
|
||||
for (String logFilePath : logFilePaths) {
|
||||
FileStatus[] fsStatus = fs.listStatus(
|
||||
new Path(logFilePath));
|
||||
Schema writerSchema = new AvroSchemaConverter()
|
||||
.convert(SchemaUtil
|
||||
.readSchemaFromLogFile(HoodieCLI.tableMetadata.getFs(), new Path(logFilePath)));
|
||||
HoodieLogFormat.Reader reader = HoodieLogFormat.newReader(fs,
|
||||
new HoodieLogFile(fsStatus[0].getPath()), writerSchema);
|
||||
FileStatus[] fsStatus = fs.listStatus(new Path(logFilePath));
|
||||
Schema writerSchema = new AvroSchemaConverter().convert(
|
||||
SchemaUtil.readSchemaFromLogFile(HoodieCLI.tableMetadata.getFs(), new Path(logFilePath)));
|
||||
HoodieLogFormat.Reader reader = HoodieLogFormat
|
||||
.newReader(fs, new HoodieLogFile(fsStatus[0].getPath()), writerSchema);
|
||||
|
||||
// read the avro blocks
|
||||
while (reader.hasNext()) {
|
||||
@@ -104,15 +104,14 @@ public class HoodieLogFileCommand implements CommandMarker {
|
||||
}
|
||||
}
|
||||
if (commitCountAndMetadata.containsKey(instantTime)) {
|
||||
commitCountAndMetadata.get(instantTime)
|
||||
.add(new Tuple3<>(n.getBlockType(),
|
||||
new Tuple2<>(n.getLogBlockHeader(), n.getLogBlockFooter()), recordCount));
|
||||
commitCountAndMetadata.get(instantTime).add(
|
||||
new Tuple3<>(n.getBlockType(), new Tuple2<>(n.getLogBlockHeader(), n.getLogBlockFooter()), recordCount));
|
||||
totalEntries++;
|
||||
} else {
|
||||
List<Tuple3<HoodieLogBlockType, Tuple2<Map<HeaderMetadataType, String>, Map<HeaderMetadataType, String>>, Integer>> list
|
||||
= new ArrayList<>();
|
||||
list.add(new Tuple3<>(n.getBlockType(),
|
||||
new Tuple2<>(n.getLogBlockHeader(), n.getLogBlockFooter()), recordCount));
|
||||
List<Tuple3<HoodieLogBlockType, Tuple2<Map<HeaderMetadataType, String>, Map<HeaderMetadataType, String>>,
|
||||
Integer>> list = new ArrayList<>();
|
||||
list.add(
|
||||
new Tuple3<>(n.getBlockType(), new Tuple2<>(n.getLogBlockHeader(), n.getLogBlockFooter()), recordCount));
|
||||
commitCountAndMetadata.put(instantTime, list);
|
||||
totalEntries++;
|
||||
}
|
||||
@@ -121,11 +120,12 @@ public class HoodieLogFileCommand implements CommandMarker {
|
||||
String[][] rows = new String[totalEntries + 1][];
|
||||
int i = 0;
|
||||
ObjectMapper objectMapper = new ObjectMapper();
|
||||
for (Map.Entry<String, List<Tuple3<HoodieLogBlockType, Tuple2<Map<HeaderMetadataType, String>, Map<HeaderMetadataType, String>>, Integer>>> entry : commitCountAndMetadata
|
||||
.entrySet()) {
|
||||
for (Map.Entry<String, List<Tuple3<HoodieLogBlockType,
|
||||
Tuple2<Map<HeaderMetadataType, String>, Map<HeaderMetadataType, String>>, Integer>>> entry
|
||||
: commitCountAndMetadata.entrySet()) {
|
||||
String instantTime = entry.getKey().toString();
|
||||
for (Tuple3<HoodieLogBlockType, Tuple2<Map<HeaderMetadataType, String>, Map<HeaderMetadataType, String>>, Integer> tuple3 : entry
|
||||
.getValue()) {
|
||||
for (Tuple3<HoodieLogBlockType, Tuple2<Map<HeaderMetadataType, String>,
|
||||
Map<HeaderMetadataType, String>>, Integer> tuple3 : entry.getValue()) {
|
||||
String[] output = new String[5];
|
||||
output[0] = instantTime;
|
||||
output[1] = String.valueOf(tuple3._3());
|
||||
@@ -136,24 +136,21 @@ public class HoodieLogFileCommand implements CommandMarker {
|
||||
i++;
|
||||
}
|
||||
}
|
||||
return HoodiePrintHelper.print(
|
||||
new String[]{"InstantTime", "RecordCount", "BlockType", "HeaderMetadata", "FooterMetadata"},
|
||||
rows);
|
||||
return HoodiePrintHelper
|
||||
.print(new String[] {"InstantTime", "RecordCount", "BlockType", "HeaderMetadata", "FooterMetadata"},
|
||||
rows);
|
||||
}
|
||||
|
||||
@CliCommand(value = "show logfile records", help = "Read records from log files")
|
||||
public String showLogFileRecords(
|
||||
@CliOption(key = {
|
||||
"limit"}, mandatory = false, help = "Limit commits", unspecifiedDefaultValue = "10")
|
||||
final Integer limit,
|
||||
public String showLogFileRecords(@CliOption(key = {
|
||||
"limit"}, mandatory = false, help = "Limit commits", unspecifiedDefaultValue = "10") final Integer limit,
|
||||
@CliOption(key = "logFilePathPattern", mandatory = true, help = "Fully qualified paths for the log files")
|
||||
final String logFilePathPattern,
|
||||
@CliOption(key = "mergeRecords", mandatory = false, help = "If the records in the log files should be merged",
|
||||
unspecifiedDefaultValue = "false")
|
||||
final Boolean shouldMerge) throws IOException {
|
||||
unspecifiedDefaultValue = "false") final Boolean shouldMerge)
|
||||
throws IOException {
|
||||
|
||||
System.out
|
||||
.println("===============> Showing only " + limit + " records <===============");
|
||||
System.out.println("===============> Showing only " + limit + " records <===============");
|
||||
|
||||
FileSystem fs = HoodieCLI.tableMetadata.getFs();
|
||||
List<String> logFilePaths = Arrays.stream(fs.globStatus(new Path(logFilePathPattern)))
|
||||
@@ -162,9 +159,8 @@ public class HoodieLogFileCommand implements CommandMarker {
|
||||
// TODO : readerSchema can change across blocks/log files, fix this inside Scanner
|
||||
AvroSchemaConverter converter = new AvroSchemaConverter();
|
||||
// get schema from last log file
|
||||
Schema readerSchema = converter
|
||||
.convert(SchemaUtil
|
||||
.readSchemaFromLogFile(fs, new Path(logFilePaths.get(logFilePaths.size() - 1))));
|
||||
Schema readerSchema = converter.convert(
|
||||
SchemaUtil.readSchemaFromLogFile(fs, new Path(logFilePaths.get(logFilePaths.size() - 1))));
|
||||
|
||||
List<IndexedRecord> allRecords = new ArrayList<>();
|
||||
|
||||
@@ -186,11 +182,10 @@ public class HoodieLogFileCommand implements CommandMarker {
|
||||
}
|
||||
} else {
|
||||
for (String logFile : logFilePaths) {
|
||||
Schema writerSchema = new AvroSchemaConverter()
|
||||
.convert(SchemaUtil
|
||||
.readSchemaFromLogFile(HoodieCLI.tableMetadata.getFs(), new Path(logFile)));
|
||||
HoodieLogFormat.Reader reader = HoodieLogFormat.newReader(fs,
|
||||
new HoodieLogFile(new Path(logFile)), writerSchema);
|
||||
Schema writerSchema = new AvroSchemaConverter().convert(
|
||||
SchemaUtil.readSchemaFromLogFile(HoodieCLI.tableMetadata.getFs(), new Path(logFile)));
|
||||
HoodieLogFormat.Reader reader = HoodieLogFormat
|
||||
.newReader(fs, new HoodieLogFile(new Path(logFile)), writerSchema);
|
||||
// read the avro blocks
|
||||
while (reader.hasNext()) {
|
||||
HoodieLogBlock n = reader.next();
|
||||
@@ -216,7 +211,6 @@ public class HoodieLogFileCommand implements CommandMarker {
|
||||
rows[i] = data;
|
||||
i++;
|
||||
}
|
||||
return HoodiePrintHelper.print(
|
||||
new String[]{"Records"}, rows);
|
||||
return HoodiePrintHelper.print(new String[] {"Records"}, rows);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -40,26 +40,22 @@ public class HoodieSyncCommand implements CommandMarker {
|
||||
|
||||
@CliCommand(value = "sync validate", help = "Validate the sync by counting the number of records")
|
||||
public String validateSync(
|
||||
@CliOption(key = {"mode"}, unspecifiedDefaultValue = "complete", help = "Check mode")
|
||||
final String mode,
|
||||
@CliOption(key = {"mode"}, unspecifiedDefaultValue = "complete", help = "Check mode") final String mode,
|
||||
@CliOption(key = {"sourceDb"}, unspecifiedDefaultValue = "rawdata", help = "source database") final String srcDb,
|
||||
@CliOption(key = {
|
||||
"sourceDb"}, unspecifiedDefaultValue = "rawdata", help = "source database")
|
||||
final String srcDb,
|
||||
@CliOption(key = {
|
||||
"targetDb"}, unspecifiedDefaultValue = "dwh_hoodie", help = "target database")
|
||||
final String tgtDb,
|
||||
"targetDb"}, unspecifiedDefaultValue = "dwh_hoodie", help = "target database") final String tgtDb,
|
||||
@CliOption(key = {
|
||||
"partitionCount"}, unspecifiedDefaultValue = "5", help = "total number of recent partitions to validate")
|
||||
final int partitionCount,
|
||||
@CliOption(key = {
|
||||
"hiveServerUrl"}, mandatory = true, help = "hiveServerURL to connect to")
|
||||
final String hiveServerUrl,
|
||||
"hiveServerUrl"}, mandatory = true, help = "hiveServerURL to connect to") final String hiveServerUrl,
|
||||
@CliOption(key = {
|
||||
"hiveUser"}, mandatory = false, unspecifiedDefaultValue = "", help = "hive username to connect to")
|
||||
final String hiveUser,
|
||||
"hiveUser"}, mandatory = false, unspecifiedDefaultValue = "", help = "hive username to connect to") final
|
||||
String hiveUser,
|
||||
@CliOption(key = {
|
||||
"hivePass"}, mandatory = true, unspecifiedDefaultValue = "", help = "hive password to connect to")
|
||||
final String hivePass) throws Exception {
|
||||
"hivePass"}, mandatory = true, unspecifiedDefaultValue = "", help = "hive password to connect to") final
|
||||
String hivePass)
|
||||
throws Exception {
|
||||
HoodieTableMetaClient target = HoodieCLI.syncTableMetadata;
|
||||
HoodieTimeline targetTimeline = target.getActiveTimeline().getCommitsTimeline();
|
||||
HoodieTableMetaClient source = HoodieCLI.tableMetadata;
|
||||
@@ -70,52 +66,42 @@ public class HoodieSyncCommand implements CommandMarker {
|
||||
sourceCount = HiveUtil.countRecords(hiveServerUrl, source, srcDb, hiveUser, hivePass);
|
||||
targetCount = HiveUtil.countRecords(hiveServerUrl, target, tgtDb, hiveUser, hivePass);
|
||||
} else if ("latestPartitions".equals(mode)) {
|
||||
sourceCount = HiveUtil
|
||||
.countRecords(hiveServerUrl, source, srcDb, partitionCount, hiveUser, hivePass);
|
||||
targetCount = HiveUtil
|
||||
.countRecords(hiveServerUrl, target, tgtDb, partitionCount, hiveUser, hivePass);
|
||||
sourceCount = HiveUtil.countRecords(hiveServerUrl, source, srcDb, partitionCount, hiveUser, hivePass);
|
||||
targetCount = HiveUtil.countRecords(hiveServerUrl, target, tgtDb, partitionCount, hiveUser, hivePass);
|
||||
}
|
||||
|
||||
String targetLatestCommit =
|
||||
targetTimeline.getInstants().iterator().hasNext() ? "0"
|
||||
: targetTimeline.lastInstant().get().getTimestamp();
|
||||
targetTimeline.getInstants().iterator().hasNext() ? "0" : targetTimeline.lastInstant().get().getTimestamp();
|
||||
String sourceLatestCommit =
|
||||
sourceTimeline.getInstants().iterator().hasNext() ? "0"
|
||||
: sourceTimeline.lastInstant().get().getTimestamp();
|
||||
sourceTimeline.getInstants().iterator().hasNext() ? "0" : sourceTimeline.lastInstant().get().getTimestamp();
|
||||
|
||||
if (sourceLatestCommit != null && HoodieTimeline
|
||||
.compareTimestamps(targetLatestCommit, sourceLatestCommit, HoodieTimeline.GREATER)) {
|
||||
if (sourceLatestCommit != null && HoodieTimeline.compareTimestamps(targetLatestCommit, sourceLatestCommit,
|
||||
HoodieTimeline.GREATER)) {
|
||||
// source is behind the target
|
||||
List<HoodieInstant> commitsToCatchup =
|
||||
targetTimeline.findInstantsAfter(sourceLatestCommit, Integer.MAX_VALUE).getInstants()
|
||||
.collect(Collectors.toList());
|
||||
List<HoodieInstant> commitsToCatchup = targetTimeline.findInstantsAfter(sourceLatestCommit, Integer.MAX_VALUE)
|
||||
.getInstants().collect(Collectors.toList());
|
||||
if (commitsToCatchup.isEmpty()) {
|
||||
return "Count difference now is (count(" + target.getTableConfig().getTableName()
|
||||
+ ") - count(" + source.getTableConfig().getTableName() + ") == " + (targetCount
|
||||
- sourceCount);
|
||||
return "Count difference now is (count(" + target.getTableConfig().getTableName() + ") - count("
|
||||
+ source.getTableConfig().getTableName() + ") == " + (targetCount - sourceCount);
|
||||
} else {
|
||||
long newInserts = CommitUtil.countNewRecords(target,
|
||||
commitsToCatchup.stream().map(HoodieInstant::getTimestamp)
|
||||
.collect(Collectors.toList()));
|
||||
return "Count difference now is (count(" + target.getTableConfig().getTableName()
|
||||
+ ") - count(" + source.getTableConfig().getTableName() + ") == " + (targetCount
|
||||
- sourceCount) + ". Catch up count is " + newInserts;
|
||||
commitsToCatchup.stream().map(HoodieInstant::getTimestamp).collect(Collectors.toList()));
|
||||
return "Count difference now is (count(" + target.getTableConfig().getTableName() + ") - count("
|
||||
+ source.getTableConfig().getTableName()
|
||||
+ ") == " + (targetCount - sourceCount) + ". Catch up count is " + newInserts;
|
||||
}
|
||||
} else {
|
||||
List<HoodieInstant> commitsToCatchup =
|
||||
sourceTimeline.findInstantsAfter(targetLatestCommit, Integer.MAX_VALUE).getInstants()
|
||||
.collect(Collectors.toList());
|
||||
List<HoodieInstant> commitsToCatchup = sourceTimeline.findInstantsAfter(targetLatestCommit, Integer.MAX_VALUE)
|
||||
.getInstants().collect(Collectors.toList());
|
||||
if (commitsToCatchup.isEmpty()) {
|
||||
return "Count difference now is (count(" + source.getTableConfig().getTableName()
|
||||
+ ") - count(" + target.getTableConfig().getTableName() + ") == " + (sourceCount
|
||||
- targetCount);
|
||||
return "Count difference now is (count(" + source.getTableConfig().getTableName() + ") - count("
|
||||
+ target.getTableConfig().getTableName() + ") == " + (sourceCount - targetCount);
|
||||
} else {
|
||||
long newInserts = CommitUtil.countNewRecords(source,
|
||||
commitsToCatchup.stream().map(HoodieInstant::getTimestamp)
|
||||
.collect(Collectors.toList()));
|
||||
return "Count difference now is (count(" + source.getTableConfig().getTableName()
|
||||
+ ") - count(" + target.getTableConfig().getTableName() + ") == " + (sourceCount
|
||||
- targetCount) + ". Catch up count is " + newInserts;
|
||||
commitsToCatchup.stream().map(HoodieInstant::getTimestamp).collect(Collectors.toList()));
|
||||
return "Count difference now is (count(" + source.getTableConfig().getTableName() + ") - count("
|
||||
+ target.getTableConfig().getTableName()
|
||||
+ ") == " + (sourceCount - targetCount) + ". Catch up count is " + newInserts;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@@ -45,20 +45,20 @@ public class RepairsCommand implements CommandMarker {
|
||||
return HoodieCLI.tableMetadata != null;
|
||||
}
|
||||
|
||||
@CliCommand(value = "repair deduplicate", help = "De-duplicate a partition path contains duplicates & produce repaired files to replace with")
|
||||
public String deduplicate(
|
||||
@CliCommand(value = "repair deduplicate", help = "De-duplicate a partition path contains duplicates & produce "
|
||||
+ "repaired files to replace with")
|
||||
public String deduplicate(@CliOption(key = {
|
||||
"duplicatedPartitionPath"}, help = "Partition Path containing the duplicates", mandatory = true) final String
|
||||
duplicatedPartitionPath,
|
||||
@CliOption(key = {
|
||||
"duplicatedPartitionPath"}, help = "Partition Path containing the duplicates", mandatory = true)
|
||||
final String duplicatedPartitionPath,
|
||||
"repairedOutputPath"}, help = "Location to place the repaired files", mandatory = true) final String
|
||||
repairedOutputPath,
|
||||
@CliOption(key = {
|
||||
"repairedOutputPath"}, help = "Location to place the repaired files", mandatory = true)
|
||||
final String repairedOutputPath,
|
||||
@CliOption(key = {"sparkProperties"}, help = "Spark Properites File Path", mandatory = true)
|
||||
final String sparkPropertiesPath) throws Exception {
|
||||
"sparkProperties"}, help = "Spark Properites File Path", mandatory = true) final String sparkPropertiesPath)
|
||||
throws Exception {
|
||||
SparkLauncher sparkLauncher = SparkUtil.initLauncher(sparkPropertiesPath);
|
||||
sparkLauncher
|
||||
.addAppArgs(SparkMain.SparkCommand.DEDUPLICATE.toString(), duplicatedPartitionPath,
|
||||
repairedOutputPath, HoodieCLI.tableMetadata.getBasePath());
|
||||
sparkLauncher.addAppArgs(SparkMain.SparkCommand.DEDUPLICATE.toString(), duplicatedPartitionPath, repairedOutputPath,
|
||||
HoodieCLI.tableMetadata.getBasePath());
|
||||
Process process = sparkLauncher.launch();
|
||||
InputStreamConsumer.captureOutput(process);
|
||||
int exitCode = process.waitFor();
|
||||
@@ -71,14 +71,12 @@ public class RepairsCommand implements CommandMarker {
|
||||
|
||||
|
||||
@CliCommand(value = "repair addpartitionmeta", help = "Add partition metadata to a dataset, if not present")
|
||||
public String addPartitionMeta(
|
||||
@CliOption(key = {"dryrun"},
|
||||
help = "Should we actually add or just print what would be done",
|
||||
unspecifiedDefaultValue = "true")
|
||||
public String addPartitionMeta(@CliOption(key = {
|
||||
"dryrun"}, help = "Should we actually add or just print what would be done", unspecifiedDefaultValue = "true")
|
||||
final boolean dryRun) throws IOException {
|
||||
|
||||
String latestCommit = HoodieCLI.tableMetadata.getActiveTimeline().getCommitTimeline()
|
||||
.lastInstant().get().getTimestamp();
|
||||
String latestCommit = HoodieCLI.tableMetadata.getActiveTimeline().getCommitTimeline().lastInstant().get()
|
||||
.getTimestamp();
|
||||
List<String> partitionPaths = FSUtils.getAllFoldersThreeLevelsDown(HoodieCLI.fs,
|
||||
HoodieCLI.tableMetadata.getBasePath());
|
||||
Path basePath = new Path(HoodieCLI.tableMetadata.getBasePath());
|
||||
@@ -94,10 +92,7 @@ public class RepairsCommand implements CommandMarker {
|
||||
if (!HoodiePartitionMetadata.hasPartitionMetadata(HoodieCLI.fs, partitionPath)) {
|
||||
row[1] = "No";
|
||||
if (!dryRun) {
|
||||
HoodiePartitionMetadata partitionMetadata = new HoodiePartitionMetadata(
|
||||
HoodieCLI.fs,
|
||||
latestCommit,
|
||||
basePath,
|
||||
HoodiePartitionMetadata partitionMetadata = new HoodiePartitionMetadata(HoodieCLI.fs, latestCommit, basePath,
|
||||
partitionPath);
|
||||
partitionMetadata.trySave(0);
|
||||
}
|
||||
@@ -105,7 +100,6 @@ public class RepairsCommand implements CommandMarker {
|
||||
rows[ind++] = row;
|
||||
}
|
||||
|
||||
return HoodiePrintHelper.print(
|
||||
new String[]{"Partition Path", "Metadata Present?", "Action"}, rows);
|
||||
return HoodiePrintHelper.print(new String[] {"Partition Path", "Metadata Present?", "Action"}, rows);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -13,6 +13,7 @@
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package com.uber.hoodie.cli.commands;
|
||||
|
||||
import com.uber.hoodie.HoodieWriteClient;
|
||||
@@ -60,8 +61,8 @@ public class SavepointsCommand implements CommandMarker {
|
||||
|
||||
@CliAvailabilityIndicator({"savepoint rollback"})
|
||||
public boolean isRollbackToSavepointAvailable() {
|
||||
return HoodieCLI.tableMetadata != null && !HoodieCLI.tableMetadata.getActiveTimeline()
|
||||
.getSavePointTimeline().filterCompletedInstants().empty();
|
||||
return HoodieCLI.tableMetadata != null && !HoodieCLI.tableMetadata.getActiveTimeline().getSavePointTimeline()
|
||||
.filterCompletedInstants().empty();
|
||||
}
|
||||
|
||||
@CliCommand(value = "savepoints show", help = "Show the savepoints")
|
||||
@@ -73,23 +74,19 @@ public class SavepointsCommand implements CommandMarker {
|
||||
Collections.reverse(commits);
|
||||
for (int i = 0; i < commits.size(); i++) {
|
||||
HoodieInstant commit = commits.get(i);
|
||||
rows[i] = new String[]{commit.getTimestamp()};
|
||||
rows[i] = new String[] {commit.getTimestamp()};
|
||||
}
|
||||
return HoodiePrintHelper.print(new String[]{"SavepointTime"}, rows);
|
||||
return HoodiePrintHelper.print(new String[] {"SavepointTime"}, rows);
|
||||
}
|
||||
|
||||
@CliCommand(value = "savepoint create", help = "Savepoint a commit")
|
||||
public String savepoint(
|
||||
@CliOption(key = {"commit"}, help = "Commit to savepoint")
|
||||
final String commitTime,
|
||||
@CliOption(key = {"user"}, help = "User who is creating the savepoint")
|
||||
final String user,
|
||||
@CliOption(key = {"comments"}, help = "Comments for creating the savepoint")
|
||||
final String comments) throws Exception {
|
||||
public String savepoint(@CliOption(key = {"commit"}, help = "Commit to savepoint") final String commitTime,
|
||||
@CliOption(key = {"user"}, help = "User who is creating the savepoint") final String user,
|
||||
@CliOption(key = {"comments"}, help = "Comments for creating the savepoint") final String comments)
|
||||
throws Exception {
|
||||
HoodieActiveTimeline activeTimeline = HoodieCLI.tableMetadata.getActiveTimeline();
|
||||
HoodieTimeline timeline = activeTimeline.getCommitTimeline().filterCompletedInstants();
|
||||
HoodieInstant
|
||||
commitInstant = new HoodieInstant(false, HoodieTimeline.COMMIT_ACTION, commitTime);
|
||||
HoodieInstant commitInstant = new HoodieInstant(false, HoodieTimeline.COMMIT_ACTION, commitTime);
|
||||
|
||||
if (!timeline.containsInstant(commitInstant)) {
|
||||
return "Commit " + commitTime + " not found in Commits " + timeline;
|
||||
@@ -106,22 +103,19 @@ public class SavepointsCommand implements CommandMarker {
|
||||
|
||||
@CliCommand(value = "savepoint rollback", help = "Savepoint a commit")
|
||||
public String rollbackToSavepoint(
|
||||
@CliOption(key = {"savepoint"}, help = "Savepoint to rollback")
|
||||
final String commitTime,
|
||||
@CliOption(key = {"sparkProperties"}, help = "Spark Properites File Path")
|
||||
final String sparkPropertiesPath) throws Exception {
|
||||
@CliOption(key = {"savepoint"}, help = "Savepoint to rollback") final String commitTime,
|
||||
@CliOption(key = {"sparkProperties"}, help = "Spark Properites File Path") final String sparkPropertiesPath)
|
||||
throws Exception {
|
||||
HoodieActiveTimeline activeTimeline = HoodieCLI.tableMetadata.getActiveTimeline();
|
||||
HoodieTimeline timeline = activeTimeline.getCommitTimeline().filterCompletedInstants();
|
||||
HoodieInstant
|
||||
commitInstant = new HoodieInstant(false, HoodieTimeline.COMMIT_ACTION, commitTime);
|
||||
HoodieInstant commitInstant = new HoodieInstant(false, HoodieTimeline.COMMIT_ACTION, commitTime);
|
||||
|
||||
if (!timeline.containsInstant(commitInstant)) {
|
||||
return "Commit " + commitTime + " not found in Commits " + timeline;
|
||||
}
|
||||
|
||||
SparkLauncher sparkLauncher = SparkUtil.initLauncher(sparkPropertiesPath);
|
||||
sparkLauncher.addAppArgs(SparkMain.SparkCommand.ROLLBACK_TO_SAVEPOINT.toString(),
|
||||
commitTime,
|
||||
sparkLauncher.addAppArgs(SparkMain.SparkCommand.ROLLBACK_TO_SAVEPOINT.toString(), commitTime,
|
||||
HoodieCLI.tableMetadata.getBasePath());
|
||||
Process process = sparkLauncher.launch();
|
||||
InputStreamConsumer.captureOutput(process);
|
||||
@@ -137,18 +131,14 @@ public class SavepointsCommand implements CommandMarker {
|
||||
|
||||
@CliCommand(value = "savepoints refresh", help = "Refresh the savepoints")
|
||||
public String refreshMetaClient() throws IOException {
|
||||
HoodieTableMetaClient metadata =
|
||||
new HoodieTableMetaClient(HoodieCLI.conf, HoodieCLI.tableMetadata.getBasePath());
|
||||
HoodieTableMetaClient metadata = new HoodieTableMetaClient(HoodieCLI.conf, HoodieCLI.tableMetadata.getBasePath());
|
||||
HoodieCLI.setTableMetadata(metadata);
|
||||
return "Metadata for table " + metadata.getTableConfig().getTableName() + " refreshed.";
|
||||
}
|
||||
|
||||
private static HoodieWriteClient createHoodieClient(JavaSparkContext jsc, String basePath)
|
||||
throws Exception {
|
||||
HoodieWriteConfig config = HoodieWriteConfig.newBuilder().withPath(basePath)
|
||||
.withIndexConfig(
|
||||
HoodieIndexConfig.newBuilder().withIndexType(HoodieIndex.IndexType.BLOOM).build())
|
||||
.build();
|
||||
private static HoodieWriteClient createHoodieClient(JavaSparkContext jsc, String basePath) throws Exception {
|
||||
HoodieWriteConfig config = HoodieWriteConfig.newBuilder().withPath(basePath).withIndexConfig(
|
||||
HoodieIndexConfig.newBuilder().withIndexType(HoodieIndex.IndexType.BLOOM).build()).build();
|
||||
return new HoodieWriteClient(jsc, config, false);
|
||||
}
|
||||
|
||||
|
||||
@@ -30,18 +30,14 @@ import org.apache.spark.sql.SQLContext;
|
||||
|
||||
public class SparkMain {
|
||||
|
||||
protected final static Logger LOG = Logger.getLogger(SparkMain.class);
|
||||
protected static final Logger LOG = Logger.getLogger(SparkMain.class);
|
||||
|
||||
|
||||
/**
|
||||
* Commands
|
||||
*/
|
||||
enum SparkCommand {
|
||||
ROLLBACK,
|
||||
DEDUPLICATE,
|
||||
ROLLBACK_TO_SAVEPOINT,
|
||||
SAVEPOINT,
|
||||
IMPORT
|
||||
ROLLBACK, DEDUPLICATE, ROLLBACK_TO_SAVEPOINT, SAVEPOINT, IMPORT
|
||||
}
|
||||
|
||||
public static void main(String[] args) throws Exception {
|
||||
@@ -67,18 +63,19 @@ public class SparkMain {
|
||||
break;
|
||||
case IMPORT:
|
||||
assert (args.length == 11);
|
||||
returnCode = dataImport(jsc, args[1], args[2], args[3], args[4], args[5], args[6],
|
||||
Integer.parseInt(args[7]), args[8], SparkUtil.DEFUALT_SPARK_MASTER, args[9],
|
||||
Integer.parseInt(args[10]));
|
||||
returnCode = dataImport(jsc, args[1], args[2], args[3], args[4], args[5], args[6], Integer.parseInt(args[7]),
|
||||
args[8], SparkUtil.DEFUALT_SPARK_MASTER, args[9], Integer.parseInt(args[10]));
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
System.exit(returnCode);
|
||||
}
|
||||
|
||||
private static int dataImport(JavaSparkContext jsc, String srcPath, String targetPath,
|
||||
String tableName, String tableType, String rowKey, String partitionKey, int parallelism,
|
||||
String schemaFile, String sparkMaster, String sparkMemory, int retry) throws Exception {
|
||||
private static int dataImport(JavaSparkContext jsc, String srcPath, String targetPath, String tableName,
|
||||
String tableType, String rowKey, String partitionKey, int parallelism, String schemaFile, String sparkMaster,
|
||||
String sparkMemory, int retry) throws Exception {
|
||||
HDFSParquetImporter.Config cfg = new HDFSParquetImporter.Config();
|
||||
cfg.srcPath = srcPath;
|
||||
cfg.targetPath = targetPath;
|
||||
@@ -92,19 +89,15 @@ public class SparkMain {
|
||||
return new HDFSParquetImporter(cfg).dataImport(jsc, retry);
|
||||
}
|
||||
|
||||
private static int deduplicatePartitionPath(JavaSparkContext jsc,
|
||||
String duplicatedPartitionPath,
|
||||
String repairedOutputPath,
|
||||
String basePath)
|
||||
throws Exception {
|
||||
DedupeSparkJob job = new DedupeSparkJob(basePath, duplicatedPartitionPath, repairedOutputPath,
|
||||
new SQLContext(jsc), FSUtils.getFs(basePath, jsc.hadoopConfiguration()));
|
||||
private static int deduplicatePartitionPath(JavaSparkContext jsc, String duplicatedPartitionPath,
|
||||
String repairedOutputPath, String basePath) throws Exception {
|
||||
DedupeSparkJob job = new DedupeSparkJob(basePath, duplicatedPartitionPath, repairedOutputPath, new SQLContext(jsc),
|
||||
FSUtils.getFs(basePath, jsc.hadoopConfiguration()));
|
||||
job.fixDuplicates(true);
|
||||
return 0;
|
||||
}
|
||||
|
||||
private static int rollback(JavaSparkContext jsc, String commitTime, String basePath)
|
||||
throws Exception {
|
||||
private static int rollback(JavaSparkContext jsc, String commitTime, String basePath) throws Exception {
|
||||
HoodieWriteClient client = createHoodieClient(jsc, basePath);
|
||||
if (client.rollback(commitTime)) {
|
||||
LOG.info(String.format("The commit \"%s\" rolled back.", commitTime));
|
||||
@@ -115,9 +108,7 @@ public class SparkMain {
|
||||
}
|
||||
}
|
||||
|
||||
private static int rollbackToSavepoint(JavaSparkContext jsc, String savepointTime,
|
||||
String basePath)
|
||||
throws Exception {
|
||||
private static int rollbackToSavepoint(JavaSparkContext jsc, String savepointTime, String basePath) throws Exception {
|
||||
HoodieWriteClient client = createHoodieClient(jsc, basePath);
|
||||
if (client.rollbackToSavepoint(savepointTime)) {
|
||||
LOG.info(String.format("The commit \"%s\" rolled back.", savepointTime));
|
||||
@@ -128,12 +119,9 @@ public class SparkMain {
|
||||
}
|
||||
}
|
||||
|
||||
private static HoodieWriteClient createHoodieClient(JavaSparkContext jsc, String basePath)
|
||||
throws Exception {
|
||||
HoodieWriteConfig config = HoodieWriteConfig.newBuilder().withPath(basePath)
|
||||
.withIndexConfig(
|
||||
HoodieIndexConfig.newBuilder().withIndexType(HoodieIndex.IndexType.BLOOM).build())
|
||||
.build();
|
||||
private static HoodieWriteClient createHoodieClient(JavaSparkContext jsc, String basePath) throws Exception {
|
||||
HoodieWriteConfig config = HoodieWriteConfig.newBuilder().withPath(basePath).withIndexConfig(
|
||||
HoodieIndexConfig.newBuilder().withIndexType(HoodieIndex.IndexType.BLOOM).build()).build();
|
||||
return new HoodieWriteClient(jsc, config);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -16,7 +16,6 @@
|
||||
|
||||
package com.uber.hoodie.cli.commands;
|
||||
|
||||
|
||||
import com.codahale.metrics.Histogram;
|
||||
import com.codahale.metrics.Snapshot;
|
||||
import com.codahale.metrics.UniformReservoir;
|
||||
@@ -44,12 +43,15 @@ import org.springframework.stereotype.Component;
|
||||
@Component
|
||||
public class StatsCommand implements CommandMarker {
|
||||
|
||||
private static final int MAX_FILES = 1000000;
|
||||
|
||||
@CliAvailabilityIndicator({"stats wa"})
|
||||
public boolean isWriteAmpAvailable() {
|
||||
return HoodieCLI.tableMetadata != null;
|
||||
}
|
||||
|
||||
@CliCommand(value = "stats wa", help = "Write Amplification. Ratio of how many records were upserted to how many records were actually written")
|
||||
@CliCommand(value = "stats wa", help = "Write Amplification. Ratio of how many records were upserted to how many "
|
||||
+ "records were actually written")
|
||||
public String writeAmplificationStats() throws IOException {
|
||||
long totalRecordsUpserted = 0;
|
||||
long totalRecordsWritten = 0;
|
||||
@@ -60,18 +62,13 @@ public class StatsCommand implements CommandMarker {
|
||||
String[][] rows = new String[new Long(timeline.countInstants()).intValue() + 1][];
|
||||
int i = 0;
|
||||
DecimalFormat df = new DecimalFormat("#.00");
|
||||
for (HoodieInstant commitTime : timeline.getInstants().collect(
|
||||
Collectors.toList())) {
|
||||
for (HoodieInstant commitTime : timeline.getInstants().collect(Collectors.toList())) {
|
||||
String waf = "0";
|
||||
HoodieCommitMetadata commit = HoodieCommitMetadata
|
||||
.fromBytes(activeTimeline.getInstantDetails(commitTime).get());
|
||||
HoodieCommitMetadata commit = HoodieCommitMetadata.fromBytes(activeTimeline.getInstantDetails(commitTime).get());
|
||||
if (commit.fetchTotalUpdateRecordsWritten() > 0) {
|
||||
waf = df.format(
|
||||
(float) commit.fetchTotalRecordsWritten() / commit
|
||||
.fetchTotalUpdateRecordsWritten());
|
||||
waf = df.format((float) commit.fetchTotalRecordsWritten() / commit.fetchTotalUpdateRecordsWritten());
|
||||
}
|
||||
rows[i++] = new String[]{commitTime.getTimestamp(),
|
||||
String.valueOf(commit.fetchTotalUpdateRecordsWritten()),
|
||||
rows[i++] = new String[] {commitTime.getTimestamp(), String.valueOf(commit.fetchTotalUpdateRecordsWritten()),
|
||||
String.valueOf(commit.fetchTotalRecordsWritten()), waf};
|
||||
totalRecordsUpserted += commit.fetchTotalUpdateRecordsWritten();
|
||||
totalRecordsWritten += commit.fetchTotalRecordsWritten();
|
||||
@@ -80,43 +77,32 @@ public class StatsCommand implements CommandMarker {
|
||||
if (totalRecordsUpserted > 0) {
|
||||
waf = df.format((float) totalRecordsWritten / totalRecordsUpserted);
|
||||
}
|
||||
rows[i] = new String[]{"Total", String.valueOf(totalRecordsUpserted),
|
||||
String.valueOf(totalRecordsWritten), waf};
|
||||
return HoodiePrintHelper.print(
|
||||
new String[]{"CommitTime", "Total Upserted", "Total Written",
|
||||
"Write Amplifiation Factor"}, rows);
|
||||
rows[i] = new String[] {"Total", String.valueOf(totalRecordsUpserted), String.valueOf(totalRecordsWritten), waf};
|
||||
return HoodiePrintHelper
|
||||
.print(new String[] {"CommitTime", "Total Upserted", "Total Written", "Write Amplifiation Factor"},
|
||||
rows);
|
||||
|
||||
}
|
||||
|
||||
|
||||
private String[] printFileSizeHistogram(String commitTime, Snapshot s) {
|
||||
return new String[]{
|
||||
commitTime,
|
||||
NumericUtils.humanReadableByteCount(s.getMin()),
|
||||
NumericUtils.humanReadableByteCount(s.getValue(0.1)),
|
||||
NumericUtils.humanReadableByteCount(s.getMedian()),
|
||||
NumericUtils.humanReadableByteCount(s.getMean()),
|
||||
NumericUtils.humanReadableByteCount(s.get95thPercentile()),
|
||||
NumericUtils.humanReadableByteCount(s.getMax()),
|
||||
String.valueOf(s.size()),
|
||||
NumericUtils.humanReadableByteCount(s.getStdDev())
|
||||
};
|
||||
return new String[] {commitTime, NumericUtils.humanReadableByteCount(s.getMin()),
|
||||
NumericUtils.humanReadableByteCount(s.getValue(0.1)), NumericUtils.humanReadableByteCount(s.getMedian()),
|
||||
NumericUtils.humanReadableByteCount(s.getMean()), NumericUtils.humanReadableByteCount(s.get95thPercentile()),
|
||||
NumericUtils.humanReadableByteCount(s.getMax()), String.valueOf(s.size()),
|
||||
NumericUtils.humanReadableByteCount(s.getStdDev())};
|
||||
}
|
||||
|
||||
@CliCommand(value = "stats filesizes", help = "File Sizes. Display summary stats on sizes of files")
|
||||
public String fileSizeStats(
|
||||
@CliOption(key = {
|
||||
"partitionPath"}, help = "regex to select files, eg: 2016/08/02", unspecifiedDefaultValue = "*/*/*")
|
||||
final String globRegex) throws IOException {
|
||||
public String fileSizeStats(@CliOption(key = {
|
||||
"partitionPath"}, help = "regex to select files, eg: 2016/08/02", unspecifiedDefaultValue = "*/*/*") final
|
||||
String globRegex) throws IOException {
|
||||
|
||||
FileSystem fs = HoodieCLI.fs;
|
||||
String globPath = String.format("%s/%s/*",
|
||||
HoodieCLI.tableMetadata.getBasePath(),
|
||||
globRegex);
|
||||
String globPath = String.format("%s/%s/*", HoodieCLI.tableMetadata.getBasePath(), globRegex);
|
||||
FileStatus[] statuses = fs.globStatus(new Path(globPath));
|
||||
|
||||
// max, min, #small files < 10MB, 50th, avg, 95th
|
||||
final int MAX_FILES = 1000000;
|
||||
Histogram globalHistogram = new Histogram(new UniformReservoir(MAX_FILES));
|
||||
HashMap<String, Histogram> commitHistoMap = new HashMap<String, Histogram>();
|
||||
for (FileStatus fileStatus : statuses) {
|
||||
@@ -138,8 +124,8 @@ public class StatsCommand implements CommandMarker {
|
||||
Snapshot s = globalHistogram.getSnapshot();
|
||||
rows[ind++] = printFileSizeHistogram("ALL", s);
|
||||
|
||||
return HoodiePrintHelper.print(
|
||||
new String[]{"CommitTime", "Min", "10th", "50th", "avg", "95th", "Max", "NumFiles",
|
||||
"StdDev"}, rows);
|
||||
return HoodiePrintHelper
|
||||
.print(new String[] {"CommitTime", "Min", "10th", "50th", "avg", "95th", "Max", "NumFiles", "StdDev"},
|
||||
rows);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -25,9 +25,7 @@ import org.springframework.stereotype.Component;
|
||||
public class UtilsCommand implements CommandMarker {
|
||||
|
||||
@CliCommand(value = "utils loadClass", help = "Load a class")
|
||||
public String loadClass(
|
||||
@CliOption(key = {"class"}, help = "Check mode") final String clazz
|
||||
) throws Exception {
|
||||
public String loadClass(@CliOption(key = {"class"}, help = "Check mode") final String clazz) throws Exception {
|
||||
Class klass = Class.forName(clazz);
|
||||
return klass.getProtectionDomain().getCodeSource().getLocation().toExternalForm();
|
||||
}
|
||||
|
||||
@@ -25,15 +25,12 @@ import java.util.List;
|
||||
|
||||
public class CommitUtil {
|
||||
|
||||
public static long countNewRecords(HoodieTableMetaClient target, List<String> commitsToCatchup)
|
||||
throws IOException {
|
||||
public static long countNewRecords(HoodieTableMetaClient target, List<String> commitsToCatchup) throws IOException {
|
||||
long totalNew = 0;
|
||||
HoodieTimeline timeline = target.getActiveTimeline().reload().getCommitTimeline()
|
||||
.filterCompletedInstants();
|
||||
HoodieTimeline timeline = target.getActiveTimeline().reload().getCommitTimeline().filterCompletedInstants();
|
||||
for (String commit : commitsToCatchup) {
|
||||
HoodieCommitMetadata c = HoodieCommitMetadata.fromBytes(timeline
|
||||
.getInstantDetails(new HoodieInstant(false, HoodieTimeline.COMMIT_ACTION, commit))
|
||||
.get());
|
||||
HoodieCommitMetadata c = HoodieCommitMetadata.fromBytes(
|
||||
timeline.getInstantDetails(new HoodieInstant(false, HoodieTimeline.COMMIT_ACTION, commit)).get());
|
||||
totalNew += c.fetchTotalRecordsWritten() - c.fetchTotalUpdateRecordsWritten();
|
||||
}
|
||||
return totalNew;
|
||||
|
||||
@@ -27,7 +27,7 @@ import org.joda.time.DateTime;
|
||||
|
||||
public class HiveUtil {
|
||||
|
||||
private static String driverName = "org.apache.hive.jdbc.HiveDriver";
|
||||
private static final String driverName = "org.apache.hive.jdbc.HiveDriver";
|
||||
|
||||
static {
|
||||
try {
|
||||
@@ -39,8 +39,7 @@ public class HiveUtil {
|
||||
|
||||
private static Connection connection;
|
||||
|
||||
private static Connection getConnection(String jdbcUrl, String user, String pass)
|
||||
throws SQLException {
|
||||
private static Connection getConnection(String jdbcUrl, String user, String pass) throws SQLException {
|
||||
DataSource ds = getDatasource(jdbcUrl, user, pass);
|
||||
return ds.getConnection();
|
||||
}
|
||||
@@ -54,8 +53,8 @@ public class HiveUtil {
|
||||
return ds;
|
||||
}
|
||||
|
||||
public static long countRecords(String jdbcUrl, HoodieTableMetaClient source, String dbName,
|
||||
String user, String pass) throws SQLException {
|
||||
public static long countRecords(String jdbcUrl, HoodieTableMetaClient source, String dbName, String user, String pass)
|
||||
throws SQLException {
|
||||
Connection conn = HiveUtil.getConnection(jdbcUrl, user, pass);
|
||||
ResultSet rs = null;
|
||||
Statement stmt = conn.createStatement();
|
||||
@@ -64,15 +63,13 @@ public class HiveUtil {
|
||||
stmt.execute("set hive.input.format=org.apache.hadoop.hive.ql.io.HiveInputFormat");
|
||||
stmt.execute("set hive.stats.autogather=false");
|
||||
rs = stmt.executeQuery(
|
||||
"select count(`_hoodie_commit_time`) as cnt from " + dbName + "." + source
|
||||
.getTableConfig()
|
||||
.getTableName());
|
||||
"select count(`_hoodie_commit_time`) as cnt from " + dbName + "."
|
||||
+ source.getTableConfig().getTableName());
|
||||
long count = -1;
|
||||
if (rs.next()) {
|
||||
count = rs.getLong("cnt");
|
||||
}
|
||||
System.out
|
||||
.println("Total records in " + source.getTableConfig().getTableName() + " is " + count);
|
||||
System.out.println("Total records in " + source.getTableConfig().getTableName() + " is " + count);
|
||||
return count;
|
||||
} finally {
|
||||
if (rs != null) {
|
||||
@@ -84,22 +81,19 @@ public class HiveUtil {
|
||||
}
|
||||
}
|
||||
|
||||
public static long countRecords(String jdbcUrl, HoodieTableMetaClient source, String srcDb,
|
||||
int partitions, String user, String pass) throws SQLException {
|
||||
public static long countRecords(String jdbcUrl, HoodieTableMetaClient source, String srcDb, int partitions,
|
||||
String user, String pass) throws SQLException {
|
||||
DateTime dateTime = DateTime.now();
|
||||
String endDateStr =
|
||||
dateTime.getYear() + "-" + String.format("%02d", dateTime.getMonthOfYear()) + "-" +
|
||||
String.format("%02d", dateTime.getDayOfMonth());
|
||||
String endDateStr = dateTime.getYear() + "-" + String.format("%02d", dateTime.getMonthOfYear()) + "-"
|
||||
+ String.format("%02d", dateTime.getDayOfMonth());
|
||||
dateTime = dateTime.minusDays(partitions);
|
||||
String startDateStr =
|
||||
dateTime.getYear() + "-" + String.format("%02d", dateTime.getMonthOfYear()) + "-" +
|
||||
String.format("%02d", dateTime.getDayOfMonth());
|
||||
String startDateStr = dateTime.getYear() + "-" + String.format("%02d", dateTime.getMonthOfYear()) + "-"
|
||||
+ String.format("%02d", dateTime.getDayOfMonth());
|
||||
System.out.println("Start date " + startDateStr + " and end date " + endDateStr);
|
||||
return countRecords(jdbcUrl, source, srcDb, startDateStr, endDateStr, user, pass);
|
||||
}
|
||||
|
||||
private static long countRecords(String jdbcUrl, HoodieTableMetaClient source, String srcDb,
|
||||
String startDateStr,
|
||||
private static long countRecords(String jdbcUrl, HoodieTableMetaClient source, String srcDb, String startDateStr,
|
||||
String endDateStr, String user, String pass) throws SQLException {
|
||||
Connection conn = HiveUtil.getConnection(jdbcUrl, user, pass);
|
||||
ResultSet rs = null;
|
||||
@@ -109,9 +103,8 @@ public class HiveUtil {
|
||||
stmt.execute("set hive.input.format=org.apache.hadoop.hive.ql.io.HiveInputFormat");
|
||||
stmt.execute("set hive.stats.autogather=false");
|
||||
rs = stmt.executeQuery(
|
||||
"select count(`_hoodie_commit_time`) as cnt from " + srcDb + "." + source.getTableConfig()
|
||||
.getTableName() + " where datestr>'" + startDateStr + "' and datestr<='"
|
||||
+ endDateStr + "'");
|
||||
"select count(`_hoodie_commit_time`) as cnt from " + srcDb + "." + source.getTableConfig().getTableName()
|
||||
+ " where datestr>'" + startDateStr + "' and datestr<='" + endDateStr + "'");
|
||||
if (rs.next()) {
|
||||
return rs.getLong("cnt");
|
||||
}
|
||||
|
||||
@@ -24,7 +24,7 @@ import java.util.logging.Logger;
|
||||
|
||||
public class InputStreamConsumer extends Thread {
|
||||
|
||||
protected final static Logger LOG = Logger.getLogger(InputStreamConsumer.class.getName());
|
||||
protected static final Logger LOG = Logger.getLogger(InputStreamConsumer.class.getName());
|
||||
private InputStream is;
|
||||
|
||||
public InputStreamConsumer(InputStream is) {
|
||||
|
||||
@@ -35,13 +35,10 @@ public class SparkUtil {
|
||||
* TODO: Need to fix a bunch of hardcoded stuff here eg: history server, spark distro
|
||||
*/
|
||||
public static SparkLauncher initLauncher(String propertiesFile) throws URISyntaxException {
|
||||
String currentJar = new File(
|
||||
SparkUtil.class.getProtectionDomain().getCodeSource().getLocation().toURI().getPath())
|
||||
String currentJar = new File(SparkUtil.class.getProtectionDomain().getCodeSource().getLocation().toURI().getPath())
|
||||
.getAbsolutePath();
|
||||
SparkLauncher sparkLauncher =
|
||||
new SparkLauncher().setAppResource(currentJar)
|
||||
.setMainClass(SparkMain.class.getName())
|
||||
.setPropertiesFile(propertiesFile);
|
||||
SparkLauncher sparkLauncher = new SparkLauncher().setAppResource(currentJar).setMainClass(SparkMain.class.getName())
|
||||
.setPropertiesFile(propertiesFile);
|
||||
File libDirectory = new File(new File(currentJar).getParent(), "lib");
|
||||
for (String library : libDirectory.list()) {
|
||||
sparkLauncher.addJar(new File(libDirectory, library).getAbsolutePath());
|
||||
@@ -60,8 +57,7 @@ public class SparkUtil {
|
||||
// Configure hadoop conf
|
||||
sparkConf.set("spark.hadoop.mapred.output.compress", "true");
|
||||
sparkConf.set("spark.hadoop.mapred.output.compression.codec", "true");
|
||||
sparkConf.set("spark.hadoop.mapred.output.compression.codec",
|
||||
"org.apache.hadoop.io.compress.GzipCodec");
|
||||
sparkConf.set("spark.hadoop.mapred.output.compression.codec", "org.apache.hadoop.io.compress.GzipCodec");
|
||||
sparkConf.set("spark.hadoop.mapred.output.compression.type", "BLOCK");
|
||||
|
||||
sparkConf = HoodieWriteClient.registerClasses(sparkConf);
|
||||
|
||||
Reference in New Issue
Block a user