diff --git a/hudi-cli/src/main/java/org/apache/hudi/cli/HoodieTableHeaderFields.java b/hudi-cli/src/main/java/org/apache/hudi/cli/HoodieTableHeaderFields.java index 77f486b3e..743273244 100644 --- a/hudi-cli/src/main/java/org/apache/hudi/cli/HoodieTableHeaderFields.java +++ b/hudi-cli/src/main/java/org/apache/hudi/cli/HoodieTableHeaderFields.java @@ -96,4 +96,21 @@ public class HoodieTableHeaderFields { public static final String HEADER_TOTAL_PARTITIONS = "Total Partitions"; public static final String HEADER_DELETED_FILE = "Deleted File"; public static final String HEADER_SUCCEEDED = "Succeeded"; + + /** + * Fields of Stats. + */ + public static final String HEADER_COMMIT_TIME = "CommitTime"; + public static final String HEADER_TOTAL_UPSERTED = "Total Upserted"; + public static final String HEADER_TOTAL_WRITTEN = "Total Written"; + public static final String HEADER_WRITE_AMPLIFICATION_FACTOR = "Write Amplification Factor"; + public static final String HEADER_HISTOGRAM_MIN = "Min"; + public static final String HEADER_HISTOGRAM_10TH = "10th"; + public static final String HEADER_HISTOGRAM_50TH = "50th"; + public static final String HEADER_HISTOGRAM_AVG = "avg"; + public static final String HEADER_HISTOGRAM_95TH = "95th"; + public static final String HEADER_HISTOGRAM_MAX = "Max"; + public static final String HEADER_HISTOGRAM_NUM_FILES = "NumFiles"; + public static final String HEADER_HISTOGRAM_STD_DEV = "StdDev"; + } diff --git a/hudi-cli/src/main/java/org/apache/hudi/cli/commands/StatsCommand.java b/hudi-cli/src/main/java/org/apache/hudi/cli/commands/StatsCommand.java index e5be0e42f..72cf6c02d 100644 --- a/hudi-cli/src/main/java/org/apache/hudi/cli/commands/StatsCommand.java +++ b/hudi-cli/src/main/java/org/apache/hudi/cli/commands/StatsCommand.java @@ -20,6 +20,7 @@ package org.apache.hudi.cli.commands; import org.apache.hudi.cli.HoodieCLI; import org.apache.hudi.cli.HoodiePrintHelper; +import org.apache.hudi.cli.HoodieTableHeaderFields; import org.apache.hudi.cli.TableHeader; import org.apache.hudi.common.fs.FSUtils; import org.apache.hudi.common.model.HoodieCommitMetadata; @@ -54,7 +55,7 @@ import java.util.stream.Collectors; @Component public class StatsCommand implements CommandMarker { - private static final int MAX_FILES = 1000000; + public static final int MAX_FILES = 1000000; @CliCommand(value = "stats wa", help = "Write Amplification. Ratio of how many records were upserted to how many " + "records were actually written") @@ -92,12 +93,14 @@ public class StatsCommand implements CommandMarker { } rows.add(new Comparable[] {"Total", totalRecordsUpserted, totalRecordsWritten, waf}); - TableHeader header = new TableHeader().addTableHeaderField("CommitTime").addTableHeaderField("Total Upserted") - .addTableHeaderField("Total Written").addTableHeaderField("Write Amplification Factor"); + TableHeader header = new TableHeader().addTableHeaderField(HoodieTableHeaderFields.HEADER_COMMIT_TIME) + .addTableHeaderField(HoodieTableHeaderFields.HEADER_TOTAL_UPSERTED) + .addTableHeaderField(HoodieTableHeaderFields.HEADER_TOTAL_WRITTEN) + .addTableHeaderField(HoodieTableHeaderFields.HEADER_WRITE_AMPLIFICATION_FACTOR); return HoodiePrintHelper.print(header, new HashMap<>(), sortByField, descending, limit, headerOnly, rows); } - private Comparable[] printFileSizeHistogram(String instantTime, Snapshot s) { + public Comparable[] printFileSizeHistogram(String instantTime, Snapshot s) { return new Comparable[] {instantTime, s.getMin(), s.getValue(0.1), s.getMedian(), s.getMean(), s.get95thPercentile(), s.getMax(), s.size(), s.getStdDev()}; } @@ -138,6 +141,20 @@ public class StatsCommand implements CommandMarker { Snapshot s = globalHistogram.getSnapshot(); rows.add(printFileSizeHistogram("ALL", s)); + TableHeader header = new TableHeader() + .addTableHeaderField(HoodieTableHeaderFields.HEADER_COMMIT_TIME) + .addTableHeaderField(HoodieTableHeaderFields.HEADER_HISTOGRAM_MIN) + .addTableHeaderField(HoodieTableHeaderFields.HEADER_HISTOGRAM_10TH) + .addTableHeaderField(HoodieTableHeaderFields.HEADER_HISTOGRAM_50TH) + .addTableHeaderField(HoodieTableHeaderFields.HEADER_HISTOGRAM_AVG) + .addTableHeaderField(HoodieTableHeaderFields.HEADER_HISTOGRAM_95TH) + .addTableHeaderField(HoodieTableHeaderFields.HEADER_HISTOGRAM_MAX) + .addTableHeaderField(HoodieTableHeaderFields.HEADER_HISTOGRAM_NUM_FILES) + .addTableHeaderField(HoodieTableHeaderFields.HEADER_HISTOGRAM_STD_DEV); + return HoodiePrintHelper.print(header, getFieldNameToConverterMap(), sortByField, descending, limit, headerOnly, rows); + } + + public Map> getFieldNameToConverterMap() { Function converterFunction = entry -> NumericUtils.humanReadableByteCount((Double.parseDouble(entry.toString()))); Map> fieldNameToConverterMap = new HashMap<>(); @@ -148,10 +165,6 @@ public class StatsCommand implements CommandMarker { fieldNameToConverterMap.put("95th", converterFunction); fieldNameToConverterMap.put("Max", converterFunction); fieldNameToConverterMap.put("StdDev", converterFunction); - - TableHeader header = new TableHeader().addTableHeaderField("CommitTime").addTableHeaderField("Min") - .addTableHeaderField("10th").addTableHeaderField("50th").addTableHeaderField("avg").addTableHeaderField("95th") - .addTableHeaderField("Max").addTableHeaderField("NumFiles").addTableHeaderField("StdDev"); - return HoodiePrintHelper.print(header, fieldNameToConverterMap, sortByField, descending, limit, headerOnly, rows); + return fieldNameToConverterMap; } } diff --git a/hudi-cli/src/test/java/org/apache/hudi/cli/commands/TestArchivedCommitsCommand.java b/hudi-cli/src/test/java/org/apache/hudi/cli/commands/TestArchivedCommitsCommand.java index 2b75b55b1..88fcb39e4 100644 --- a/hudi-cli/src/test/java/org/apache/hudi/cli/commands/TestArchivedCommitsCommand.java +++ b/hudi-cli/src/test/java/org/apache/hudi/cli/commands/TestArchivedCommitsCommand.java @@ -177,10 +177,10 @@ public class TestArchivedCommitsCommand extends AbstractShellIntegrationTest { rows.clear(); - HoodieCommitMetadata metadata = HoodieTestCommitMetadataGenerator.generateCommitMetadata(tablePath); for (int i = 100; i < 104; i++) { String instant = String.valueOf(i); // Since HoodiePrintHelper order data by default, need to order commitMetadata + HoodieCommitMetadata metadata = HoodieTestCommitMetadataGenerator.generateCommitMetadata(tablePath, instant); Comparable[] result = new Comparable[] { instant, "commit", HoodieTestCommitUtilities.convertAndOrderCommitMetadata(metadata)}; rows.add(result); diff --git a/hudi-cli/src/test/java/org/apache/hudi/cli/commands/TestRepairsCommand.java b/hudi-cli/src/test/java/org/apache/hudi/cli/commands/TestRepairsCommand.java index 452e24969..9fc49181d 100644 --- a/hudi-cli/src/test/java/org/apache/hudi/cli/commands/TestRepairsCommand.java +++ b/hudi-cli/src/test/java/org/apache/hudi/cli/commands/TestRepairsCommand.java @@ -65,7 +65,7 @@ public class TestRepairsCommand extends AbstractShellIntegrationTest { // Create table and connect new TableCommand().createTable( - tablePath, "test_table", HoodieTableType.COPY_ON_WRITE.name(), + tablePath, tableName, HoodieTableType.COPY_ON_WRITE.name(), "", TimelineLayoutVersion.VERSION_1, "org.apache.hudi.common.model.HoodieAvroPayload"); } diff --git a/hudi-cli/src/test/java/org/apache/hudi/cli/commands/TestStatsCommand.java b/hudi-cli/src/test/java/org/apache/hudi/cli/commands/TestStatsCommand.java new file mode 100644 index 000000000..85fbc0ab0 --- /dev/null +++ b/hudi-cli/src/test/java/org/apache/hudi/cli/commands/TestStatsCommand.java @@ -0,0 +1,176 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hudi.cli.commands; + +import com.codahale.metrics.Histogram; +import com.codahale.metrics.Snapshot; +import com.codahale.metrics.UniformReservoir; +import org.apache.hudi.cli.AbstractShellIntegrationTest; +import org.apache.hudi.cli.HoodieCLI; +import org.apache.hudi.cli.HoodiePrintHelper; +import org.apache.hudi.cli.HoodieTableHeaderFields; +import org.apache.hudi.cli.TableHeader; +import org.apache.hudi.cli.common.HoodieTestCommitMetadataGenerator; +import org.apache.hudi.common.HoodieTestDataGenerator; +import org.apache.hudi.common.model.HoodieTableType; +import org.apache.hudi.common.model.HoodieTestUtils; +import org.apache.hudi.common.table.timeline.versioning.TimelineLayoutVersion; +import org.apache.hudi.common.util.Option; + +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; +import org.springframework.shell.core.CommandResult; + +import java.io.File; +import java.io.IOException; +import java.text.DecimalFormat; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.LinkedHashMap; +import java.util.List; +import java.util.Map; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertTrue; + +/** + * Test class of {@link org.apache.hudi.cli.commands.StatsCommand}. + */ +public class TestStatsCommand extends AbstractShellIntegrationTest { + + private String tablePath; + + @BeforeEach + public void init() throws IOException { + String tableName = "test_table"; + tablePath = basePath + File.separator + tableName; + + HoodieCLI.conf = jsc.hadoopConfiguration(); + // Create table and connect + new TableCommand().createTable( + tablePath, tableName, HoodieTableType.COPY_ON_WRITE.name(), + "", TimelineLayoutVersion.VERSION_1, "org.apache.hudi.common.model.HoodieAvroPayload"); + } + + /** + * Test case for command 'stats wa'. + */ + @Test + public void testWriteAmplificationStats() { + // generate data and metadata + Map data = new LinkedHashMap<>(); + data.put("100", new Integer[] {15, 10}); + data.put("101", new Integer[] {20, 10}); + data.put("102", new Integer[] {15, 15}); + + data.forEach((key, value) -> { + HoodieTestCommitMetadataGenerator.createCommitFileWithMetadata(tablePath, key, jsc.hadoopConfiguration(), + Option.of(value[0]), Option.of(value[1])); + }); + + CommandResult cr = getShell().executeCommand("stats wa"); + assertTrue(cr.isSuccess()); + + // generate expect + List rows = new ArrayList<>(); + DecimalFormat df = new DecimalFormat("#.00"); + data.forEach((key, value) -> { + // there are two partitions, so need to *2 + rows.add(new Comparable[]{key, value[1] * 2, value[0] * 2, df.format((float) value[0] / value[1])}); + }); + int totalWrite = data.values().stream().map(integers -> integers[0] * 2).mapToInt(s -> s).sum(); + int totalUpdate = data.values().stream().map(integers -> integers[1] * 2).mapToInt(s -> s).sum(); + rows.add(new Comparable[]{"Total", totalUpdate, totalWrite, df.format((float) totalWrite / totalUpdate)}); + + TableHeader header = new TableHeader().addTableHeaderField(HoodieTableHeaderFields.HEADER_COMMIT_TIME) + .addTableHeaderField(HoodieTableHeaderFields.HEADER_TOTAL_UPSERTED) + .addTableHeaderField(HoodieTableHeaderFields.HEADER_TOTAL_WRITTEN) + .addTableHeaderField(HoodieTableHeaderFields.HEADER_WRITE_AMPLIFICATION_FACTOR); + String expected = HoodiePrintHelper.print(header, new HashMap<>(), "", false, -1, false, rows); + + assertEquals(expected, cr.getResult().toString()); + } + + /** + * Test case for command 'stats filesizes'. + */ + @Test + public void testFileSizeStats() throws IOException { + String commit1 = "100"; + String commit2 = "101"; + Map data = new LinkedHashMap<>(); + data.put(commit1, new Integer[] {100, 120, 150}); + data.put(commit2, new Integer[] {200, 180, 250, 300}); + + // generate data file + String partition1 = HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH; + String partition2 = HoodieTestDataGenerator.DEFAULT_SECOND_PARTITION_PATH; + String partition3 = HoodieTestDataGenerator.DEFAULT_THIRD_PARTITION_PATH; + + Integer[] data1 = data.get(commit1); + assertTrue(3 <= data1.length); + HoodieTestUtils.createNewDataFile(tablePath, partition1, commit1, data1[0]); + HoodieTestUtils.createNewDataFile(tablePath, partition2, commit1, data1[1]); + HoodieTestUtils.createNewDataFile(tablePath, partition3, commit1, data1[2]); + + Integer[] data2 = data.get(commit2); + assertTrue(4 <= data2.length); + HoodieTestUtils.createNewDataFile(tablePath, partition1, commit2, data2[0]); + HoodieTestUtils.createNewDataFile(tablePath, partition2, commit2, data2[1]); + HoodieTestUtils.createNewDataFile(tablePath, partition2, commit2, data2[2]); + HoodieTestUtils.createNewDataFile(tablePath, partition3, commit2, data2[3]); + + CommandResult cr = getShell().executeCommand("stats filesizes"); + assertTrue(cr.isSuccess()); + + Histogram globalHistogram = new Histogram(new UniformReservoir(StatsCommand.MAX_FILES)); + HashMap commitHistoMap = new HashMap<>(); + data.forEach((k, v) -> { + commitHistoMap.put(k, new Histogram(new UniformReservoir(StatsCommand.MAX_FILES))); + for (int value : v) { + commitHistoMap.get(k).update(value); + globalHistogram.update(value); + } + }); + + // generate expect + List rows = new ArrayList<>(); + for (Map.Entry entry : commitHistoMap.entrySet()) { + Snapshot s = entry.getValue().getSnapshot(); + rows.add(new StatsCommand().printFileSizeHistogram(entry.getKey(), s)); + } + Snapshot s = globalHistogram.getSnapshot(); + rows.add(new StatsCommand().printFileSizeHistogram("ALL", s)); + + TableHeader header = new TableHeader() + .addTableHeaderField(HoodieTableHeaderFields.HEADER_COMMIT_TIME) + .addTableHeaderField(HoodieTableHeaderFields.HEADER_HISTOGRAM_MIN) + .addTableHeaderField(HoodieTableHeaderFields.HEADER_HISTOGRAM_10TH) + .addTableHeaderField(HoodieTableHeaderFields.HEADER_HISTOGRAM_50TH) + .addTableHeaderField(HoodieTableHeaderFields.HEADER_HISTOGRAM_AVG) + .addTableHeaderField(HoodieTableHeaderFields.HEADER_HISTOGRAM_95TH) + .addTableHeaderField(HoodieTableHeaderFields.HEADER_HISTOGRAM_MAX) + .addTableHeaderField(HoodieTableHeaderFields.HEADER_HISTOGRAM_NUM_FILES) + .addTableHeaderField(HoodieTableHeaderFields.HEADER_HISTOGRAM_STD_DEV); + String expect = HoodiePrintHelper.print(header, new StatsCommand().getFieldNameToConverterMap(), + "", false, -1, false, rows); + + assertEquals(expect, cr.getResult().toString()); + } +} diff --git a/hudi-cli/src/test/java/org/apache/hudi/cli/common/HoodieTestCommitMetadataGenerator.java b/hudi-cli/src/test/java/org/apache/hudi/cli/common/HoodieTestCommitMetadataGenerator.java index 57828b121..2c4719cb2 100644 --- a/hudi-cli/src/test/java/org/apache/hudi/cli/common/HoodieTestCommitMetadataGenerator.java +++ b/hudi-cli/src/test/java/org/apache/hudi/cli/common/HoodieTestCommitMetadataGenerator.java @@ -26,6 +26,7 @@ import org.apache.hudi.common.model.HoodieWriteStat; import org.apache.hudi.common.table.HoodieTableMetaClient; import org.apache.hudi.common.table.timeline.HoodieTimeline; import org.apache.hudi.common.util.CollectionUtils; +import org.apache.hudi.common.util.Option; import org.apache.hudi.exception.HoodieIOException; import org.apache.hadoop.conf.Configuration; @@ -61,6 +62,11 @@ public class HoodieTestCommitMetadataGenerator extends HoodieTestDataGenerator { * Create a commit file with default CommitMetadata. */ public static void createCommitFileWithMetadata(String basePath, String commitTime, Configuration configuration) { + createCommitFileWithMetadata(basePath, commitTime, configuration, Option.empty(), Option.empty()); + } + + public static void createCommitFileWithMetadata(String basePath, String commitTime, Configuration configuration, + Option writes, Option updates) { Arrays.asList(HoodieTimeline.makeCommitFileName(commitTime), HoodieTimeline.makeInflightCommitFileName(commitTime), HoodieTimeline.makeRequestedCommitFileName(commitTime)) .forEach(f -> { @@ -71,7 +77,7 @@ public class HoodieTestCommitMetadataGenerator extends HoodieTestDataGenerator { FileSystem fs = FSUtils.getFs(basePath, configuration); os = fs.create(commitFile, true); // Generate commitMetadata - HoodieCommitMetadata commitMetadata = generateCommitMetadata(basePath); + HoodieCommitMetadata commitMetadata = generateCommitMetadata(basePath, commitTime, writes, updates); // Write empty commit metadata os.writeBytes(new String(commitMetadata.toJsonString().getBytes(StandardCharsets.UTF_8))); } catch (IOException ioe) { @@ -91,21 +97,27 @@ public class HoodieTestCommitMetadataGenerator extends HoodieTestDataGenerator { /** * Generate commitMetadata in path. */ - public static HoodieCommitMetadata generateCommitMetadata(String basePath) throws IOException { - String file1P0C0 = HoodieTestUtils.createNewDataFile(basePath, DEFAULT_FIRST_PARTITION_PATH, "000"); - String file1P1C0 = HoodieTestUtils.createNewDataFile(basePath, DEFAULT_SECOND_PARTITION_PATH, "000"); + public static HoodieCommitMetadata generateCommitMetadata(String basePath, String commitTime) throws IOException { + return generateCommitMetadata(basePath, commitTime, Option.empty(), Option.empty()); + } + + public static HoodieCommitMetadata generateCommitMetadata(String basePath, String commitTime, + Option writes, Option updates) throws IOException { + String file1P0C0 = HoodieTestUtils.createNewDataFile(basePath, DEFAULT_FIRST_PARTITION_PATH, commitTime); + String file1P1C0 = HoodieTestUtils.createNewDataFile(basePath, DEFAULT_SECOND_PARTITION_PATH, commitTime); return generateCommitMetadata(new HashMap>() { { put(DEFAULT_FIRST_PARTITION_PATH, CollectionUtils.createImmutableList(file1P0C0)); put(DEFAULT_SECOND_PARTITION_PATH, CollectionUtils.createImmutableList(file1P1C0)); } - }); + }, writes, updates); } /** * Method to generate commit metadata. */ - private static HoodieCommitMetadata generateCommitMetadata(Map> partitionToFilePaths) { + private static HoodieCommitMetadata generateCommitMetadata(Map> partitionToFilePaths, + Option writes, Option updates) { HoodieCommitMetadata metadata = new HoodieCommitMetadata(); partitionToFilePaths.forEach((key, value) -> value.forEach(f -> { HoodieWriteStat writeStat = new HoodieWriteStat(); @@ -114,8 +126,8 @@ public class HoodieTestCommitMetadataGenerator extends HoodieTestDataGenerator { writeStat.setFileId(DEFAULT_FILEID); writeStat.setTotalWriteBytes(DEFAULT_TOTAL_WRITE_BYTES); writeStat.setPrevCommit(DEFAULT_PRE_COMMIT); - writeStat.setNumWrites(DEFAULT_NUM_WRITES); - writeStat.setNumUpdateWrites(DEFAULT_NUM_UPDATE_WRITES); + writeStat.setNumWrites(writes.orElse(DEFAULT_NUM_WRITES)); + writeStat.setNumUpdateWrites(updates.orElse(DEFAULT_NUM_UPDATE_WRITES)); writeStat.setTotalLogBlocks(DEFAULT_TOTAL_LOG_BLOCKS); writeStat.setTotalLogRecords(DEFAULT_TOTAL_LOG_RECORDS); metadata.addWriteStat(key, writeStat); diff --git a/hudi-common/src/test/java/org/apache/hudi/common/model/HoodieTestUtils.java b/hudi-common/src/test/java/org/apache/hudi/common/model/HoodieTestUtils.java index 945b8103b..a718cef82 100644 --- a/hudi-common/src/test/java/org/apache/hudi/common/model/HoodieTestUtils.java +++ b/hudi-common/src/test/java/org/apache/hudi/common/model/HoodieTestUtils.java @@ -62,8 +62,13 @@ import org.apache.hadoop.util.StringUtils; import java.io.ByteArrayInputStream; import java.io.ByteArrayOutputStream; import java.io.File; +import java.io.FileOutputStream; import java.io.IOException; import java.io.Serializable; +import java.nio.ByteBuffer; +import java.nio.channels.FileChannel; +import java.nio.file.Files; +import java.nio.file.Paths; import java.text.SimpleDateFormat; import java.util.ArrayList; import java.util.Arrays; @@ -223,6 +228,12 @@ public class HoodieTestUtils { return createDataFile(basePath, partitionPath, instantTime, fileID); } + public static String createNewDataFile(String basePath, String partitionPath, String instantTime, long length) + throws IOException { + String fileID = UUID.randomUUID().toString(); + return createDataFileFixLength(basePath, partitionPath, instantTime, fileID, length); + } + public static String createNewMarkerFile(String basePath, String partitionPath, String instantTime) throws IOException { String fileID = UUID.randomUUID().toString(); @@ -237,6 +248,18 @@ public class HoodieTestUtils { return fileID; } + public static String createDataFileFixLength(String basePath, String partitionPath, String instantTime, String fileID, + long length) throws IOException { + String folderPath = basePath + "/" + partitionPath + "/"; + Files.createDirectories(Paths.get(folderPath)); + String filePath = folderPath + FSUtils.makeDataFileName(instantTime, DEFAULT_WRITE_TOKEN, fileID); + Files.createFile(Paths.get(filePath)); + try (FileChannel output = new FileOutputStream(new File(filePath)).getChannel()) { + output.write(ByteBuffer.allocate(1), length - 1); + } + return fileID; + } + public static String createMarkerFile(String basePath, String partitionPath, String instantTime, String fileID) throws IOException { String folderPath =