1
0

[HUDI-707] Add unit test for StatsCommand (#1645)

This commit is contained in:
hongdd
2020-05-21 18:28:04 +08:00
committed by GitHub
parent 6a0aa9a645
commit 802d16c8c9
7 changed files with 260 additions and 19 deletions

View File

@@ -96,4 +96,21 @@ public class HoodieTableHeaderFields {
public static final String HEADER_TOTAL_PARTITIONS = "Total Partitions";
public static final String HEADER_DELETED_FILE = "Deleted File";
public static final String HEADER_SUCCEEDED = "Succeeded";
/**
* Fields of Stats.
*/
public static final String HEADER_COMMIT_TIME = "CommitTime";
public static final String HEADER_TOTAL_UPSERTED = "Total Upserted";
public static final String HEADER_TOTAL_WRITTEN = "Total Written";
public static final String HEADER_WRITE_AMPLIFICATION_FACTOR = "Write Amplification Factor";
public static final String HEADER_HISTOGRAM_MIN = "Min";
public static final String HEADER_HISTOGRAM_10TH = "10th";
public static final String HEADER_HISTOGRAM_50TH = "50th";
public static final String HEADER_HISTOGRAM_AVG = "avg";
public static final String HEADER_HISTOGRAM_95TH = "95th";
public static final String HEADER_HISTOGRAM_MAX = "Max";
public static final String HEADER_HISTOGRAM_NUM_FILES = "NumFiles";
public static final String HEADER_HISTOGRAM_STD_DEV = "StdDev";
}

View File

@@ -20,6 +20,7 @@ package org.apache.hudi.cli.commands;
import org.apache.hudi.cli.HoodieCLI;
import org.apache.hudi.cli.HoodiePrintHelper;
import org.apache.hudi.cli.HoodieTableHeaderFields;
import org.apache.hudi.cli.TableHeader;
import org.apache.hudi.common.fs.FSUtils;
import org.apache.hudi.common.model.HoodieCommitMetadata;
@@ -54,7 +55,7 @@ import java.util.stream.Collectors;
@Component
public class StatsCommand implements CommandMarker {
private static final int MAX_FILES = 1000000;
public static final int MAX_FILES = 1000000;
@CliCommand(value = "stats wa", help = "Write Amplification. Ratio of how many records were upserted to how many "
+ "records were actually written")
@@ -92,12 +93,14 @@ public class StatsCommand implements CommandMarker {
}
rows.add(new Comparable[] {"Total", totalRecordsUpserted, totalRecordsWritten, waf});
TableHeader header = new TableHeader().addTableHeaderField("CommitTime").addTableHeaderField("Total Upserted")
.addTableHeaderField("Total Written").addTableHeaderField("Write Amplification Factor");
TableHeader header = new TableHeader().addTableHeaderField(HoodieTableHeaderFields.HEADER_COMMIT_TIME)
.addTableHeaderField(HoodieTableHeaderFields.HEADER_TOTAL_UPSERTED)
.addTableHeaderField(HoodieTableHeaderFields.HEADER_TOTAL_WRITTEN)
.addTableHeaderField(HoodieTableHeaderFields.HEADER_WRITE_AMPLIFICATION_FACTOR);
return HoodiePrintHelper.print(header, new HashMap<>(), sortByField, descending, limit, headerOnly, rows);
}
private Comparable[] printFileSizeHistogram(String instantTime, Snapshot s) {
public Comparable[] printFileSizeHistogram(String instantTime, Snapshot s) {
return new Comparable[] {instantTime, s.getMin(), s.getValue(0.1), s.getMedian(), s.getMean(), s.get95thPercentile(),
s.getMax(), s.size(), s.getStdDev()};
}
@@ -138,6 +141,20 @@ public class StatsCommand implements CommandMarker {
Snapshot s = globalHistogram.getSnapshot();
rows.add(printFileSizeHistogram("ALL", s));
TableHeader header = new TableHeader()
.addTableHeaderField(HoodieTableHeaderFields.HEADER_COMMIT_TIME)
.addTableHeaderField(HoodieTableHeaderFields.HEADER_HISTOGRAM_MIN)
.addTableHeaderField(HoodieTableHeaderFields.HEADER_HISTOGRAM_10TH)
.addTableHeaderField(HoodieTableHeaderFields.HEADER_HISTOGRAM_50TH)
.addTableHeaderField(HoodieTableHeaderFields.HEADER_HISTOGRAM_AVG)
.addTableHeaderField(HoodieTableHeaderFields.HEADER_HISTOGRAM_95TH)
.addTableHeaderField(HoodieTableHeaderFields.HEADER_HISTOGRAM_MAX)
.addTableHeaderField(HoodieTableHeaderFields.HEADER_HISTOGRAM_NUM_FILES)
.addTableHeaderField(HoodieTableHeaderFields.HEADER_HISTOGRAM_STD_DEV);
return HoodiePrintHelper.print(header, getFieldNameToConverterMap(), sortByField, descending, limit, headerOnly, rows);
}
public Map<String, Function<Object, String>> getFieldNameToConverterMap() {
Function<Object, String> converterFunction =
entry -> NumericUtils.humanReadableByteCount((Double.parseDouble(entry.toString())));
Map<String, Function<Object, String>> fieldNameToConverterMap = new HashMap<>();
@@ -148,10 +165,6 @@ public class StatsCommand implements CommandMarker {
fieldNameToConverterMap.put("95th", converterFunction);
fieldNameToConverterMap.put("Max", converterFunction);
fieldNameToConverterMap.put("StdDev", converterFunction);
TableHeader header = new TableHeader().addTableHeaderField("CommitTime").addTableHeaderField("Min")
.addTableHeaderField("10th").addTableHeaderField("50th").addTableHeaderField("avg").addTableHeaderField("95th")
.addTableHeaderField("Max").addTableHeaderField("NumFiles").addTableHeaderField("StdDev");
return HoodiePrintHelper.print(header, fieldNameToConverterMap, sortByField, descending, limit, headerOnly, rows);
return fieldNameToConverterMap;
}
}

View File

@@ -177,10 +177,10 @@ public class TestArchivedCommitsCommand extends AbstractShellIntegrationTest {
rows.clear();
HoodieCommitMetadata metadata = HoodieTestCommitMetadataGenerator.generateCommitMetadata(tablePath);
for (int i = 100; i < 104; i++) {
String instant = String.valueOf(i);
// Since HoodiePrintHelper order data by default, need to order commitMetadata
HoodieCommitMetadata metadata = HoodieTestCommitMetadataGenerator.generateCommitMetadata(tablePath, instant);
Comparable[] result = new Comparable[] {
instant, "commit", HoodieTestCommitUtilities.convertAndOrderCommitMetadata(metadata)};
rows.add(result);

View File

@@ -65,7 +65,7 @@ public class TestRepairsCommand extends AbstractShellIntegrationTest {
// Create table and connect
new TableCommand().createTable(
tablePath, "test_table", HoodieTableType.COPY_ON_WRITE.name(),
tablePath, tableName, HoodieTableType.COPY_ON_WRITE.name(),
"", TimelineLayoutVersion.VERSION_1, "org.apache.hudi.common.model.HoodieAvroPayload");
}

View File

@@ -0,0 +1,176 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hudi.cli.commands;
import com.codahale.metrics.Histogram;
import com.codahale.metrics.Snapshot;
import com.codahale.metrics.UniformReservoir;
import org.apache.hudi.cli.AbstractShellIntegrationTest;
import org.apache.hudi.cli.HoodieCLI;
import org.apache.hudi.cli.HoodiePrintHelper;
import org.apache.hudi.cli.HoodieTableHeaderFields;
import org.apache.hudi.cli.TableHeader;
import org.apache.hudi.cli.common.HoodieTestCommitMetadataGenerator;
import org.apache.hudi.common.HoodieTestDataGenerator;
import org.apache.hudi.common.model.HoodieTableType;
import org.apache.hudi.common.model.HoodieTestUtils;
import org.apache.hudi.common.table.timeline.versioning.TimelineLayoutVersion;
import org.apache.hudi.common.util.Option;
import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.Test;
import org.springframework.shell.core.CommandResult;
import java.io.File;
import java.io.IOException;
import java.text.DecimalFormat;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertTrue;
/**
* Test class of {@link org.apache.hudi.cli.commands.StatsCommand}.
*/
public class TestStatsCommand extends AbstractShellIntegrationTest {
private String tablePath;
@BeforeEach
public void init() throws IOException {
String tableName = "test_table";
tablePath = basePath + File.separator + tableName;
HoodieCLI.conf = jsc.hadoopConfiguration();
// Create table and connect
new TableCommand().createTable(
tablePath, tableName, HoodieTableType.COPY_ON_WRITE.name(),
"", TimelineLayoutVersion.VERSION_1, "org.apache.hudi.common.model.HoodieAvroPayload");
}
/**
* Test case for command 'stats wa'.
*/
@Test
public void testWriteAmplificationStats() {
// generate data and metadata
Map<String, Integer[]> data = new LinkedHashMap<>();
data.put("100", new Integer[] {15, 10});
data.put("101", new Integer[] {20, 10});
data.put("102", new Integer[] {15, 15});
data.forEach((key, value) -> {
HoodieTestCommitMetadataGenerator.createCommitFileWithMetadata(tablePath, key, jsc.hadoopConfiguration(),
Option.of(value[0]), Option.of(value[1]));
});
CommandResult cr = getShell().executeCommand("stats wa");
assertTrue(cr.isSuccess());
// generate expect
List<Comparable[]> rows = new ArrayList<>();
DecimalFormat df = new DecimalFormat("#.00");
data.forEach((key, value) -> {
// there are two partitions, so need to *2
rows.add(new Comparable[]{key, value[1] * 2, value[0] * 2, df.format((float) value[0] / value[1])});
});
int totalWrite = data.values().stream().map(integers -> integers[0] * 2).mapToInt(s -> s).sum();
int totalUpdate = data.values().stream().map(integers -> integers[1] * 2).mapToInt(s -> s).sum();
rows.add(new Comparable[]{"Total", totalUpdate, totalWrite, df.format((float) totalWrite / totalUpdate)});
TableHeader header = new TableHeader().addTableHeaderField(HoodieTableHeaderFields.HEADER_COMMIT_TIME)
.addTableHeaderField(HoodieTableHeaderFields.HEADER_TOTAL_UPSERTED)
.addTableHeaderField(HoodieTableHeaderFields.HEADER_TOTAL_WRITTEN)
.addTableHeaderField(HoodieTableHeaderFields.HEADER_WRITE_AMPLIFICATION_FACTOR);
String expected = HoodiePrintHelper.print(header, new HashMap<>(), "", false, -1, false, rows);
assertEquals(expected, cr.getResult().toString());
}
/**
* Test case for command 'stats filesizes'.
*/
@Test
public void testFileSizeStats() throws IOException {
String commit1 = "100";
String commit2 = "101";
Map<String, Integer[]> data = new LinkedHashMap<>();
data.put(commit1, new Integer[] {100, 120, 150});
data.put(commit2, new Integer[] {200, 180, 250, 300});
// generate data file
String partition1 = HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH;
String partition2 = HoodieTestDataGenerator.DEFAULT_SECOND_PARTITION_PATH;
String partition3 = HoodieTestDataGenerator.DEFAULT_THIRD_PARTITION_PATH;
Integer[] data1 = data.get(commit1);
assertTrue(3 <= data1.length);
HoodieTestUtils.createNewDataFile(tablePath, partition1, commit1, data1[0]);
HoodieTestUtils.createNewDataFile(tablePath, partition2, commit1, data1[1]);
HoodieTestUtils.createNewDataFile(tablePath, partition3, commit1, data1[2]);
Integer[] data2 = data.get(commit2);
assertTrue(4 <= data2.length);
HoodieTestUtils.createNewDataFile(tablePath, partition1, commit2, data2[0]);
HoodieTestUtils.createNewDataFile(tablePath, partition2, commit2, data2[1]);
HoodieTestUtils.createNewDataFile(tablePath, partition2, commit2, data2[2]);
HoodieTestUtils.createNewDataFile(tablePath, partition3, commit2, data2[3]);
CommandResult cr = getShell().executeCommand("stats filesizes");
assertTrue(cr.isSuccess());
Histogram globalHistogram = new Histogram(new UniformReservoir(StatsCommand.MAX_FILES));
HashMap<String, Histogram> commitHistoMap = new HashMap<>();
data.forEach((k, v) -> {
commitHistoMap.put(k, new Histogram(new UniformReservoir(StatsCommand.MAX_FILES)));
for (int value : v) {
commitHistoMap.get(k).update(value);
globalHistogram.update(value);
}
});
// generate expect
List<Comparable[]> rows = new ArrayList<>();
for (Map.Entry<String, Histogram> entry : commitHistoMap.entrySet()) {
Snapshot s = entry.getValue().getSnapshot();
rows.add(new StatsCommand().printFileSizeHistogram(entry.getKey(), s));
}
Snapshot s = globalHistogram.getSnapshot();
rows.add(new StatsCommand().printFileSizeHistogram("ALL", s));
TableHeader header = new TableHeader()
.addTableHeaderField(HoodieTableHeaderFields.HEADER_COMMIT_TIME)
.addTableHeaderField(HoodieTableHeaderFields.HEADER_HISTOGRAM_MIN)
.addTableHeaderField(HoodieTableHeaderFields.HEADER_HISTOGRAM_10TH)
.addTableHeaderField(HoodieTableHeaderFields.HEADER_HISTOGRAM_50TH)
.addTableHeaderField(HoodieTableHeaderFields.HEADER_HISTOGRAM_AVG)
.addTableHeaderField(HoodieTableHeaderFields.HEADER_HISTOGRAM_95TH)
.addTableHeaderField(HoodieTableHeaderFields.HEADER_HISTOGRAM_MAX)
.addTableHeaderField(HoodieTableHeaderFields.HEADER_HISTOGRAM_NUM_FILES)
.addTableHeaderField(HoodieTableHeaderFields.HEADER_HISTOGRAM_STD_DEV);
String expect = HoodiePrintHelper.print(header, new StatsCommand().getFieldNameToConverterMap(),
"", false, -1, false, rows);
assertEquals(expect, cr.getResult().toString());
}
}

View File

@@ -26,6 +26,7 @@ import org.apache.hudi.common.model.HoodieWriteStat;
import org.apache.hudi.common.table.HoodieTableMetaClient;
import org.apache.hudi.common.table.timeline.HoodieTimeline;
import org.apache.hudi.common.util.CollectionUtils;
import org.apache.hudi.common.util.Option;
import org.apache.hudi.exception.HoodieIOException;
import org.apache.hadoop.conf.Configuration;
@@ -61,6 +62,11 @@ public class HoodieTestCommitMetadataGenerator extends HoodieTestDataGenerator {
* Create a commit file with default CommitMetadata.
*/
public static void createCommitFileWithMetadata(String basePath, String commitTime, Configuration configuration) {
createCommitFileWithMetadata(basePath, commitTime, configuration, Option.empty(), Option.empty());
}
public static void createCommitFileWithMetadata(String basePath, String commitTime, Configuration configuration,
Option<Integer> writes, Option<Integer> updates) {
Arrays.asList(HoodieTimeline.makeCommitFileName(commitTime), HoodieTimeline.makeInflightCommitFileName(commitTime),
HoodieTimeline.makeRequestedCommitFileName(commitTime))
.forEach(f -> {
@@ -71,7 +77,7 @@ public class HoodieTestCommitMetadataGenerator extends HoodieTestDataGenerator {
FileSystem fs = FSUtils.getFs(basePath, configuration);
os = fs.create(commitFile, true);
// Generate commitMetadata
HoodieCommitMetadata commitMetadata = generateCommitMetadata(basePath);
HoodieCommitMetadata commitMetadata = generateCommitMetadata(basePath, commitTime, writes, updates);
// Write empty commit metadata
os.writeBytes(new String(commitMetadata.toJsonString().getBytes(StandardCharsets.UTF_8)));
} catch (IOException ioe) {
@@ -91,21 +97,27 @@ public class HoodieTestCommitMetadataGenerator extends HoodieTestDataGenerator {
/**
* Generate commitMetadata in path.
*/
public static HoodieCommitMetadata generateCommitMetadata(String basePath) throws IOException {
String file1P0C0 = HoodieTestUtils.createNewDataFile(basePath, DEFAULT_FIRST_PARTITION_PATH, "000");
String file1P1C0 = HoodieTestUtils.createNewDataFile(basePath, DEFAULT_SECOND_PARTITION_PATH, "000");
public static HoodieCommitMetadata generateCommitMetadata(String basePath, String commitTime) throws IOException {
return generateCommitMetadata(basePath, commitTime, Option.empty(), Option.empty());
}
public static HoodieCommitMetadata generateCommitMetadata(String basePath, String commitTime,
Option<Integer> writes, Option<Integer> updates) throws IOException {
String file1P0C0 = HoodieTestUtils.createNewDataFile(basePath, DEFAULT_FIRST_PARTITION_PATH, commitTime);
String file1P1C0 = HoodieTestUtils.createNewDataFile(basePath, DEFAULT_SECOND_PARTITION_PATH, commitTime);
return generateCommitMetadata(new HashMap<String, List<String>>() {
{
put(DEFAULT_FIRST_PARTITION_PATH, CollectionUtils.createImmutableList(file1P0C0));
put(DEFAULT_SECOND_PARTITION_PATH, CollectionUtils.createImmutableList(file1P1C0));
}
});
}, writes, updates);
}
/**
* Method to generate commit metadata.
*/
private static HoodieCommitMetadata generateCommitMetadata(Map<String, List<String>> partitionToFilePaths) {
private static HoodieCommitMetadata generateCommitMetadata(Map<String, List<String>> partitionToFilePaths,
Option<Integer> writes, Option<Integer> updates) {
HoodieCommitMetadata metadata = new HoodieCommitMetadata();
partitionToFilePaths.forEach((key, value) -> value.forEach(f -> {
HoodieWriteStat writeStat = new HoodieWriteStat();
@@ -114,8 +126,8 @@ public class HoodieTestCommitMetadataGenerator extends HoodieTestDataGenerator {
writeStat.setFileId(DEFAULT_FILEID);
writeStat.setTotalWriteBytes(DEFAULT_TOTAL_WRITE_BYTES);
writeStat.setPrevCommit(DEFAULT_PRE_COMMIT);
writeStat.setNumWrites(DEFAULT_NUM_WRITES);
writeStat.setNumUpdateWrites(DEFAULT_NUM_UPDATE_WRITES);
writeStat.setNumWrites(writes.orElse(DEFAULT_NUM_WRITES));
writeStat.setNumUpdateWrites(updates.orElse(DEFAULT_NUM_UPDATE_WRITES));
writeStat.setTotalLogBlocks(DEFAULT_TOTAL_LOG_BLOCKS);
writeStat.setTotalLogRecords(DEFAULT_TOTAL_LOG_RECORDS);
metadata.addWriteStat(key, writeStat);