Timeline Service with Incremental View Syncing support
This commit is contained in:
committed by
vinoth chandar
parent
446f99aa0f
commit
64fec64097
@@ -0,0 +1,129 @@
|
||||
/*
|
||||
* Copyright (c) 2017 Uber Technologies, Inc. (hoodie-dev-group@uber.com)
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*
|
||||
*
|
||||
*/
|
||||
|
||||
package com.uber.hoodie.utilities;
|
||||
|
||||
import com.beust.jcommander.JCommander;
|
||||
import com.beust.jcommander.Parameter;
|
||||
import com.google.common.base.Preconditions;
|
||||
import io.javalin.Javalin;
|
||||
import java.io.BufferedReader;
|
||||
import java.io.InputStreamReader;
|
||||
import java.io.Serializable;
|
||||
import java.net.InetAddress;
|
||||
import java.net.UnknownHostException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
import java.util.stream.IntStream;
|
||||
import org.apache.hadoop.fs.FileSystem;
|
||||
import org.apache.http.HttpResponse;
|
||||
import org.apache.http.client.methods.HttpGet;
|
||||
import org.apache.http.impl.client.CloseableHttpClient;
|
||||
import org.apache.http.impl.client.HttpClientBuilder;
|
||||
import org.apache.log4j.LogManager;
|
||||
import org.apache.log4j.Logger;
|
||||
import org.apache.spark.api.java.JavaSparkContext;
|
||||
|
||||
public class HoodieWithTimelineServer implements Serializable {
|
||||
|
||||
private static volatile Logger logger = LogManager.getLogger(HoodieCompactor.class);
|
||||
private final Config cfg;
|
||||
private transient FileSystem fs;
|
||||
|
||||
private transient Javalin app = null;
|
||||
|
||||
public HoodieWithTimelineServer(Config cfg) {
|
||||
this.cfg = cfg;
|
||||
}
|
||||
|
||||
public static class Config implements Serializable {
|
||||
|
||||
@Parameter(names = {"--spark-master", "-ms"}, description = "Spark master", required = false)
|
||||
public String sparkMaster = null;
|
||||
@Parameter(names = {"--spark-memory",
|
||||
"-sm"}, description = "spark memory to use", required = true)
|
||||
public String sparkMemory = null;
|
||||
@Parameter(names = {"--num-partitions", "-n"}, description = "Num Partitions", required = false)
|
||||
public Integer numPartitions = 100;
|
||||
@Parameter(names = {"--server-port", "-p"}, description = " Server Port", required = false)
|
||||
public Integer serverPort = 26754;
|
||||
@Parameter(names = {"--delay-secs", "-d"}, description = "Delay(sec) before client connects", required = false)
|
||||
public Integer delaySecs = 30;
|
||||
@Parameter(names = {"--help", "-h"}, help = true)
|
||||
public Boolean help = false;
|
||||
}
|
||||
|
||||
public void startService() {
|
||||
app = Javalin.create().start(cfg.serverPort);
|
||||
app.get("/", ctx -> ctx.result("Hello World"));
|
||||
}
|
||||
|
||||
public static void main(String[] args) throws Exception {
|
||||
final Config cfg = new Config();
|
||||
JCommander cmd = new JCommander(cfg, args);
|
||||
if (cfg.help || args.length == 0) {
|
||||
cmd.usage();
|
||||
System.exit(1);
|
||||
}
|
||||
|
||||
HoodieWithTimelineServer service = new HoodieWithTimelineServer(cfg);
|
||||
service.run(UtilHelpers.buildSparkContext("client-server-hoodie", cfg.sparkMaster, cfg.sparkMemory));
|
||||
}
|
||||
|
||||
public void run(JavaSparkContext jsc) throws UnknownHostException {
|
||||
startService();
|
||||
final String driverHost = InetAddress.getLocalHost().getHostAddress();
|
||||
System.out.println("Driver Hostname is :" + driverHost);
|
||||
List<String> messages = new ArrayList<>();
|
||||
IntStream.range(0, cfg.numPartitions).forEach(i -> messages.add("Hello World"));
|
||||
List<String> gotMessages =
|
||||
jsc.parallelize(messages).map(msg -> sendRequest(driverHost, cfg.serverPort)).collect();
|
||||
System.out.println("Got Messages :" + gotMessages);
|
||||
Preconditions.checkArgument(gotMessages.equals(messages), "Got expected reply from Server");
|
||||
}
|
||||
|
||||
public String sendRequest(String driverHost, int port) throws RuntimeException {
|
||||
String url = String.format("http://%s:%d/", driverHost, port);
|
||||
try {
|
||||
|
||||
System.out.println("Sleeping for " + cfg.delaySecs + " secs ");
|
||||
Thread.sleep(cfg.delaySecs * 1000);
|
||||
System.out.println("Woke up after sleeping for " + cfg.delaySecs + " secs ");
|
||||
|
||||
CloseableHttpClient client = HttpClientBuilder.create().build();
|
||||
HttpGet request = new HttpGet(url);
|
||||
|
||||
HttpResponse response = client.execute(request);
|
||||
|
||||
System.out.println("Response Code from(" + url + ") : " + response.getStatusLine().getStatusCode());
|
||||
|
||||
BufferedReader rd = new BufferedReader(
|
||||
new InputStreamReader(response.getEntity().getContent()));
|
||||
|
||||
StringBuffer result = new StringBuffer();
|
||||
String line = "";
|
||||
while ((line = rd.readLine()) != null) {
|
||||
result.append(line);
|
||||
}
|
||||
System.out.println("Got result (" + result + ")");
|
||||
return result.toString();
|
||||
} catch (Exception ex) {
|
||||
throw new RuntimeException(ex);
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,308 @@
|
||||
package com.uber.hoodie.utilities.perf;
|
||||
|
||||
import com.beust.jcommander.JCommander;
|
||||
import com.beust.jcommander.Parameter;
|
||||
import com.codahale.metrics.Histogram;
|
||||
import com.codahale.metrics.Snapshot;
|
||||
import com.codahale.metrics.UniformReservoir;
|
||||
import com.uber.hoodie.common.model.FileSlice;
|
||||
import com.uber.hoodie.common.table.HoodieTableMetaClient;
|
||||
import com.uber.hoodie.common.table.SyncableFileSystemView;
|
||||
import com.uber.hoodie.common.table.view.FileSystemViewStorageConfig;
|
||||
import com.uber.hoodie.common.table.view.FileSystemViewStorageType;
|
||||
import com.uber.hoodie.common.table.view.RemoteHoodieTableFileSystemView;
|
||||
import com.uber.hoodie.common.util.FSUtils;
|
||||
import com.uber.hoodie.common.util.Option;
|
||||
import com.uber.hoodie.timeline.service.TimelineService;
|
||||
import com.uber.hoodie.utilities.UtilHelpers;
|
||||
import java.io.IOException;
|
||||
import java.io.Serializable;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collections;
|
||||
import java.util.List;
|
||||
import java.util.Random;
|
||||
import java.util.UUID;
|
||||
import java.util.concurrent.ExecutionException;
|
||||
import java.util.concurrent.ScheduledFuture;
|
||||
import java.util.concurrent.ScheduledThreadPoolExecutor;
|
||||
import java.util.concurrent.TimeUnit;
|
||||
import java.util.stream.Collectors;
|
||||
import java.util.stream.IntStream;
|
||||
import org.apache.hadoop.fs.FSDataOutputStream;
|
||||
import org.apache.hadoop.fs.FileSystem;
|
||||
import org.apache.hadoop.fs.Path;
|
||||
import org.apache.log4j.LogManager;
|
||||
import org.apache.log4j.Logger;
|
||||
import org.apache.spark.SparkConf;
|
||||
import org.apache.spark.api.java.JavaSparkContext;
|
||||
|
||||
/**
|
||||
* Perf Benchmark to run FileSystem View calls at scale and monitor performance
|
||||
*/
|
||||
public class TimelineServerPerf implements Serializable {
|
||||
|
||||
private static volatile Logger logger = LogManager.getLogger(TimelineServerPerf.class);
|
||||
private final Config cfg;
|
||||
private transient TimelineService timelineServer;
|
||||
private final boolean useExternalTimelineServer;
|
||||
private String hostAddr;
|
||||
|
||||
public TimelineServerPerf(Config cfg) throws IOException {
|
||||
this.cfg = cfg;
|
||||
useExternalTimelineServer = (cfg.serverHost != null);
|
||||
this.timelineServer = new TimelineService(cfg.getTimelinServerConfig());
|
||||
}
|
||||
|
||||
private void setHostAddrFromSparkConf(SparkConf sparkConf) {
|
||||
String hostAddr = sparkConf.get("spark.driver.host", null);
|
||||
if (hostAddr != null) {
|
||||
logger.info("Overriding hostIp to (" + hostAddr + ") found in spark-conf. It was " + this.hostAddr);
|
||||
this.hostAddr = hostAddr;
|
||||
} else {
|
||||
logger.warn("Unable to find driver bind address from spark config");
|
||||
}
|
||||
}
|
||||
|
||||
public void run() throws IOException {
|
||||
|
||||
List<String> allPartitionPaths = FSUtils.getAllPartitionPaths(timelineServer.getFs(), cfg.basePath, true);
|
||||
Collections.shuffle(allPartitionPaths);
|
||||
List<String> selected = allPartitionPaths.stream().filter(p -> !p.contains("error"))
|
||||
.limit(cfg.maxPartitions).collect(Collectors.toList());
|
||||
JavaSparkContext jsc = UtilHelpers.buildSparkContext("hudi-view-perf-" + cfg.basePath, cfg.sparkMaster);
|
||||
if (!useExternalTimelineServer) {
|
||||
this.timelineServer.startService();
|
||||
setHostAddrFromSparkConf(jsc.getConf());
|
||||
} else {
|
||||
this.hostAddr = cfg.serverHost;
|
||||
}
|
||||
|
||||
HoodieTableMetaClient metaClient = new HoodieTableMetaClient(timelineServer.getConf(), cfg.basePath, true);
|
||||
SyncableFileSystemView fsView = new RemoteHoodieTableFileSystemView(this.hostAddr, cfg.serverPort, metaClient);
|
||||
|
||||
String reportDir = cfg.reportDir;
|
||||
metaClient.getFs().mkdirs(new Path(reportDir));
|
||||
|
||||
String dumpPrefix = UUID.randomUUID().toString();
|
||||
System.out.println("First Iteration to load all partitions");
|
||||
Dumper d = new Dumper(metaClient.getFs(), new Path(reportDir,
|
||||
String.format("1_%s.csv", dumpPrefix)));
|
||||
d.init();
|
||||
d.dump(runLookups(jsc, selected, fsView, 1, 0));
|
||||
d.close();
|
||||
System.out.println("\n\n\n First Iteration is done");
|
||||
|
||||
Dumper d2 = new Dumper(metaClient.getFs(), new Path(reportDir,
|
||||
String.format("2_%s.csv", dumpPrefix)));
|
||||
d2.init();
|
||||
d2.dump(runLookups(jsc, selected, fsView, cfg.numIterations, cfg.numCoresPerExecutor));
|
||||
d2.close();
|
||||
|
||||
System.out.println("\n\n\nDumping all File Slices");
|
||||
selected.stream().forEach(p -> fsView.getAllFileSlices(p).forEach(s -> System.out.println("\tMyFileSlice=" + s)));
|
||||
|
||||
// Waiting for curl queries
|
||||
if (!useExternalTimelineServer && cfg.waitForManualQueries) {
|
||||
System.out.println("Timeline Server Host Address=" + hostAddr + ", port=" + timelineServer.getServerPort());
|
||||
while (true) {
|
||||
try {
|
||||
Thread.sleep(60000);
|
||||
} catch (InterruptedException e) {
|
||||
// skip it
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
public List<PerfStats> runLookups(JavaSparkContext jsc, List<String> partitionPaths, SyncableFileSystemView fsView,
|
||||
int numIterations, int concurrency) {
|
||||
List<PerfStats> perfStats = jsc.parallelize(partitionPaths, cfg.numExecutors).flatMap(p -> {
|
||||
ScheduledThreadPoolExecutor executor = new ScheduledThreadPoolExecutor(100);
|
||||
final List<PerfStats> result = new ArrayList<>();
|
||||
final List<ScheduledFuture<PerfStats>> futures = new ArrayList<>();
|
||||
List<FileSlice> slices = fsView.getLatestFileSlices(p).collect(Collectors.toList());
|
||||
String fileId = slices.isEmpty() ? "dummyId"
|
||||
: slices.get(new Random(Double.doubleToLongBits(Math.random())).nextInt(slices.size())).getFileId();
|
||||
IntStream.range(0, concurrency).forEach(i -> {
|
||||
futures.add(executor.schedule(() -> runOneRound(fsView, p, fileId, i, numIterations), 0, TimeUnit.NANOSECONDS));
|
||||
});
|
||||
futures.stream().forEach(x -> {
|
||||
try {
|
||||
result.add(x.get());
|
||||
} catch (InterruptedException | ExecutionException e) {
|
||||
throw new RuntimeException(e);
|
||||
}
|
||||
});
|
||||
System.out.println("SLICES are=");
|
||||
slices.stream().forEach(s -> {
|
||||
System.out.println("\t\tFileSlice=" + s);
|
||||
});
|
||||
return result.iterator();
|
||||
}).collect();
|
||||
return perfStats;
|
||||
}
|
||||
|
||||
private static PerfStats runOneRound(SyncableFileSystemView fsView, String partition, String fileId, int id,
|
||||
int numIterations) {
|
||||
Histogram latencyHistogram = new Histogram(new UniformReservoir(10000));
|
||||
for (int i = 0; i < numIterations; i++) {
|
||||
long beginTs = System.currentTimeMillis();
|
||||
Option<FileSlice> c = fsView.getLatestFileSlice(partition, fileId);
|
||||
long endTs = System.currentTimeMillis();
|
||||
System.out.println("Latest File Slice for part=" + partition + ", fileId="
|
||||
+ fileId + ", Slice=" + c + ", Time=" + (endTs - beginTs));
|
||||
latencyHistogram.update(endTs - beginTs);
|
||||
}
|
||||
return new PerfStats(partition, id, latencyHistogram.getSnapshot());
|
||||
}
|
||||
|
||||
private static class Dumper implements Serializable {
|
||||
|
||||
private final Path dumpPath;
|
||||
private final FileSystem fileSystem;
|
||||
private FSDataOutputStream outputStream;
|
||||
|
||||
public Dumper(FileSystem fs, Path dumpPath) {
|
||||
this.dumpPath = dumpPath;
|
||||
this.fileSystem = fs;
|
||||
}
|
||||
|
||||
public void init() throws IOException {
|
||||
outputStream = fileSystem.create(dumpPath, true);
|
||||
addHeader();
|
||||
}
|
||||
|
||||
private void addHeader() throws IOException {
|
||||
String header = "Partition,Thread,Min,Max,Mean,Median,75th,95th\n";
|
||||
outputStream.write(header.getBytes());
|
||||
outputStream.flush();
|
||||
}
|
||||
|
||||
public void dump(List<PerfStats> stats) {
|
||||
stats.stream().forEach(x -> {
|
||||
String row = String.format("%s,%d,%d,%d,%f,%f,%f,%f\n", x.partition, x.id, x.minTime, x.maxTime, x.meanTime,
|
||||
x.medianTime, x.p75, x.p95);
|
||||
System.out.println(row);
|
||||
try {
|
||||
outputStream.write(row.getBytes());
|
||||
} catch (IOException e) {
|
||||
throw new RuntimeException(e);
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
public void close() throws IOException {
|
||||
outputStream.close();
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
private static class PerfStats implements Serializable {
|
||||
|
||||
private final String partition;
|
||||
private final int id;
|
||||
private final long minTime;
|
||||
private final long maxTime;
|
||||
private final double meanTime;
|
||||
private final double medianTime;
|
||||
private final double p95;
|
||||
private final double p75;
|
||||
|
||||
public PerfStats(String partition, int id, Snapshot s) {
|
||||
this(partition, id, s.getMin(), s.getMax(), s.getMean(), s.getMedian(), s.get95thPercentile(),
|
||||
s.get75thPercentile());
|
||||
}
|
||||
|
||||
public PerfStats(String partition, int id, long minTime, long maxTime, double meanTime, double medianTime,
|
||||
double p95, double p75) {
|
||||
this.partition = partition;
|
||||
this.id = id;
|
||||
this.minTime = minTime;
|
||||
this.maxTime = maxTime;
|
||||
this.meanTime = meanTime;
|
||||
this.medianTime = medianTime;
|
||||
this.p95 = p95;
|
||||
this.p75 = p75;
|
||||
}
|
||||
}
|
||||
|
||||
public static class Config implements Serializable {
|
||||
|
||||
@Parameter(names = {"--base-path", "-b"}, description = "Base Path", required = true)
|
||||
public String basePath = "";
|
||||
|
||||
@Parameter(names = {"--report-dir", "-rd"}, description = "Dir where reports are added", required = true)
|
||||
public String reportDir = "";
|
||||
|
||||
@Parameter(names = {"--max-partitions", "-m"}, description = "Mx partitions to be loaded")
|
||||
public Integer maxPartitions = 1000;
|
||||
|
||||
@Parameter(names = {"--num-executors", "-e"}, description = "num executors")
|
||||
public Integer numExecutors = 10;
|
||||
|
||||
@Parameter(names = {"--num-cores", "-c"}, description = "num cores")
|
||||
public Integer numCoresPerExecutor = 10;
|
||||
|
||||
@Parameter(names = {"--num-iterations", "-i"}, description = "Number of iterations for each partitions")
|
||||
public Integer numIterations = 10;
|
||||
|
||||
@Parameter(names = {"--spark-master", "-ms"}, description = "Spark master", required = false)
|
||||
public String sparkMaster = "local[2]";
|
||||
|
||||
@Parameter(names = {"--server-port", "-p"}, description = " Server Port")
|
||||
public Integer serverPort = 26754;
|
||||
|
||||
@Parameter(names = {"--server-host", "-sh"},
|
||||
description = " Server Host (Set it for externally managed timeline service")
|
||||
public String serverHost = null;
|
||||
|
||||
@Parameter(names = {"--view-storage", "-st"}, description = "View Storage Type. Defaut - SPILLABLE_DISK")
|
||||
public FileSystemViewStorageType viewStorageType = FileSystemViewStorageType.SPILLABLE_DISK;
|
||||
|
||||
@Parameter(names = {"--max-view-mem-per-table", "-mv"},
|
||||
description = "Maximum view memory per table in MB to be used for storing file-groups."
|
||||
+ " Overflow file-groups will be spilled to disk. Used for SPILLABLE_DISK storage type")
|
||||
public Integer maxViewMemPerTableInMB = 2048;
|
||||
|
||||
@Parameter(names = {"--mem-overhead-fraction-pending-compaction", "-cf"},
|
||||
description = "Memory Fraction of --max-view-mem-per-table to be allocated for managing pending compaction"
|
||||
+ " storage. Overflow entries will be spilled to disk. Used for SPILLABLE_DISK storage type")
|
||||
public Double memFractionForCompactionPerTable = 0.001;
|
||||
|
||||
@Parameter(names = {"--base-store-path", "-sp"},
|
||||
description = "Directory where spilled view entries will be stored. Used for SPILLABLE_DISK storage type")
|
||||
public String baseStorePathForFileGroups = FileSystemViewStorageConfig.DEFAULT_VIEW_SPILLABLE_DIR;
|
||||
|
||||
@Parameter(names = {"--rocksdb-path", "-rp"},
|
||||
description = "Root directory for RocksDB")
|
||||
public String rocksDBPath = FileSystemViewStorageConfig.DEFAULT_ROCKSDB_BASE_PATH;
|
||||
|
||||
@Parameter(names = {"--wait-for-manual-queries", "-ww"})
|
||||
public Boolean waitForManualQueries = false;
|
||||
|
||||
@Parameter(names = {"--help", "-h"})
|
||||
public Boolean help = false;
|
||||
|
||||
public TimelineService.Config getTimelinServerConfig() {
|
||||
TimelineService.Config c = new TimelineService.Config();
|
||||
c.viewStorageType = viewStorageType;
|
||||
c.baseStorePathForFileGroups = baseStorePathForFileGroups;
|
||||
c.maxViewMemPerTableInMB = maxViewMemPerTableInMB;
|
||||
c.memFractionForCompactionPerTable = memFractionForCompactionPerTable;
|
||||
c.rocksDBPath = rocksDBPath;
|
||||
c.serverPort = serverPort;
|
||||
return c;
|
||||
}
|
||||
}
|
||||
|
||||
public static void main(String[] args) throws Exception {
|
||||
final Config cfg = new Config();
|
||||
JCommander cmd = new JCommander(cfg, args);
|
||||
if (cfg.help || args.length == 0) {
|
||||
cmd.usage();
|
||||
System.exit(1);
|
||||
}
|
||||
TimelineServerPerf perf = new TimelineServerPerf(cfg);
|
||||
perf.run();
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user