Hoodie operability with S3
This commit is contained in:
committed by
vinoth chandar
parent
a7cd021f26
commit
d6f94b998d
@@ -1,3 +1,4 @@
|
||||
# Hoodie
|
||||
Hoodie manages storage of large analytical datasets on [HDFS](http://hadoop.apache.org/docs/stable/hadoop-project-dist/hadoop-hdfs/HdfsDesign.html) and serve them out via two types of tables
|
||||
|
||||
* **Read Optimized Table** - Provides excellent query performance via purely columnar storage (e.g. [Parquet](https://parquet.apache.org/))
|
||||
|
||||
47
docs/s3_filesystem.md
Normal file
47
docs/s3_filesystem.md
Normal file
@@ -0,0 +1,47 @@
|
||||
---
|
||||
title: S3 Filesystem (experimental)
|
||||
keywords: sql hive s3 spark presto
|
||||
sidebar: mydoc_sidebar
|
||||
permalink: s3_hoodie.html
|
||||
toc: false
|
||||
summary: In this page, we go over how to configure hoodie with S3 filesystem.
|
||||
---
|
||||
Hoodie works with HDFS by default. There is an experimental work going on Hoodie-S3 compatibility.
|
||||
|
||||
## S3 configs
|
||||
|
||||
Add the required configs in your core-site.xml from where Hoodie can fetch them. Replace the `fs.defaultFS` with your S3 bucket name and Hoodie should be able to read/write from the bucket.
|
||||
|
||||
```
|
||||
<property>
|
||||
<name>fs.defaultFS</name>
|
||||
<value>s3://ysharma</value>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>fs.s3.impl</name>
|
||||
<value>org.apache.hadoop.fs.s3native.NativeS3FileSystem</value>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>fs.s3.awsAccessKeyId</name>
|
||||
<value>AWS_KEY</value>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>fs.s3.awsSecretAccessKey</name>
|
||||
<value>AWS_SECRET</value>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>fs.s3n.awsAccessKeyId</name>
|
||||
<value>AWS_KEY</value>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>fs.s3n.awsSecretAccessKey</name>
|
||||
<value>AWS_SECRET</value>
|
||||
</property>
|
||||
```
|
||||
|
||||
|
||||
@@ -49,9 +49,10 @@ public class HoodieWrapperFileSystem extends FileSystem {
|
||||
public static final String HOODIE_SCHEME_PREFIX = "hoodie-";
|
||||
|
||||
static {
|
||||
SUPPORT_SCHEMES = new HashSet<>(2);
|
||||
SUPPORT_SCHEMES = new HashSet<>();
|
||||
SUPPORT_SCHEMES.add("file");
|
||||
SUPPORT_SCHEMES.add("hdfs");
|
||||
SUPPORT_SCHEMES.add("s3");
|
||||
}
|
||||
|
||||
private ConcurrentMap<String, SizeAwareFSDataOutputStream> openStreams =
|
||||
|
||||
@@ -15,14 +15,20 @@
|
||||
*/
|
||||
|
||||
import com.uber.hoodie.HoodieWriteClient;
|
||||
import com.uber.hoodie.common.table.HoodieTableMetaClient;
|
||||
import com.uber.hoodie.common.util.FSUtils;
|
||||
import com.uber.hoodie.config.HoodieWriteConfig;
|
||||
import com.uber.hoodie.common.HoodieTestDataGenerator;
|
||||
import com.uber.hoodie.common.model.HoodieRecord;
|
||||
import com.uber.hoodie.common.table.HoodieTableMetaClient;
|
||||
import com.uber.hoodie.common.util.FSUtils;
|
||||
import com.uber.hoodie.config.HoodieIndexConfig;
|
||||
import com.uber.hoodie.config.HoodieWriteConfig;
|
||||
import com.uber.hoodie.index.HoodieIndex;
|
||||
|
||||
import org.apache.commons.cli.CommandLine;
|
||||
import org.apache.commons.cli.CommandLineParser;
|
||||
import org.apache.commons.cli.BasicParser;
|
||||
import org.apache.commons.cli.HelpFormatter;
|
||||
import org.apache.commons.cli.Option;
|
||||
import org.apache.commons.cli.Options;
|
||||
import org.apache.commons.cli.ParseException;
|
||||
import org.apache.log4j.LogManager;
|
||||
import org.apache.log4j.Logger;
|
||||
import org.apache.spark.SparkConf;
|
||||
@@ -41,8 +47,34 @@ public class HoodieClientExample {
|
||||
|
||||
private static Logger logger = LogManager.getLogger(HoodieClientExample.class);
|
||||
|
||||
private static final String DEFAULT_TABLE_PATH = "file:///tmp/hoodie/sample-table";
|
||||
private static final String DEFAULT_TABLE_NAME = "sample-table";
|
||||
|
||||
public static void main(String[] args) throws Exception {
|
||||
String tablePath = args.length == 1 ? args[0] : "file:///tmp/hoodie/sample-table";
|
||||
Options options = new Options();
|
||||
Option path = new Option("p", "table-path", true, "input table path");
|
||||
path.setRequired(false);
|
||||
options.addOption(path);
|
||||
|
||||
Option name = new Option("n", "table-name", true, "input table name");
|
||||
name.setRequired(false);
|
||||
options.addOption(name);
|
||||
|
||||
CommandLineParser parser = new BasicParser();
|
||||
HelpFormatter formatter = new HelpFormatter();
|
||||
CommandLine cmd;
|
||||
|
||||
try {
|
||||
cmd = parser.parse(options, args);
|
||||
} catch (ParseException e) {
|
||||
System.out.println(e.getMessage());
|
||||
formatter.printHelp("HoodieClientExample", options);
|
||||
System.exit(1);
|
||||
return;
|
||||
}
|
||||
|
||||
String inputTablePath = cmd.getOptionValue("table-path", DEFAULT_TABLE_PATH);
|
||||
String inputTableName = cmd.getOptionValue("table-name", DEFAULT_TABLE_NAME);
|
||||
|
||||
HoodieTestDataGenerator dataGen = new HoodieTestDataGenerator();
|
||||
|
||||
@@ -54,16 +86,15 @@ public class HoodieClientExample {
|
||||
|
||||
// generate some records to be loaded in.
|
||||
HoodieWriteConfig cfg =
|
||||
HoodieWriteConfig.newBuilder().withPath(tablePath)
|
||||
HoodieWriteConfig.newBuilder().withPath(inputTablePath)
|
||||
.withSchema(HoodieTestDataGenerator.TRIP_EXAMPLE_SCHEMA).withParallelism(2, 2)
|
||||
.forTable("sample-table").withIndexConfig(
|
||||
.forTable(inputTableName).withIndexConfig(
|
||||
HoodieIndexConfig.newBuilder().withIndexType(HoodieIndex.IndexType.BLOOM).build())
|
||||
.build();
|
||||
Properties properties = new Properties();
|
||||
properties.put(HoodieWriteConfig.TABLE_NAME, "sample-table");
|
||||
properties.put(HoodieWriteConfig.TABLE_NAME, inputTableName);
|
||||
HoodieTableMetaClient
|
||||
.initializePathAsHoodieDataset(FSUtils.getFs(), tablePath,
|
||||
properties);
|
||||
.initializePathAsHoodieDataset(FSUtils.getFs(), inputTablePath, properties);
|
||||
HoodieWriteClient client = new HoodieWriteClient(jsc, cfg);
|
||||
|
||||
/**
|
||||
|
||||
Reference in New Issue
Block a user