Add GCS to supported filesystems
This commit is contained in:
committed by
vinoth chandar
parent
21e334592f
commit
43a55b09fd
@@ -81,4 +81,7 @@ summary: "Here we list all possible configurations and what they mean"
|
|||||||
- [S3Configs](s3_hoodie.html) (Hoodie S3 Configs) <br/>
|
- [S3Configs](s3_hoodie.html) (Hoodie S3 Configs) <br/>
|
||||||
<span style="color:grey">Configurations required for S3 and Hoodie co-operability.</span>
|
<span style="color:grey">Configurations required for S3 and Hoodie co-operability.</span>
|
||||||
|
|
||||||
|
- [GCSConfigs](gcs_hoodie.html) (Hoodie GCS Configs) <br/>
|
||||||
|
<span style="color:grey">Configurations required for GCS and Hoodie co-operability.</span>
|
||||||
|
|
||||||
{% include callout.html content="Hoodie is a young project. A lot of pluggable interfaces and configurations to support diverse workloads need to be created. Get involved [here](https://github.com/uber/hoodie)" type="info" %}
|
{% include callout.html content="Hoodie is a young project. A lot of pluggable interfaces and configurations to support diverse workloads need to be created. Get involved [here](https://github.com/uber/hoodie)" type="info" %}
|
||||||
|
|||||||
62
docs/gcs_filesystem.md
Normal file
62
docs/gcs_filesystem.md
Normal file
@@ -0,0 +1,62 @@
|
|||||||
|
---
|
||||||
|
title: GCS Filesystem (experimental)
|
||||||
|
keywords: sql hive gcs spark presto
|
||||||
|
sidebar: mydoc_sidebar
|
||||||
|
permalink: gcs_hoodie.html
|
||||||
|
toc: false
|
||||||
|
summary: In this page, we go over how to configure hoodie with Google Cloud Storage.
|
||||||
|
---
|
||||||
|
Hoodie works with HDFS by default and GCS **regional** buckets provide an HDFS API with strong consistency.
|
||||||
|
|
||||||
|
## GCS Configs
|
||||||
|
|
||||||
|
There are two configurations required for Hoodie GCS compatibility:
|
||||||
|
|
||||||
|
- Adding GCS Credentials for Hoodie
|
||||||
|
- Adding required jars to classpath
|
||||||
|
|
||||||
|
### GCS Credentials
|
||||||
|
|
||||||
|
Add the required configs in your core-site.xml from where Hoodie can fetch them. Replace the `fs.defaultFS` with your GCS bucket name and Hoodie should be able to read/write from the bucket.
|
||||||
|
|
||||||
|
```xml
|
||||||
|
<property>
|
||||||
|
<name>fs.defaultFS</name>
|
||||||
|
<value>gs://hoodie-bucket</value>
|
||||||
|
</property>
|
||||||
|
|
||||||
|
<property>
|
||||||
|
<name>fs.gs.impl</name>
|
||||||
|
<value>com.google.cloud.hadoop.fs.gcs.GoogleHadoopFileSystem</value>
|
||||||
|
<description>The FileSystem for gs: (GCS) uris.</description>
|
||||||
|
</property>
|
||||||
|
|
||||||
|
<property>
|
||||||
|
<name>fs.AbstractFileSystem.gs.impl</name>
|
||||||
|
<value>com.google.cloud.hadoop.fs.gcs.GoogleHadoopFS</value>
|
||||||
|
<description>The AbstractFileSystem for gs: (GCS) uris.</description>
|
||||||
|
</property>
|
||||||
|
|
||||||
|
<property>
|
||||||
|
<name>fs.gs.project.id</name>
|
||||||
|
<value>GCS_PROJECT_ID</value>
|
||||||
|
</property>
|
||||||
|
<property>
|
||||||
|
<name>google.cloud.auth.service.account.enable</name>
|
||||||
|
<value>true</value>
|
||||||
|
</property>
|
||||||
|
<property>
|
||||||
|
<name>google.cloud.auth.service.account.email</name>
|
||||||
|
<value>GCS_SERVICE_ACCOUNT_EMAIL</value>
|
||||||
|
</property>
|
||||||
|
<property>
|
||||||
|
<name>google.cloud.auth.service.account.keyfile</name>
|
||||||
|
<value>GCS_SERVICE_ACCOUNT_KEYFILE</value>
|
||||||
|
</property>
|
||||||
|
```
|
||||||
|
|
||||||
|
### GCS Libs
|
||||||
|
|
||||||
|
GCS hadoop libraries to add to our classpath
|
||||||
|
|
||||||
|
- com.google.cloud.bigdataoss:gcs-connector:1.6.0-hadoop2
|
||||||
@@ -53,6 +53,10 @@ public class HoodieWrapperFileSystem extends FileSystem {
|
|||||||
SUPPORT_SCHEMES.add("file");
|
SUPPORT_SCHEMES.add("file");
|
||||||
SUPPORT_SCHEMES.add("hdfs");
|
SUPPORT_SCHEMES.add("hdfs");
|
||||||
SUPPORT_SCHEMES.add("s3");
|
SUPPORT_SCHEMES.add("s3");
|
||||||
|
|
||||||
|
// Hoodie currently relies on underlying object store being fully
|
||||||
|
// consistent so only regional buckets should be used.
|
||||||
|
SUPPORT_SCHEMES.add("gs");
|
||||||
}
|
}
|
||||||
|
|
||||||
private ConcurrentMap<String, SizeAwareFSDataOutputStream> openStreams =
|
private ConcurrentMap<String, SizeAwareFSDataOutputStream> openStreams =
|
||||||
|
|||||||
Reference in New Issue
Block a user