1
0

[HUDI-242] Support for RFC-12/Bootstrapping of external datasets to hudi (#1876)

- [HUDI-418] Bootstrap Index Implementation using HFile with unit-test
 - [HUDI-421] FileSystem View Changes to support Bootstrap with unit-tests
 - [HUDI-424] Implement Query Side Integration for querying tables containing bootstrap file slices
 - [HUDI-423] Implement upsert functionality for handling updates to these bootstrap file slices
 - [HUDI-421] Bootstrap Write Client with tests
 - [HUDI-425] Added HoodieDeltaStreamer support
 - [HUDI-899] Add a knob to change partition-path style while performing metadata bootstrap
 - [HUDI-900] Metadata Bootstrap Key Generator needs to handle complex keys correctly
 - [HUDI-424] Simplify Record reader implementation
 - [HUDI-423] Implement upsert functionality for handling updates to these bootstrap file slices
 - [HUDI-420] Hoodie Demo working with hive and sparkSQL. Also, Hoodie CLI working with bootstrap tables

Co-authored-by: Mehrotra <uditme@amazon.com>
Co-authored-by: Vinoth Chandar <vinoth@apache.org>
Co-authored-by: Balaji Varadarajan <varadarb@uber.com>
This commit is contained in:
vinoth chandar
2020-08-03 20:19:21 -07:00
committed by GitHub
parent 266bce12b3
commit 539621bd33
175 changed files with 7540 additions and 779 deletions

View File

@@ -0,0 +1,44 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
{
"namespace":"org.apache.hudi.avro.model",
"type":"record",
"name":"HoodieBootstrapFilePartitionInfo",
"fields":[
{
"name":"version",
"type":["int", "null"],
"default": 1
},
{
"name":"bootstrapPartitionPath",
"type":["null", "string"],
"default" : null
},
{
"name":"bootstrapFileStatus",
"type":["null", "HoodieFileStatus"],
"default" : null
},
{
"name":"partitionPath",
"type":["null", "string"],
"default" : null
}
]
}

View File

@@ -0,0 +1,44 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
{
"namespace":"org.apache.hudi.avro.model",
"type":"record",
"name":"HoodieBootstrapIndexInfo",
"fields":[
{
"name":"version",
"type":["int", "null"],
"default": 1
},
{
"name":"bootstrapBasePath",
"type":["null", "string"],
"default" : null
},
{
"name":"createdTimestamp",
"type":["null", "long"],
"default" : null
},
{
"name":"numKeys",
"type":["null", "int"],
"default" : null
}
]
}

View File

@@ -0,0 +1,47 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
{
"namespace":"org.apache.hudi.avro.model",
"type":"record",
"name":"HoodieBootstrapPartitionMetadata",
"fields":[
{
"name":"version",
"type":["int", "null"],
"default": 1
},
{
"name":"bootstrapPartitionPath",
"type":["null", "string"],
"default" : null
},
{
"name":"partitionPath",
"type":["null", "string"],
"default" : null
},
{
"name":"fileIdToBootstrapFile",
"type":["null", {
"type":"map",
"values": "HoodieFileStatus"
}],
"default": null
}
]
}

View File

@@ -61,6 +61,11 @@
"values":"double"
}],
"default": null
},
{
"name":"bootstrapFilePath",
"type":["null", "string"],
"default": null
}
]
}

View File

@@ -0,0 +1,49 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
{
"namespace":"org.apache.hudi.avro.model",
"type":"record",
"name":"HoodieFSPermission",
"fields":[
{
"name":"version",
"type":["int", "null"],
"default": 1
},
{
"name":"userAction",
"type":[ "null", "string" ],
"default": "null"
},
{
"name":"groupAction",
"type":[ "null", "string" ],
"default": "null"
},
{
"name":"otherAction",
"type":[ "null", "string" ],
"default": "null"
},
{
"name":"stickyBit",
"type":[ "null", "boolean" ],
"default": "null"
}
]
}

View File

@@ -0,0 +1,84 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
{
"namespace":"org.apache.hudi.avro.model",
"type":"record",
"name":"HoodieFileStatus",
"fields":[
{
"name":"version",
"type":["int", "null"],
"default": 1
},
{
"name":"path",
"type":["null", "HoodiePath"],
"default" : null
},
{
"name":"length",
"type":["null", "long"],
"default" : null
},
{
"name":"isDir",
"type":["null", "boolean"],
"default" : null
},
{
"name":"blockReplication",
"type":["null", "int"],
"default" : null
},
{
"name":"blockSize",
"type":["null", "long"],
"default" : null
},
{
"name":"modificationTime",
"type":["null", "long"],
"default" : null
},
{
"name":"accessTime",
"type":["null", "long"],
"default" : null
},
{
"name":"permission",
"type":["null", "HoodieFSPermission"],
"default" : null
},
{
"name":"owner",
"type":["null", "string"],
"default" : null
},
{
"name":"group",
"type":["null", "string"],
"default" : null
},
{
"name":"symlink",
"type":["null", "HoodiePath"],
"default" : null
}
]
}

View File

@@ -0,0 +1,34 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
{
"namespace":"org.apache.hudi.avro.model",
"type":"record",
"name":"HoodiePath",
"fields":[
{
"name":"version",
"type":["int", "null"],
"default": 1
},
{
"name":"uri",
"type":["null", "string"],
"default" : null
}
]
}