1
0

[HUDI-514] A schema provider to get metadata through Jdbc (#1200)

This commit is contained in:
openopen2
2020-02-14 10:06:06 +08:00
committed by GitHub
parent 175de0db7b
commit dfbee673ef
9 changed files with 378 additions and 4 deletions

View File

@@ -123,6 +123,7 @@ public class TestHoodieDeltaStreamer extends UtilitiesTestBase {
props.setProperty("hoodie.datasource.write.partitionpath.field", "not_there");
props.setProperty("hoodie.deltastreamer.schemaprovider.source.schema.file", dfsBasePath + "/source.avsc");
props.setProperty("hoodie.deltastreamer.schemaprovider.target.schema.file", dfsBasePath + "/target.avsc");
// Hive Configs
props.setProperty(DataSourceWriteOptions.HIVE_URL_OPT_KEY(), "jdbc:hive2://127.0.0.1:9999/");
props.setProperty(DataSourceWriteOptions.HIVE_DATABASE_OPT_KEY(), "testdb1");
@@ -526,7 +527,7 @@ public class TestHoodieDeltaStreamer extends UtilitiesTestBase {
assertTrue(e.getMessage().contains("Please provide a valid schema provider class!"));
}
}
@Test
public void testPayloadClassUpdate() throws Exception {
String dataSetBasePath = dfsBasePath + "/test_dataset_mor";

View File

@@ -0,0 +1,88 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hudi.utilities;
import org.apache.hudi.common.util.TypedProperties;
import org.apache.hudi.exception.HoodieException;
import org.apache.hudi.utilities.schema.JdbcbasedSchemaProvider;
import org.apache.avro.Schema;
import org.apache.log4j.LogManager;
import org.apache.log4j.Logger;
import org.apache.spark.api.java.JavaSparkContext;
import org.junit.After;
import org.junit.Before;
import org.junit.Test;
import java.io.IOException;
import java.sql.Connection;
import java.sql.DriverManager;
import java.sql.PreparedStatement;
import java.sql.SQLException;
import static org.junit.Assert.assertEquals;
public class TestJdbcbasedSchemaProvider {
private static final Logger LOG = LogManager.getLogger(TestJdbcbasedSchemaProvider.class);
private static final TypedProperties PROPS = new TypedProperties();
protected transient JavaSparkContext jsc = null;
@Before
public void init() {
jsc = UtilHelpers.buildSparkContext(this.getClass().getName() + "-hoodie", "local[2]");
PROPS.setProperty("hoodie.deltastreamer.schemaprovider.source.schema.jdbc.connection.url", "jdbc:h2:mem:test_mem");
PROPS.setProperty("hoodie.deltastreamer.schemaprovider.source.schema.jdbc.driver.type", "org.h2.Driver");
PROPS.setProperty("hoodie.deltastreamer.schemaprovider.source.schema.jdbc.username", "sa");
PROPS.setProperty("hoodie.deltastreamer.schemaprovider.source.schema.jdbc.password", "");
PROPS.setProperty("hoodie.deltastreamer.schemaprovider.source.schema.jdbc.dbtable", "triprec");
PROPS.setProperty("hoodie.deltastreamer.schemaprovider.source.schema.jdbc.timeout", "0");
PROPS.setProperty("hoodie.deltastreamer.schemaprovider.source.schema.jdbc.nullable", "false");
}
@After
public void teardown() throws Exception {
if (jsc != null) {
jsc.stop();
}
}
@Test
public void testJdbcbasedSchemaProvider() throws Exception {
try {
initH2Database();
Schema sourceSchema = UtilHelpers.createSchemaProvider(JdbcbasedSchemaProvider.class.getName(), PROPS, jsc).getSourceSchema();
assertEquals(sourceSchema.toString().toUpperCase(), new Schema.Parser().parse(UtilitiesTestBase.Helpers.readFile("delta-streamer-config/source-jdbc.avsc")).toString().toUpperCase());
} catch (HoodieException e) {
LOG.error("Failed to get connection through jdbc. ", e);
}
}
/**
* Initialize the H2 database and obtain a connection, then create a table as a test.
* Based on the characteristics of the H2 in-memory database, we do not need to display the initialized database.
* @throws SQLException
* @throws IOException
*/
private void initH2Database() throws SQLException, IOException {
Connection conn = DriverManager.getConnection("jdbc:h2:mem:test_mem", "sa", "");
PreparedStatement ps = conn.prepareStatement(UtilitiesTestBase.Helpers.readFile("delta-streamer-config/triprec.sql"));
ps.executeUpdate();
}
}

View File

@@ -161,14 +161,20 @@ public class UtilitiesTestBase {
// to get hold of resources bundled with jar
private static ClassLoader classLoader = Helpers.class.getClassLoader();
public static void copyToDFS(String testResourcePath, FileSystem fs, String targetPath) throws IOException {
public static String readFile(String testResourcePath) throws IOException {
BufferedReader reader =
new BufferedReader(new InputStreamReader(classLoader.getResourceAsStream(testResourcePath)));
PrintStream os = new PrintStream(fs.create(new Path(targetPath), true));
StringBuffer sb = new StringBuffer();
String line;
while ((line = reader.readLine()) != null) {
os.println(line);
sb.append(line + "\n");
}
return sb.toString();
}
public static void copyToDFS(String testResourcePath, FileSystem fs, String targetPath) throws IOException {
PrintStream os = new PrintStream(fs.create(new Path(targetPath), true));
os.print(readFile(testResourcePath));
os.flush();
os.close();
}

View File

@@ -0,0 +1,59 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
{
"type": "record",
"name": "triprec",
"namespace": "hoodie.triprec",
"fields": [
{
"name":"ID",
"type": "int"
},
{
"name": "TIMESTAMP",
"type": ["double", "null"]
},
{
"name": "RIDER",
"type": ["string", "null"]
},
{
"name": "DRIVER",
"type": ["string", "null"]
},
{
"name": "BEGIN_LAT",
"type": ["double", "null"]
},
{
"name": "BEGIN_LON",
"type": ["double", "null"]
},
{
"name": "END_LAT",
"type": ["double", "null"]
},
{
"name": "END_LON",
"type": ["double", "null"]
},
{
"name": "FARE",
"type": ["double", "null"]
} ]
}

View File

@@ -0,0 +1,28 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
CREATE TABLE triprec (
id INT NOT NULL,
timestamp DOUBLE,
rider VARCHAR(200),
driver VARCHAR(200),
begin_lat DOUBLE,
begin_lon DOUBLE,
end_lat DOUBLE,
end_lon DOUBLE,
fare DOUBLE
);