New Features in DeltaStreamer :
(1) Apply transformation when using delta-streamer to ingest data. (2) Add Hudi Incremental Source for Delta Streamer (3) Allow delta-streamer config-property to be passed as command-line (4) Add Hive Integration to Delta-Streamer and address Review comments (5) Ensure MultiPartKeysValueExtractor handle hive style partition description (6) Reuse same spark session on both source and transformer (7) Support extracting partition fields from _hoodie_partition_path for HoodieIncrSource (8) Reuse Binary Avro coders (9) Add push down filter for Incremental source (10) Add Hoodie DeltaStreamer metrics to track total time taken
This commit is contained in:
committed by
vinoth chandar
parent
c70dbc13e9
commit
3a0044216c
@@ -19,8 +19,10 @@
|
||||
package com.uber.hoodie.utilities;
|
||||
|
||||
import static org.junit.Assert.assertEquals;
|
||||
import static org.junit.Assert.assertTrue;
|
||||
import static org.junit.Assert.fail;
|
||||
|
||||
import com.uber.hoodie.DataSourceWriteOptions;
|
||||
import com.uber.hoodie.common.model.HoodieCommitMetadata;
|
||||
import com.uber.hoodie.common.table.HoodieTableMetaClient;
|
||||
import com.uber.hoodie.common.table.HoodieTimeline;
|
||||
@@ -28,17 +30,31 @@ import com.uber.hoodie.common.table.timeline.HoodieInstant;
|
||||
import com.uber.hoodie.common.util.DFSPropertiesConfiguration;
|
||||
import com.uber.hoodie.common.util.TypedProperties;
|
||||
import com.uber.hoodie.exception.DatasetNotFoundException;
|
||||
import com.uber.hoodie.hive.HiveSyncConfig;
|
||||
import com.uber.hoodie.hive.HoodieHiveClient;
|
||||
import com.uber.hoodie.hive.MultiPartKeysValueExtractor;
|
||||
import com.uber.hoodie.utilities.deltastreamer.HoodieDeltaStreamer;
|
||||
import com.uber.hoodie.utilities.deltastreamer.HoodieDeltaStreamer.Operation;
|
||||
import com.uber.hoodie.utilities.schema.FilebasedSchemaProvider;
|
||||
import com.uber.hoodie.utilities.sources.HoodieIncrSource;
|
||||
import com.uber.hoodie.utilities.sources.TestDataSource;
|
||||
import com.uber.hoodie.utilities.transform.SqlQueryBasedTransformer;
|
||||
import com.uber.hoodie.utilities.transform.Transformer;
|
||||
import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
import org.apache.hadoop.fs.FileSystem;
|
||||
import org.apache.hadoop.fs.Path;
|
||||
import org.apache.log4j.LogManager;
|
||||
import org.apache.log4j.Logger;
|
||||
import org.apache.spark.api.java.JavaSparkContext;
|
||||
import org.apache.spark.sql.Dataset;
|
||||
import org.apache.spark.sql.Row;
|
||||
import org.apache.spark.sql.SQLContext;
|
||||
import org.apache.spark.sql.SparkSession;
|
||||
import org.apache.spark.sql.api.java.UDF4;
|
||||
import org.apache.spark.sql.functions;
|
||||
import org.apache.spark.sql.types.DataTypes;
|
||||
import org.junit.After;
|
||||
import org.junit.AfterClass;
|
||||
import org.junit.Before;
|
||||
@@ -55,17 +71,43 @@ public class TestHoodieDeltaStreamer extends UtilitiesTestBase {
|
||||
|
||||
@BeforeClass
|
||||
public static void initClass() throws Exception {
|
||||
UtilitiesTestBase.initClass();
|
||||
UtilitiesTestBase.initClass(true);
|
||||
|
||||
// prepare the configs.
|
||||
UtilitiesTestBase.Helpers.copyToDFS("delta-streamer-config/base.properties", dfs, dfsBasePath + "/base.properties");
|
||||
UtilitiesTestBase.Helpers.copyToDFS("delta-streamer-config/sql-transformer.properties", dfs,
|
||||
dfsBasePath + "/sql-transformer.properties");
|
||||
UtilitiesTestBase.Helpers.copyToDFS("delta-streamer-config/source.avsc", dfs, dfsBasePath + "/source.avsc");
|
||||
UtilitiesTestBase.Helpers.copyToDFS("delta-streamer-config/target.avsc", dfs, dfsBasePath + "/target.avsc");
|
||||
|
||||
TypedProperties props = new TypedProperties();
|
||||
props.setProperty("include", "base.properties");
|
||||
props.setProperty("include", "sql-transformer.properties");
|
||||
props.setProperty("hoodie.datasource.write.recordkey.field", "_row_key");
|
||||
props.setProperty("hoodie.datasource.write.partitionpath.field", "not_there");
|
||||
props.setProperty("hoodie.deltastreamer.schemaprovider.source.schema.file", dfsBasePath + "/source.avsc");
|
||||
props.setProperty("hoodie.deltastreamer.schemaprovider.target.schema.file", dfsBasePath + "/target.avsc");
|
||||
// Hive Configs
|
||||
props.setProperty(DataSourceWriteOptions.HIVE_URL_OPT_KEY(), "jdbc:hive2://127.0.0.1:9999/");
|
||||
props.setProperty(DataSourceWriteOptions.HIVE_DATABASE_OPT_KEY(), "testdb1");
|
||||
props.setProperty(DataSourceWriteOptions.HIVE_TABLE_OPT_KEY(), "hive_trips");
|
||||
props.setProperty(DataSourceWriteOptions.HIVE_ASSUME_DATE_PARTITION_OPT_KEY(), "false");
|
||||
props.setProperty(DataSourceWriteOptions.HIVE_PARTITION_FIELDS_OPT_KEY(), "datestr");
|
||||
props.setProperty(DataSourceWriteOptions.HIVE_PARTITION_EXTRACTOR_CLASS_OPT_KEY(),
|
||||
MultiPartKeysValueExtractor.class.getName());
|
||||
UtilitiesTestBase.Helpers.savePropsToDFS(props, dfs, dfsBasePath + "/test-source.properties");
|
||||
|
||||
// Properties used for the delta-streamer which incrementally pulls from upstream Hudi source table and writes to
|
||||
// downstream hudi table
|
||||
TypedProperties downstreamProps = new TypedProperties();
|
||||
downstreamProps.setProperty("include", "base.properties");
|
||||
downstreamProps.setProperty("hoodie.datasource.write.recordkey.field", "_row_key");
|
||||
downstreamProps.setProperty("hoodie.datasource.write.partitionpath.field", "not_there");
|
||||
|
||||
// Source schema is the target schema of upstream table
|
||||
downstreamProps.setProperty("hoodie.deltastreamer.schemaprovider.source.schema.file", dfsBasePath + "/target.avsc");
|
||||
downstreamProps.setProperty("hoodie.deltastreamer.schemaprovider.target.schema.file", dfsBasePath + "/target.avsc");
|
||||
UtilitiesTestBase.Helpers.savePropsToDFS(downstreamProps, dfs,
|
||||
dfsBasePath + "/test-downstream-source.properties");
|
||||
}
|
||||
|
||||
@AfterClass
|
||||
@@ -86,17 +128,48 @@ public class TestHoodieDeltaStreamer extends UtilitiesTestBase {
|
||||
}
|
||||
|
||||
static class TestHelpers {
|
||||
|
||||
static HoodieDeltaStreamer.Config makeConfig(String basePath, Operation op) {
|
||||
return makeConfig(basePath, op, TripsWithDistanceTransformer.class.getName());
|
||||
}
|
||||
|
||||
static HoodieDeltaStreamer.Config makeConfig(String basePath, Operation op, String transformerClassName) {
|
||||
return makeConfig(basePath, op, transformerClassName, false);
|
||||
}
|
||||
|
||||
static HoodieDeltaStreamer.Config makeConfig(String basePath, Operation op, String transformerClassName,
|
||||
boolean enableHiveSync) {
|
||||
HoodieDeltaStreamer.Config cfg = new HoodieDeltaStreamer.Config();
|
||||
cfg.targetBasePath = basePath;
|
||||
cfg.targetTableName = "hoodie_trips";
|
||||
cfg.storageType = "COPY_ON_WRITE";
|
||||
cfg.sourceClassName = TestDataSource.class.getName();
|
||||
cfg.transformerClassName = transformerClassName;
|
||||
cfg.operation = op;
|
||||
cfg.enableHiveSync = enableHiveSync;
|
||||
cfg.sourceOrderingField = "timestamp";
|
||||
cfg.propsFilePath = dfsBasePath + "/test-source.properties";
|
||||
cfg.sourceLimit = 1000;
|
||||
cfg.schemaProviderClassName = FilebasedSchemaProvider.class.getName();
|
||||
return cfg;
|
||||
}
|
||||
|
||||
static HoodieDeltaStreamer.Config makeConfigForHudiIncrSrc(String srcBasePath, String basePath, Operation op,
|
||||
boolean addReadLatestOnMissingCkpt) {
|
||||
HoodieDeltaStreamer.Config cfg = new HoodieDeltaStreamer.Config();
|
||||
cfg.targetBasePath = basePath;
|
||||
cfg.targetTableName = "hoodie_trips_copy";
|
||||
cfg.storageType = "COPY_ON_WRITE";
|
||||
cfg.sourceClassName = HoodieIncrSource.class.getName();
|
||||
cfg.operation = op;
|
||||
cfg.sourceOrderingField = "timestamp";
|
||||
cfg.propsFilePath = dfsBasePath + "/test-downstream-source.properties";
|
||||
cfg.sourceLimit = 1000;
|
||||
List<String> cfgs = new ArrayList<>();
|
||||
cfgs.add("hoodie.deltastreamer.source.hoodieincr.read_latest_on_missing_ckpt=" + addReadLatestOnMissingCkpt);
|
||||
cfgs.add("hoodie.deltastreamer.source.hoodieincr.path=" + srcBasePath);
|
||||
// No partition
|
||||
cfgs.add("hoodie.deltastreamer.source.hoodieincr.partition.fields=datestr");
|
||||
cfg.configs = cfgs;
|
||||
return cfg;
|
||||
}
|
||||
|
||||
@@ -110,15 +183,30 @@ public class TestHoodieDeltaStreamer extends UtilitiesTestBase {
|
||||
.sort("_hoodie_commit_time").collectAsList();
|
||||
}
|
||||
|
||||
static void assertCommitMetadata(String expected, String datasetPath, FileSystem fs, int totalCommits)
|
||||
static void assertDistanceCount(long expected, String datasetPath, SQLContext sqlContext) {
|
||||
sqlContext.read().format("com.uber.hoodie").load(datasetPath).registerTempTable("tmp_trips");
|
||||
long recordCount =
|
||||
sqlContext.sparkSession().sql("select * from tmp_trips where haversine_distance is not NULL").count();
|
||||
assertEquals(expected, recordCount);
|
||||
}
|
||||
|
||||
static void assertDistanceCountWithExactValue(long expected, String datasetPath, SQLContext sqlContext) {
|
||||
sqlContext.read().format("com.uber.hoodie").load(datasetPath).registerTempTable("tmp_trips");
|
||||
long recordCount =
|
||||
sqlContext.sparkSession().sql("select * from tmp_trips where haversine_distance = 1.0").count();
|
||||
assertEquals(expected, recordCount);
|
||||
}
|
||||
|
||||
static String assertCommitMetadata(String expected, String datasetPath, FileSystem fs, int totalCommits)
|
||||
throws IOException {
|
||||
HoodieTableMetaClient meta = new HoodieTableMetaClient(fs.getConf(), datasetPath);
|
||||
HoodieTimeline timeline = meta.getActiveTimeline().getCommitsTimeline().filterCompletedInstants();
|
||||
HoodieInstant lastCommit = timeline.lastInstant().get();
|
||||
HoodieInstant lastInstant = timeline.lastInstant().get();
|
||||
HoodieCommitMetadata commitMetadata = HoodieCommitMetadata.fromBytes(
|
||||
timeline.getInstantDetails(lastCommit).get(), HoodieCommitMetadata.class);
|
||||
timeline.getInstantDetails(lastInstant).get(), HoodieCommitMetadata.class);
|
||||
assertEquals(totalCommits, timeline.countInstants());
|
||||
assertEquals(expected, commitMetadata.getMetadata(HoodieDeltaStreamer.CHECKPOINT_KEY));
|
||||
return lastInstant.getTimestamp();
|
||||
}
|
||||
}
|
||||
|
||||
@@ -152,12 +240,14 @@ public class TestHoodieDeltaStreamer extends UtilitiesTestBase {
|
||||
HoodieDeltaStreamer.Config cfg = TestHelpers.makeConfig(datasetBasePath, Operation.BULK_INSERT);
|
||||
new HoodieDeltaStreamer(cfg, jsc).sync();
|
||||
TestHelpers.assertRecordCount(1000, datasetBasePath + "/*/*.parquet", sqlContext);
|
||||
TestHelpers.assertDistanceCount(1000, datasetBasePath + "/*/*.parquet", sqlContext);
|
||||
TestHelpers.assertCommitMetadata("00000", datasetBasePath, dfs, 1);
|
||||
|
||||
// No new data => no commits.
|
||||
cfg.sourceLimit = 0;
|
||||
new HoodieDeltaStreamer(cfg, jsc).sync();
|
||||
TestHelpers.assertRecordCount(1000, datasetBasePath + "/*/*.parquet", sqlContext);
|
||||
TestHelpers.assertDistanceCount(1000, datasetBasePath + "/*/*.parquet", sqlContext);
|
||||
TestHelpers.assertCommitMetadata("00000", datasetBasePath, dfs, 1);
|
||||
|
||||
// upsert() #1
|
||||
@@ -165,11 +255,94 @@ public class TestHoodieDeltaStreamer extends UtilitiesTestBase {
|
||||
cfg.operation = Operation.UPSERT;
|
||||
new HoodieDeltaStreamer(cfg, jsc).sync();
|
||||
TestHelpers.assertRecordCount(2000, datasetBasePath + "/*/*.parquet", sqlContext);
|
||||
TestHelpers.assertDistanceCount(2000, datasetBasePath + "/*/*.parquet", sqlContext);
|
||||
TestHelpers.assertCommitMetadata("00001", datasetBasePath, dfs, 2);
|
||||
List<Row> counts = TestHelpers.countsPerCommit(datasetBasePath + "/*/*.parquet", sqlContext);
|
||||
assertEquals(2000, counts.get(0).getLong(1));
|
||||
}
|
||||
|
||||
/**
|
||||
* Test Bulk Insert and upserts with hive syncing. Tests Hudi incremental processing using a 2 step pipeline
|
||||
* The first step involves using a SQL template to transform a source
|
||||
* TEST-DATA-SOURCE ============================> HUDI TABLE 1 ===============> HUDI TABLE 2
|
||||
* (incr-pull with transform) (incr-pull)
|
||||
* Hudi Table 1 is synced with Hive.
|
||||
* @throws Exception
|
||||
*/
|
||||
@Test
|
||||
public void testBulkInsertsAndUpsertsWithSQLBasedTransformerFor2StepPipeline() throws Exception {
|
||||
String datasetBasePath = dfsBasePath + "/test_dataset2";
|
||||
String downstreamDatasetBasePath = dfsBasePath + "/test_downstream_dataset2";
|
||||
|
||||
HiveSyncConfig hiveSyncConfig = getHiveSyncConfig(datasetBasePath, "hive_trips");
|
||||
|
||||
// Initial bulk insert to ingest to first hudi table
|
||||
HoodieDeltaStreamer.Config cfg = TestHelpers.makeConfig(datasetBasePath, Operation.BULK_INSERT,
|
||||
SqlQueryBasedTransformer.class.getName(), true);
|
||||
new HoodieDeltaStreamer(cfg, jsc, dfs, hiveServer.getHiveConf()).sync();
|
||||
TestHelpers.assertRecordCount(1000, datasetBasePath + "/*/*.parquet", sqlContext);
|
||||
TestHelpers.assertDistanceCount(1000, datasetBasePath + "/*/*.parquet", sqlContext);
|
||||
TestHelpers.assertDistanceCountWithExactValue(1000, datasetBasePath + "/*/*.parquet", sqlContext);
|
||||
String lastInstantForUpstreamTable = TestHelpers.assertCommitMetadata("00000", datasetBasePath, dfs, 1);
|
||||
|
||||
// Now incrementally pull from the above hudi table and ingest to second table
|
||||
HoodieDeltaStreamer.Config downstreamCfg =
|
||||
TestHelpers.makeConfigForHudiIncrSrc(datasetBasePath, downstreamDatasetBasePath, Operation.BULK_INSERT, true);
|
||||
new HoodieDeltaStreamer(downstreamCfg, jsc, dfs, hiveServer.getHiveConf()).sync();
|
||||
TestHelpers.assertRecordCount(1000, downstreamDatasetBasePath + "/*/*.parquet", sqlContext);
|
||||
TestHelpers.assertDistanceCount(1000, downstreamDatasetBasePath + "/*/*.parquet", sqlContext);
|
||||
TestHelpers.assertDistanceCountWithExactValue(1000, downstreamDatasetBasePath + "/*/*.parquet", sqlContext);
|
||||
TestHelpers.assertCommitMetadata(lastInstantForUpstreamTable, downstreamDatasetBasePath, dfs, 1);
|
||||
|
||||
// No new data => no commits for upstream table
|
||||
cfg.sourceLimit = 0;
|
||||
new HoodieDeltaStreamer(cfg, jsc, dfs, hiveServer.getHiveConf()).sync();
|
||||
TestHelpers.assertRecordCount(1000, datasetBasePath + "/*/*.parquet", sqlContext);
|
||||
TestHelpers.assertDistanceCount(1000, datasetBasePath + "/*/*.parquet", sqlContext);
|
||||
TestHelpers.assertDistanceCountWithExactValue(1000, datasetBasePath + "/*/*.parquet", sqlContext);
|
||||
TestHelpers.assertCommitMetadata("00000", datasetBasePath, dfs, 1);
|
||||
|
||||
// with no change in upstream table, no change in downstream too when pulled.
|
||||
new HoodieDeltaStreamer(downstreamCfg, jsc).sync();
|
||||
TestHelpers.assertRecordCount(1000, downstreamDatasetBasePath + "/*/*.parquet", sqlContext);
|
||||
TestHelpers.assertDistanceCount(1000, downstreamDatasetBasePath + "/*/*.parquet", sqlContext);
|
||||
TestHelpers.assertDistanceCountWithExactValue(1000, downstreamDatasetBasePath + "/*/*.parquet", sqlContext);
|
||||
TestHelpers.assertCommitMetadata(lastInstantForUpstreamTable, downstreamDatasetBasePath, dfs, 1);
|
||||
|
||||
// upsert() #1 on upstream hudi table
|
||||
cfg.sourceLimit = 2000;
|
||||
cfg.operation = Operation.UPSERT;
|
||||
new HoodieDeltaStreamer(cfg, jsc, dfs, hiveServer.getHiveConf()).sync();
|
||||
TestHelpers.assertRecordCount(2000, datasetBasePath + "/*/*.parquet", sqlContext);
|
||||
TestHelpers.assertDistanceCount(2000, datasetBasePath + "/*/*.parquet", sqlContext);
|
||||
TestHelpers.assertDistanceCountWithExactValue(2000, datasetBasePath + "/*/*.parquet", sqlContext);
|
||||
lastInstantForUpstreamTable = TestHelpers.assertCommitMetadata("00001", datasetBasePath, dfs, 2);
|
||||
List<Row> counts = TestHelpers.countsPerCommit(datasetBasePath + "/*/*.parquet", sqlContext);
|
||||
assertEquals(2000, counts.get(0).getLong(1));
|
||||
|
||||
// Incrementally pull changes in upstream hudi table and apply to downstream table
|
||||
downstreamCfg =
|
||||
TestHelpers.makeConfigForHudiIncrSrc(datasetBasePath, downstreamDatasetBasePath, Operation.UPSERT, false);
|
||||
downstreamCfg.sourceLimit = 2000;
|
||||
new HoodieDeltaStreamer(downstreamCfg, jsc).sync();
|
||||
TestHelpers.assertRecordCount(2000, downstreamDatasetBasePath + "/*/*.parquet", sqlContext);
|
||||
TestHelpers.assertDistanceCount(2000, downstreamDatasetBasePath + "/*/*.parquet", sqlContext);
|
||||
TestHelpers.assertDistanceCountWithExactValue(2000, downstreamDatasetBasePath + "/*/*.parquet", sqlContext);
|
||||
String finalInstant =
|
||||
TestHelpers.assertCommitMetadata(lastInstantForUpstreamTable, downstreamDatasetBasePath, dfs, 2);
|
||||
counts = TestHelpers.countsPerCommit(downstreamDatasetBasePath + "/*/*.parquet", sqlContext);
|
||||
assertEquals(2000, counts.get(0).getLong(1));
|
||||
|
||||
// Test Hive integration
|
||||
HoodieHiveClient hiveClient = new HoodieHiveClient(hiveSyncConfig, hiveServer.getHiveConf(), dfs);
|
||||
assertTrue("Table " + hiveSyncConfig.tableName + " should exist",
|
||||
hiveClient.doesTableExist());
|
||||
assertEquals("Table partitions should match the number of partitions we wrote", 1,
|
||||
hiveClient.scanTablePartitions().size());
|
||||
assertEquals("The last commit that was sycned should be updated in the TBLPROPERTIES",
|
||||
lastInstantForUpstreamTable, hiveClient.getLastCommitTimeSynced().get());
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testFilterDupes() throws Exception {
|
||||
String datasetBasePath = dfsBasePath + "/test_dupes_dataset";
|
||||
@@ -192,4 +365,57 @@ public class TestHoodieDeltaStreamer extends UtilitiesTestBase {
|
||||
assertEquals(1000, counts.get(0).getLong(1));
|
||||
assertEquals(1000, counts.get(1).getLong(1));
|
||||
}
|
||||
|
||||
/**
|
||||
* UDF to calculate Haversine distance
|
||||
*/
|
||||
public static class DistanceUDF implements UDF4<Double, Double, Double, Double, Double> {
|
||||
|
||||
/**
|
||||
*
|
||||
* Taken from https://stackoverflow.com/questions/3694380/calculating-distance-between-two-points-using-latitude-
|
||||
* longitude-what-am-i-doi
|
||||
* Calculate distance between two points in latitude and longitude taking
|
||||
* into account height difference. If you are not interested in height
|
||||
* difference pass 0.0. Uses Haversine method as its base.
|
||||
*
|
||||
* lat1, lon1 Start point lat2, lon2 End point el1 Start altitude in meters
|
||||
* el2 End altitude in meters
|
||||
* @returns Distance in Meters
|
||||
*/
|
||||
@Override
|
||||
public Double call(Double lat1, Double lat2, Double lon1, Double lon2) {
|
||||
|
||||
final int R = 6371; // Radius of the earth
|
||||
|
||||
double latDistance = Math.toRadians(lat2 - lat1);
|
||||
double lonDistance = Math.toRadians(lon2 - lon1);
|
||||
double a = Math.sin(latDistance / 2) * Math.sin(latDistance / 2)
|
||||
+ Math.cos(Math.toRadians(lat1)) * Math.cos(Math.toRadians(lat2))
|
||||
* Math.sin(lonDistance / 2) * Math.sin(lonDistance / 2);
|
||||
double c = 2 * Math.atan2(Math.sqrt(a), Math.sqrt(1 - a));
|
||||
double distance = R * c * 1000; // convert to meters
|
||||
|
||||
double height = 0;
|
||||
|
||||
distance = Math.pow(distance, 2) + Math.pow(height, 2);
|
||||
|
||||
return Math.sqrt(distance);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Adds a new field "haversine_distance" to the row
|
||||
*/
|
||||
public static class TripsWithDistanceTransformer implements Transformer {
|
||||
|
||||
@Override
|
||||
public Dataset<Row> apply(JavaSparkContext jsc, SparkSession sparkSession,
|
||||
Dataset<Row> rowDataset, TypedProperties properties) {
|
||||
rowDataset.sqlContext().udf().register("distance_udf", new DistanceUDF(), DataTypes.DoubleType);
|
||||
return rowDataset.withColumn("haversine_distance",
|
||||
functions.callUDF("distance_udf", functions.col("begin_lat"),
|
||||
functions.col("end_lat"), functions.col("begin_lon"), functions.col("end_lat")));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -18,10 +18,16 @@
|
||||
|
||||
package com.uber.hoodie.utilities;
|
||||
|
||||
import com.google.common.collect.ImmutableList;
|
||||
import com.uber.hoodie.common.TestRawTripPayload;
|
||||
import com.uber.hoodie.common.minicluster.HdfsTestService;
|
||||
import com.uber.hoodie.common.model.HoodieRecord;
|
||||
import com.uber.hoodie.common.model.HoodieTableType;
|
||||
import com.uber.hoodie.common.table.HoodieTableMetaClient;
|
||||
import com.uber.hoodie.common.util.TypedProperties;
|
||||
import com.uber.hoodie.hive.HiveSyncConfig;
|
||||
import com.uber.hoodie.hive.HoodieHiveClient;
|
||||
import com.uber.hoodie.hive.util.HiveTestService;
|
||||
import com.uber.hoodie.utilities.sources.TestDataSource;
|
||||
import java.io.BufferedReader;
|
||||
import java.io.IOException;
|
||||
@@ -32,8 +38,11 @@ import org.apache.hadoop.fs.FileSystem;
|
||||
import org.apache.hadoop.fs.Path;
|
||||
import org.apache.hadoop.hdfs.DistributedFileSystem;
|
||||
import org.apache.hadoop.hdfs.MiniDFSCluster;
|
||||
import org.apache.hadoop.hive.conf.HiveConf;
|
||||
import org.apache.hive.service.server.HiveServer2;
|
||||
import org.apache.spark.api.java.JavaSparkContext;
|
||||
import org.apache.spark.sql.SQLContext;
|
||||
import org.apache.spark.sql.SparkSession;
|
||||
import org.junit.After;
|
||||
import org.junit.AfterClass;
|
||||
import org.junit.Before;
|
||||
@@ -51,15 +60,26 @@ public class UtilitiesTestBase {
|
||||
protected static MiniDFSCluster dfsCluster;
|
||||
protected static DistributedFileSystem dfs;
|
||||
protected transient JavaSparkContext jsc = null;
|
||||
protected transient SparkSession sparkSession = null;
|
||||
protected transient SQLContext sqlContext;
|
||||
protected static HiveServer2 hiveServer;
|
||||
|
||||
@BeforeClass
|
||||
public static void initClass() throws Exception {
|
||||
initClass(false);
|
||||
}
|
||||
|
||||
static void initClass(boolean startHiveService) throws Exception {
|
||||
hdfsTestService = new HdfsTestService();
|
||||
dfsCluster = hdfsTestService.start(true);
|
||||
dfs = dfsCluster.getFileSystem();
|
||||
dfsBasePath = dfs.getWorkingDirectory().toString();
|
||||
dfs.mkdirs(new Path(dfsBasePath));
|
||||
if (startHiveService) {
|
||||
HiveTestService hiveService = new HiveTestService(hdfsTestService.getHadoopConf());
|
||||
hiveServer = hiveService.start();
|
||||
clearHiveDb();
|
||||
}
|
||||
}
|
||||
|
||||
@AfterClass
|
||||
@@ -67,6 +87,9 @@ public class UtilitiesTestBase {
|
||||
if (hdfsTestService != null) {
|
||||
hdfsTestService.stop();
|
||||
}
|
||||
if (hiveServer != null) {
|
||||
hiveServer.stop();
|
||||
}
|
||||
}
|
||||
|
||||
@Before
|
||||
@@ -74,6 +97,7 @@ public class UtilitiesTestBase {
|
||||
TestDataSource.initDataGen();
|
||||
jsc = UtilHelpers.buildSparkContext(this.getClass().getName() + "-hoodie", "local[2]");
|
||||
sqlContext = new SQLContext(jsc);
|
||||
sparkSession = SparkSession.builder().config(jsc.getConf()).getOrCreate();
|
||||
}
|
||||
|
||||
@After
|
||||
@@ -84,6 +108,42 @@ public class UtilitiesTestBase {
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Helper to get hive sync config
|
||||
* @param basePath
|
||||
* @param tableName
|
||||
* @return
|
||||
*/
|
||||
protected static HiveSyncConfig getHiveSyncConfig(String basePath, String tableName) {
|
||||
HiveSyncConfig hiveSyncConfig = new HiveSyncConfig();
|
||||
hiveSyncConfig.jdbcUrl = "jdbc:hive2://127.0.0.1:9999/";
|
||||
hiveSyncConfig.hiveUser = "";
|
||||
hiveSyncConfig.hivePass = "";
|
||||
hiveSyncConfig.databaseName = "testdb1";
|
||||
hiveSyncConfig.tableName = tableName;
|
||||
hiveSyncConfig.basePath = basePath;
|
||||
hiveSyncConfig.assumeDatePartitioning = false;
|
||||
hiveSyncConfig.partitionFields = new ImmutableList.Builder<String>().add("datestr").build();
|
||||
return hiveSyncConfig;
|
||||
}
|
||||
|
||||
/**
|
||||
* Initialize Hive DB
|
||||
* @throws IOException
|
||||
*/
|
||||
private static void clearHiveDb() throws IOException {
|
||||
HiveConf hiveConf = new HiveConf();
|
||||
// Create Dummy hive sync config
|
||||
HiveSyncConfig hiveSyncConfig = getHiveSyncConfig("/dummy", "dummy");
|
||||
hiveConf.addResource(hiveServer.getHiveConf());
|
||||
HoodieTableMetaClient.initTableType(dfs.getConf(), hiveSyncConfig.basePath, HoodieTableType.COPY_ON_WRITE,
|
||||
hiveSyncConfig.tableName, null);
|
||||
HoodieHiveClient client = new HoodieHiveClient(hiveSyncConfig, hiveConf, dfs);
|
||||
client.updateHiveSQL("drop database if exists " + hiveSyncConfig.databaseName);
|
||||
client.updateHiveSQL("create database " + hiveSyncConfig.databaseName);
|
||||
client.close();
|
||||
}
|
||||
|
||||
public static class Helpers {
|
||||
|
||||
// to get hold of resources bundled with jar
|
||||
|
||||
@@ -20,16 +20,20 @@ package com.uber.hoodie.utilities.sources;
|
||||
|
||||
import static org.junit.Assert.assertEquals;
|
||||
|
||||
import com.uber.hoodie.AvroConversionUtils;
|
||||
import com.uber.hoodie.common.HoodieTestDataGenerator;
|
||||
import com.uber.hoodie.common.util.TypedProperties;
|
||||
import com.uber.hoodie.common.util.collection.Pair;
|
||||
import com.uber.hoodie.utilities.UtilitiesTestBase;
|
||||
import com.uber.hoodie.utilities.deltastreamer.SourceFormatAdapter;
|
||||
import com.uber.hoodie.utilities.schema.FilebasedSchemaProvider;
|
||||
import java.io.IOException;
|
||||
import java.util.Optional;
|
||||
import org.apache.avro.generic.GenericRecord;
|
||||
import org.apache.hadoop.fs.Path;
|
||||
import org.apache.spark.api.java.JavaRDD;
|
||||
import org.apache.spark.sql.Dataset;
|
||||
import org.apache.spark.sql.Row;
|
||||
import org.apache.spark.sql.SQLContext;
|
||||
import org.junit.After;
|
||||
import org.junit.AfterClass;
|
||||
import org.junit.Before;
|
||||
@@ -37,7 +41,7 @@ import org.junit.BeforeClass;
|
||||
import org.junit.Test;
|
||||
|
||||
/**
|
||||
* Basic tests against all subclasses of {@link DFSSource}
|
||||
* Basic tests against all subclasses of {@link JsonDFSSource}
|
||||
*/
|
||||
public class TestDFSSource extends UtilitiesTestBase {
|
||||
|
||||
@@ -71,34 +75,47 @@ public class TestDFSSource extends UtilitiesTestBase {
|
||||
|
||||
TypedProperties props = new TypedProperties();
|
||||
props.setProperty("hoodie.deltastreamer.source.dfs.root", dfsBasePath + "/jsonFiles");
|
||||
JsonDFSSource jsonSource = new JsonDFSSource(props, jsc, schemaProvider);
|
||||
JsonDFSSource jsonDFSSource = new JsonDFSSource(props, jsc, sparkSession, schemaProvider);
|
||||
SourceFormatAdapter jsonSource = new SourceFormatAdapter(jsonDFSSource);
|
||||
|
||||
// 1. Extract without any checkpoint => get all the data, respecting sourceLimit
|
||||
assertEquals(Optional.empty(), jsonSource.fetchNewData(Optional.empty(), Long.MAX_VALUE).getKey());
|
||||
assertEquals(Optional.empty(), jsonSource.fetchNewDataInAvroFormat(Optional.empty(), Long.MAX_VALUE).getBatch());
|
||||
UtilitiesTestBase.Helpers.saveStringsToDFS(
|
||||
Helpers.jsonifyRecords(dataGenerator.generateInserts("000", 100)), dfs,
|
||||
dfsBasePath + "/jsonFiles/1.json");
|
||||
assertEquals(Optional.empty(), jsonSource.fetchNewData(Optional.empty(), 10).getKey());
|
||||
Pair<Optional<JavaRDD<GenericRecord>>, String> fetch1 = jsonSource.fetchNewData(Optional.empty(), 1000000);
|
||||
assertEquals(100, fetch1.getKey().get().count());
|
||||
assertEquals(Optional.empty(), jsonSource.fetchNewDataInAvroFormat(Optional.empty(), 10).getBatch());
|
||||
InputBatch<JavaRDD<GenericRecord>> fetch1 =
|
||||
jsonSource.fetchNewDataInAvroFormat(Optional.empty(), 1000000);
|
||||
assertEquals(100, fetch1.getBatch().get().count());
|
||||
// Test json -> Row format
|
||||
InputBatch<Dataset<Row>> fetch1AsRows =
|
||||
jsonSource.fetchNewDataInRowFormat(Optional.empty(), 1000000);
|
||||
assertEquals(100, fetch1AsRows.getBatch().get().count());
|
||||
// Test Avro -> Row format
|
||||
Dataset<Row> fetch1Rows = AvroConversionUtils.createDataFrame(JavaRDD.toRDD(fetch1.getBatch().get()),
|
||||
schemaProvider.getSourceSchema().toString(), jsonDFSSource.getSparkSession());
|
||||
assertEquals(100, fetch1Rows.count());
|
||||
|
||||
// 2. Produce new data, extract new data
|
||||
UtilitiesTestBase.Helpers.saveStringsToDFS(
|
||||
Helpers.jsonifyRecords(dataGenerator.generateInserts("001", 10000)),
|
||||
dfs, dfsBasePath + "/jsonFiles/2.json");
|
||||
Pair<Optional<JavaRDD<GenericRecord>>, String> fetch2 = jsonSource.fetchNewData(
|
||||
Optional.of(fetch1.getValue()), Long.MAX_VALUE);
|
||||
assertEquals(10000, fetch2.getKey().get().count());
|
||||
InputBatch<Dataset<Row>> fetch2 = jsonSource.fetchNewDataInRowFormat(
|
||||
Optional.of(fetch1.getCheckpointForNextBatch()), Long.MAX_VALUE);
|
||||
assertEquals(10000, fetch2.getBatch().get().count());
|
||||
|
||||
// 3. Extract with previous checkpoint => gives same data back (idempotent)
|
||||
Pair<Optional<JavaRDD<GenericRecord>>, String> fetch3 = jsonSource.fetchNewData(
|
||||
Optional.of(fetch1.getValue()), Long.MAX_VALUE);
|
||||
assertEquals(10000, fetch3.getKey().get().count());
|
||||
assertEquals(fetch2.getValue(), fetch3.getValue());
|
||||
InputBatch<Dataset<Row>> fetch3 = jsonSource.fetchNewDataInRowFormat(
|
||||
Optional.of(fetch1.getCheckpointForNextBatch()), Long.MAX_VALUE);
|
||||
assertEquals(10000, fetch3.getBatch().get().count());
|
||||
assertEquals(fetch2.getCheckpointForNextBatch(), fetch3.getCheckpointForNextBatch());
|
||||
fetch3.getBatch().get().registerTempTable("test_dfs_table");
|
||||
Dataset<Row> rowDataset = new SQLContext(jsc.sc()).sql("select * from test_dfs_table");
|
||||
assertEquals(10000, rowDataset.count());
|
||||
|
||||
// 4. Extract with latest checkpoint => no new data returned
|
||||
Pair<Optional<JavaRDD<GenericRecord>>, String> fetch4 = jsonSource.fetchNewData(
|
||||
Optional.of(fetch2.getValue()), Long.MAX_VALUE);
|
||||
assertEquals(Optional.empty(), fetch4.getKey());
|
||||
InputBatch<JavaRDD<GenericRecord>> fetch4 = jsonSource.fetchNewDataInAvroFormat(
|
||||
Optional.of(fetch2.getCheckpointForNextBatch()), Long.MAX_VALUE);
|
||||
assertEquals(Optional.empty(), fetch4.getBatch());
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -21,8 +21,6 @@ package com.uber.hoodie.utilities.sources;
|
||||
import com.uber.hoodie.common.HoodieTestDataGenerator;
|
||||
import com.uber.hoodie.common.model.HoodieRecord;
|
||||
import com.uber.hoodie.common.util.TypedProperties;
|
||||
import com.uber.hoodie.common.util.collection.ImmutablePair;
|
||||
import com.uber.hoodie.common.util.collection.Pair;
|
||||
import com.uber.hoodie.utilities.schema.SchemaProvider;
|
||||
import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
@@ -35,11 +33,12 @@ import org.apache.log4j.LogManager;
|
||||
import org.apache.log4j.Logger;
|
||||
import org.apache.spark.api.java.JavaRDD;
|
||||
import org.apache.spark.api.java.JavaSparkContext;
|
||||
import org.apache.spark.sql.SparkSession;
|
||||
|
||||
/**
|
||||
* An implementation of {@link Source}, that emits test upserts.
|
||||
*/
|
||||
public class TestDataSource extends Source {
|
||||
public class TestDataSource extends AvroSource {
|
||||
|
||||
private static volatile Logger log = LogManager.getLogger(TestDataSource.class);
|
||||
|
||||
@@ -54,8 +53,9 @@ public class TestDataSource extends Source {
|
||||
dataGenerator = null;
|
||||
}
|
||||
|
||||
public TestDataSource(TypedProperties props, JavaSparkContext sparkContext, SchemaProvider schemaProvider) {
|
||||
super(props, sparkContext, schemaProvider);
|
||||
public TestDataSource(TypedProperties props, JavaSparkContext sparkContext, SparkSession sparkSession,
|
||||
SchemaProvider schemaProvider) {
|
||||
super(props, sparkContext, sparkSession, schemaProvider);
|
||||
}
|
||||
|
||||
private GenericRecord toGenericRecord(HoodieRecord hoodieRecord) {
|
||||
@@ -68,14 +68,14 @@ public class TestDataSource extends Source {
|
||||
}
|
||||
|
||||
@Override
|
||||
public Pair<Optional<JavaRDD<GenericRecord>>, String> fetchNewData(Optional<String> lastCheckpointStr,
|
||||
protected InputBatch<JavaRDD<GenericRecord>> fetchNewData(Optional<String> lastCheckpointStr,
|
||||
long sourceLimit) {
|
||||
|
||||
int nextCommitNum = lastCheckpointStr.map(s -> Integer.parseInt(s) + 1).orElse(0);
|
||||
String commitTime = String.format("%05d", nextCommitNum);
|
||||
// No new data.
|
||||
if (sourceLimit <= 0) {
|
||||
return new ImmutablePair<>(Optional.empty(), commitTime);
|
||||
return new InputBatch<>(Optional.empty(), commitTime);
|
||||
}
|
||||
|
||||
// generate `sourceLimit` number of upserts each time.
|
||||
@@ -94,6 +94,6 @@ public class TestDataSource extends Source {
|
||||
}
|
||||
|
||||
JavaRDD<GenericRecord> avroRDD = sparkContext.<GenericRecord>parallelize(records, 4);
|
||||
return new ImmutablePair<>(Optional.of(avroRDD), commitTime);
|
||||
return new InputBatch<>(Optional.of(avroRDD), commitTime);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -18,20 +18,23 @@
|
||||
|
||||
package com.uber.hoodie.utilities.sources;
|
||||
|
||||
import static com.uber.hoodie.utilities.sources.KafkaSource.CheckpointUtils;
|
||||
import static org.junit.Assert.assertEquals;
|
||||
|
||||
import com.uber.hoodie.AvroConversionUtils;
|
||||
import com.uber.hoodie.common.HoodieTestDataGenerator;
|
||||
import com.uber.hoodie.common.util.TypedProperties;
|
||||
import com.uber.hoodie.common.util.collection.Pair;
|
||||
import com.uber.hoodie.utilities.UtilitiesTestBase;
|
||||
import com.uber.hoodie.utilities.deltastreamer.SourceFormatAdapter;
|
||||
import com.uber.hoodie.utilities.schema.FilebasedSchemaProvider;
|
||||
import com.uber.hoodie.utilities.sources.helpers.KafkaOffsetGen.CheckpointUtils;
|
||||
import java.io.IOException;
|
||||
import java.util.HashMap;
|
||||
import java.util.Optional;
|
||||
import kafka.common.TopicAndPartition;
|
||||
import org.apache.avro.generic.GenericRecord;
|
||||
import org.apache.spark.api.java.JavaRDD;
|
||||
import org.apache.spark.sql.Dataset;
|
||||
import org.apache.spark.sql.Row;
|
||||
import org.apache.spark.streaming.kafka.KafkaCluster.LeaderOffset;
|
||||
import org.apache.spark.streaming.kafka.KafkaTestUtils;
|
||||
import org.apache.spark.streaming.kafka.OffsetRange;
|
||||
@@ -42,7 +45,7 @@ import org.junit.BeforeClass;
|
||||
import org.junit.Test;
|
||||
|
||||
/**
|
||||
* Tests against {@link KafkaSource}
|
||||
* Tests against {@link AvroKafkaSource}
|
||||
*/
|
||||
public class TestKafkaSource extends UtilitiesTestBase {
|
||||
|
||||
@@ -89,30 +92,44 @@ public class TestKafkaSource extends UtilitiesTestBase {
|
||||
props.setProperty("key.serializer", "org.apache.kafka.common.serialization.StringSerializer");
|
||||
props.setProperty("value.serializer", "org.apache.kafka.common.serialization.StringSerializer");
|
||||
|
||||
Source kafkaSource = new JsonKafkaSource(props, jsc, schemaProvider);
|
||||
Source jsonSource = new JsonKafkaSource(props, jsc, sparkSession, schemaProvider);
|
||||
SourceFormatAdapter kafkaSource = new SourceFormatAdapter(jsonSource);
|
||||
|
||||
// 1. Extract without any checkpoint => get all the data, respecting sourceLimit
|
||||
assertEquals(Optional.empty(), kafkaSource.fetchNewData(Optional.empty(), Long.MAX_VALUE).getKey());
|
||||
assertEquals(Optional.empty(), kafkaSource.fetchNewDataInAvroFormat(Optional.empty(), Long.MAX_VALUE).getBatch());
|
||||
testUtils.sendMessages(TEST_TOPIC_NAME, Helpers.jsonifyRecords(dataGenerator.generateInserts("000", 1000)));
|
||||
Pair<Optional<JavaRDD<GenericRecord>>, String> fetch1 = kafkaSource.fetchNewData(Optional.empty(), 900);
|
||||
assertEquals(900, fetch1.getKey().get().count());
|
||||
InputBatch<JavaRDD<GenericRecord>> fetch1 = kafkaSource.fetchNewDataInAvroFormat(Optional.empty(), 900);
|
||||
assertEquals(900, fetch1.getBatch().get().count());
|
||||
// Test Avro To DataFrame<Row> path
|
||||
Dataset<Row> fetch1AsRows = AvroConversionUtils.createDataFrame(JavaRDD.toRDD(fetch1.getBatch().get()),
|
||||
schemaProvider.getSourceSchema().toString(), jsonSource.getSparkSession());
|
||||
assertEquals(900, fetch1AsRows.count());
|
||||
|
||||
// 2. Produce new data, extract new data
|
||||
testUtils.sendMessages(TEST_TOPIC_NAME, Helpers.jsonifyRecords(dataGenerator.generateInserts("001", 1000)));
|
||||
Pair<Optional<JavaRDD<GenericRecord>>, String> fetch2 = kafkaSource.fetchNewData(
|
||||
Optional.of(fetch1.getValue()), Long.MAX_VALUE);
|
||||
assertEquals(1100, fetch2.getKey().get().count());
|
||||
InputBatch<Dataset<Row>> fetch2 = kafkaSource.fetchNewDataInRowFormat(
|
||||
Optional.of(fetch1.getCheckpointForNextBatch()), Long.MAX_VALUE);
|
||||
assertEquals(1100, fetch2.getBatch().get().count());
|
||||
|
||||
// 3. Extract with previous checkpoint => gives same data back (idempotent)
|
||||
Pair<Optional<JavaRDD<GenericRecord>>, String> fetch3 = kafkaSource.fetchNewData(
|
||||
Optional.of(fetch1.getValue()), Long.MAX_VALUE);
|
||||
assertEquals(fetch2.getKey().get().count(), fetch3.getKey().get().count());
|
||||
assertEquals(fetch2.getValue(), fetch3.getValue());
|
||||
InputBatch<JavaRDD<GenericRecord>> fetch3 = kafkaSource.fetchNewDataInAvroFormat(
|
||||
Optional.of(fetch1.getCheckpointForNextBatch()), Long.MAX_VALUE);
|
||||
assertEquals(fetch2.getBatch().get().count(), fetch3.getBatch().get().count());
|
||||
assertEquals(fetch2.getCheckpointForNextBatch(), fetch3.getCheckpointForNextBatch());
|
||||
// Same using Row API
|
||||
InputBatch<Dataset<Row>> fetch3AsRows =
|
||||
kafkaSource.fetchNewDataInRowFormat(Optional.of(fetch1.getCheckpointForNextBatch()), Long.MAX_VALUE);
|
||||
assertEquals(fetch2.getBatch().get().count(), fetch3AsRows.getBatch().get().count());
|
||||
assertEquals(fetch2.getCheckpointForNextBatch(), fetch3AsRows.getCheckpointForNextBatch());
|
||||
|
||||
// 4. Extract with latest checkpoint => no new data returned
|
||||
Pair<Optional<JavaRDD<GenericRecord>>, String> fetch4 = kafkaSource.fetchNewData(
|
||||
Optional.of(fetch2.getValue()), Long.MAX_VALUE);
|
||||
assertEquals(Optional.empty(), fetch4.getKey());
|
||||
InputBatch<JavaRDD<GenericRecord>> fetch4 = kafkaSource.fetchNewDataInAvroFormat(
|
||||
Optional.of(fetch2.getCheckpointForNextBatch()), Long.MAX_VALUE);
|
||||
assertEquals(Optional.empty(), fetch4.getBatch());
|
||||
// Same using Row API
|
||||
InputBatch<Dataset<Row>> fetch4AsRows =
|
||||
kafkaSource.fetchNewDataInRowFormat(Optional.of(fetch2.getCheckpointForNextBatch()), Long.MAX_VALUE);
|
||||
assertEquals(Optional.empty(), fetch4AsRows.getBatch());
|
||||
}
|
||||
|
||||
|
||||
|
||||
@@ -0,0 +1,19 @@
|
||||
#
|
||||
# Copyright (c) 2018 Uber Technologies, Inc. (hoodie-dev-group@uber.com)
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
#
|
||||
#
|
||||
#
|
||||
include=base.properties
|
||||
hoodie.deltastreamer.transformer.sql=SELECT a.timestamp, a._row_key, a.rider, a.driver, a.begin_lat, a.begin_lon, a.end_lat, a.end_lon, a.fare, CAST(1.0 AS DOUBLE) AS haversine_distance FROM <SRC> a
|
||||
@@ -0,0 +1,37 @@
|
||||
{
|
||||
"type" : "record",
|
||||
"name" : "triprec",
|
||||
"fields" : [
|
||||
{
|
||||
"name" : "timestamp",
|
||||
"type" : "double"
|
||||
}, {
|
||||
"name" : "_row_key",
|
||||
"type" : "string"
|
||||
}, {
|
||||
"name" : "rider",
|
||||
"type" : "string"
|
||||
}, {
|
||||
"name" : "driver",
|
||||
"type" : "string"
|
||||
}, {
|
||||
"name" : "begin_lat",
|
||||
"type" : "double"
|
||||
}, {
|
||||
"name" : "begin_lon",
|
||||
"type" : "double"
|
||||
}, {
|
||||
"name" : "end_lat",
|
||||
"type" : "double"
|
||||
}, {
|
||||
"name" : "end_lon",
|
||||
"type" : "double"
|
||||
}, {
|
||||
"name" : "fare",
|
||||
"type" : "double"
|
||||
}, {
|
||||
"name" : "haversine_distance",
|
||||
"type" : "double"
|
||||
}]
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user