[HUDI-681]Remove embeddedTimelineService from HoodieReadClient (#1388)
* [HUDI-681]Remove embeddedTimelineService from HoodieReadClient
This commit is contained in:
@@ -19,7 +19,6 @@
|
|||||||
package org.apache.hudi.client;
|
package org.apache.hudi.client;
|
||||||
|
|
||||||
import org.apache.hudi.avro.model.HoodieCompactionPlan;
|
import org.apache.hudi.avro.model.HoodieCompactionPlan;
|
||||||
import org.apache.hudi.client.embedded.EmbeddedTimelineService;
|
|
||||||
import org.apache.hudi.common.model.HoodieBaseFile;
|
import org.apache.hudi.common.model.HoodieBaseFile;
|
||||||
import org.apache.hudi.common.model.HoodieKey;
|
import org.apache.hudi.common.model.HoodieKey;
|
||||||
import org.apache.hudi.common.model.HoodieRecord;
|
import org.apache.hudi.common.model.HoodieRecord;
|
||||||
@@ -72,18 +71,10 @@ public class HoodieReadClient<T extends HoodieRecordPayload> implements Serializ
|
|||||||
/**
|
/**
|
||||||
* @param basePath path to Hoodie table
|
* @param basePath path to Hoodie table
|
||||||
*/
|
*/
|
||||||
public HoodieReadClient(JavaSparkContext jsc, String basePath, Option<EmbeddedTimelineService> timelineService) {
|
public HoodieReadClient(JavaSparkContext jsc, String basePath) {
|
||||||
this(jsc, HoodieWriteConfig.newBuilder().withPath(basePath)
|
this(jsc, HoodieWriteConfig.newBuilder().withPath(basePath)
|
||||||
// by default we use HoodieBloomIndex
|
// by default we use HoodieBloomIndex
|
||||||
.withIndexConfig(HoodieIndexConfig.newBuilder().withIndexType(HoodieIndex.IndexType.BLOOM).build()).build(),
|
.withIndexConfig(HoodieIndexConfig.newBuilder().withIndexType(HoodieIndex.IndexType.BLOOM).build()).build());
|
||||||
timelineService);
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* @param basePath path to Hoodie table
|
|
||||||
*/
|
|
||||||
public HoodieReadClient(JavaSparkContext jsc, String basePath) {
|
|
||||||
this(jsc, basePath, Option.empty());
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@@ -100,14 +91,6 @@ public class HoodieReadClient<T extends HoodieRecordPayload> implements Serializ
|
|||||||
* @param clientConfig instance of HoodieWriteConfig
|
* @param clientConfig instance of HoodieWriteConfig
|
||||||
*/
|
*/
|
||||||
public HoodieReadClient(JavaSparkContext jsc, HoodieWriteConfig clientConfig) {
|
public HoodieReadClient(JavaSparkContext jsc, HoodieWriteConfig clientConfig) {
|
||||||
this(jsc, clientConfig, Option.empty());
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* @param clientConfig instance of HoodieWriteConfig
|
|
||||||
*/
|
|
||||||
public HoodieReadClient(JavaSparkContext jsc, HoodieWriteConfig clientConfig,
|
|
||||||
Option<EmbeddedTimelineService> timelineService) {
|
|
||||||
this.jsc = jsc;
|
this.jsc = jsc;
|
||||||
final String basePath = clientConfig.getBasePath();
|
final String basePath = clientConfig.getBasePath();
|
||||||
// Create a Hoodie table which encapsulated the commits and files visible
|
// Create a Hoodie table which encapsulated the commits and files visible
|
||||||
|
|||||||
@@ -23,11 +23,9 @@ import org.apache.avro.Schema;
|
|||||||
import org.apache.hudi.client.HoodieReadClient;
|
import org.apache.hudi.client.HoodieReadClient;
|
||||||
import org.apache.hudi.client.HoodieWriteClient;
|
import org.apache.hudi.client.HoodieWriteClient;
|
||||||
import org.apache.hudi.client.WriteStatus;
|
import org.apache.hudi.client.WriteStatus;
|
||||||
import org.apache.hudi.client.embedded.EmbeddedTimelineService;
|
|
||||||
import org.apache.hudi.common.model.HoodieKey;
|
import org.apache.hudi.common.model.HoodieKey;
|
||||||
import org.apache.hudi.common.model.HoodieRecord;
|
import org.apache.hudi.common.model.HoodieRecord;
|
||||||
import org.apache.hudi.common.model.HoodieRecordPayload;
|
import org.apache.hudi.common.model.HoodieRecordPayload;
|
||||||
import org.apache.hudi.common.util.Option;
|
|
||||||
import org.apache.hudi.common.util.ReflectionUtils;
|
import org.apache.hudi.common.util.ReflectionUtils;
|
||||||
import org.apache.hudi.common.util.TypedProperties;
|
import org.apache.hudi.common.util.TypedProperties;
|
||||||
import org.apache.hudi.config.HoodieCompactionConfig;
|
import org.apache.hudi.config.HoodieCompactionConfig;
|
||||||
@@ -222,9 +220,9 @@ public class DataSourceUtils {
|
|||||||
|
|
||||||
@SuppressWarnings("unchecked")
|
@SuppressWarnings("unchecked")
|
||||||
public static JavaRDD<HoodieRecord> dropDuplicates(JavaSparkContext jssc, JavaRDD<HoodieRecord> incomingHoodieRecords,
|
public static JavaRDD<HoodieRecord> dropDuplicates(JavaSparkContext jssc, JavaRDD<HoodieRecord> incomingHoodieRecords,
|
||||||
HoodieWriteConfig writeConfig, Option<EmbeddedTimelineService> timelineService) {
|
HoodieWriteConfig writeConfig) {
|
||||||
try {
|
try {
|
||||||
HoodieReadClient client = new HoodieReadClient<>(jssc, writeConfig, timelineService);
|
HoodieReadClient client = new HoodieReadClient<>(jssc, writeConfig);
|
||||||
return client.tagLocation(incomingHoodieRecords)
|
return client.tagLocation(incomingHoodieRecords)
|
||||||
.filter(r -> !((HoodieRecord<HoodieRecordPayload>) r).isCurrentLocationKnown());
|
.filter(r -> !((HoodieRecord<HoodieRecordPayload>) r).isCurrentLocationKnown());
|
||||||
} catch (TableNotFoundException e) {
|
} catch (TableNotFoundException e) {
|
||||||
@@ -236,10 +234,10 @@ public class DataSourceUtils {
|
|||||||
|
|
||||||
@SuppressWarnings("unchecked")
|
@SuppressWarnings("unchecked")
|
||||||
public static JavaRDD<HoodieRecord> dropDuplicates(JavaSparkContext jssc, JavaRDD<HoodieRecord> incomingHoodieRecords,
|
public static JavaRDD<HoodieRecord> dropDuplicates(JavaSparkContext jssc, JavaRDD<HoodieRecord> incomingHoodieRecords,
|
||||||
Map<String, String> parameters, Option<EmbeddedTimelineService> timelineService) {
|
Map<String, String> parameters) {
|
||||||
HoodieWriteConfig writeConfig =
|
HoodieWriteConfig writeConfig =
|
||||||
HoodieWriteConfig.newBuilder().withPath(parameters.get("path")).withProps(parameters).build();
|
HoodieWriteConfig.newBuilder().withPath(parameters.get("path")).withProps(parameters).build();
|
||||||
return dropDuplicates(jssc, incomingHoodieRecords, writeConfig, timelineService);
|
return dropDuplicates(jssc, incomingHoodieRecords, writeConfig);
|
||||||
}
|
}
|
||||||
|
|
||||||
public static HiveSyncConfig buildHiveSyncConfig(TypedProperties props, String basePath) {
|
public static HiveSyncConfig buildHiveSyncConfig(TypedProperties props, String basePath) {
|
||||||
|
|||||||
@@ -131,10 +131,7 @@ private[hudi] object HoodieSparkSqlWriter {
|
|||||||
|
|
||||||
val hoodieRecords =
|
val hoodieRecords =
|
||||||
if (parameters(INSERT_DROP_DUPS_OPT_KEY).toBoolean) {
|
if (parameters(INSERT_DROP_DUPS_OPT_KEY).toBoolean) {
|
||||||
DataSourceUtils.dropDuplicates(
|
DataSourceUtils.dropDuplicates(jsc, hoodieAllIncomingRecords, mapAsJavaMap(parameters))
|
||||||
jsc,
|
|
||||||
hoodieAllIncomingRecords,
|
|
||||||
mapAsJavaMap(parameters), client.getTimelineServer)
|
|
||||||
} else {
|
} else {
|
||||||
hoodieAllIncomingRecords
|
hoodieAllIncomingRecords
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -357,7 +357,7 @@ public class DeltaSync implements Serializable {
|
|||||||
if (cfg.filterDupes) {
|
if (cfg.filterDupes) {
|
||||||
// turn upserts to insert
|
// turn upserts to insert
|
||||||
cfg.operation = cfg.operation == Operation.UPSERT ? Operation.INSERT : cfg.operation;
|
cfg.operation = cfg.operation == Operation.UPSERT ? Operation.INSERT : cfg.operation;
|
||||||
records = DataSourceUtils.dropDuplicates(jssc, records, writeClient.getConfig(), writeClient.getTimelineServer());
|
records = DataSourceUtils.dropDuplicates(jssc, records, writeClient.getConfig());
|
||||||
}
|
}
|
||||||
|
|
||||||
boolean isEmpty = records.isEmpty();
|
boolean isEmpty = records.isEmpty();
|
||||||
|
|||||||
Reference in New Issue
Block a user