Fixes to RealtimeInputFormat and RealtimeRecordReader and update documentation for HiveSyncTool
This commit is contained in:
committed by
prazanna
parent
521555c576
commit
4b26be9f61
@@ -70,21 +70,23 @@ bin/hiveserver2 \
|
||||
|
||||
#### Hive Sync Tool
|
||||
|
||||
Once Hive is up and running, the sync tool can be used to sync commits done above to a Hive table, as follows.
|
||||
Hive Sync Tool will update/create the necessary metadata(schema and partitions) in hive metastore.
|
||||
This allows for schema evolution and incremental addition of new partitions written to.
|
||||
It uses an incremental approach by storing the last commit time synced in the TBLPROPERTIES and only syncing the commits from the last sync commit time stored.
|
||||
This can be run as frequently as the ingestion pipeline to make sure new partitions and schema evolution changes are reflected immediately.
|
||||
|
||||
```
|
||||
java -cp target/hoodie-hive-0.3.1-SNAPSHOT-jar-with-dependencies.jar:target/jars/* com.uber.hoodie.hive.HiveSyncTool \
|
||||
--base-path file:///tmp/hoodie/sample-table/ \
|
||||
--database default \
|
||||
--table hoodie_test \
|
||||
--user hive \
|
||||
--pass hive \
|
||||
--jdbc-url jdbc:hive2://localhost:10010/
|
||||
{JAVA8}/bin/java -cp "/etc/hive/conf:./hoodie-hive-0.3.8-SNAPSHOT-jar-with-dependencies.jar:/opt/hadoop/lib/hadoop-mapreduce/*" com.uber.hoodie.hive.HiveSyncTool
|
||||
--user hive
|
||||
--pass hive
|
||||
--database default
|
||||
--jdbc-url "jdbc:hive2://localhost:10010/"
|
||||
--base-path tmp/hoodie/sample-table/
|
||||
--table hoodie_test
|
||||
--partitioned-by field1,field2
|
||||
|
||||
```
|
||||
|
||||
{% include callout.html content="Hive sync tools does not yet support Merge-On-Read tables." type="info" %}
|
||||
|
||||
|
||||
|
||||
#### Manually via Beeline
|
||||
|
||||
@@ -28,11 +28,11 @@ import org.apache.hadoop.fs.Path;
|
||||
@JsonIgnoreProperties(ignoreUnknown = true)
|
||||
public class CompactionWriteStat implements Serializable {
|
||||
|
||||
private final HoodieWriteStat writeStat;
|
||||
private HoodieWriteStat writeStat;
|
||||
private String partitionPath;
|
||||
private final long totalLogRecords;
|
||||
private final long totalLogFiles;
|
||||
private final long totalRecordsToBeUpdate;
|
||||
private long totalLogRecords;
|
||||
private long totalLogFiles;
|
||||
private long totalRecordsToBeUpdate;
|
||||
|
||||
public CompactionWriteStat(HoodieWriteStat writeStat, String partitionPath, long totalLogFiles, long totalLogRecords,
|
||||
long totalRecordsToUpdate) {
|
||||
@@ -43,6 +43,10 @@ public class CompactionWriteStat implements Serializable {
|
||||
this.totalRecordsToBeUpdate = totalRecordsToUpdate;
|
||||
}
|
||||
|
||||
public CompactionWriteStat() {
|
||||
// For de-serialization
|
||||
}
|
||||
|
||||
public long getTotalLogRecords() {
|
||||
return totalLogRecords;
|
||||
}
|
||||
|
||||
@@ -75,6 +75,10 @@
|
||||
<groupId>org.apache.parquet</groupId>
|
||||
<artifactId>parquet-avro</artifactId>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>com.twitter</groupId>
|
||||
<artifactId>parquet-avro</artifactId>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.apache.avro</groupId>
|
||||
<artifactId>avro</artifactId>
|
||||
@@ -108,6 +112,7 @@
|
||||
<artifactSet>
|
||||
<includes>
|
||||
<include>com.uber.hoodie:hoodie-common</include>
|
||||
<include>com.twitter:parquet-avro</include>
|
||||
</includes>
|
||||
</artifactSet>
|
||||
</configuration>
|
||||
|
||||
@@ -20,9 +20,11 @@ package com.uber.hoodie.hadoop.realtime;
|
||||
|
||||
import com.google.common.base.Preconditions;
|
||||
|
||||
import com.google.common.collect.Sets;
|
||||
import com.uber.hoodie.common.model.HoodieDataFile;
|
||||
import com.uber.hoodie.common.model.HoodieRecord;
|
||||
import com.uber.hoodie.common.table.HoodieTableMetaClient;
|
||||
import com.uber.hoodie.common.table.HoodieTimeline;
|
||||
import com.uber.hoodie.common.table.log.HoodieLogFile;
|
||||
import com.uber.hoodie.common.table.view.HoodieTableFileSystemView;
|
||||
import com.uber.hoodie.common.util.FSUtils;
|
||||
@@ -66,6 +68,7 @@ public class HoodieRealtimeInputFormat extends HoodieInputFormat implements Conf
|
||||
// These positions have to be deterministic across all tables
|
||||
public static final int HOODIE_COMMIT_TIME_COL_POS = 0;
|
||||
public static final int HOODIE_RECORD_KEY_COL_POS = 2;
|
||||
public static final int HOODIE_PARTITION_PATH_COL_POS = 3;
|
||||
|
||||
@Override
|
||||
public InputSplit[] getSplits(JobConf job, int numSplits) throws IOException {
|
||||
@@ -112,9 +115,18 @@ public class HoodieRealtimeInputFormat extends HoodieInputFormat implements Conf
|
||||
List<FileSplit> dataFileSplits = groupedInputSplits.get(dataFile.getFileId());
|
||||
dataFileSplits.forEach(split -> {
|
||||
try {
|
||||
List<String> logFilePaths = logFiles.stream().map(logFile -> logFile.getPath().toString()).collect(Collectors.toList());
|
||||
String maxCommitTime = metaClient.getActiveTimeline().getDeltaCommitTimeline().filterCompletedInstants().lastInstant().get().getTimestamp();
|
||||
rtSplits.add(new HoodieRealtimeFileSplit(split, logFilePaths, maxCommitTime));
|
||||
List<String> logFilePaths = logFiles.stream()
|
||||
.map(logFile -> logFile.getPath().toString())
|
||||
.collect(Collectors.toList());
|
||||
// Get the maxCommit from the last delta or compaction or commit - when bootstrapped from COW table
|
||||
String maxCommitTime = metaClient.getActiveTimeline()
|
||||
.getTimelineOfActions(
|
||||
Sets.newHashSet(HoodieTimeline.COMMIT_ACTION,
|
||||
HoodieTimeline.COMPACTION_ACTION,
|
||||
HoodieTimeline.DELTA_COMMIT_ACTION))
|
||||
.filterCompletedInstants().lastInstant().get().getTimestamp();
|
||||
rtSplits.add(
|
||||
new HoodieRealtimeFileSplit(split, logFilePaths, maxCommitTime));
|
||||
} catch (IOException e) {
|
||||
throw new HoodieIOException("Error creating hoodie real time split ", e);
|
||||
}
|
||||
@@ -124,7 +136,7 @@ public class HoodieRealtimeInputFormat extends HoodieInputFormat implements Conf
|
||||
throw new HoodieIOException("Error obtaining data file/log file grouping: " + partitionPath, e);
|
||||
}
|
||||
});
|
||||
|
||||
LOG.info("Returning a total splits of " + rtSplits.size());
|
||||
return rtSplits.toArray(new InputSplit[rtSplits.size()]);
|
||||
}
|
||||
|
||||
@@ -135,35 +147,48 @@ public class HoodieRealtimeInputFormat extends HoodieInputFormat implements Conf
|
||||
return super.listStatus(job);
|
||||
}
|
||||
|
||||
/**
|
||||
* Add a field to the existing fields projected
|
||||
*/
|
||||
private static Configuration addProjectionField(Configuration conf, String fieldName,
|
||||
int fieldIndex) {
|
||||
String readColNames = conf.get(ColumnProjectionUtils.READ_COLUMN_NAMES_CONF_STR, "");
|
||||
String readColIds = conf.get(ColumnProjectionUtils.READ_COLUMN_IDS_CONF_STR, "");
|
||||
|
||||
private static Configuration addExtraReadColsIfNeeded(Configuration configuration) {
|
||||
String readColNames = configuration.get(ColumnProjectionUtils.READ_COLUMN_NAMES_CONF_STR);
|
||||
String readColIds = configuration.get(ColumnProjectionUtils.READ_COLUMN_IDS_CONF_STR);
|
||||
|
||||
if (!readColNames.contains(HoodieRecord.RECORD_KEY_METADATA_FIELD)) {
|
||||
configuration.set(ColumnProjectionUtils.READ_COLUMN_NAMES_CONF_STR,
|
||||
readColNames + "," + HoodieRecord.RECORD_KEY_METADATA_FIELD);
|
||||
configuration.set(ColumnProjectionUtils.READ_COLUMN_IDS_CONF_STR,
|
||||
readColIds + "," + HOODIE_RECORD_KEY_COL_POS);
|
||||
LOG.info(String.format("Adding extra _hoodie_record_key column, to enable log merging cols (%s) ids (%s) ",
|
||||
configuration.get(ColumnProjectionUtils.READ_COLUMN_NAMES_CONF_STR),
|
||||
configuration.get(ColumnProjectionUtils.READ_COLUMN_IDS_CONF_STR)));
|
||||
String readColNamesPrefix = readColNames + ",";
|
||||
if (readColNames == null || readColNames.isEmpty()) {
|
||||
readColNamesPrefix = "";
|
||||
}
|
||||
String readColIdsPrefix = readColIds + ",";
|
||||
if (readColIds == null || readColIds.isEmpty()) {
|
||||
readColIdsPrefix = "";
|
||||
}
|
||||
|
||||
if (!readColNames.contains(HoodieRecord.COMMIT_TIME_METADATA_FIELD)) {
|
||||
configuration.set(ColumnProjectionUtils.READ_COLUMN_NAMES_CONF_STR,
|
||||
readColNames + "," + HoodieRecord.COMMIT_TIME_METADATA_FIELD);
|
||||
configuration.set(ColumnProjectionUtils.READ_COLUMN_IDS_CONF_STR,
|
||||
readColIds + "," + HOODIE_COMMIT_TIME_COL_POS);
|
||||
LOG.info(String.format("Adding extra _hoodie_commit_time column, to enable log merging cols (%s) ids (%s) ",
|
||||
configuration.get(ColumnProjectionUtils.READ_COLUMN_NAMES_CONF_STR),
|
||||
configuration.get(ColumnProjectionUtils.READ_COLUMN_IDS_CONF_STR)));
|
||||
if (!readColNames.contains(fieldName)) {
|
||||
// If not already in the list - then add it
|
||||
conf.set(ColumnProjectionUtils.READ_COLUMN_NAMES_CONF_STR,
|
||||
readColNamesPrefix + fieldName);
|
||||
conf.set(ColumnProjectionUtils.READ_COLUMN_IDS_CONF_STR, readColIdsPrefix + fieldIndex);
|
||||
if (LOG.isDebugEnabled()) {
|
||||
LOG.debug(String.format("Adding extra column " + fieldName
|
||||
+ ", to enable log merging cols (%s) ids (%s) ",
|
||||
conf.get(ColumnProjectionUtils.READ_COLUMN_NAMES_CONF_STR),
|
||||
conf.get(ColumnProjectionUtils.READ_COLUMN_IDS_CONF_STR)));
|
||||
}
|
||||
}
|
||||
|
||||
return configuration;
|
||||
return conf;
|
||||
}
|
||||
|
||||
|
||||
private static Configuration addRequiredProjectionFields(Configuration configuration) {
|
||||
// Need this to do merge records in HoodieRealtimeRecordReader
|
||||
configuration = addProjectionField(configuration, HoodieRecord.RECORD_KEY_METADATA_FIELD,
|
||||
HOODIE_RECORD_KEY_COL_POS);
|
||||
configuration = addProjectionField(configuration, HoodieRecord.COMMIT_TIME_METADATA_FIELD,
|
||||
HOODIE_COMMIT_TIME_COL_POS);
|
||||
configuration = addProjectionField(configuration,
|
||||
HoodieRecord.PARTITION_PATH_METADATA_FIELD, HOODIE_PARTITION_PATH_COL_POS);
|
||||
return configuration;
|
||||
}
|
||||
|
||||
@Override
|
||||
public RecordReader<Void, ArrayWritable> getRecordReader(final InputSplit split,
|
||||
@@ -172,17 +197,17 @@ public class HoodieRealtimeInputFormat extends HoodieInputFormat implements Conf
|
||||
LOG.info("Creating record reader with readCols :" + job.get(ColumnProjectionUtils.READ_COLUMN_NAMES_CONF_STR));
|
||||
// sanity check
|
||||
Preconditions.checkArgument(split instanceof HoodieRealtimeFileSplit,
|
||||
"HoodieRealtimeRecordReader can only work on HoodieRealtimeFileSplit");
|
||||
"HoodieRealtimeRecordReader can only work on HoodieRealtimeFileSplit and not with " + split );
|
||||
return new HoodieRealtimeRecordReader((HoodieRealtimeFileSplit) split, job, super.getRecordReader(split, job, reporter));
|
||||
}
|
||||
|
||||
@Override
|
||||
public void setConf(Configuration conf) {
|
||||
this.conf = addExtraReadColsIfNeeded(conf);
|
||||
this.conf = addRequiredProjectionFields(conf);
|
||||
}
|
||||
|
||||
@Override
|
||||
public Configuration getConf() {
|
||||
return addExtraReadColsIfNeeded(conf);
|
||||
return conf;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -18,20 +18,24 @@
|
||||
|
||||
package com.uber.hoodie.hadoop.realtime;
|
||||
|
||||
import com.google.common.collect.Lists;
|
||||
import com.uber.hoodie.common.model.HoodieAvroPayload;
|
||||
import com.uber.hoodie.common.model.HoodieRecord;
|
||||
import com.uber.hoodie.common.table.HoodieTimeline;
|
||||
import com.uber.hoodie.common.table.log.HoodieCompactedLogRecordScanner;
|
||||
import com.uber.hoodie.common.util.FSUtils;
|
||||
import com.uber.hoodie.common.util.ParquetUtils;
|
||||
import com.uber.hoodie.exception.HoodieException;
|
||||
import com.uber.hoodie.exception.HoodieIOException;
|
||||
import java.util.Set;
|
||||
import java.util.TreeMap;
|
||||
import java.util.stream.Collectors;
|
||||
import org.apache.avro.Schema;
|
||||
import org.apache.avro.generic.GenericArray;
|
||||
import org.apache.avro.generic.GenericFixed;
|
||||
import org.apache.avro.generic.GenericRecord;
|
||||
import org.apache.commons.logging.Log;
|
||||
import org.apache.commons.logging.LogFactory;
|
||||
import org.apache.hadoop.conf.Configuration;
|
||||
import org.apache.hadoop.fs.Path;
|
||||
import org.apache.hadoop.hive.serde2.ColumnProjectionUtils;
|
||||
import org.apache.hadoop.hive.serde2.io.DoubleWritable;
|
||||
import org.apache.hadoop.io.ArrayWritable;
|
||||
@@ -45,18 +49,15 @@ import org.apache.hadoop.io.Text;
|
||||
import org.apache.hadoop.io.Writable;
|
||||
import org.apache.hadoop.mapred.JobConf;
|
||||
import org.apache.hadoop.mapred.RecordReader;
|
||||
import org.apache.parquet.avro.AvroSchemaConverter;
|
||||
import org.apache.parquet.schema.MessageType;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
import java.util.HashMap;
|
||||
import java.util.Iterator;
|
||||
import java.util.List;
|
||||
import java.util.Set;
|
||||
import java.util.TreeMap;
|
||||
import java.util.stream.Collectors;
|
||||
import parquet.avro.AvroSchemaConverter;
|
||||
import parquet.hadoop.ParquetFileReader;
|
||||
import parquet.schema.MessageType;
|
||||
|
||||
/**
|
||||
* Record Reader implementation to merge fresh avro data with base parquet data, to support real time
|
||||
@@ -83,37 +84,54 @@ public class HoodieRealtimeRecordReader implements RecordReader<Void, ArrayWrita
|
||||
|
||||
LOG.info("cfg ==> " + job.get(ColumnProjectionUtils.READ_COLUMN_NAMES_CONF_STR));
|
||||
try {
|
||||
baseFileSchema = ParquetUtils.readSchema(split.getPath());
|
||||
baseFileSchema = readSchema(jobConf, split.getPath());
|
||||
readAndCompactLog();
|
||||
} catch (IOException e) {
|
||||
throw new HoodieIOException("Could not create HoodieRealtimeRecordReader on path " + this.split.getPath(), e);
|
||||
throw new HoodieIOException(
|
||||
"Could not create HoodieRealtimeRecordReader on path " + this.split.getPath(), e);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Reads the schema from the parquet file. This is different from ParquetUtils as it uses the
|
||||
* twitter parquet to support hive 1.1.0
|
||||
*/
|
||||
private static MessageType readSchema(Configuration conf, Path parquetFilePath) {
|
||||
try {
|
||||
return ParquetFileReader.readFooter(conf, parquetFilePath).getFileMetaData()
|
||||
.getSchema();
|
||||
} catch (IOException e) {
|
||||
throw new HoodieIOException("Failed to read footer for parquet " + parquetFilePath,
|
||||
e);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Goes through the log files and populates a map with latest version of each key logged, since the base split was written.
|
||||
*/
|
||||
private void readAndCompactLog() throws IOException {
|
||||
Schema writerSchema = new AvroSchemaConverter().convert(baseFileSchema);
|
||||
List<String> projectionFields = orderFields(
|
||||
jobConf.get(ColumnProjectionUtils.READ_COLUMN_NAMES_CONF_STR),
|
||||
jobConf.get(ColumnProjectionUtils.READ_COLUMN_IDS_CONF_STR),
|
||||
jobConf.get("partition_columns"));
|
||||
jobConf.get(ColumnProjectionUtils.READ_COLUMN_NAMES_CONF_STR),
|
||||
jobConf.get(ColumnProjectionUtils.READ_COLUMN_IDS_CONF_STR),
|
||||
jobConf.get("partition_columns", ""));
|
||||
// TODO(vc): In the future, the reader schema should be updated based on log files & be able to null out fields not present before
|
||||
Schema readerSchema = generateProjectionSchema(writerSchema, projectionFields);
|
||||
|
||||
LOG.info(String.format("About to read compacted logs %s for base split %s, projecting cols %s",
|
||||
LOG.info(
|
||||
String.format("About to read compacted logs %s for base split %s, projecting cols %s",
|
||||
split.getDeltaFilePaths(), split.getPath(), projectionFields));
|
||||
|
||||
HoodieCompactedLogRecordScanner compactedLogRecordScanner =
|
||||
new HoodieCompactedLogRecordScanner(FSUtils.getFs(), split.getDeltaFilePaths(), readerSchema);
|
||||
Iterator<HoodieRecord<HoodieAvroPayload>> itr = compactedLogRecordScanner.iterator();
|
||||
new HoodieCompactedLogRecordScanner(FSUtils.getFs(), split.getDeltaFilePaths(),
|
||||
readerSchema);
|
||||
|
||||
// NOTE: HoodieCompactedLogRecordScanner will not return records for an in-flight commit
|
||||
// but can return records for completed commits > the commit we are trying to read (if using readCommit() API)
|
||||
while(itr.hasNext()) {
|
||||
HoodieRecord<HoodieAvroPayload> hoodieRecord = itr.next();
|
||||
GenericRecord rec = (GenericRecord) hoodieRecord.getData().getInsertValue(readerSchema).get();
|
||||
for (HoodieRecord<HoodieAvroPayload> hoodieRecord : compactedLogRecordScanner) {
|
||||
GenericRecord rec = (GenericRecord) hoodieRecord.getData().getInsertValue(readerSchema)
|
||||
.get();
|
||||
String key = hoodieRecord.getRecordKey();
|
||||
// we assume, a later safe record in the log, is newer than what we have in the map & replace it.
|
||||
ArrayWritable aWritable = (ArrayWritable) avroToArrayWritable(rec, writerSchema);
|
||||
@@ -146,22 +164,27 @@ public class HoodieRealtimeRecordReader implements RecordReader<Void, ArrayWrita
|
||||
* @param fieldOrderCsv
|
||||
* @return
|
||||
*/
|
||||
public static List<String> orderFields(String fieldNameCsv, String fieldOrderCsv, String partitioningFieldsCsv) {
|
||||
public static List<String> orderFields(String fieldNameCsv, String fieldOrderCsv,
|
||||
String partitioningFieldsCsv) {
|
||||
|
||||
String[] fieldOrders = fieldOrderCsv.split(",");
|
||||
Set<String> partitioningFields = Arrays.stream(partitioningFieldsCsv.split(",")).collect(Collectors.toSet());
|
||||
List<String> fieldNames = Arrays.stream(fieldNameCsv.split(",")).filter(fn -> !partitioningFields.contains(fn)).collect(Collectors.toList());
|
||||
Set<String> partitioningFields = Arrays.stream(partitioningFieldsCsv.split(","))
|
||||
.collect(Collectors.toSet());
|
||||
List<String> fieldNames = Arrays.stream(fieldNameCsv.split(","))
|
||||
.filter(fn -> !partitioningFields.contains(fn)).collect(
|
||||
Collectors.toList());
|
||||
|
||||
// Hive does not provide ids for partitioning fields, so check for lengths excluding that.
|
||||
if (fieldNames.size() != fieldOrders.length) {
|
||||
throw new HoodieException(String.format("Error ordering fields for storage read. #fieldNames: %d, #fieldPositions: %d",
|
||||
fieldNames.size(), fieldOrders.length));
|
||||
throw new HoodieException(String.format(
|
||||
"Error ordering fields for storage read. #fieldNames: %d, #fieldPositions: %d",
|
||||
fieldNames.size(), fieldOrders.length));
|
||||
}
|
||||
TreeMap<Integer, String> orderedFieldMap = new TreeMap<>();
|
||||
for (int ox=0; ox < fieldOrders.length; ox++) {
|
||||
for (int ox = 0; ox < fieldOrders.length; ox++) {
|
||||
orderedFieldMap.put(Integer.parseInt(fieldOrders[ox]), fieldNames.get(ox));
|
||||
}
|
||||
return orderedFieldMap.values().stream().collect(Collectors.toList());
|
||||
return new ArrayList<>(orderedFieldMap.values());
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -235,6 +258,7 @@ public class HoodieRealtimeRecordReader implements RecordReader<Void, ArrayWrita
|
||||
return new ArrayWritable(Writable.class, values2);
|
||||
case MAP:
|
||||
// TODO(vc): Need to add support for complex types
|
||||
return NullWritable.get();
|
||||
case UNION:
|
||||
List<Schema> types = schema.getTypes();
|
||||
if (types.size() != 2) {
|
||||
@@ -271,7 +295,10 @@ public class HoodieRealtimeRecordReader implements RecordReader<Void, ArrayWrita
|
||||
key, arrayWritableToString(arrayWritable), arrayWritableToString(deltaRecordMap.get(key))));
|
||||
}
|
||||
if (deltaRecordMap.containsKey(key)) {
|
||||
arrayWritable.set(deltaRecordMap.get(key).get());
|
||||
Writable[] replaceValue = deltaRecordMap.get(key).get();
|
||||
Writable[] originalValue = arrayWritable.get();
|
||||
System.arraycopy(replaceValue, 0, originalValue, 0, originalValue.length);
|
||||
arrayWritable.set(originalValue);
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
@@ -167,7 +167,7 @@
|
||||
</descriptors>
|
||||
<archive>
|
||||
<manifest>
|
||||
<mainClass>com.uber.hoodie.hive.example.HoodieHiveSyncExample</mainClass>
|
||||
<mainClass>com.uber.hoodie.hive.HiveSyncTool</mainClass>
|
||||
</manifest>
|
||||
</archive>
|
||||
|
||||
|
||||
@@ -49,7 +49,7 @@ public class HiveSyncConfig implements Serializable {
|
||||
"--base-path"}, description = "Basepath of hoodie dataset to sync", required = true)
|
||||
public String basePath;
|
||||
|
||||
@Parameter(names = "--partitioned-by", description = "Fields in the schema partitioned by")
|
||||
@Parameter(names = "--partitioned-by", description = "Fields in the schema partitioned by", required = true)
|
||||
public List<String> partitionFields = new ArrayList<>();
|
||||
|
||||
@Parameter(names = "-partition-value-extractor", description = "Class which implements PartitionValueExtractor to extract the partition values from HDFS path")
|
||||
|
||||
Reference in New Issue
Block a user