1
0

[HUDI-91][HUDI-12]Migrate to spark 2.4.4, migrate to spark-avro library instead of databricks-avro, add support for Decimal/Date types

- Upgrade Spark to 2.4.4, Parquet to 1.10.1, Avro to 1.8.2
- Remove spark-avro from hudi-spark-bundle. Users need to provide --packages org.apache.spark:spark-avro:2.4.4 when running spark-shell or spark-submit
- Replace com.databricks:spark-avro with org.apache.spark:spark-avro
- Shade avro in hudi-hadoop-mr-bundle to make sure it does not conflict with hive's avro version.
This commit is contained in:
Udit Mehrotra
2020-01-12 15:03:11 -08:00
committed by Balaji Varadarajan
parent d9675c4ec0
commit ad50008a59
11 changed files with 128 additions and 79 deletions

View File

@@ -107,7 +107,7 @@ public abstract class ITTestBase {
.append(" --master local[2] --driver-class-path ").append(HADOOP_CONF_DIR)
.append(
" --conf spark.sql.hive.convertMetastoreParquet=false --deploy-mode client --driver-memory 1G --executor-memory 1G --num-executors 1 ")
.append(" --packages com.databricks:spark-avro_2.11:4.0.0 ").append(" -i ").append(commandFile).toString();
.append(" --packages org.apache.spark:spark-avro_2.11:2.4.4 ").append(" -i ").append(commandFile).toString();
}
static String getPrestoConsoleCommand(String commandFile) {