From 3251d62bd3c740b25139029a1913d1cf5a57173f Mon Sep 17 00:00:00 2001 From: Wenning Ding Date: Wed, 23 Oct 2019 13:53:57 -0700 Subject: [PATCH] [HUDI-313] Fix select count star error when querying a realtime table --- .../HoodieParquetRealtimeInputFormat.java | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/realtime/HoodieParquetRealtimeInputFormat.java b/hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/realtime/HoodieParquetRealtimeInputFormat.java index d37ae2ab9..3e4272479 100644 --- a/hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/realtime/HoodieParquetRealtimeInputFormat.java +++ b/hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/realtime/HoodieParquetRealtimeInputFormat.java @@ -197,10 +197,27 @@ public class HoodieParquetRealtimeInputFormat extends HoodieParquetInputFormat i return configuration; } + /** + * Hive will append read columns' ids to old columns' ids during getRecordReader. In some cases, e.g. SELECT COUNT(*), + * the read columns' id is an empty string and Hive will combine it with Hoodie required projection ids and becomes + * e.g. ",2,0,3" and will cause an error. This method is used to avoid this situation. + */ + private static synchronized Configuration cleanProjectionColumnIds(Configuration conf) { + String columnIds = conf.get(ColumnProjectionUtils.READ_COLUMN_IDS_CONF_STR); + if (!columnIds.isEmpty() && columnIds.charAt(0) == ',') { + conf.set(ColumnProjectionUtils.READ_COLUMN_IDS_CONF_STR, columnIds.substring(1)); + if (LOG.isDebugEnabled()) { + LOG.debug("The projection Ids: {" + columnIds + "} start with ','. First comma is removed"); + } + } + return conf; + } + @Override public RecordReader getRecordReader(final InputSplit split, final JobConf job, final Reporter reporter) throws IOException { + this.conf = cleanProjectionColumnIds(job); LOG.info("Before adding Hoodie columns, Projections :" + job.get(ColumnProjectionUtils.READ_COLUMN_NAMES_CONF_STR) + ", Ids :" + job.get(ColumnProjectionUtils.READ_COLUMN_IDS_CONF_STR));