[HUDI-242] Support for RFC-12/Bootstrapping of external datasets to hudi (#1876)
- [HUDI-418] Bootstrap Index Implementation using HFile with unit-test - [HUDI-421] FileSystem View Changes to support Bootstrap with unit-tests - [HUDI-424] Implement Query Side Integration for querying tables containing bootstrap file slices - [HUDI-423] Implement upsert functionality for handling updates to these bootstrap file slices - [HUDI-421] Bootstrap Write Client with tests - [HUDI-425] Added HoodieDeltaStreamer support - [HUDI-899] Add a knob to change partition-path style while performing metadata bootstrap - [HUDI-900] Metadata Bootstrap Key Generator needs to handle complex keys correctly - [HUDI-424] Simplify Record reader implementation - [HUDI-423] Implement upsert functionality for handling updates to these bootstrap file slices - [HUDI-420] Hoodie Demo working with hive and sparkSQL. Also, Hoodie CLI working with bootstrap tables Co-authored-by: Mehrotra <uditme@amazon.com> Co-authored-by: Vinoth Chandar <vinoth@apache.org> Co-authored-by: Balaji Varadarajan <varadarb@uber.com>
This commit is contained in:
@@ -477,8 +477,8 @@ public class TestHoodieClientOnCopyOnWriteStorage extends HoodieClientTestBase {
|
||||
.withBloomIndexUpdatePartitionPath(true)
|
||||
.withGlobalSimpleIndexUpdatePartitionPath(true)
|
||||
.build()).withTimelineLayoutVersion(VERSION_0).build();
|
||||
HoodieTableMetaClient.initTableType(metaClient.getHadoopConf(), metaClient.getBasePath(), metaClient.getTableType(),
|
||||
metaClient.getTableConfig().getTableName(), metaClient.getArchivePath(),
|
||||
HoodieTableMetaClient.initTableType(metaClient.getHadoopConf(), metaClient.getBasePath(),
|
||||
metaClient.getTableType(), metaClient.getTableConfig().getTableName(), metaClient.getArchivePath(),
|
||||
metaClient.getTableConfig().getPayloadClass(), VERSION_0);
|
||||
HoodieWriteClient client = getHoodieWriteClient(hoodieWriteConfig, false);
|
||||
|
||||
|
||||
@@ -122,7 +122,7 @@ public class TestUpdateSchemaEvolution extends HoodieClientTestHarness {
|
||||
HoodieMergeHandle mergeHandle = new HoodieMergeHandle(config2, "101", table2,
|
||||
updateRecords.iterator(), record1.getPartitionPath(), fileId, supplier);
|
||||
Configuration conf = new Configuration();
|
||||
AvroReadSupport.setAvroReadSchema(conf, mergeHandle.getWriterSchema());
|
||||
AvroReadSupport.setAvroReadSchema(conf, mergeHandle.getWriterSchemaWithMetafields());
|
||||
List<GenericRecord> oldRecords = ParquetUtils.readAvroRecords(conf,
|
||||
new Path(config2.getBasePath() + "/" + insertResult.getStat().getPath()));
|
||||
for (GenericRecord rec : oldRecords) {
|
||||
|
||||
@@ -0,0 +1,75 @@
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.hudi.client.bootstrap;
|
||||
|
||||
import org.apache.hudi.avro.model.HoodieFileStatus;
|
||||
import org.apache.hudi.client.bootstrap.selector.BootstrapRegexModeSelector;
|
||||
import org.apache.hudi.common.util.collection.Pair;
|
||||
import org.apache.hudi.config.HoodieBootstrapConfig;
|
||||
import org.apache.hudi.config.HoodieWriteConfig;
|
||||
|
||||
import org.junit.jupiter.api.Test;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
import static org.junit.jupiter.api.Assertions.assertEquals;
|
||||
import static org.junit.jupiter.api.Assertions.assertTrue;
|
||||
|
||||
public class TestBootstrapRegexModeSelector {
|
||||
|
||||
private HoodieWriteConfig getConfig(String regex, BootstrapMode selectedMode) {
|
||||
return HoodieWriteConfig.newBuilder().withPath("")
|
||||
.withBootstrapConfig(HoodieBootstrapConfig.newBuilder()
|
||||
.withBootstrapModeSelectorRegex(regex)
|
||||
.withBootstrapModeForRegexMatch(selectedMode).build())
|
||||
.forTable("test-trip-table").build();
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testModeSelector() {
|
||||
List<String> partitionPaths = Arrays.asList("2020/05/01", "2020/05/02", "2020/05/10", "2020/05/11");
|
||||
List<Pair<String, List<HoodieFileStatus>>> input = partitionPaths.stream()
|
||||
.map(p -> Pair.<String, List<HoodieFileStatus>>of(p, new ArrayList<>())).collect(Collectors.toList());
|
||||
String regex = "2020/05/1[0-9]";
|
||||
BootstrapRegexModeSelector regexModeSelector = new BootstrapRegexModeSelector(getConfig(regex,
|
||||
BootstrapMode.FULL_RECORD));
|
||||
|
||||
Map<BootstrapMode, List<String>> result = regexModeSelector.select(input);
|
||||
assertTrue(result.get(BootstrapMode.METADATA_ONLY).contains("2020/05/01"));
|
||||
assertTrue(result.get(BootstrapMode.METADATA_ONLY).contains("2020/05/02"));
|
||||
assertTrue(result.get(BootstrapMode.FULL_RECORD).contains("2020/05/10"));
|
||||
assertTrue(result.get(BootstrapMode.FULL_RECORD).contains("2020/05/11"));
|
||||
assertEquals(2, result.get(BootstrapMode.METADATA_ONLY).size());
|
||||
assertEquals(2, result.get(BootstrapMode.FULL_RECORD).size());
|
||||
|
||||
regexModeSelector = new BootstrapRegexModeSelector(getConfig(regex,
|
||||
BootstrapMode.METADATA_ONLY));
|
||||
result = regexModeSelector.select(input);
|
||||
assertTrue(result.get(BootstrapMode.FULL_RECORD).contains("2020/05/01"));
|
||||
assertTrue(result.get(BootstrapMode.FULL_RECORD).contains("2020/05/02"));
|
||||
assertTrue(result.get(BootstrapMode.METADATA_ONLY).contains("2020/05/10"));
|
||||
assertTrue(result.get(BootstrapMode.METADATA_ONLY).contains("2020/05/11"));
|
||||
assertEquals(2, result.get(BootstrapMode.METADATA_ONLY).size());
|
||||
assertEquals(2, result.get(BootstrapMode.FULL_RECORD).size());
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,66 @@
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.hudi.client.bootstrap;
|
||||
|
||||
import org.apache.hudi.avro.model.HoodieFileStatus;
|
||||
import org.apache.hudi.client.bootstrap.selector.FullRecordBootstrapModeSelector;
|
||||
import org.apache.hudi.client.bootstrap.selector.MetadataOnlyBootstrapModeSelector;
|
||||
import org.apache.hudi.client.bootstrap.selector.UniformBootstrapModeSelector;
|
||||
import org.apache.hudi.common.util.collection.Pair;
|
||||
import org.apache.hudi.config.HoodieWriteConfig;
|
||||
import org.junit.jupiter.api.Test;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
import static org.junit.jupiter.api.Assertions.assertEquals;
|
||||
import static org.junit.jupiter.api.Assertions.assertTrue;
|
||||
|
||||
public class TestUniformBootstrapModeSelector {
|
||||
|
||||
@Test
|
||||
public void testFullBootstrapModeSelector() {
|
||||
|
||||
FullRecordBootstrapModeSelector modeSelector = new FullRecordBootstrapModeSelector(
|
||||
HoodieWriteConfig.newBuilder().withPath("").build());
|
||||
testModeSelector(modeSelector, BootstrapMode.FULL_RECORD);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testMetadataOnlyBootstrapModeSelector() {
|
||||
MetadataOnlyBootstrapModeSelector modeSelector = new MetadataOnlyBootstrapModeSelector(
|
||||
HoodieWriteConfig.newBuilder().withPath("").build());
|
||||
testModeSelector(modeSelector, BootstrapMode.METADATA_ONLY);
|
||||
}
|
||||
|
||||
private void testModeSelector(UniformBootstrapModeSelector modeSelector, BootstrapMode mode) {
|
||||
List<String> partitionPaths = Arrays.asList("2020/05/01", "2020/05/02", "2020/05/10", "2020/05/11");
|
||||
List<Pair<String, List<HoodieFileStatus>>> input = partitionPaths.stream()
|
||||
.map(p -> Pair.<String, List<HoodieFileStatus>>of(p, new ArrayList<>())).collect(Collectors.toList());
|
||||
Map<BootstrapMode, List<String>> result = modeSelector.select(input);
|
||||
assertTrue(result.get(mode).contains("2020/05/01"));
|
||||
assertTrue(result.get(mode).contains("2020/05/02"));
|
||||
assertTrue(result.get(mode).contains("2020/05/10"));
|
||||
assertTrue(result.get(mode).contains("2020/05/11"));
|
||||
assertEquals(4, result.get(mode).size());
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,85 @@
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.hudi.table.action.bootstrap;
|
||||
|
||||
import org.apache.hudi.avro.model.HoodieFileStatus;
|
||||
import org.apache.hudi.common.util.collection.Pair;
|
||||
import org.apache.hudi.exception.HoodieException;
|
||||
import org.apache.hudi.testutils.HoodieClientTestBase;
|
||||
|
||||
import org.apache.hadoop.fs.Path;
|
||||
import org.junit.jupiter.api.Test;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.Arrays;
|
||||
import java.util.List;
|
||||
|
||||
import static org.junit.jupiter.api.Assertions.assertEquals;
|
||||
|
||||
public class TestBootstrapUtils extends HoodieClientTestBase {
|
||||
|
||||
@Test
|
||||
public void testAllLeafFoldersWithFiles() throws IOException {
|
||||
// All directories including marker dirs.
|
||||
List<String> folders = Arrays.asList("2016/04/15", "2016/05/16", "2016/05/17");
|
||||
folders.forEach(f -> {
|
||||
try {
|
||||
metaClient.getFs().mkdirs(new Path(new Path(basePath), f));
|
||||
} catch (IOException e) {
|
||||
throw new HoodieException(e);
|
||||
}
|
||||
});
|
||||
|
||||
// Files inside partitions and marker directories
|
||||
List<String> files = Arrays.asList(
|
||||
"2016/04/15/1_1-0-1_20190528120000.parquet",
|
||||
"2016/04/15/2_1-0-1_20190528120000.parquet",
|
||||
"2016/05/16/3_1-0-1_20190528120000.parquet",
|
||||
"2016/05/16/4_1-0-1_20190528120000.parquet",
|
||||
"2016/04/17/5_1-0-1_20190528120000.parquet",
|
||||
"2016/04/17/6_1-0-1_20190528120000.parquet");
|
||||
|
||||
files.forEach(f -> {
|
||||
try {
|
||||
metaClient.getFs().create(new Path(new Path(basePath), f));
|
||||
} catch (IOException e) {
|
||||
throw new HoodieException(e);
|
||||
}
|
||||
});
|
||||
|
||||
List<Pair<String, List<HoodieFileStatus>>> collected =
|
||||
BootstrapUtils.getAllLeafFoldersWithFiles(metaClient.getFs(), basePath, (status) -> {
|
||||
return true;
|
||||
});
|
||||
assertEquals(3, collected.size());
|
||||
collected.stream().forEach(k -> {
|
||||
assertEquals(2, k.getRight().size());
|
||||
});
|
||||
|
||||
// Simulate reading from un-partitioned dataset
|
||||
collected =
|
||||
BootstrapUtils.getAllLeafFoldersWithFiles(metaClient.getFs(), basePath + "/" + folders.get(0), (status) -> {
|
||||
return true;
|
||||
});
|
||||
assertEquals(1, collected.size());
|
||||
collected.stream().forEach(k -> {
|
||||
assertEquals(2, k.getRight().size());
|
||||
});
|
||||
}
|
||||
}
|
||||
@@ -19,6 +19,7 @@
|
||||
package org.apache.hudi.table.action.compact.strategy;
|
||||
|
||||
import org.apache.hudi.avro.model.HoodieCompactionOperation;
|
||||
import org.apache.hudi.common.model.BaseFile;
|
||||
import org.apache.hudi.common.model.HoodieBaseFile;
|
||||
import org.apache.hudi.common.model.HoodieLogFile;
|
||||
import org.apache.hudi.common.util.Option;
|
||||
@@ -259,7 +260,9 @@ public class TestHoodieCompactionStrategy {
|
||||
operations.add(new HoodieCompactionOperation(df.getCommitTime(),
|
||||
logFiles.stream().map(s -> s.getPath().toString()).collect(Collectors.toList()), df.getPath(), df.getFileId(),
|
||||
partitionPath,
|
||||
config.getCompactionStrategy().captureMetrics(config, Option.of(df), partitionPath, logFiles)));
|
||||
config.getCompactionStrategy().captureMetrics(config, Option.of(df), partitionPath, logFiles),
|
||||
df.getBootstrapBaseFile().map(BaseFile::getPath).orElse(null))
|
||||
);
|
||||
});
|
||||
return operations;
|
||||
}
|
||||
|
||||
@@ -22,14 +22,17 @@ import org.apache.hudi.avro.HoodieAvroUtils;
|
||||
import org.apache.hudi.common.table.HoodieTableMetaClient;
|
||||
import org.apache.hudi.common.testutils.HoodieTestDataGenerator;
|
||||
import org.apache.hudi.common.testutils.HoodieTestUtils;
|
||||
import org.apache.hudi.common.util.collection.Pair;
|
||||
import org.apache.hudi.hadoop.utils.HoodieInputFormatUtils;
|
||||
|
||||
import org.apache.avro.Schema;
|
||||
import org.apache.avro.Schema.Field;
|
||||
import org.apache.avro.generic.GenericRecord;
|
||||
import org.apache.avro.generic.GenericRecordBuilder;
|
||||
import org.apache.hadoop.conf.Configurable;
|
||||
import org.apache.hadoop.conf.Configuration;
|
||||
import org.apache.hadoop.hive.metastore.api.hive_metastoreConstants;
|
||||
import org.apache.hadoop.hive.ql.io.IOConstants;
|
||||
import org.apache.hadoop.hive.serde2.ColumnProjectionUtils;
|
||||
import org.apache.hadoop.io.ArrayWritable;
|
||||
import org.apache.hadoop.io.Writable;
|
||||
@@ -48,22 +51,39 @@ import java.util.stream.Collectors;
|
||||
* Utility methods to aid in testing MergeOnRead (workaround for HoodieReadClient for MOR).
|
||||
*/
|
||||
public class HoodieMergeOnReadTestUtils {
|
||||
|
||||
public static List<GenericRecord> getRecordsUsingInputFormat(Configuration conf, List<String> inputPaths,
|
||||
String basePath) {
|
||||
return getRecordsUsingInputFormat(conf, inputPaths, basePath, new JobConf(conf), true);
|
||||
}
|
||||
|
||||
public static List<GenericRecord> getRecordsUsingInputFormat(Configuration conf, List<String> inputPaths,
|
||||
String basePath,
|
||||
JobConf jobConf,
|
||||
boolean realtime) {
|
||||
HoodieTableMetaClient metaClient = new HoodieTableMetaClient(conf, basePath);
|
||||
FileInputFormat inputFormat = HoodieInputFormatUtils.getInputFormat(metaClient.getTableConfig().getBaseFileFormat(),
|
||||
realtime, jobConf);
|
||||
String basePath, JobConf jobConf, boolean realtime) {
|
||||
Schema schema = new Schema.Parser().parse(HoodieTestDataGenerator.TRIP_EXAMPLE_SCHEMA);
|
||||
return getRecordsUsingInputFormat(conf, inputPaths, basePath, jobConf, realtime, schema,
|
||||
HoodieTestDataGenerator.TRIP_HIVE_COLUMN_TYPES, false, new ArrayList<>());
|
||||
}
|
||||
|
||||
public static List<GenericRecord> getRecordsUsingInputFormat(Configuration conf, List<String> inputPaths, String basePath, JobConf jobConf, boolean realtime, Schema rawSchema,
|
||||
String rawHiveColumnTypes, boolean projectCols, List<String> projectedColumns) {
|
||||
|
||||
HoodieTableMetaClient metaClient = new HoodieTableMetaClient(conf, basePath);
|
||||
FileInputFormat inputFormat = HoodieInputFormatUtils.getInputFormat(metaClient.getTableConfig().getBaseFileFormat(), realtime, jobConf);
|
||||
|
||||
Schema schema = HoodieAvroUtils.addMetadataFields(rawSchema);
|
||||
String hiveColumnTypes = HoodieAvroUtils.addMetadataColumnTypes(rawHiveColumnTypes);
|
||||
setPropsForInputFormat(inputFormat, jobConf, schema, hiveColumnTypes, projectCols, projectedColumns);
|
||||
final List<Field> fields;
|
||||
if (projectCols) {
|
||||
fields = schema.getFields().stream().filter(f -> projectedColumns.contains(f.name()))
|
||||
.collect(Collectors.toList());
|
||||
} else {
|
||||
fields = schema.getFields();
|
||||
}
|
||||
final Schema projectedSchema = Schema.createRecord(fields.stream()
|
||||
.map(f -> new Schema.Field(f.name(), f.schema(), f.doc(), f.defaultVal()))
|
||||
.collect(Collectors.toList()));
|
||||
|
||||
Schema schema = HoodieAvroUtils.addMetadataFields(
|
||||
new Schema.Parser().parse(HoodieTestDataGenerator.TRIP_EXAMPLE_SCHEMA));
|
||||
setPropsForInputFormat(inputFormat, jobConf, schema, basePath);
|
||||
return inputPaths.stream().map(path -> {
|
||||
setInputPath(jobConf, path);
|
||||
List<GenericRecord> records = new ArrayList<>();
|
||||
@@ -71,17 +91,18 @@ public class HoodieMergeOnReadTestUtils {
|
||||
List<InputSplit> splits = Arrays.asList(inputFormat.getSplits(jobConf, 1));
|
||||
for (InputSplit split : splits) {
|
||||
RecordReader recordReader = inputFormat.getRecordReader(split, jobConf, null);
|
||||
Void key = (Void) recordReader.createKey();
|
||||
Object key = recordReader.createKey();
|
||||
ArrayWritable writable = (ArrayWritable) recordReader.createValue();
|
||||
while (recordReader.next(key, writable)) {
|
||||
GenericRecordBuilder newRecord = new GenericRecordBuilder(schema);
|
||||
GenericRecordBuilder newRecord = new GenericRecordBuilder(projectedSchema);
|
||||
// writable returns an array with [field1, field2, _hoodie_commit_time,
|
||||
// _hoodie_commit_seqno]
|
||||
Writable[] values = writable.get();
|
||||
assert schema.getFields().size() <= values.length;
|
||||
schema.getFields().forEach(field -> {
|
||||
newRecord.set(field, values[field.pos()]);
|
||||
});
|
||||
schema.getFields().stream()
|
||||
.filter(f -> !projectCols || projectedColumns.contains(f.name()))
|
||||
.map(f -> Pair.of(projectedSchema.getFields().stream()
|
||||
.filter(p -> f.name().equals(p.name())).findFirst().get(), f))
|
||||
.forEach(fieldsPair -> newRecord.set(fieldsPair.getKey(), values[fieldsPair.getValue().pos()]));
|
||||
records.add(newRecord.build());
|
||||
}
|
||||
}
|
||||
@@ -95,29 +116,40 @@ public class HoodieMergeOnReadTestUtils {
|
||||
}).orElse(new ArrayList<>());
|
||||
}
|
||||
|
||||
private static void setPropsForInputFormat(FileInputFormat inputFormat, JobConf jobConf, Schema schema,
|
||||
String basePath) {
|
||||
private static void setPropsForInputFormat(FileInputFormat inputFormat, JobConf jobConf, Schema schema, String hiveColumnTypes, boolean projectCols, List<String> projectedCols) {
|
||||
List<Schema.Field> fields = schema.getFields();
|
||||
String names = fields.stream().map(f -> f.name().toString()).collect(Collectors.joining(","));
|
||||
String postions = fields.stream().map(f -> String.valueOf(f.pos())).collect(Collectors.joining(","));
|
||||
Configuration conf = HoodieTestUtils.getDefaultHadoopConf();
|
||||
final List<String> projectedColNames;
|
||||
if (!projectCols) {
|
||||
projectedColNames = fields.stream().map(Field::name).collect(Collectors.toList());
|
||||
} else {
|
||||
projectedColNames = projectedCols;
|
||||
}
|
||||
|
||||
String hiveColumnNames = fields.stream().filter(field -> !field.name().equalsIgnoreCase("datestr"))
|
||||
String names = fields.stream()
|
||||
.filter(f -> projectedColNames.contains(f.name()))
|
||||
.map(f -> f.name()).collect(Collectors.joining(","));
|
||||
String positions = fields.stream()
|
||||
.filter(f -> projectedColNames.contains(f.name()))
|
||||
.map(f -> String.valueOf(f.pos())).collect(Collectors.joining(","));
|
||||
String hiveColumnNames = fields.stream()
|
||||
.filter(field -> !field.name().equalsIgnoreCase("datestr"))
|
||||
.map(Schema.Field::name).collect(Collectors.joining(","));
|
||||
hiveColumnNames = hiveColumnNames + ",datestr";
|
||||
|
||||
String hiveColumnTypes = HoodieAvroUtils.addMetadataColumnTypes(HoodieTestDataGenerator.TRIP_HIVE_COLUMN_TYPES);
|
||||
hiveColumnTypes = hiveColumnTypes + ",string";
|
||||
Configuration conf = HoodieTestUtils.getDefaultHadoopConf();
|
||||
String hiveColumnTypesWithDatestr = hiveColumnTypes + ",string";
|
||||
jobConf.set(hive_metastoreConstants.META_TABLE_COLUMNS, hiveColumnNames);
|
||||
jobConf.set(hive_metastoreConstants.META_TABLE_COLUMN_TYPES, hiveColumnTypes);
|
||||
jobConf.set(hive_metastoreConstants.META_TABLE_COLUMN_TYPES, hiveColumnTypesWithDatestr);
|
||||
jobConf.set(ColumnProjectionUtils.READ_COLUMN_NAMES_CONF_STR, names);
|
||||
jobConf.set(ColumnProjectionUtils.READ_COLUMN_IDS_CONF_STR, postions);
|
||||
jobConf.set(ColumnProjectionUtils.READ_COLUMN_IDS_CONF_STR, positions);
|
||||
jobConf.set(hive_metastoreConstants.META_TABLE_PARTITION_COLUMNS, "datestr");
|
||||
conf.set(hive_metastoreConstants.META_TABLE_COLUMNS, hiveColumnNames);
|
||||
conf.set(ColumnProjectionUtils.READ_COLUMN_NAMES_CONF_STR, names);
|
||||
conf.set(ColumnProjectionUtils.READ_COLUMN_IDS_CONF_STR, postions);
|
||||
conf.set(ColumnProjectionUtils.READ_COLUMN_IDS_CONF_STR, positions);
|
||||
conf.set(hive_metastoreConstants.META_TABLE_PARTITION_COLUMNS, "datestr");
|
||||
conf.set(hive_metastoreConstants.META_TABLE_COLUMN_TYPES, hiveColumnTypes);
|
||||
conf.set(hive_metastoreConstants.META_TABLE_COLUMN_TYPES, hiveColumnTypesWithDatestr);
|
||||
conf.set(IOConstants.COLUMNS, hiveColumnNames);
|
||||
conf.get(IOConstants.COLUMNS_TYPES, hiveColumnTypesWithDatestr);
|
||||
|
||||
// Hoodie Input formats are also configurable
|
||||
Configurable configurable = (Configurable)inputFormat;
|
||||
|
||||
@@ -17,6 +17,7 @@
|
||||
###
|
||||
log4j.rootLogger=WARN, CONSOLE
|
||||
log4j.logger.org.apache.hudi=DEBUG
|
||||
log4j.logger.org.apache.hadoop.hbase=ERROR
|
||||
|
||||
# CONSOLE is set to be a ConsoleAppender.
|
||||
log4j.appender.CONSOLE=org.apache.log4j.ConsoleAppender
|
||||
|
||||
@@ -18,6 +18,7 @@
|
||||
log4j.rootLogger=WARN, CONSOLE
|
||||
log4j.logger.org.apache=INFO
|
||||
log4j.logger.org.apache.hudi=DEBUG
|
||||
log4j.logger.org.apache.hadoop.hbase=ERROR
|
||||
|
||||
# A1 is set to be a ConsoleAppender.
|
||||
log4j.appender.CONSOLE=org.apache.log4j.ConsoleAppender
|
||||
|
||||
Reference in New Issue
Block a user