[HUDI-285] Implement HoodieStorageWriter based on actual file type (#936)
This commit is contained in:
@@ -18,6 +18,9 @@
|
|||||||
|
|
||||||
package org.apache.hudi.io.storage;
|
package org.apache.hudi.io.storage;
|
||||||
|
|
||||||
|
import static org.apache.hudi.common.model.HoodieFileFormat.HOODIE_LOG;
|
||||||
|
import static org.apache.hudi.common.model.HoodieFileFormat.PARQUET;
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import org.apache.avro.Schema;
|
import org.apache.avro.Schema;
|
||||||
import org.apache.avro.generic.IndexedRecord;
|
import org.apache.avro.generic.IndexedRecord;
|
||||||
@@ -25,6 +28,7 @@ import org.apache.hadoop.fs.Path;
|
|||||||
import org.apache.hudi.avro.HoodieAvroWriteSupport;
|
import org.apache.hudi.avro.HoodieAvroWriteSupport;
|
||||||
import org.apache.hudi.common.BloomFilter;
|
import org.apache.hudi.common.BloomFilter;
|
||||||
import org.apache.hudi.common.model.HoodieRecordPayload;
|
import org.apache.hudi.common.model.HoodieRecordPayload;
|
||||||
|
import org.apache.hudi.common.util.FSUtils;
|
||||||
import org.apache.hudi.config.HoodieWriteConfig;
|
import org.apache.hudi.config.HoodieWriteConfig;
|
||||||
import org.apache.hudi.table.HoodieTable;
|
import org.apache.hudi.table.HoodieTable;
|
||||||
import org.apache.parquet.avro.AvroSchemaConverter;
|
import org.apache.parquet.avro.AvroSchemaConverter;
|
||||||
@@ -34,9 +38,12 @@ public class HoodieStorageWriterFactory {
|
|||||||
public static <T extends HoodieRecordPayload, R extends IndexedRecord> HoodieStorageWriter<R> getStorageWriter(
|
public static <T extends HoodieRecordPayload, R extends IndexedRecord> HoodieStorageWriter<R> getStorageWriter(
|
||||||
String commitTime, Path path, HoodieTable<T> hoodieTable,
|
String commitTime, Path path, HoodieTable<T> hoodieTable,
|
||||||
HoodieWriteConfig config, Schema schema) throws IOException {
|
HoodieWriteConfig config, Schema schema) throws IOException {
|
||||||
//TODO - based on the metadata choose the implementation of HoodieStorageWriter
|
final String name = path.getName();
|
||||||
// Currently only parquet is supported
|
final String extension = FSUtils.isLogFile(path) ? HOODIE_LOG.getFileExtension() : FSUtils.getFileExtension(name);
|
||||||
return newParquetStorageWriter(commitTime, path, config, schema, hoodieTable);
|
if (PARQUET.getFileExtension().equals(extension)) {
|
||||||
|
return newParquetStorageWriter(commitTime, path, config, schema, hoodieTable);
|
||||||
|
}
|
||||||
|
throw new UnsupportedOperationException(extension + " format not supported yet.");
|
||||||
}
|
}
|
||||||
|
|
||||||
private static <T extends HoodieRecordPayload,
|
private static <T extends HoodieRecordPayload,
|
||||||
|
|||||||
@@ -0,0 +1,62 @@
|
|||||||
|
/*
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one
|
||||||
|
* or more contributor license agreements. See the NOTICE file
|
||||||
|
* distributed with this work for additional information
|
||||||
|
* regarding copyright ownership. The ASF licenses this file
|
||||||
|
* to you under the Apache License, Version 2.0 (the
|
||||||
|
* "License"); you may not use this file except in compliance
|
||||||
|
* with the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package org.apache.hudi.io.storage;
|
||||||
|
|
||||||
|
import static org.junit.Assert.fail;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
import org.apache.avro.generic.IndexedRecord;
|
||||||
|
import org.apache.hadoop.fs.Path;
|
||||||
|
import org.apache.hudi.TestHoodieClientBase;
|
||||||
|
import org.apache.hudi.common.HoodieTestDataGenerator;
|
||||||
|
import org.apache.hudi.config.HoodieWriteConfig;
|
||||||
|
import org.apache.hudi.table.HoodieTable;
|
||||||
|
import org.junit.Assert;
|
||||||
|
import org.junit.Test;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Tests for {@link HoodieStorageWriterFactory}.
|
||||||
|
*/
|
||||||
|
public class TestHoodieStorageWriterFactory extends TestHoodieClientBase {
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testGetStorageWriter() throws IOException {
|
||||||
|
// parquet file format.
|
||||||
|
final String commitTime = "100";
|
||||||
|
final Path parquetPath = new Path(basePath + "/partition/path/f1_1-0-1_000.parquet");
|
||||||
|
final HoodieWriteConfig cfg = getConfig();
|
||||||
|
HoodieTable table = HoodieTable.getHoodieTable(metaClient, cfg, jsc);
|
||||||
|
HoodieStorageWriter<IndexedRecord> parquetWriter =
|
||||||
|
HoodieStorageWriterFactory.getStorageWriter(
|
||||||
|
commitTime, parquetPath, table, cfg, HoodieTestDataGenerator.avroSchema);
|
||||||
|
Assert.assertTrue(parquetWriter instanceof HoodieParquetWriter);
|
||||||
|
|
||||||
|
// other file format exception.
|
||||||
|
final Path logPath = new Path(basePath + "/partition/path/f.b51192a8-574b-4a85-b246-bcfec03ac8bf_100.log.2_1-0-1");
|
||||||
|
try {
|
||||||
|
HoodieStorageWriter<IndexedRecord> logWriter =
|
||||||
|
HoodieStorageWriterFactory.getStorageWriter(
|
||||||
|
commitTime, logPath, table, cfg, HoodieTestDataGenerator.avroSchema);
|
||||||
|
fail("should fail since log storage writer is not supported yet.");
|
||||||
|
} catch (Exception e) {
|
||||||
|
Assert.assertTrue(e instanceof UnsupportedOperationException);
|
||||||
|
Assert.assertTrue(e.getMessage().contains("format not supported yet."));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
Reference in New Issue
Block a user