1
0

[HUDI-624]: Split some of the code from PR for HUDI-479 (#1344)

This commit is contained in:
Suneel Marthi
2020-02-21 01:22:21 -05:00
committed by GitHub
parent 185ff646ad
commit 078d4825d9
31 changed files with 130 additions and 141 deletions

View File

@@ -38,8 +38,6 @@ import org.apache.hudi.config.HoodieCompactionConfig;
import org.apache.hudi.config.HoodieMemoryConfig; import org.apache.hudi.config.HoodieMemoryConfig;
import org.apache.hudi.hive.util.SchemaUtil; import org.apache.hudi.hive.util.SchemaUtil;
import com.google.common.base.Preconditions;
import com.google.common.collect.Maps;
import com.fasterxml.jackson.databind.ObjectMapper; import com.fasterxml.jackson.databind.ObjectMapper;
import org.apache.avro.Schema; import org.apache.avro.Schema;
@@ -59,6 +57,7 @@ import java.util.Arrays;
import java.util.HashMap; import java.util.HashMap;
import java.util.List; import java.util.List;
import java.util.Map; import java.util.Map;
import java.util.Objects;
import java.util.stream.Collectors; import java.util.stream.Collectors;
import scala.Tuple2; import scala.Tuple2;
@@ -85,14 +84,14 @@ public class HoodieLogFileCommand implements CommandMarker {
List<String> logFilePaths = Arrays.stream(fs.globStatus(new Path(logFilePathPattern))) List<String> logFilePaths = Arrays.stream(fs.globStatus(new Path(logFilePathPattern)))
.map(status -> status.getPath().toString()).collect(Collectors.toList()); .map(status -> status.getPath().toString()).collect(Collectors.toList());
Map<String, List<Tuple3<HoodieLogBlockType, Tuple2<Map<HeaderMetadataType, String>, Map<HeaderMetadataType, String>>, Integer>>> commitCountAndMetadata = Map<String, List<Tuple3<HoodieLogBlockType, Tuple2<Map<HeaderMetadataType, String>, Map<HeaderMetadataType, String>>, Integer>>> commitCountAndMetadata =
Maps.newHashMap(); new HashMap<>();
int numCorruptBlocks = 0; int numCorruptBlocks = 0;
int dummyInstantTimeCount = 0; int dummyInstantTimeCount = 0;
for (String logFilePath : logFilePaths) { for (String logFilePath : logFilePaths) {
FileStatus[] fsStatus = fs.listStatus(new Path(logFilePath)); FileStatus[] fsStatus = fs.listStatus(new Path(logFilePath));
Schema writerSchema = new AvroSchemaConverter() Schema writerSchema = new AvroSchemaConverter()
.convert(Preconditions.checkNotNull(SchemaUtil.readSchemaFromLogFile(fs, new Path(logFilePath)))); .convert(Objects.requireNonNull(SchemaUtil.readSchemaFromLogFile(fs, new Path(logFilePath))));
Reader reader = HoodieLogFormat.newReader(fs, new HoodieLogFile(fsStatus[0].getPath()), writerSchema); Reader reader = HoodieLogFormat.newReader(fs, new HoodieLogFile(fsStatus[0].getPath()), writerSchema);
// read the avro blocks // read the avro blocks
@@ -181,7 +180,7 @@ public class HoodieLogFileCommand implements CommandMarker {
AvroSchemaConverter converter = new AvroSchemaConverter(); AvroSchemaConverter converter = new AvroSchemaConverter();
// get schema from last log file // get schema from last log file
Schema readerSchema = Schema readerSchema =
converter.convert(Preconditions.checkNotNull(SchemaUtil.readSchemaFromLogFile(fs, new Path(logFilePaths.get(logFilePaths.size() - 1))))); converter.convert(Objects.requireNonNull(SchemaUtil.readSchemaFromLogFile(fs, new Path(logFilePaths.get(logFilePaths.size() - 1)))));
List<IndexedRecord> allRecords = new ArrayList<>(); List<IndexedRecord> allRecords = new ArrayList<>();
@@ -204,7 +203,7 @@ public class HoodieLogFileCommand implements CommandMarker {
} else { } else {
for (String logFile : logFilePaths) { for (String logFile : logFilePaths) {
Schema writerSchema = new AvroSchemaConverter() Schema writerSchema = new AvroSchemaConverter()
.convert(Preconditions.checkNotNull(SchemaUtil.readSchemaFromLogFile(client.getFs(), new Path(logFile)))); .convert(Objects.requireNonNull(SchemaUtil.readSchemaFromLogFile(client.getFs(), new Path(logFile))));
HoodieLogFormat.Reader reader = HoodieLogFormat.Reader reader =
HoodieLogFormat.newReader(fs, new HoodieLogFile(new Path(logFile)), writerSchema); HoodieLogFormat.newReader(fs, new HoodieLogFile(new Path(logFile)), writerSchema);
// read the avro blocks // read the avro blocks

View File

@@ -18,11 +18,11 @@
package org.apache.hudi.cli.commands; package org.apache.hudi.cli.commands;
import com.google.common.base.Strings;
import org.apache.hudi.HoodieWriteClient; import org.apache.hudi.HoodieWriteClient;
import org.apache.hudi.cli.DedupeSparkJob; import org.apache.hudi.cli.DedupeSparkJob;
import org.apache.hudi.cli.utils.SparkUtil; import org.apache.hudi.cli.utils.SparkUtil;
import org.apache.hudi.common.util.FSUtils; import org.apache.hudi.common.util.FSUtils;
import org.apache.hudi.common.util.StringUtils;
import org.apache.hudi.config.HoodieIndexConfig; import org.apache.hudi.config.HoodieIndexConfig;
import org.apache.hudi.config.HoodieWriteConfig; import org.apache.hudi.config.HoodieWriteConfig;
import org.apache.hudi.index.HoodieIndex; import org.apache.hudi.index.HoodieIndex;
@@ -81,7 +81,7 @@ public class SparkMain {
case UPSERT: case UPSERT:
assert (args.length >= 12); assert (args.length >= 12);
String propsFilePath = null; String propsFilePath = null;
if (!Strings.isNullOrEmpty(args[11])) { if (!StringUtils.isNullOrEmpty(args[11])) {
propsFilePath = args[11]; propsFilePath = args[11];
} }
List<String> configs = new ArrayList<>(); List<String> configs = new ArrayList<>();
@@ -94,7 +94,7 @@ public class SparkMain {
case COMPACT_RUN: case COMPACT_RUN:
assert (args.length >= 9); assert (args.length >= 9);
propsFilePath = null; propsFilePath = null;
if (!Strings.isNullOrEmpty(args[8])) { if (!StringUtils.isNullOrEmpty(args[8])) {
propsFilePath = args[8]; propsFilePath = args[8];
} }
configs = new ArrayList<>(); configs = new ArrayList<>();
@@ -107,7 +107,7 @@ public class SparkMain {
case COMPACT_SCHEDULE: case COMPACT_SCHEDULE:
assert (args.length >= 6); assert (args.length >= 6);
propsFilePath = null; propsFilePath = null;
if (!Strings.isNullOrEmpty(args[5])) { if (!StringUtils.isNullOrEmpty(args[5])) {
propsFilePath = args[5]; propsFilePath = args[5];
} }
configs = new ArrayList<>(); configs = new ArrayList<>();
@@ -142,7 +142,7 @@ public class SparkMain {
case CLEAN: case CLEAN:
assert (args.length >= 5); assert (args.length >= 5);
propsFilePath = null; propsFilePath = null;
if (!Strings.isNullOrEmpty(args[3])) { if (!StringUtils.isNullOrEmpty(args[3])) {
propsFilePath = args[3]; propsFilePath = args[3];
} }
configs = new ArrayList<>(); configs = new ArrayList<>();

View File

@@ -24,8 +24,6 @@ import org.apache.hudi.cli.commands.SparkMain;
import org.apache.hudi.common.util.FSUtils; import org.apache.hudi.common.util.FSUtils;
import org.apache.hudi.common.util.StringUtils; import org.apache.hudi.common.util.StringUtils;
import com.google.common.base.Preconditions;
import org.apache.spark.SparkConf; import org.apache.spark.SparkConf;
import org.apache.spark.api.java.JavaSparkContext; import org.apache.spark.api.java.JavaSparkContext;
import org.apache.spark.launcher.SparkLauncher; import org.apache.spark.launcher.SparkLauncher;
@@ -33,6 +31,7 @@ import org.apache.spark.launcher.SparkLauncher;
import java.io.File; import java.io.File;
import java.net.URISyntaxException; import java.net.URISyntaxException;
import java.util.Map; import java.util.Map;
import java.util.Objects;
/** /**
* Utility functions dealing with Spark. * Utility functions dealing with Spark.
@@ -55,7 +54,7 @@ public class SparkUtil {
sparkLauncher.setPropertiesFile(propertiesFile); sparkLauncher.setPropertiesFile(propertiesFile);
} }
File libDirectory = new File(new File(currentJar).getParent(), "lib"); File libDirectory = new File(new File(currentJar).getParent(), "lib");
for (String library : Preconditions.checkNotNull(libDirectory.list())) { for (String library : Objects.requireNonNull(libDirectory.list())) {
sparkLauncher.addJar(new File(libDirectory, library).getAbsolutePath()); sparkLauncher.addJar(new File(libDirectory, library).getAbsolutePath());
} }
return sparkLauncher; return sparkLauncher;

View File

@@ -29,7 +29,6 @@ import org.apache.hudi.index.HoodieIndex;
import org.apache.hudi.io.compact.strategy.CompactionStrategy; import org.apache.hudi.io.compact.strategy.CompactionStrategy;
import org.apache.hudi.metrics.MetricsReporterType; import org.apache.hudi.metrics.MetricsReporterType;
import com.google.common.base.Preconditions;
import org.apache.parquet.hadoop.metadata.CompressionCodecName; import org.apache.parquet.hadoop.metadata.CompressionCodecName;
import org.apache.spark.storage.StorageLevel; import org.apache.spark.storage.StorageLevel;
@@ -40,6 +39,7 @@ import java.io.FileReader;
import java.io.IOException; import java.io.IOException;
import java.io.InputStream; import java.io.InputStream;
import java.util.Map; import java.util.Map;
import java.util.Objects;
import java.util.Properties; import java.util.Properties;
/** /**
@@ -764,7 +764,7 @@ public class HoodieWriteConfig extends DefaultHoodieConfig {
// Build WriteConfig at the end // Build WriteConfig at the end
HoodieWriteConfig config = new HoodieWriteConfig(props); HoodieWriteConfig config = new HoodieWriteConfig(props);
Preconditions.checkArgument(config.getBasePath() != null); Objects.requireNonNull(config.getBasePath());
return config; return config;
} }
} }

View File

@@ -33,7 +33,7 @@ import java.util.Iterator;
*/ */
public abstract class LazyIterableIterator<I, O> implements Iterable<O>, Iterator<O> { public abstract class LazyIterableIterator<I, O> implements Iterable<O>, Iterator<O> {
protected Iterator<I> inputItr = null; protected Iterator<I> inputItr;
private boolean consumed = false; private boolean consumed = false;
private boolean startCalled = false; private boolean startCalled = false;
private boolean endCalled = false; private boolean endCalled = false;

View File

@@ -18,9 +18,8 @@
package org.apache.hudi.index.bloom; package org.apache.hudi.index.bloom;
import com.google.common.base.Objects;
import java.io.Serializable; import java.io.Serializable;
import java.util.Objects;
/** /**
* Metadata about a given file group, useful for index lookup. * Metadata about a given file group, useful for index lookup.
@@ -80,14 +79,14 @@ public class BloomIndexFileInfo implements Serializable {
} }
BloomIndexFileInfo that = (BloomIndexFileInfo) o; BloomIndexFileInfo that = (BloomIndexFileInfo) o;
return Objects.equal(that.fileId, fileId) && Objects.equal(that.minRecordKey, minRecordKey) return Objects.equals(that.fileId, fileId) && Objects.equals(that.minRecordKey, minRecordKey)
&& Objects.equal(that.maxRecordKey, maxRecordKey); && Objects.equals(that.maxRecordKey, maxRecordKey);
} }
@Override @Override
public int hashCode() { public int hashCode() {
return Objects.hashCode(fileId, minRecordKey, maxRecordKey); return Objects.hash(fileId, minRecordKey, maxRecordKey);
} }
@Override @Override

View File

@@ -44,7 +44,6 @@ import org.apache.hudi.exception.HoodieAppendException;
import org.apache.hudi.exception.HoodieUpsertException; import org.apache.hudi.exception.HoodieUpsertException;
import org.apache.hudi.table.HoodieTable; import org.apache.hudi.table.HoodieTable;
import com.google.common.collect.Maps;
import org.apache.avro.generic.GenericRecord; import org.apache.avro.generic.GenericRecord;
import org.apache.avro.generic.IndexedRecord; import org.apache.avro.generic.IndexedRecord;
import org.apache.hadoop.fs.Path; import org.apache.hadoop.fs.Path;
@@ -56,6 +55,7 @@ import org.apache.spark.util.SizeEstimator;
import java.io.IOException; import java.io.IOException;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.Iterator; import java.util.Iterator;
import java.util.HashMap;
import java.util.List; import java.util.List;
import java.util.Map; import java.util.Map;
import java.util.concurrent.atomic.AtomicLong; import java.util.concurrent.atomic.AtomicLong;
@@ -97,7 +97,7 @@ public class HoodieAppendHandle<T extends HoodieRecordPayload> extends HoodieWri
// Max block size to limit to for a log block // Max block size to limit to for a log block
private int maxBlockSize = config.getLogFileDataBlockMaxSize(); private int maxBlockSize = config.getLogFileDataBlockMaxSize();
// Header metadata for a log block // Header metadata for a log block
private Map<HeaderMetadataType, String> header = Maps.newHashMap(); private Map<HeaderMetadataType, String> header = new HashMap<>();
// Total number of new records inserted into the delta file // Total number of new records inserted into the delta file
private long insertRecordsWritten = 0; private long insertRecordsWritten = 0;

View File

@@ -49,8 +49,6 @@ import org.apache.hudi.table.HoodieTable;
import com.fasterxml.jackson.databind.DeserializationFeature; import com.fasterxml.jackson.databind.DeserializationFeature;
import com.fasterxml.jackson.databind.ObjectMapper; import com.fasterxml.jackson.databind.ObjectMapper;
import com.google.common.collect.Maps;
import com.google.common.collect.Sets;
import org.apache.avro.Schema; import org.apache.avro.Schema;
import org.apache.avro.generic.IndexedRecord; import org.apache.avro.generic.IndexedRecord;
import org.apache.hadoop.fs.Path; import org.apache.hadoop.fs.Path;
@@ -61,7 +59,9 @@ import org.apache.spark.api.java.JavaSparkContext;
import java.io.IOException; import java.io.IOException;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.Collection; import java.util.Collection;
import java.util.Collections;
import java.util.Comparator; import java.util.Comparator;
import java.util.HashMap;
import java.util.List; import java.util.List;
import java.util.Map; import java.util.Map;
import java.util.stream.Collectors; import java.util.stream.Collectors;
@@ -145,7 +145,7 @@ public class HoodieCommitArchiveLog {
// TODO: Handle ROLLBACK_ACTION in future // TODO: Handle ROLLBACK_ACTION in future
// ROLLBACK_ACTION is currently not defined in HoodieActiveTimeline // ROLLBACK_ACTION is currently not defined in HoodieActiveTimeline
HoodieTimeline cleanAndRollbackTimeline = table.getActiveTimeline() HoodieTimeline cleanAndRollbackTimeline = table.getActiveTimeline()
.getTimelineOfActions(Sets.newHashSet(HoodieTimeline.CLEAN_ACTION)).filterCompletedInstants(); .getTimelineOfActions(Collections.singleton(HoodieTimeline.CLEAN_ACTION)).filterCompletedInstants();
Stream<HoodieInstant> instants = cleanAndRollbackTimeline.getInstants() Stream<HoodieInstant> instants = cleanAndRollbackTimeline.getInstants()
.collect(Collectors.groupingBy(HoodieInstant::getAction)).values().stream().map(hoodieInstants -> { .collect(Collectors.groupingBy(HoodieInstant::getAction)).values().stream().map(hoodieInstants -> {
if (hoodieInstants.size() > maxCommitsToKeep) { if (hoodieInstants.size() > maxCommitsToKeep) {
@@ -270,7 +270,7 @@ public class HoodieCommitArchiveLog {
private void writeToFile(Schema wrapperSchema, List<IndexedRecord> records) throws Exception { private void writeToFile(Schema wrapperSchema, List<IndexedRecord> records) throws Exception {
if (records.size() > 0) { if (records.size() > 0) {
Map<HeaderMetadataType, String> header = Maps.newHashMap(); Map<HeaderMetadataType, String> header = new HashMap<>();
header.put(HoodieLogBlock.HeaderMetadataType.SCHEMA, wrapperSchema.toString()); header.put(HoodieLogBlock.HeaderMetadataType.SCHEMA, wrapperSchema.toString());
HoodieAvroDataBlock block = new HoodieAvroDataBlock(records, header); HoodieAvroDataBlock block = new HoodieAvroDataBlock(records, header);
this.writer = writer.appendBlock(block); this.writer = writer.appendBlock(block);

View File

@@ -22,8 +22,7 @@ import org.apache.hudi.avro.model.HoodieCompactionOperation;
import org.apache.hudi.avro.model.HoodieCompactionPlan; import org.apache.hudi.avro.model.HoodieCompactionPlan;
import org.apache.hudi.config.HoodieWriteConfig; import org.apache.hudi.config.HoodieWriteConfig;
import com.google.common.collect.Lists; import java.util.ArrayList;
import java.util.List; import java.util.List;
/** /**
@@ -40,7 +39,7 @@ public class BoundedIOCompactionStrategy extends CompactionStrategy {
// Iterate through the operations in order and accept operations as long as we are within the // Iterate through the operations in order and accept operations as long as we are within the
// IO limit // IO limit
// Preserves the original ordering of compactions // Preserves the original ordering of compactions
List<HoodieCompactionOperation> finalOperations = Lists.newArrayList(); List<HoodieCompactionOperation> finalOperations = new ArrayList<>();
long targetIORemaining = writeConfig.getTargetIOPerCompactionInMB(); long targetIORemaining = writeConfig.getTargetIOPerCompactionInMB();
for (HoodieCompactionOperation op : operations) { for (HoodieCompactionOperation op : operations) {
long opIo = op.getMetrics().get(TOTAL_IO_MB).longValue(); long opIo = op.getMetrics().get(TOTAL_IO_MB).longValue();

View File

@@ -28,9 +28,8 @@ import org.apache.hudi.common.util.Option;
import org.apache.hudi.config.HoodieWriteConfig; import org.apache.hudi.config.HoodieWriteConfig;
import org.apache.hudi.io.compact.HoodieMergeOnReadTableCompactor; import org.apache.hudi.io.compact.HoodieMergeOnReadTableCompactor;
import com.google.common.collect.Maps;
import java.io.Serializable; import java.io.Serializable;
import java.util.HashMap;
import java.util.List; import java.util.List;
import java.util.Map; import java.util.Map;
@@ -61,7 +60,7 @@ public abstract class CompactionStrategy implements Serializable {
*/ */
public Map<String, Double> captureMetrics(HoodieWriteConfig writeConfig, Option<HoodieBaseFile> dataFile, public Map<String, Double> captureMetrics(HoodieWriteConfig writeConfig, Option<HoodieBaseFile> dataFile,
String partitionPath, List<HoodieLogFile> logFiles) { String partitionPath, List<HoodieLogFile> logFiles) {
Map<String, Double> metrics = Maps.newHashMap(); Map<String, Double> metrics = new HashMap<>();
long defaultMaxParquetFileSize = writeConfig.getParquetMaxFileSize(); long defaultMaxParquetFileSize = writeConfig.getParquetMaxFileSize();
// Total size of all the log files // Total size of all the log files
Long totalLogFileSize = logFiles.stream().map(HoodieLogFile::getFileSize).filter(size -> size >= 0) Long totalLogFileSize = logFiles.stream().map(HoodieLogFile::getFileSize).filter(size -> size >= 0)

View File

@@ -21,7 +21,6 @@ package org.apache.hudi.metrics;
import org.apache.hudi.config.HoodieWriteConfig; import org.apache.hudi.config.HoodieWriteConfig;
import org.apache.hudi.exception.HoodieException; import org.apache.hudi.exception.HoodieException;
import com.google.common.base.Preconditions;
import org.apache.log4j.LogManager; import org.apache.log4j.LogManager;
import org.apache.log4j.Logger; import org.apache.log4j.Logger;
@@ -32,6 +31,7 @@ import javax.management.remote.JMXServiceURL;
import java.io.Closeable; import java.io.Closeable;
import java.lang.management.ManagementFactory; import java.lang.management.ManagementFactory;
import java.rmi.registry.LocateRegistry; import java.rmi.registry.LocateRegistry;
import java.util.Objects;
/** /**
* Implementation of Jmx reporter, which used to report jmx metric. * Implementation of Jmx reporter, which used to report jmx metric.
@@ -67,7 +67,7 @@ public class JmxMetricsReporter extends MetricsReporter {
@Override @Override
public void start() { public void start() {
try { try {
Preconditions.checkNotNull(connector, "Cannot start as the jmxReporter is null."); Objects.requireNonNull(connector, "Cannot start as the jmxReporter is null.");
connector.start(); connector.start();
} catch (Exception e) { } catch (Exception e) {
throw new HoodieException(e); throw new HoodieException(e);

View File

@@ -32,7 +32,6 @@ import org.apache.hudi.config.HoodieWriteConfig;
import org.apache.hudi.exception.HoodieRollbackException; import org.apache.hudi.exception.HoodieRollbackException;
import com.google.common.base.Preconditions; import com.google.common.base.Preconditions;
import com.google.common.collect.Maps;
import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.PathFilter; import org.apache.hadoop.fs.PathFilter;
@@ -47,6 +46,7 @@ import java.util.ArrayList;
import java.util.HashMap; import java.util.HashMap;
import java.util.List; import java.util.List;
import java.util.Map; import java.util.Map;
import java.util.Objects;
import scala.Tuple2; import scala.Tuple2;
@@ -129,7 +129,7 @@ public class RollbackExecutor implements Serializable {
// getFileStatus would reflect correct stats and FileNotFoundException is not thrown in // getFileStatus would reflect correct stats and FileNotFoundException is not thrown in
// cloud-storage : HUDI-168 // cloud-storage : HUDI-168
Map<FileStatus, Long> filesToNumBlocksRollback = new HashMap<>(); Map<FileStatus, Long> filesToNumBlocksRollback = new HashMap<>();
filesToNumBlocksRollback.put(metaClient.getFs().getFileStatus(Preconditions.checkNotNull(writer).getLogFile().getPath()), 1L); filesToNumBlocksRollback.put(metaClient.getFs().getFileStatus(Objects.requireNonNull(writer).getLogFile().getPath()), 1L);
return new Tuple2<>(rollbackRequest.getPartitionPath(), return new Tuple2<>(rollbackRequest.getPartitionPath(),
HoodieRollbackStat.newBuilder().withPartitionPath(rollbackRequest.getPartitionPath()) HoodieRollbackStat.newBuilder().withPartitionPath(rollbackRequest.getPartitionPath())
.withRollbackBlockAppendResults(filesToNumBlocksRollback).build()); .withRollbackBlockAppendResults(filesToNumBlocksRollback).build());
@@ -215,7 +215,7 @@ public class RollbackExecutor implements Serializable {
private Map<HeaderMetadataType, String> generateHeader(String commit) { private Map<HeaderMetadataType, String> generateHeader(String commit) {
// generate metadata // generate metadata
Map<HeaderMetadataType, String> header = Maps.newHashMap(); Map<HeaderMetadataType, String> header = new HashMap<>();
header.put(HeaderMetadataType.INSTANT_TIME, metaClient.getActiveTimeline().lastInstant().get().getTimestamp()); header.put(HeaderMetadataType.INSTANT_TIME, metaClient.getActiveTimeline().lastInstant().get().getTimestamp());
header.put(HeaderMetadataType.TARGET_INSTANT_TIME, commit); header.put(HeaderMetadataType.TARGET_INSTANT_TIME, commit);
header.put(HeaderMetadataType.COMMAND_BLOCK_TYPE, header.put(HeaderMetadataType.COMMAND_BLOCK_TYPE,

View File

@@ -32,7 +32,6 @@ import org.apache.hudi.common.util.Option;
import org.apache.hudi.common.util.collection.Pair; import org.apache.hudi.common.util.collection.Pair;
import org.apache.hudi.exception.HoodieException; import org.apache.hudi.exception.HoodieException;
import org.apache.hudi.exception.HoodieIOException; import org.apache.hudi.exception.HoodieIOException;
import org.apache.hudi.func.OperationResult; import org.apache.hudi.func.OperationResult;
import org.apache.log4j.LogManager; import org.apache.log4j.LogManager;
import org.apache.log4j.Logger; import org.apache.log4j.Logger;

View File

@@ -20,13 +20,13 @@ package org.apache.hudi.config;
import org.apache.hudi.config.HoodieWriteConfig.Builder; import org.apache.hudi.config.HoodieWriteConfig.Builder;
import com.google.common.collect.Maps;
import org.junit.Test; import org.junit.Test;
import java.io.ByteArrayInputStream; import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream; import java.io.ByteArrayOutputStream;
import java.io.IOException; import java.io.IOException;
import java.util.Date; import java.util.Date;
import java.util.HashMap;
import java.util.Map; import java.util.Map;
import java.util.Properties; import java.util.Properties;
@@ -37,7 +37,7 @@ public class TestHoodieWriteConfig {
@Test @Test
public void testPropertyLoading() throws IOException { public void testPropertyLoading() throws IOException {
Builder builder = HoodieWriteConfig.newBuilder().withPath("/tmp"); Builder builder = HoodieWriteConfig.newBuilder().withPath("/tmp");
Map<String, String> params = Maps.newHashMap(); Map<String, String> params = new HashMap<>();
params.put(HoodieCompactionConfig.CLEANER_COMMITS_RETAINED_PROP, "1"); params.put(HoodieCompactionConfig.CLEANER_COMMITS_RETAINED_PROP, "1");
params.put(HoodieCompactionConfig.MAX_COMMITS_TO_KEEP_PROP, "5"); params.put(HoodieCompactionConfig.MAX_COMMITS_TO_KEEP_PROP, "5");
params.put(HoodieCompactionConfig.MIN_COMMITS_TO_KEEP_PROP, "2"); params.put(HoodieCompactionConfig.MIN_COMMITS_TO_KEEP_PROP, "2");

View File

@@ -37,7 +37,6 @@ import org.apache.hudi.config.HoodieWriteConfig;
import org.apache.hudi.io.HoodieKeyLookupHandle; import org.apache.hudi.io.HoodieKeyLookupHandle;
import org.apache.hudi.table.HoodieTable; import org.apache.hudi.table.HoodieTable;
import com.google.common.collect.Lists;
import org.apache.avro.Schema; import org.apache.avro.Schema;
import org.apache.hadoop.fs.Path; import org.apache.hadoop.fs.Path;
import org.apache.spark.api.java.JavaPairRDD; import org.apache.spark.api.java.JavaPairRDD;
@@ -50,6 +49,7 @@ import org.junit.runners.Parameterized;
import java.io.File; import java.io.File;
import java.io.IOException; import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays; import java.util.Arrays;
import java.util.Collection; import java.util.Collection;
import java.util.Collections; import java.util.Collections;
@@ -149,11 +149,11 @@ public class TestHoodieBloomIndex extends HoodieClientTestHarness {
HoodieRecord record4 = HoodieRecord record4 =
new HoodieRecord(new HoodieKey(rowChange4.getRowKey(), rowChange4.getPartitionPath()), rowChange4); new HoodieRecord(new HoodieKey(rowChange4.getRowKey(), rowChange4.getPartitionPath()), rowChange4);
HoodieClientTestUtils.writeParquetFile(basePath, "2016/04/01", "2_0_20160401010101.parquet", Lists.newArrayList(), HoodieClientTestUtils.writeParquetFile(basePath, "2016/04/01", "2_0_20160401010101.parquet", new ArrayList<>(),
schema, null, false); schema, null, false);
HoodieClientTestUtils.writeParquetFile(basePath, "2015/03/12", "1_0_20150312101010.parquet", Lists.newArrayList(), HoodieClientTestUtils.writeParquetFile(basePath, "2015/03/12", "1_0_20150312101010.parquet", new ArrayList<>(),
schema, null, false); schema, null, false);
HoodieClientTestUtils.writeParquetFile(basePath, "2015/03/12", "3_0_20150312101010.parquet", Arrays.asList(record1), HoodieClientTestUtils.writeParquetFile(basePath, "2015/03/12", "3_0_20150312101010.parquet", Collections.singletonList(record1),
schema, null, false); schema, null, false);
HoodieClientTestUtils.writeParquetFile(basePath, "2015/03/12", "4_0_20150312101010.parquet", HoodieClientTestUtils.writeParquetFile(basePath, "2015/03/12", "4_0_20150312101010.parquet",
Arrays.asList(record2, record3, record4), schema, null, false); Arrays.asList(record2, record3, record4), schema, null, false);
@@ -337,11 +337,11 @@ public class TestHoodieBloomIndex extends HoodieClientTestHarness {
// We create three parquet file, each having one record. (two different partitions) // We create three parquet file, each having one record. (two different partitions)
String filename1 = String filename1 =
HoodieClientTestUtils.writeParquetFile(basePath, "2016/01/31", Arrays.asList(record1), schema, null, true); HoodieClientTestUtils.writeParquetFile(basePath, "2016/01/31", Collections.singletonList(record1), schema, null, true);
String filename2 = String filename2 =
HoodieClientTestUtils.writeParquetFile(basePath, "2016/01/31", Arrays.asList(record2), schema, null, true); HoodieClientTestUtils.writeParquetFile(basePath, "2016/01/31", Collections.singletonList(record2), schema, null, true);
String filename3 = String filename3 =
HoodieClientTestUtils.writeParquetFile(basePath, "2015/01/31", Arrays.asList(record4), schema, null, true); HoodieClientTestUtils.writeParquetFile(basePath, "2015/01/31", Collections.singletonList(record4), schema, null, true);
// We do the tag again // We do the tag again
metaClient = HoodieTableMetaClient.reload(metaClient); metaClient = HoodieTableMetaClient.reload(metaClient);
@@ -353,14 +353,14 @@ public class TestHoodieBloomIndex extends HoodieClientTestHarness {
for (HoodieRecord record : taggedRecordRDD.collect()) { for (HoodieRecord record : taggedRecordRDD.collect()) {
if (record.getRecordKey().equals(rowKey1)) { if (record.getRecordKey().equals(rowKey1)) {
if (record.getPartitionPath().equals("2015/01/31")) { if (record.getPartitionPath().equals("2015/01/31")) {
assertTrue(record.getCurrentLocation().getFileId().equals(FSUtils.getFileId(filename3))); assertEquals(record.getCurrentLocation().getFileId(), FSUtils.getFileId(filename3));
} else { } else {
assertTrue(record.getCurrentLocation().getFileId().equals(FSUtils.getFileId(filename1))); assertEquals(record.getCurrentLocation().getFileId(), FSUtils.getFileId(filename1));
} }
} else if (record.getRecordKey().equals(rowKey2)) { } else if (record.getRecordKey().equals(rowKey2)) {
assertTrue(record.getCurrentLocation().getFileId().equals(FSUtils.getFileId(filename2))); assertEquals(record.getCurrentLocation().getFileId(), FSUtils.getFileId(filename2));
} else if (record.getRecordKey().equals(rowKey3)) { } else if (record.getRecordKey().equals(rowKey3)) {
assertTrue(!record.isCurrentLocationKnown()); assertFalse(record.isCurrentLocationKnown());
} }
} }
} }

View File

@@ -33,7 +33,6 @@ import org.apache.hudi.config.HoodieIndexConfig;
import org.apache.hudi.config.HoodieWriteConfig; import org.apache.hudi.config.HoodieWriteConfig;
import org.apache.hudi.table.HoodieTable; import org.apache.hudi.table.HoodieTable;
import com.google.common.collect.Lists;
import org.apache.avro.Schema; import org.apache.avro.Schema;
import org.apache.spark.api.java.JavaPairRDD; import org.apache.spark.api.java.JavaPairRDD;
import org.apache.spark.api.java.JavaRDD; import org.apache.spark.api.java.JavaRDD;
@@ -43,6 +42,7 @@ import org.junit.Test;
import java.io.File; import java.io.File;
import java.io.IOException; import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays; import java.util.Arrays;
import java.util.Collections; import java.util.Collections;
import java.util.HashMap; import java.util.HashMap;
@@ -117,11 +117,11 @@ public class TestHoodieGlobalBloomIndex extends HoodieClientTestHarness {
HoodieRecord record4 = HoodieRecord record4 =
new HoodieRecord(new HoodieKey(rowChange4.getRowKey(), rowChange4.getPartitionPath()), rowChange4); new HoodieRecord(new HoodieKey(rowChange4.getRowKey(), rowChange4.getPartitionPath()), rowChange4);
HoodieClientTestUtils.writeParquetFile(basePath, "2016/04/01", "2_0_20160401010101.parquet", Lists.newArrayList(), HoodieClientTestUtils.writeParquetFile(basePath, "2016/04/01", "2_0_20160401010101.parquet", new ArrayList<>(),
schema, null, false); schema, null, false);
HoodieClientTestUtils.writeParquetFile(basePath, "2015/03/12", "1_0_20150312101010.parquet", Lists.newArrayList(), HoodieClientTestUtils.writeParquetFile(basePath, "2015/03/12", "1_0_20150312101010.parquet", new ArrayList<>(),
schema, null, false); schema, null, false);
HoodieClientTestUtils.writeParquetFile(basePath, "2015/03/12", "3_0_20150312101010.parquet", Arrays.asList(record1), HoodieClientTestUtils.writeParquetFile(basePath, "2015/03/12", "3_0_20150312101010.parquet", Collections.singletonList(record1),
schema, null, false); schema, null, false);
HoodieClientTestUtils.writeParquetFile(basePath, "2015/03/12", "4_0_20150312101010.parquet", HoodieClientTestUtils.writeParquetFile(basePath, "2015/03/12", "4_0_20150312101010.parquet",
Arrays.asList(record2, record3, record4), schema, null, false); Arrays.asList(record2, record3, record4), schema, null, false);
@@ -252,7 +252,7 @@ public class TestHoodieGlobalBloomIndex extends HoodieClientTestHarness {
HoodieClientTestUtils.writeParquetFile(basePath, "2016/04/01", Collections.singletonList(record1), HoodieClientTestUtils.writeParquetFile(basePath, "2016/04/01", Collections.singletonList(record1),
schema, null, false); schema, null, false);
String filename1 = String filename1 =
HoodieClientTestUtils.writeParquetFile(basePath, "2015/03/12", Lists.newArrayList(), HoodieClientTestUtils.writeParquetFile(basePath, "2015/03/12", new ArrayList<>(),
schema, null, false); schema, null, false);
String filename2 = String filename2 =
HoodieClientTestUtils.writeParquetFile(basePath, "2015/03/12", Collections.singletonList(record2), HoodieClientTestUtils.writeParquetFile(basePath, "2015/03/12", Collections.singletonList(record2),

View File

@@ -18,9 +18,8 @@
package org.apache.hudi.common.model; package org.apache.hudi.common.model;
import com.google.common.base.Objects;
import java.io.Serializable; import java.io.Serializable;
import java.util.Objects;
/** /**
* HoodieKey consists of * HoodieKey consists of
@@ -58,12 +57,12 @@ public class HoodieKey implements Serializable {
return false; return false;
} }
HoodieKey otherKey = (HoodieKey) o; HoodieKey otherKey = (HoodieKey) o;
return Objects.equal(recordKey, otherKey.recordKey) && Objects.equal(partitionPath, otherKey.partitionPath); return Objects.equals(recordKey, otherKey.recordKey) && Objects.equals(partitionPath, otherKey.partitionPath);
} }
@Override @Override
public int hashCode() { public int hashCode() {
return Objects.hashCode(recordKey, partitionPath); return Objects.hash(recordKey, partitionPath);
} }
@Override @Override

View File

@@ -20,11 +20,11 @@ package org.apache.hudi.common.model;
import org.apache.hudi.common.util.Option; import org.apache.hudi.common.util.Option;
import com.google.common.base.Objects;
import com.google.common.collect.ImmutableList; import com.google.common.collect.ImmutableList;
import java.io.Serializable; import java.io.Serializable;
import java.util.List; import java.util.List;
import java.util.Objects;
/** /**
* A Single Record managed by Hoodie. * A Single Record managed by Hoodie.
@@ -141,13 +141,13 @@ public class HoodieRecord<T extends HoodieRecordPayload> implements Serializable
return false; return false;
} }
HoodieRecord that = (HoodieRecord) o; HoodieRecord that = (HoodieRecord) o;
return Objects.equal(key, that.key) && Objects.equal(data, that.data) return Objects.equals(key, that.key) && Objects.equals(data, that.data)
&& Objects.equal(currentLocation, that.currentLocation) && Objects.equal(newLocation, that.newLocation); && Objects.equals(currentLocation, that.currentLocation) && Objects.equals(newLocation, that.newLocation);
} }
@Override @Override
public int hashCode() { public int hashCode() {
return Objects.hashCode(key, data, currentLocation, newLocation); return Objects.hash(key, data, currentLocation, newLocation);
} }
@Override @Override

View File

@@ -18,9 +18,8 @@
package org.apache.hudi.common.model; package org.apache.hudi.common.model;
import com.google.common.base.Objects;
import java.io.Serializable; import java.io.Serializable;
import java.util.Objects;
/** /**
* Location of a HoodieRecord within the partition it belongs to. Ultimately, this points to an actual file on disk * Location of a HoodieRecord within the partition it belongs to. Ultimately, this points to an actual file on disk
@@ -44,12 +43,12 @@ public class HoodieRecordLocation implements Serializable {
return false; return false;
} }
HoodieRecordLocation otherLoc = (HoodieRecordLocation) o; HoodieRecordLocation otherLoc = (HoodieRecordLocation) o;
return Objects.equal(instantTime, otherLoc.instantTime) && Objects.equal(fileId, otherLoc.fileId); return Objects.equals(instantTime, otherLoc.instantTime) && Objects.equals(fileId, otherLoc.fileId);
} }
@Override @Override
public int hashCode() { public int hashCode() {
return Objects.hashCode(instantTime, fileId); return Objects.hash(instantTime, fileId);
} }
@Override @Override

View File

@@ -268,11 +268,7 @@ public final class BufferedRandomAccessFile extends RandomAccessFile {
this.seek(this.currentPosition); this.seek(this.currentPosition);
// if currentPosition is at start, EOF has been reached // if currentPosition is at start, EOF has been reached
if (this.currentPosition == this.validLastPosition) { return this.currentPosition != this.validLastPosition;
return false;
}
return true;
} }
/** /**

View File

@@ -47,6 +47,7 @@ import java.util.ArrayList;
import java.util.Arrays; import java.util.Arrays;
import java.util.LinkedList; import java.util.LinkedList;
import java.util.List; import java.util.List;
import java.util.Objects;
import java.util.Map.Entry; import java.util.Map.Entry;
import java.util.UUID; import java.util.UUID;
import java.util.function.Function; import java.util.function.Function;
@@ -248,7 +249,7 @@ public class FSUtils {
} }
public static String getFileExtension(String fullName) { public static String getFileExtension(String fullName) {
Preconditions.checkNotNull(fullName); Objects.requireNonNull(fullName);
String fileName = (new File(fullName)).getName(); String fileName = (new File(fullName)).getName();
int dotIndex = fileName.indexOf('.'); int dotIndex = fileName.indexOf('.');
return dotIndex == -1 ? "" : fileName.substring(dotIndex); return dotIndex == -1 ? "" : fileName.substring(dotIndex);

View File

@@ -16,12 +16,6 @@
package org.apache.hudi.common.util; package org.apache.hudi.common.util;
import com.google.common.base.Preconditions;
import com.google.common.cache.CacheBuilder;
import com.google.common.cache.CacheLoader;
import com.google.common.cache.LoadingCache;
import com.google.common.collect.Sets;
import java.lang.management.ManagementFactory; import java.lang.management.ManagementFactory;
import java.lang.management.MemoryPoolMXBean; import java.lang.management.MemoryPoolMXBean;
import java.lang.reflect.Array; import java.lang.reflect.Array;
@@ -29,9 +23,13 @@ import java.lang.reflect.Field;
import java.lang.reflect.Modifier; import java.lang.reflect.Modifier;
import java.util.ArrayDeque; import java.util.ArrayDeque;
import java.util.Arrays; import java.util.Arrays;
import java.util.Collections;
import java.util.Deque; import java.util.Deque;
import java.util.IdentityHashMap;
import java.util.LinkedList; import java.util.LinkedList;
import java.util.List; import java.util.List;
import java.util.Map;
import java.util.Objects;
import java.util.Set; import java.util.Set;
/** /**
@@ -121,16 +119,9 @@ public class ObjectSizeCalculator {
// added. // added.
private final int superclassFieldPadding; private final int superclassFieldPadding;
private final LoadingCache<Class<?>, ClassSizeInfo> classSizeInfos = private final Map<Class<?>, ClassSizeInfo> classSizeInfos = new IdentityHashMap<>();
CacheBuilder.newBuilder().build(new CacheLoader<Class<?>, ClassSizeInfo>() {
@Override
public ClassSizeInfo load(Class<?> clazz) {
return new ClassSizeInfo(clazz);
}
});
private final Set<Object> alreadyVisited = Collections.newSetFromMap(new IdentityHashMap<>());
private final Set<Object> alreadyVisited = Sets.newIdentityHashSet();
private final Deque<Object> pending = new ArrayDeque<>(16 * 1024); private final Deque<Object> pending = new ArrayDeque<>(16 * 1024);
private long size; private long size;
@@ -140,7 +131,7 @@ public class ObjectSizeCalculator {
* @param memoryLayoutSpecification a description of the JVM memory layout. * @param memoryLayoutSpecification a description of the JVM memory layout.
*/ */
public ObjectSizeCalculator(MemoryLayoutSpecification memoryLayoutSpecification) { public ObjectSizeCalculator(MemoryLayoutSpecification memoryLayoutSpecification) {
Preconditions.checkNotNull(memoryLayoutSpecification); Objects.requireNonNull(memoryLayoutSpecification);
arrayHeaderSize = memoryLayoutSpecification.getArrayHeaderSize(); arrayHeaderSize = memoryLayoutSpecification.getArrayHeaderSize();
objectHeaderSize = memoryLayoutSpecification.getObjectHeaderSize(); objectHeaderSize = memoryLayoutSpecification.getObjectHeaderSize();
objectPadding = memoryLayoutSpecification.getObjectPadding(); objectPadding = memoryLayoutSpecification.getObjectPadding();
@@ -175,6 +166,15 @@ public class ObjectSizeCalculator {
} }
} }
private ClassSizeInfo getClassSizeInfo(final Class<?> clazz) {
ClassSizeInfo csi = classSizeInfos.get(clazz);
if (csi == null) {
csi = new ClassSizeInfo(clazz);
classSizeInfos.put(clazz, csi);
}
return csi;
}
private void visit(Object obj) { private void visit(Object obj) {
if (alreadyVisited.contains(obj)) { if (alreadyVisited.contains(obj)) {
return; return;
@@ -187,7 +187,7 @@ public class ObjectSizeCalculator {
if (clazz.isArray()) { if (clazz.isArray()) {
visitArray(obj); visitArray(obj);
} else { } else {
classSizeInfos.getUnchecked(clazz).visit(obj, this); getClassSizeInfo(clazz).visit(obj, this);
} }
} }
} }
@@ -282,7 +282,7 @@ public class ObjectSizeCalculator {
} }
final Class<?> superClass = clazz.getSuperclass(); final Class<?> superClass = clazz.getSuperclass();
if (superClass != null) { if (superClass != null) {
final ClassSizeInfo superClassInfo = classSizeInfos.getUnchecked(superClass); final ClassSizeInfo superClassInfo = getClassSizeInfo(superClass);
fieldsSize += roundTo(superClassInfo.fieldsSize, superclassFieldPadding); fieldsSize += roundTo(superClassInfo.fieldsSize, superclassFieldPadding);
referenceFields.addAll(Arrays.asList(superClassInfo.referenceFields)); referenceFields.addAll(Arrays.asList(superClassInfo.referenceFields));
} }

View File

@@ -25,7 +25,6 @@ import org.apache.hudi.common.table.log.block.HoodieAvroDataBlock;
import org.apache.hudi.common.table.log.block.HoodieLogBlock; import org.apache.hudi.common.table.log.block.HoodieLogBlock;
import org.apache.hudi.common.util.SchemaTestUtil; import org.apache.hudi.common.util.SchemaTestUtil;
import com.google.common.collect.Maps;
import org.apache.avro.generic.IndexedRecord; import org.apache.avro.generic.IndexedRecord;
import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.FileSystem;
@@ -45,6 +44,7 @@ import org.junit.Test;
import java.io.File; import java.io.File;
import java.io.IOException; import java.io.IOException;
import java.net.URISyntaxException; import java.net.URISyntaxException;
import java.util.HashMap;
import java.util.List; import java.util.List;
import java.util.Map; import java.util.Map;
import java.util.UUID; import java.util.UUID;
@@ -97,7 +97,7 @@ public class TestHoodieLogFormatAppendFailure {
// Some data & append. // Some data & append.
List<IndexedRecord> records = SchemaTestUtil.generateTestRecords(0, 10); List<IndexedRecord> records = SchemaTestUtil.generateTestRecords(0, 10);
Map<HoodieLogBlock.HeaderMetadataType, String> header = Maps.newHashMap(); Map<HoodieLogBlock.HeaderMetadataType, String> header = new HashMap<>(2);
header.put(HoodieLogBlock.HeaderMetadataType.INSTANT_TIME, "100"); header.put(HoodieLogBlock.HeaderMetadataType.INSTANT_TIME, "100");
header.put(HoodieLogBlock.HeaderMetadataType.SCHEMA, getSimpleSchema().toString()); header.put(HoodieLogBlock.HeaderMetadataType.SCHEMA, getSimpleSchema().toString());
HoodieAvroDataBlock dataBlock = new HoodieAvroDataBlock(records, header); HoodieAvroDataBlock dataBlock = new HoodieAvroDataBlock(records, header);

View File

@@ -199,7 +199,6 @@ public class TestHoodieActiveTimeline extends HoodieCommonTestHarness {
checkTimeline.accept(timeline.getCommitsAndCompactionTimeline(), checkTimeline.accept(timeline.getCommitsAndCompactionTimeline(),
Sets.newHashSet(HoodieTimeline.COMMIT_ACTION, HoodieTimeline.DELTA_COMMIT_ACTION, HoodieTimeline.COMPACTION_ACTION)); Sets.newHashSet(HoodieTimeline.COMMIT_ACTION, HoodieTimeline.DELTA_COMMIT_ACTION, HoodieTimeline.COMPACTION_ACTION));
checkTimeline.accept(timeline.getCommitTimeline(), Collections.singleton(HoodieTimeline.COMMIT_ACTION)); checkTimeline.accept(timeline.getCommitTimeline(), Collections.singleton(HoodieTimeline.COMMIT_ACTION));
checkTimeline.accept(timeline.getDeltaCommitTimeline(), Collections.singleton(HoodieTimeline.DELTA_COMMIT_ACTION)); checkTimeline.accept(timeline.getDeltaCommitTimeline(), Collections.singleton(HoodieTimeline.DELTA_COMMIT_ACTION));
checkTimeline.accept(timeline.getCleanerTimeline(), Collections.singleton(HoodieTimeline.CLEAN_ACTION)); checkTimeline.accept(timeline.getCleanerTimeline(), Collections.singleton(HoodieTimeline.CLEAN_ACTION));
checkTimeline.accept(timeline.getRollbackTimeline(), Collections.singleton(HoodieTimeline.ROLLBACK_ACTION)); checkTimeline.accept(timeline.getRollbackTimeline(), Collections.singleton(HoodieTimeline.ROLLBACK_ACTION));

View File

@@ -40,7 +40,6 @@ import org.apache.hudi.common.util.FSUtils;
import org.apache.hudi.common.util.Option; import org.apache.hudi.common.util.Option;
import org.apache.hudi.common.util.collection.Pair; import org.apache.hudi.common.util.collection.Pair;
import com.google.common.collect.Lists;
import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.Path; import org.apache.hadoop.fs.Path;
import org.apache.log4j.LogManager; import org.apache.log4j.LogManager;
@@ -814,7 +813,7 @@ public class TestHoodieTableFileSystemView extends HoodieCommonTestHarness {
roView.getAllBaseFiles("2016/05/01/"); roView.getAllBaseFiles("2016/05/01/");
List<HoodieBaseFile> dataFiles = List<HoodieBaseFile> dataFiles =
roView.getLatestBaseFilesInRange(Lists.newArrayList(commitTime2, commitTime3)).collect(Collectors.toList()); roView.getLatestBaseFilesInRange(Arrays.asList(commitTime2, commitTime3)).collect(Collectors.toList());
assertEquals(isLatestFileSliceOnly ? 2 : 3, dataFiles.size()); assertEquals(isLatestFileSliceOnly ? 2 : 3, dataFiles.size());
Set<String> filenames = new HashSet<>(); Set<String> filenames = new HashSet<>();
for (HoodieBaseFile status : dataFiles) { for (HoodieBaseFile status : dataFiles) {
@@ -828,7 +827,7 @@ public class TestHoodieTableFileSystemView extends HoodieCommonTestHarness {
} }
List<FileSlice> slices = List<FileSlice> slices =
rtView.getLatestFileSliceInRange(Lists.newArrayList(commitTime3, commitTime4)).collect(Collectors.toList()); rtView.getLatestFileSliceInRange(Arrays.asList(commitTime3, commitTime4)).collect(Collectors.toList());
assertEquals(3, slices.size()); assertEquals(3, slices.size());
for (FileSlice slice : slices) { for (FileSlice slice : slices) {
if (slice.getFileId().equals(fileId1)) { if (slice.getFileId().equals(fileId1)) {

View File

@@ -159,13 +159,12 @@ public class TestCompactionUtils extends HoodieCommonTestHarness {
Pair<List<Pair<String, FileSlice>>, HoodieCompactionPlan> inputAndPlan = buildCompactionPlan(); Pair<List<Pair<String, FileSlice>>, HoodieCompactionPlan> inputAndPlan = buildCompactionPlan();
HoodieCompactionPlan plan = inputAndPlan.getRight(); HoodieCompactionPlan plan = inputAndPlan.getRight();
List<HoodieCompactionOperation> originalOps = plan.getOperations(); List<HoodieCompactionOperation> originalOps = plan.getOperations();
List<HoodieCompactionOperation> regeneratedOps = originalOps.stream().map(op -> {
// Convert to CompactionOperation // Convert to CompactionOperation
return CompactionUtils.buildCompactionOperation(op);
}).map(op2 -> {
// Convert back to HoodieCompactionOperation and check for equality // Convert back to HoodieCompactionOperation and check for equality
return CompactionUtils.buildHoodieCompactionOperation(op2); List<HoodieCompactionOperation> regeneratedOps = originalOps.stream()
}).collect(Collectors.toList()); .map(CompactionUtils::buildCompactionOperation)
.map(CompactionUtils::buildHoodieCompactionOperation)
.collect(Collectors.toList());
Assert.assertTrue("Transformation did get tested", originalOps.size() > 0); Assert.assertTrue("Transformation did get tested", originalOps.size() > 0);
Assert.assertEquals("All fields set correctly in transformations", originalOps, regeneratedOps); Assert.assertEquals("All fields set correctly in transformations", originalOps, regeneratedOps);
} }
@@ -247,11 +246,9 @@ public class TestCompactionUtils extends HoodieCommonTestHarness {
op.getDataFilePath()); op.getDataFilePath());
} }
List<String> paths = slice.getLogFiles().map(l -> l.getPath().toString()).collect(Collectors.toList()); List<String> paths = slice.getLogFiles().map(l -> l.getPath().toString()).collect(Collectors.toList());
IntStream.range(0, paths.size()).boxed().forEach(idx -> { IntStream.range(0, paths.size()).boxed().forEach(idx -> Assert.assertEquals("Log File Index " + idx,
Assert.assertEquals("Log File Index " + idx,
version == COMPACTION_METADATA_VERSION_1 ? paths.get(idx) : new Path(paths.get(idx)).getName(), version == COMPACTION_METADATA_VERSION_1 ? paths.get(idx) : new Path(paths.get(idx)).getName(),
op.getDeltaFilePaths().get(idx)); op.getDeltaFilePaths().get(idx)));
});
Assert.assertEquals("Metrics set", METRICS, op.getMetrics()); Assert.assertEquals("Metrics set", METRICS, op.getMetrics());
} }

View File

@@ -18,15 +18,14 @@
package org.apache.hudi.hive; package org.apache.hudi.hive;
import com.google.common.base.Objects;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.ImmutableMap;
import org.apache.parquet.schema.MessageType; import org.apache.parquet.schema.MessageType;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap; import java.util.HashMap;
import java.util.List; import java.util.List;
import java.util.Map; import java.util.Map;
import java.util.StringJoiner;
/** /**
* Represents the schema difference between the storage schema and hive table schema. * Represents the schema difference between the storage schema and hive table schema.
@@ -43,9 +42,9 @@ public class SchemaDifference {
Map<String, String> updateColumnTypes, Map<String, String> addColumnTypes) { Map<String, String> updateColumnTypes, Map<String, String> addColumnTypes) {
this.storageSchema = storageSchema; this.storageSchema = storageSchema;
this.tableSchema = tableSchema; this.tableSchema = tableSchema;
this.deleteColumns = ImmutableList.copyOf(deleteColumns); this.deleteColumns = Collections.unmodifiableList(deleteColumns);
this.updateColumnTypes = ImmutableMap.copyOf(updateColumnTypes); this.updateColumnTypes = Collections.unmodifiableMap(updateColumnTypes);
this.addColumnTypes = ImmutableMap.copyOf(addColumnTypes); this.addColumnTypes = Collections.unmodifiableMap(addColumnTypes);
} }
public List<String> getDeleteColumns() { public List<String> getDeleteColumns() {
@@ -60,12 +59,6 @@ public class SchemaDifference {
return addColumnTypes; return addColumnTypes;
} }
@Override
public String toString() {
return Objects.toStringHelper(this).add("deleteColumns", deleteColumns).add("updateColumnTypes", updateColumnTypes)
.add("addColumnTypes", addColumnTypes).toString();
}
public static Builder newBuilder(MessageType storageSchema, Map<String, String> tableSchema) { public static Builder newBuilder(MessageType storageSchema, Map<String, String> tableSchema) {
return new Builder(storageSchema, tableSchema); return new Builder(storageSchema, tableSchema);
} }
@@ -74,6 +67,17 @@ public class SchemaDifference {
return deleteColumns.isEmpty() && updateColumnTypes.isEmpty() && addColumnTypes.isEmpty(); return deleteColumns.isEmpty() && updateColumnTypes.isEmpty() && addColumnTypes.isEmpty();
} }
@Override
public String toString() {
return new StringJoiner(", ", SchemaDifference.class.getSimpleName() + "[", "]")
.add("storageSchema=" + storageSchema)
.add("tableSchema=" + tableSchema)
.add("deleteColumns=" + deleteColumns)
.add("updateColumnTypes=" + updateColumnTypes)
.add("addColumnTypes=" + addColumnTypes)
.toString();
}
public static class Builder { public static class Builder {
private final MessageType storageSchema; private final MessageType storageSchema;

View File

@@ -18,8 +18,6 @@
package org.apache.hudi.hive.util; package org.apache.hudi.hive.util;
import com.google.common.collect.Maps;
import com.google.common.collect.Sets;
import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path; import org.apache.hadoop.fs.Path;
import org.apache.hudi.common.model.HoodieLogFile; import org.apache.hudi.common.model.HoodieLogFile;
@@ -42,6 +40,8 @@ import org.apache.parquet.schema.Type;
import java.io.IOException; import java.io.IOException;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.HashSet;
import java.util.LinkedHashMap;
import java.util.List; import java.util.List;
import java.util.Map; import java.util.Map;
import java.util.Set; import java.util.Set;
@@ -67,7 +67,7 @@ public class SchemaUtil {
} }
LOG.info("Getting schema difference for " + tableSchema + "\r\n\r\n" + newTableSchema); LOG.info("Getting schema difference for " + tableSchema + "\r\n\r\n" + newTableSchema);
SchemaDifference.Builder schemaDiffBuilder = SchemaDifference.newBuilder(storageSchema, tableSchema); SchemaDifference.Builder schemaDiffBuilder = SchemaDifference.newBuilder(storageSchema, tableSchema);
Set<String> tableColumns = Sets.newHashSet(); Set<String> tableColumns = new HashSet<>();
for (Map.Entry<String, String> field : tableSchema.entrySet()) { for (Map.Entry<String, String> field : tableSchema.entrySet()) {
String fieldName = field.getKey().toLowerCase(); String fieldName = field.getKey().toLowerCase();
@@ -140,7 +140,7 @@ public class SchemaUtil {
* @return : Hive Table schema read from parquet file MAP[String,String] * @return : Hive Table schema read from parquet file MAP[String,String]
*/ */
public static Map<String, String> convertParquetSchemaToHiveSchema(MessageType messageType) throws IOException { public static Map<String, String> convertParquetSchemaToHiveSchema(MessageType messageType) throws IOException {
Map<String, String> schema = Maps.newLinkedHashMap(); Map<String, String> schema = new LinkedHashMap<>();
List<Type> parquetFields = messageType.getFields(); List<Type> parquetFields = messageType.getFields();
for (Type parquetType : parquetFields) { for (Type parquetType : parquetFields) {
StringBuilder result = new StringBuilder(); StringBuilder result = new StringBuilder();

View File

@@ -24,7 +24,6 @@ import org.apache.hudi.hive.HoodieHiveClient.PartitionEvent;
import org.apache.hudi.hive.HoodieHiveClient.PartitionEvent.PartitionEventType; import org.apache.hudi.hive.HoodieHiveClient.PartitionEvent.PartitionEventType;
import org.apache.hudi.hive.util.SchemaUtil; import org.apache.hudi.hive.util.SchemaUtil;
import com.google.common.collect.Lists;
import org.apache.hadoop.hive.metastore.api.Partition; import org.apache.hadoop.hive.metastore.api.Partition;
import org.apache.parquet.schema.MessageType; import org.apache.parquet.schema.MessageType;
import org.apache.parquet.schema.OriginalType; import org.apache.parquet.schema.OriginalType;
@@ -345,7 +344,7 @@ public class TestHiveSyncTool {
HiveSyncConfig hiveSyncConfig = HiveSyncConfig.copy(TestUtil.hiveSyncConfig); HiveSyncConfig hiveSyncConfig = HiveSyncConfig.copy(TestUtil.hiveSyncConfig);
hiveSyncConfig.partitionValueExtractorClass = MultiPartKeysValueExtractor.class.getCanonicalName(); hiveSyncConfig.partitionValueExtractorClass = MultiPartKeysValueExtractor.class.getCanonicalName();
hiveSyncConfig.tableName = "multi_part_key"; hiveSyncConfig.tableName = "multi_part_key";
hiveSyncConfig.partitionFields = Lists.newArrayList("year", "month", "day"); hiveSyncConfig.partitionFields = Arrays.asList("year", "month", "day");
TestUtil.getCreatedTablesSet().add(hiveSyncConfig.databaseName + "." + hiveSyncConfig.tableName); TestUtil.getCreatedTablesSet().add(hiveSyncConfig.databaseName + "." + hiveSyncConfig.tableName);
HoodieHiveClient hiveClient = new HoodieHiveClient(hiveSyncConfig, TestUtil.getHiveConf(), TestUtil.fileSystem); HoodieHiveClient hiveClient = new HoodieHiveClient(hiveSyncConfig, TestUtil.getHiveConf(), TestUtil.fileSystem);

View File

@@ -43,9 +43,6 @@ import org.apache.hudi.common.util.FileIOUtils;
import org.apache.hudi.common.util.SchemaTestUtil; import org.apache.hudi.common.util.SchemaTestUtil;
import org.apache.hudi.hive.util.HiveTestService; import org.apache.hudi.hive.util.HiveTestService;
import com.google.common.collect.Lists;
import com.google.common.collect.Maps;
import com.google.common.collect.Sets;
import org.apache.avro.Schema; import org.apache.avro.Schema;
import org.apache.avro.generic.IndexedRecord; import org.apache.avro.generic.IndexedRecord;
import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.conf.Configuration;
@@ -68,6 +65,10 @@ import java.io.File;
import java.io.IOException; import java.io.IOException;
import java.net.URISyntaxException; import java.net.URISyntaxException;
import java.nio.charset.StandardCharsets; import java.nio.charset.StandardCharsets;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List; import java.util.List;
import java.util.Map; import java.util.Map;
import java.util.Map.Entry; import java.util.Map.Entry;
@@ -86,7 +87,7 @@ public class TestUtil {
static HiveSyncConfig hiveSyncConfig; static HiveSyncConfig hiveSyncConfig;
private static DateTimeFormatter dtfOut; private static DateTimeFormatter dtfOut;
static FileSystem fileSystem; static FileSystem fileSystem;
private static Set<String> createdTablesSet = Sets.newHashSet(); private static Set<String> createdTablesSet = new HashSet<>();
public static void setUp() throws IOException, InterruptedException { public static void setUp() throws IOException, InterruptedException {
if (dfsCluster == null) { if (dfsCluster == null) {
@@ -114,7 +115,7 @@ public class TestUtil {
hiveSyncConfig.basePath = "/tmp/hdfs/TestHiveSyncTool/"; hiveSyncConfig.basePath = "/tmp/hdfs/TestHiveSyncTool/";
hiveSyncConfig.assumeDatePartitioning = true; hiveSyncConfig.assumeDatePartitioning = true;
hiveSyncConfig.usePreApacheInputFormat = false; hiveSyncConfig.usePreApacheInputFormat = false;
hiveSyncConfig.partitionFields = Lists.newArrayList("datestr"); hiveSyncConfig.partitionFields = Collections.singletonList("datestr");
dtfOut = DateTimeFormat.forPattern("yyyy/MM/dd"); dtfOut = DateTimeFormat.forPattern("yyyy/MM/dd");
@@ -249,7 +250,7 @@ public class TestUtil {
private static List<HoodieWriteStat> createTestData(Path partPath, boolean isParquetSchemaSimple, String commitTime) private static List<HoodieWriteStat> createTestData(Path partPath, boolean isParquetSchemaSimple, String commitTime)
throws IOException, URISyntaxException { throws IOException, URISyntaxException {
List<HoodieWriteStat> writeStats = Lists.newArrayList(); List<HoodieWriteStat> writeStats = new ArrayList<>();
for (int i = 0; i < 5; i++) { for (int i = 0; i < 5; i++) {
// Create 5 files // Create 5 files
String fileId = UUID.randomUUID().toString(); String fileId = UUID.randomUUID().toString();
@@ -297,7 +298,7 @@ public class TestUtil {
.overBaseCommit(dataFile.getCommitTime()).withFs(fileSystem).build(); .overBaseCommit(dataFile.getCommitTime()).withFs(fileSystem).build();
List<IndexedRecord> records = (isLogSchemaSimple ? SchemaTestUtil.generateTestRecords(0, 100) List<IndexedRecord> records = (isLogSchemaSimple ? SchemaTestUtil.generateTestRecords(0, 100)
: SchemaTestUtil.generateEvolvedTestRecords(100, 100)); : SchemaTestUtil.generateEvolvedTestRecords(100, 100));
Map<HeaderMetadataType, String> header = Maps.newHashMap(); Map<HeaderMetadataType, String> header = new HashMap<>(2);
header.put(HoodieLogBlock.HeaderMetadataType.INSTANT_TIME, dataFile.getCommitTime()); header.put(HoodieLogBlock.HeaderMetadataType.INSTANT_TIME, dataFile.getCommitTime());
header.put(HoodieLogBlock.HeaderMetadataType.SCHEMA, schema.toString()); header.put(HoodieLogBlock.HeaderMetadataType.SCHEMA, schema.toString());
HoodieAvroDataBlock dataBlock = new HoodieAvroDataBlock(records, header); HoodieAvroDataBlock dataBlock = new HoodieAvroDataBlock(records, header);

View File

@@ -68,11 +68,12 @@ import java.sql.SQLException;
import java.sql.DriverManager; import java.sql.DriverManager;
import java.sql.Driver; import java.sql.Driver;
import java.util.Arrays; import java.util.Arrays;
import java.util.Enumeration;
import java.util.HashMap; import java.util.HashMap;
import java.util.List; import java.util.List;
import java.util.Map; import java.util.Map;
import java.util.Objects;
import java.util.Properties; import java.util.Properties;
import java.util.Enumeration;
/** /**
* Bunch of helper methods. * Bunch of helper methods.
@@ -279,14 +280,14 @@ public class UtilHelpers {
} }
} }
Preconditions.checkNotNull(driver, String.format("Did not find registered driver with class %s", driverClass)); Objects.requireNonNull(driver, String.format("Did not find registered driver with class %s", driverClass));
Properties properties = new Properties(); Properties properties = new Properties();
properties.putAll(options); properties.putAll(options);
Connection connect = null; Connection connect;
String url = options.get(JDBCOptions.JDBC_URL()); String url = options.get(JDBCOptions.JDBC_URL());
connect = driver.connect(url, properties); connect = driver.connect(url, properties);
Preconditions.checkNotNull(connect, String.format("The driver could not open a JDBC connection. Check the URL: %s", url)); Objects.requireNonNull(connect, String.format("The driver could not open a JDBC connection. Check the URL: %s", url));
return connect; return connect;
} }