[HUDI-846][HUDI-848] Enable Incremental cleaning and embedded timeline-server by default (#1634)
This commit is contained in:
committed by
GitHub
parent
f802d4400b
commit
74ecc27e92
@@ -96,7 +96,7 @@ public class HoodieCompactionConfig extends DefaultHoodieConfig {
|
|||||||
private static final String DEFAULT_CLEANER_POLICY = HoodieCleaningPolicy.KEEP_LATEST_COMMITS.name();
|
private static final String DEFAULT_CLEANER_POLICY = HoodieCleaningPolicy.KEEP_LATEST_COMMITS.name();
|
||||||
private static final String DEFAULT_AUTO_CLEAN = "true";
|
private static final String DEFAULT_AUTO_CLEAN = "true";
|
||||||
private static final String DEFAULT_INLINE_COMPACT = "false";
|
private static final String DEFAULT_INLINE_COMPACT = "false";
|
||||||
private static final String DEFAULT_INCREMENTAL_CLEANER = "false";
|
private static final String DEFAULT_INCREMENTAL_CLEANER = "true";
|
||||||
private static final String DEFAULT_INLINE_COMPACT_NUM_DELTA_COMMITS = "1";
|
private static final String DEFAULT_INLINE_COMPACT_NUM_DELTA_COMMITS = "1";
|
||||||
private static final String DEFAULT_CLEANER_FILE_VERSIONS_RETAINED = "3";
|
private static final String DEFAULT_CLEANER_FILE_VERSIONS_RETAINED = "3";
|
||||||
private static final String DEFAULT_CLEANER_COMMITS_RETAINED = "10";
|
private static final String DEFAULT_CLEANER_COMMITS_RETAINED = "10";
|
||||||
|
|||||||
@@ -82,7 +82,7 @@ public class HoodieWriteConfig extends DefaultHoodieConfig {
|
|||||||
private static final String DEFAULT_FINALIZE_WRITE_PARALLELISM = DEFAULT_PARALLELISM;
|
private static final String DEFAULT_FINALIZE_WRITE_PARALLELISM = DEFAULT_PARALLELISM;
|
||||||
|
|
||||||
private static final String EMBEDDED_TIMELINE_SERVER_ENABLED = "hoodie.embed.timeline.server";
|
private static final String EMBEDDED_TIMELINE_SERVER_ENABLED = "hoodie.embed.timeline.server";
|
||||||
private static final String DEFAULT_EMBEDDED_TIMELINE_SERVER_ENABLED = "false";
|
private static final String DEFAULT_EMBEDDED_TIMELINE_SERVER_ENABLED = "true";
|
||||||
|
|
||||||
private static final String FAIL_ON_TIMELINE_ARCHIVING_ENABLED_PROP = "hoodie.fail.on.timeline.archiving";
|
private static final String FAIL_ON_TIMELINE_ARCHIVING_ENABLED_PROP = "hoodie.fail.on.timeline.archiving";
|
||||||
private static final String DEFAULT_FAIL_ON_TIMELINE_ARCHIVING_ENABLED = "true";
|
private static final String DEFAULT_FAIL_ON_TIMELINE_ARCHIVING_ENABLED = "true";
|
||||||
|
|||||||
@@ -30,6 +30,9 @@ import org.apache.hudi.common.model.HoodieTableType;
|
|||||||
import org.apache.hudi.common.model.HoodieTestUtils;
|
import org.apache.hudi.common.model.HoodieTestUtils;
|
||||||
import org.apache.hudi.common.table.HoodieTableMetaClient;
|
import org.apache.hudi.common.table.HoodieTableMetaClient;
|
||||||
import org.apache.hudi.common.table.timeline.HoodieActiveTimeline;
|
import org.apache.hudi.common.table.timeline.HoodieActiveTimeline;
|
||||||
|
import org.apache.hudi.common.table.timeline.HoodieInstant;
|
||||||
|
import org.apache.hudi.common.table.timeline.HoodieInstant.State;
|
||||||
|
import org.apache.hudi.common.table.timeline.HoodieTimeline;
|
||||||
import org.apache.hudi.common.util.Option;
|
import org.apache.hudi.common.util.Option;
|
||||||
import org.apache.hudi.config.HoodieCompactionConfig;
|
import org.apache.hudi.config.HoodieCompactionConfig;
|
||||||
import org.apache.hudi.config.HoodieIndexConfig;
|
import org.apache.hudi.config.HoodieIndexConfig;
|
||||||
@@ -152,9 +155,13 @@ public class TestHoodieCompactor extends HoodieClientTestHarness {
|
|||||||
HoodieIndex index = new HoodieBloomIndex<>(config);
|
HoodieIndex index = new HoodieBloomIndex<>(config);
|
||||||
updatedRecords = index.tagLocation(updatedRecordsRDD, jsc, table).collect();
|
updatedRecords = index.tagLocation(updatedRecordsRDD, jsc, table).collect();
|
||||||
|
|
||||||
// Write them to corresponding avro logfiles
|
// Write them to corresponding avro logfiles. Also, set the state transition properly.
|
||||||
HoodieTestUtils.writeRecordsToLogFiles(fs, metaClient.getBasePath(),
|
HoodieTestUtils.writeRecordsToLogFiles(fs, metaClient.getBasePath(),
|
||||||
HoodieTestDataGenerator.AVRO_SCHEMA_WITH_METADATA_FIELDS, updatedRecords);
|
HoodieTestDataGenerator.AVRO_SCHEMA_WITH_METADATA_FIELDS, updatedRecords);
|
||||||
|
metaClient.getActiveTimeline().transitionRequestedToInflight(new HoodieInstant(State.REQUESTED,
|
||||||
|
HoodieTimeline.DELTA_COMMIT_ACTION, newCommitTime), Option.empty());
|
||||||
|
writeClient.commit(newCommitTime, jsc.emptyRDD(), Option.empty());
|
||||||
|
metaClient.reloadActiveTimeline();
|
||||||
|
|
||||||
// Verify that all data file has one log file
|
// Verify that all data file has one log file
|
||||||
table = HoodieTable.create(config, hadoopConf);
|
table = HoodieTable.create(config, hadoopConf);
|
||||||
|
|||||||
@@ -171,12 +171,6 @@
|
|||||||
<scope>test</scope>
|
<scope>test</scope>
|
||||||
</dependency>
|
</dependency>
|
||||||
|
|
||||||
<dependency>
|
|
||||||
<groupId>org.eclipse.jetty.aggregate</groupId>
|
|
||||||
<artifactId>jetty-all</artifactId>
|
|
||||||
<scope>test</scope>
|
|
||||||
</dependency>
|
|
||||||
|
|
||||||
<!-- Hadoop - Test -->
|
<!-- Hadoop - Test -->
|
||||||
<dependency>
|
<dependency>
|
||||||
<groupId>org.apache.hadoop</groupId>
|
<groupId>org.apache.hadoop</groupId>
|
||||||
|
|||||||
@@ -105,6 +105,7 @@ public class HiveTestService {
|
|||||||
executorService = Executors.newSingleThreadExecutor();
|
executorService = Executors.newSingleThreadExecutor();
|
||||||
tServer = startMetaStore(bindIP, metastorePort, serverConf);
|
tServer = startMetaStore(bindIP, metastorePort, serverConf);
|
||||||
|
|
||||||
|
serverConf.set("hive.in.test", "true");
|
||||||
hiveServer = startHiveServer(serverConf);
|
hiveServer = startHiveServer(serverConf);
|
||||||
|
|
||||||
String serverHostname;
|
String serverHostname;
|
||||||
|
|||||||
@@ -225,6 +225,12 @@
|
|||||||
<dependency>
|
<dependency>
|
||||||
<groupId>org.apache.spark</groupId>
|
<groupId>org.apache.spark</groupId>
|
||||||
<artifactId>spark-core_${scala.binary.version}</artifactId>
|
<artifactId>spark-core_${scala.binary.version}</artifactId>
|
||||||
|
<exclusions>
|
||||||
|
<exclusion>
|
||||||
|
<groupId>javax.servlet</groupId>
|
||||||
|
<artifactId>*</artifactId>
|
||||||
|
</exclusion>
|
||||||
|
</exclusions>
|
||||||
</dependency>
|
</dependency>
|
||||||
<dependency>
|
<dependency>
|
||||||
<groupId>org.apache.spark</groupId>
|
<groupId>org.apache.spark</groupId>
|
||||||
@@ -253,7 +259,17 @@
|
|||||||
<dependency>
|
<dependency>
|
||||||
<groupId>org.apache.hadoop</groupId>
|
<groupId>org.apache.hadoop</groupId>
|
||||||
<artifactId>hadoop-common</artifactId>
|
<artifactId>hadoop-common</artifactId>
|
||||||
<scope>provided</scope>
|
<exclusions>
|
||||||
|
<exclusion>
|
||||||
|
<groupId>javax.servlet</groupId>
|
||||||
|
<artifactId>*</artifactId>
|
||||||
|
</exclusion>
|
||||||
|
<exclusion>
|
||||||
|
<groupId>javax.servlet.jsp</groupId>
|
||||||
|
<artifactId>*</artifactId>
|
||||||
|
</exclusion>
|
||||||
|
</exclusions>
|
||||||
|
<scope>provided</scope>
|
||||||
</dependency>
|
</dependency>
|
||||||
|
|
||||||
<!-- Hive -->
|
<!-- Hive -->
|
||||||
@@ -277,16 +293,42 @@
|
|||||||
<groupId>${hive.groupid}</groupId>
|
<groupId>${hive.groupid}</groupId>
|
||||||
<artifactId>hive-jdbc</artifactId>
|
<artifactId>hive-jdbc</artifactId>
|
||||||
<version>${hive.version}</version>
|
<version>${hive.version}</version>
|
||||||
|
<exclusions>
|
||||||
|
<exclusion>
|
||||||
|
<groupId>javax.servlet</groupId>
|
||||||
|
<artifactId>*</artifactId>
|
||||||
|
</exclusion>
|
||||||
|
<exclusion>
|
||||||
|
<groupId>javax.servlet.jsp</groupId>
|
||||||
|
<artifactId>*</artifactId>
|
||||||
|
</exclusion>
|
||||||
|
</exclusions>
|
||||||
</dependency>
|
</dependency>
|
||||||
<dependency>
|
<dependency>
|
||||||
<groupId>${hive.groupid}</groupId>
|
<groupId>${hive.groupid}</groupId>
|
||||||
<artifactId>hive-metastore</artifactId>
|
<artifactId>hive-metastore</artifactId>
|
||||||
<version>${hive.version}</version>
|
<version>${hive.version}</version>
|
||||||
|
<exclusions>
|
||||||
|
<exclusion>
|
||||||
|
<groupId>javax.servlet</groupId>
|
||||||
|
<artifactId>*</artifactId>
|
||||||
|
</exclusion>
|
||||||
|
<exclusion>
|
||||||
|
<groupId>javax.servlet.jsp</groupId>
|
||||||
|
<artifactId>*</artifactId>
|
||||||
|
</exclusion>
|
||||||
|
</exclusions>
|
||||||
</dependency>
|
</dependency>
|
||||||
<dependency>
|
<dependency>
|
||||||
<groupId>${hive.groupid}</groupId>
|
<groupId>${hive.groupid}</groupId>
|
||||||
<artifactId>hive-common</artifactId>
|
<artifactId>hive-common</artifactId>
|
||||||
<version>${hive.version}</version>
|
<version>${hive.version}</version>
|
||||||
|
<exclusions>
|
||||||
|
<exclusion>
|
||||||
|
<groupId>org.eclipse.jetty.orbit</groupId>
|
||||||
|
<artifactId>javax.servlet</artifactId>
|
||||||
|
</exclusion>
|
||||||
|
</exclusions>
|
||||||
</dependency>
|
</dependency>
|
||||||
|
|
||||||
<!-- Hoodie - Test -->
|
<!-- Hoodie - Test -->
|
||||||
|
|||||||
@@ -86,13 +86,8 @@
|
|||||||
<!-- Needs to be at the top to ensure we get the correct dependency versions for jetty-server -->
|
<!-- Needs to be at the top to ensure we get the correct dependency versions for jetty-server -->
|
||||||
<groupId>org.eclipse.jetty.aggregate</groupId>
|
<groupId>org.eclipse.jetty.aggregate</groupId>
|
||||||
<artifactId>jetty-all</artifactId>
|
<artifactId>jetty-all</artifactId>
|
||||||
<scope>test</scope>
|
|
||||||
</dependency>
|
|
||||||
|
|
||||||
<dependency>
|
|
||||||
<groupId>org.eclipse.jetty</groupId>
|
|
||||||
<artifactId>jetty-server</artifactId>
|
|
||||||
<version>${jetty.version}</version>
|
<version>${jetty.version}</version>
|
||||||
|
<classifier>uber</classifier>
|
||||||
<scope>test</scope>
|
<scope>test</scope>
|
||||||
</dependency>
|
</dependency>
|
||||||
|
|
||||||
|
|||||||
9
pom.xml
9
pom.xml
@@ -102,7 +102,7 @@
|
|||||||
<scalatest.version>3.0.1</scalatest.version>
|
<scalatest.version>3.0.1</scalatest.version>
|
||||||
<surefire-log4j.file>file://${project.basedir}/src/test/resources/log4j-surefire.properties</surefire-log4j.file>
|
<surefire-log4j.file>file://${project.basedir}/src/test/resources/log4j-surefire.properties</surefire-log4j.file>
|
||||||
<thrift.version>0.12.0</thrift.version>
|
<thrift.version>0.12.0</thrift.version>
|
||||||
<jetty.version>7.6.0.v20120127</jetty.version>
|
<jetty.version>9.4.15.v20190215</jetty.version>
|
||||||
<hbase.version>1.2.3</hbase.version>
|
<hbase.version>1.2.3</hbase.version>
|
||||||
<codehaus-jackson.version>1.9.13</codehaus-jackson.version>
|
<codehaus-jackson.version>1.9.13</codehaus-jackson.version>
|
||||||
<h2.version>1.4.199</h2.version>
|
<h2.version>1.4.199</h2.version>
|
||||||
@@ -463,13 +463,6 @@
|
|||||||
<artifactId>jersey-container-servlet-core</artifactId>
|
<artifactId>jersey-container-servlet-core</artifactId>
|
||||||
<version>${glassfish.version}</version>
|
<version>${glassfish.version}</version>
|
||||||
</dependency>
|
</dependency>
|
||||||
<!-- Needed for running HiveServer for Tests -->
|
|
||||||
<dependency>
|
|
||||||
<groupId>org.eclipse.jetty.aggregate</groupId>
|
|
||||||
<artifactId>jetty-all</artifactId>
|
|
||||||
<scope>test</scope>
|
|
||||||
<version>${jetty.version}</version>
|
|
||||||
</dependency>
|
|
||||||
|
|
||||||
<!-- Avro -->
|
<!-- Avro -->
|
||||||
<dependency>
|
<dependency>
|
||||||
|
|||||||
Reference in New Issue
Block a user