1
0

[HUDI-846][HUDI-848] Enable Incremental cleaning and embedded timeline-server by default (#1634)

This commit is contained in:
Balaji Varadarajan
2020-05-20 05:29:43 -07:00
committed by GitHub
parent f802d4400b
commit 74ecc27e92
8 changed files with 56 additions and 24 deletions

View File

@@ -96,7 +96,7 @@ public class HoodieCompactionConfig extends DefaultHoodieConfig {
private static final String DEFAULT_CLEANER_POLICY = HoodieCleaningPolicy.KEEP_LATEST_COMMITS.name();
private static final String DEFAULT_AUTO_CLEAN = "true";
private static final String DEFAULT_INLINE_COMPACT = "false";
private static final String DEFAULT_INCREMENTAL_CLEANER = "false";
private static final String DEFAULT_INCREMENTAL_CLEANER = "true";
private static final String DEFAULT_INLINE_COMPACT_NUM_DELTA_COMMITS = "1";
private static final String DEFAULT_CLEANER_FILE_VERSIONS_RETAINED = "3";
private static final String DEFAULT_CLEANER_COMMITS_RETAINED = "10";

View File

@@ -82,7 +82,7 @@ public class HoodieWriteConfig extends DefaultHoodieConfig {
private static final String DEFAULT_FINALIZE_WRITE_PARALLELISM = DEFAULT_PARALLELISM;
private static final String EMBEDDED_TIMELINE_SERVER_ENABLED = "hoodie.embed.timeline.server";
private static final String DEFAULT_EMBEDDED_TIMELINE_SERVER_ENABLED = "false";
private static final String DEFAULT_EMBEDDED_TIMELINE_SERVER_ENABLED = "true";
private static final String FAIL_ON_TIMELINE_ARCHIVING_ENABLED_PROP = "hoodie.fail.on.timeline.archiving";
private static final String DEFAULT_FAIL_ON_TIMELINE_ARCHIVING_ENABLED = "true";

View File

@@ -30,6 +30,9 @@ import org.apache.hudi.common.model.HoodieTableType;
import org.apache.hudi.common.model.HoodieTestUtils;
import org.apache.hudi.common.table.HoodieTableMetaClient;
import org.apache.hudi.common.table.timeline.HoodieActiveTimeline;
import org.apache.hudi.common.table.timeline.HoodieInstant;
import org.apache.hudi.common.table.timeline.HoodieInstant.State;
import org.apache.hudi.common.table.timeline.HoodieTimeline;
import org.apache.hudi.common.util.Option;
import org.apache.hudi.config.HoodieCompactionConfig;
import org.apache.hudi.config.HoodieIndexConfig;
@@ -152,9 +155,13 @@ public class TestHoodieCompactor extends HoodieClientTestHarness {
HoodieIndex index = new HoodieBloomIndex<>(config);
updatedRecords = index.tagLocation(updatedRecordsRDD, jsc, table).collect();
// Write them to corresponding avro logfiles
// Write them to corresponding avro logfiles. Also, set the state transition properly.
HoodieTestUtils.writeRecordsToLogFiles(fs, metaClient.getBasePath(),
HoodieTestDataGenerator.AVRO_SCHEMA_WITH_METADATA_FIELDS, updatedRecords);
metaClient.getActiveTimeline().transitionRequestedToInflight(new HoodieInstant(State.REQUESTED,
HoodieTimeline.DELTA_COMMIT_ACTION, newCommitTime), Option.empty());
writeClient.commit(newCommitTime, jsc.emptyRDD(), Option.empty());
metaClient.reloadActiveTimeline();
// Verify that all data file has one log file
table = HoodieTable.create(config, hadoopConf);

View File

@@ -171,12 +171,6 @@
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.eclipse.jetty.aggregate</groupId>
<artifactId>jetty-all</artifactId>
<scope>test</scope>
</dependency>
<!-- Hadoop - Test -->
<dependency>
<groupId>org.apache.hadoop</groupId>

View File

@@ -105,6 +105,7 @@ public class HiveTestService {
executorService = Executors.newSingleThreadExecutor();
tServer = startMetaStore(bindIP, metastorePort, serverConf);
serverConf.set("hive.in.test", "true");
hiveServer = startHiveServer(serverConf);
String serverHostname;

View File

@@ -225,6 +225,12 @@
<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-core_${scala.binary.version}</artifactId>
<exclusions>
<exclusion>
<groupId>javax.servlet</groupId>
<artifactId>*</artifactId>
</exclusion>
</exclusions>
</dependency>
<dependency>
<groupId>org.apache.spark</groupId>
@@ -253,7 +259,17 @@
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-common</artifactId>
<scope>provided</scope>
<exclusions>
<exclusion>
<groupId>javax.servlet</groupId>
<artifactId>*</artifactId>
</exclusion>
<exclusion>
<groupId>javax.servlet.jsp</groupId>
<artifactId>*</artifactId>
</exclusion>
</exclusions>
<scope>provided</scope>
</dependency>
<!-- Hive -->
@@ -277,16 +293,42 @@
<groupId>${hive.groupid}</groupId>
<artifactId>hive-jdbc</artifactId>
<version>${hive.version}</version>
<exclusions>
<exclusion>
<groupId>javax.servlet</groupId>
<artifactId>*</artifactId>
</exclusion>
<exclusion>
<groupId>javax.servlet.jsp</groupId>
<artifactId>*</artifactId>
</exclusion>
</exclusions>
</dependency>
<dependency>
<groupId>${hive.groupid}</groupId>
<artifactId>hive-metastore</artifactId>
<version>${hive.version}</version>
<exclusions>
<exclusion>
<groupId>javax.servlet</groupId>
<artifactId>*</artifactId>
</exclusion>
<exclusion>
<groupId>javax.servlet.jsp</groupId>
<artifactId>*</artifactId>
</exclusion>
</exclusions>
</dependency>
<dependency>
<groupId>${hive.groupid}</groupId>
<artifactId>hive-common</artifactId>
<version>${hive.version}</version>
<exclusions>
<exclusion>
<groupId>org.eclipse.jetty.orbit</groupId>
<artifactId>javax.servlet</artifactId>
</exclusion>
</exclusions>
</dependency>
<!-- Hoodie - Test -->

View File

@@ -86,13 +86,8 @@
<!-- Needs to be at the top to ensure we get the correct dependency versions for jetty-server -->
<groupId>org.eclipse.jetty.aggregate</groupId>
<artifactId>jetty-all</artifactId>
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.eclipse.jetty</groupId>
<artifactId>jetty-server</artifactId>
<version>${jetty.version}</version>
<classifier>uber</classifier>
<scope>test</scope>
</dependency>

View File

@@ -102,7 +102,7 @@
<scalatest.version>3.0.1</scalatest.version>
<surefire-log4j.file>file://${project.basedir}/src/test/resources/log4j-surefire.properties</surefire-log4j.file>
<thrift.version>0.12.0</thrift.version>
<jetty.version>7.6.0.v20120127</jetty.version>
<jetty.version>9.4.15.v20190215</jetty.version>
<hbase.version>1.2.3</hbase.version>
<codehaus-jackson.version>1.9.13</codehaus-jackson.version>
<h2.version>1.4.199</h2.version>
@@ -463,13 +463,6 @@
<artifactId>jersey-container-servlet-core</artifactId>
<version>${glassfish.version}</version>
</dependency>
<!-- Needed for running HiveServer for Tests -->
<dependency>
<groupId>org.eclipse.jetty.aggregate</groupId>
<artifactId>jetty-all</artifactId>
<scope>test</scope>
<version>${jetty.version}</version>
</dependency>
<!-- Avro -->
<dependency>