/* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package com.uber.hoodie.func; import com.uber.hoodie.WriteStatus; import com.uber.hoodie.common.model.HoodieRecord; import com.uber.hoodie.common.model.HoodieRecordPayload; import com.uber.hoodie.common.util.queue.BoundedInMemoryExecutor; import com.uber.hoodie.common.util.queue.BoundedInMemoryQueueConsumer; import com.uber.hoodie.config.HoodieWriteConfig; import com.uber.hoodie.exception.HoodieException; import com.uber.hoodie.io.HoodieCreateHandle; import com.uber.hoodie.io.HoodieIOHandle; import com.uber.hoodie.table.HoodieTable; import java.util.ArrayList; import java.util.Iterator; import java.util.List; import java.util.Optional; import java.util.function.Function; import org.apache.avro.Schema; import org.apache.avro.generic.IndexedRecord; /** * Lazy Iterable, that writes a stream of HoodieRecords sorted by the partitionPath, into new * files. */ public class CopyOnWriteLazyInsertIterable extends LazyIterableIterator, List> { protected final HoodieWriteConfig hoodieConfig; protected final String commitTime; protected final HoodieTable hoodieTable; protected final String idPrefix; protected int numFilesWritten; public CopyOnWriteLazyInsertIterable(Iterator> sortedRecordItr, HoodieWriteConfig config, String commitTime, HoodieTable hoodieTable, String idPrefix) { super(sortedRecordItr); this.hoodieConfig = config; this.commitTime = commitTime; this.hoodieTable = hoodieTable; this.idPrefix = idPrefix; this.numFilesWritten = 0; } // Used for caching HoodieRecord along with insertValue. We need this to offload computation work to buffering thread. static class HoodieInsertValueGenResult { public T record; public Optional insertValue; // It caches the exception seen while fetching insert value. public Optional exception = Optional.empty(); public HoodieInsertValueGenResult(T record, Schema schema) { this.record = record; try { this.insertValue = record.getData().getInsertValue(schema); } catch (Exception e) { this.exception = Optional.of(e); } } } /** * Transformer function to help transform a HoodieRecord. This transformer is used by BufferedIterator to offload some * expensive operations of transformation to the reader thread. */ static Function, HoodieInsertValueGenResult> getTransformFunction(Schema schema) { return hoodieRecord -> new HoodieInsertValueGenResult(hoodieRecord, schema); } @Override protected void start() { } @Override protected List computeNext() { // Executor service used for launching writer thread. BoundedInMemoryExecutor, HoodieInsertValueGenResult, List> bufferedIteratorExecutor = null; try { final Schema schema = new Schema.Parser().parse(hoodieConfig.getSchema()); bufferedIteratorExecutor = new SparkBoundedInMemoryExecutor<>(hoodieConfig, inputItr, getInsertHandler(), getTransformFunction(schema)); final List result = bufferedIteratorExecutor.execute(); assert result != null && !result.isEmpty() && !bufferedIteratorExecutor.isRemaining(); return result; } catch (Exception e) { throw new HoodieException(e); } finally { if (null != bufferedIteratorExecutor) { bufferedIteratorExecutor.shutdownNow(); } } } @Override protected void end() { } protected String getNextFileId(String idPfx) { return String.format("%s-%d", idPfx, numFilesWritten++); } protected CopyOnWriteInsertHandler getInsertHandler() { return new CopyOnWriteInsertHandler(); } /** * Consumes stream of hoodie records from in-memory queue and * writes to one or more create-handles */ protected class CopyOnWriteInsertHandler extends BoundedInMemoryQueueConsumer, List> { protected final List statuses = new ArrayList<>(); protected HoodieIOHandle handle; @Override protected void consumeOneRecord(HoodieInsertValueGenResult payload) { final HoodieRecord insertPayload = payload.record; // lazily initialize the handle, for the first time if (handle == null) { handle = new HoodieCreateHandle(hoodieConfig, commitTime, hoodieTable, insertPayload.getPartitionPath(), getNextFileId(idPrefix)); } if (handle.canWrite(payload.record)) { // write the payload, if the handle has capacity handle.write(insertPayload, payload.insertValue, payload.exception); } else { // handle is full. statuses.add(handle.close()); // Need to handle the rejected payload & open new handle handle = new HoodieCreateHandle(hoodieConfig, commitTime, hoodieTable, insertPayload.getPartitionPath(), getNextFileId(idPrefix)); handle.write(insertPayload, payload.insertValue, payload.exception); // we should be able to write 1 payload. } } @Override protected void finish() { if (handle != null) { statuses.add(handle.close()); } handle = null; assert statuses.size() > 0; } @Override protected List getResult() { return statuses; } } }