|
| 1 | +/* |
| 2 | + * Copyright (c) Facebook, Inc. and its affiliates. |
| 3 | + * |
| 4 | + * Licensed under the Apache License, Version 2.0 (the "License"); |
| 5 | + * you may not use this file except in compliance with the License. |
| 6 | + * You may obtain a copy of the License at |
| 7 | + * |
| 8 | + * http://www.apache.org/licenses/LICENSE-2.0 |
| 9 | + * |
| 10 | + * Unless required by applicable law or agreed to in writing, software |
| 11 | + * distributed under the License is distributed on an "AS IS" BASIS, |
| 12 | + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 13 | + * See the License for the specific language governing permissions and |
| 14 | + * limitations under the License. |
| 15 | + */ |
| 16 | + |
| 17 | +#pragma once |
| 18 | + |
| 19 | +#include "velox/connectors/hive/HiveDataSink.h" |
| 20 | + |
| 21 | +namespace facebook::velox::connector::hive::iceberg { |
| 22 | + |
| 23 | +/// Represents a request for Iceberg write. |
| 24 | +class IcebergInsertTableHandle final : public HiveInsertTableHandle { |
| 25 | + public: |
| 26 | + /// @param inputColumns Columns from the table schema to write. |
| 27 | + /// The input RowVector must have the same number of columns and matching |
| 28 | + /// types in the same order. |
| 29 | + /// Column names in the RowVector may differ from those in inputColumns, |
| 30 | + /// only position and type must align. All columns present in the input |
| 31 | + /// data must be included, mismatches can lead to write failure. |
| 32 | + /// @param locationHandle Contains the target location information including: |
| 33 | + /// - Base directory path where data files will be written. |
| 34 | + /// - File naming scheme and temporary directory paths. |
| 35 | + /// @param compressionKind Optional compression to apply to data files. |
| 36 | + /// @param serdeParameters Additional serialization/deserialization parameters |
| 37 | + /// for the file format. |
| 38 | + IcebergInsertTableHandle( |
| 39 | + std::vector<HiveColumnHandlePtr> inputColumns, |
| 40 | + LocationHandlePtr locationHandle, |
| 41 | + dwio::common::FileFormat tableStorageFormat, |
| 42 | + std::optional<common::CompressionKind> compressionKind = {}, |
| 43 | + const std::unordered_map<std::string, std::string>& serdeParameters = {}); |
| 44 | +}; |
| 45 | + |
| 46 | +using IcebergInsertTableHandlePtr = |
| 47 | + std::shared_ptr<const IcebergInsertTableHandle>; |
| 48 | + |
| 49 | +class IcebergDataSink : public HiveDataSink { |
| 50 | + public: |
| 51 | + IcebergDataSink( |
| 52 | + RowTypePtr inputType, |
| 53 | + IcebergInsertTableHandlePtr insertTableHandle, |
| 54 | + const ConnectorQueryCtx* connectorQueryCtx, |
| 55 | + CommitStrategy commitStrategy, |
| 56 | + const std::shared_ptr<const HiveConfig>& hiveConfig); |
| 57 | + |
| 58 | + /// Generates Iceberg-specific commit messages for all writers containing |
| 59 | + /// metadata about written files. Creates a JSON object for each writer |
| 60 | + /// in the format expected by Presto and Spark for Iceberg tables. |
| 61 | + /// |
| 62 | + /// Each commit message contains: |
| 63 | + /// - path: full file path where data was written. |
| 64 | + /// - fileSizeInBytes: raw bytes written to disk. |
| 65 | + /// - metrics: object with recordCount (number of rows written). |
| 66 | + /// - partitionSpecJson: partition specification. |
| 67 | + /// - fileFormat: storage format (e.g., "PARQUET"). |
| 68 | + /// - content: file content type ("DATA" for data files). |
| 69 | + /// |
| 70 | + /// See |
| 71 | + /// https://github.com/prestodb/presto/blob/master/presto-iceberg/src/main/java/com/facebook/presto/iceberg/CommitTaskData.java |
| 72 | + /// |
| 73 | + /// Note: Complete Iceberg metrics are not yet implemented, which results in |
| 74 | + /// incomplete manifest files that may lead to suboptimal query planning. |
| 75 | + /// |
| 76 | + /// @return Vector of JSON strings, one per writer, formatted according to |
| 77 | + /// Presto and Spark Iceberg commit protocol. |
| 78 | + std::vector<std::string> commitMessage() const override; |
| 79 | +}; |
| 80 | + |
| 81 | +} // namespace facebook::velox::connector::hive::iceberg |
0 commit comments