Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions src/iceberg/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@ set(ICEBERG_SOURCES
json_internal.cc
manifest/manifest_adapter.cc
manifest/manifest_entry.cc
manifest/manifest_group.cc
manifest/manifest_list.cc
manifest/manifest_reader.cc
manifest/manifest_writer.cc
Expand Down
12 changes: 12 additions & 0 deletions src/iceberg/constants.h
Original file line number Diff line number Diff line change
Expand Up @@ -19,12 +19,24 @@

#pragma once

/// \file iceberg/constants.h
/// This file defines constants used commonly and shared across multiple
/// source files. It is mostly useful to add constants that are used as
/// default values in the class definitions in the header files without
/// including other headers just for the constant definitions.

#include <cstdint>
#include <string_view>

namespace iceberg {

constexpr std::string_view kParquetFieldIdKey = "PARQUET:field_id";
constexpr int64_t kInvalidSnapshotId = -1;
/// \brief Stand-in for the current sequence number that will be assigned when the commit
/// is successful. This is replaced when writing a manifest list by the ManifestFile
/// adapter.
constexpr int64_t kUnassignedSequenceNumber = -1;

// TODO(gangwu): move other commonly used constants here.

} // namespace iceberg
36 changes: 15 additions & 21 deletions src/iceberg/delete_file_index.cc
Original file line number Diff line number Diff line change
Expand Up @@ -453,34 +453,32 @@ Result<std::shared_ptr<DataFile>> DeleteFileIndex::FindDV(
}

Result<DeleteFileIndex::Builder> DeleteFileIndex::BuilderFor(
std::shared_ptr<FileIO> io, std::vector<ManifestFile> delete_manifests) {
std::shared_ptr<FileIO> io, std::shared_ptr<Schema> schema,
std::unordered_map<int32_t, std::shared_ptr<PartitionSpec>> specs_by_id,
std::vector<ManifestFile> delete_manifests) {
ICEBERG_PRECHECK(io != nullptr, "FileIO cannot be null");
return Builder(std::move(io), std::move(delete_manifests));
ICEBERG_PRECHECK(schema != nullptr, "Schema cannot be null");
ICEBERG_PRECHECK(!specs_by_id.empty(), "Partition specs cannot be empty");
return Builder(std::move(io), std::move(schema), std::move(specs_by_id),
std::move(delete_manifests));
}

// Builder implementation

DeleteFileIndex::Builder::Builder(std::shared_ptr<FileIO> io,
std::vector<ManifestFile> delete_manifests)
: io_(std::move(io)), delete_manifests_(std::move(delete_manifests)) {}
DeleteFileIndex::Builder::Builder(
std::shared_ptr<FileIO> io, std::shared_ptr<Schema> schema,
std::unordered_map<int32_t, std::shared_ptr<PartitionSpec>> specs_by_id,
std::vector<ManifestFile> delete_manifests)
: io_(std::move(io)),
schema_(std::move(schema)),
specs_by_id_(std::move(specs_by_id)),
delete_manifests_(std::move(delete_manifests)) {}

DeleteFileIndex::Builder::~Builder() = default;
DeleteFileIndex::Builder::Builder(Builder&&) noexcept = default;
DeleteFileIndex::Builder& DeleteFileIndex::Builder::operator=(Builder&&) noexcept =
default;

DeleteFileIndex::Builder& DeleteFileIndex::Builder::SpecsById(
std::unordered_map<int32_t, std::shared_ptr<PartitionSpec>> specs_by_id) {
specs_by_id_ = std::move(specs_by_id);
return *this;
}

DeleteFileIndex::Builder& DeleteFileIndex::Builder::WithSchema(
std::shared_ptr<Schema> schema) {
schema_ = std::move(schema);
return *this;
}

DeleteFileIndex::Builder& DeleteFileIndex::Builder::AfterSequenceNumber(int64_t seq) {
min_sequence_number_ = seq;
return *this;
Expand Down Expand Up @@ -721,10 +719,6 @@ Status DeleteFileIndex::Builder::AddEqualityDelete(

Result<std::unique_ptr<DeleteFileIndex>> DeleteFileIndex::Builder::Build() {
ICEBERG_RETURN_UNEXPECTED(CheckErrors());
ICEBERG_PRECHECK(io_ != nullptr, "FileIO is required to load delete files");
ICEBERG_PRECHECK(schema_ != nullptr, "Schema is required to load delete files");
ICEBERG_PRECHECK(!specs_by_id_.empty(),
"Partition specs are required to load delete files");

std::vector<ManifestEntry> entries;
if (!delete_manifests_.empty()) {
Expand Down
25 changes: 11 additions & 14 deletions src/iceberg/delete_file_index.h
Original file line number Diff line number Diff line change
Expand Up @@ -268,10 +268,14 @@ class ICEBERG_EXPORT DeleteFileIndex {
/// \brief Create a builder for constructing a DeleteFileIndex from manifest files.
///
/// \param io The FileIO to use for reading manifests
/// \param schema Current table schema
/// \param specs_by_id Partition specs by their IDs
/// \param delete_manifests The delete manifests to index
/// \return A Builder instance
static Result<Builder> BuilderFor(std::shared_ptr<FileIO> io,
std::vector<ManifestFile> delete_manifests);
static Result<Builder> BuilderFor(
std::shared_ptr<FileIO> io, std::shared_ptr<Schema> schema,
std::unordered_map<int32_t, std::shared_ptr<PartitionSpec>> specs_by_id,
std::vector<ManifestFile> delete_manifests);

private:
friend class Builder;
Expand Down Expand Up @@ -318,7 +322,9 @@ class ICEBERG_EXPORT DeleteFileIndex {
class ICEBERG_EXPORT DeleteFileIndex::Builder : public ErrorCollector {
public:
/// \brief Construct a builder from manifest files.
Builder(std::shared_ptr<FileIO> io, std::vector<ManifestFile> delete_manifests);
Builder(std::shared_ptr<FileIO> io, std::shared_ptr<Schema> schema,
std::unordered_map<int32_t, std::shared_ptr<PartitionSpec>> specs_by_id,
std::vector<ManifestFile> delete_manifests);

~Builder() override;

Expand All @@ -327,15 +333,6 @@ class ICEBERG_EXPORT DeleteFileIndex::Builder : public ErrorCollector {
Builder(const Builder&) = delete;
Builder& operator=(const Builder&) = delete;

/// \brief Set the partition specs by ID.
Builder& SpecsById(
std::unordered_map<int32_t, std::shared_ptr<PartitionSpec>> specs_by_id);

/// \brief Set the table schema.
///
/// Required for filtering and expression evaluation.
Builder& WithSchema(std::shared_ptr<Schema> schema);

/// \brief Set the minimum sequence number for delete files.
///
/// Only delete files with sequence number > min_sequence_number will be included.
Expand Down Expand Up @@ -384,10 +381,10 @@ class ICEBERG_EXPORT DeleteFileIndex::Builder : public ErrorCollector {
ManifestEntry&& entry);

std::shared_ptr<FileIO> io_;
std::shared_ptr<Schema> schema_;
std::unordered_map<int32_t, std::shared_ptr<PartitionSpec>> specs_by_id_;
std::vector<ManifestFile> delete_manifests_;
int64_t min_sequence_number_ = 0;
std::unordered_map<int32_t, std::shared_ptr<PartitionSpec>> specs_by_id_;
std::shared_ptr<Schema> schema_;
std::shared_ptr<Expression> data_filter_;
std::shared_ptr<Expression> partition_filter_;
std::shared_ptr<PartitionSet> partition_set_;
Expand Down
Loading
Loading