diff --git a/BUCK b/BUCK index a28484df232e..c485a8d40971 100644 --- a/BUCK +++ b/BUCK @@ -202,6 +202,7 @@ cpp_library_wrapper(name="rocksdb_lib", srcs=[ "table/block_based/block_cache.cc", "table/block_based/block_prefetcher.cc", "table/block_based/block_prefix_index.cc", + "table/block_based/builtin_index_factory.cc", "table/block_based/data_block_footer.cc", "table/block_based/data_block_hash_index.cc", "table/block_based/filter_block_reader_common.cc", @@ -4598,6 +4599,12 @@ cpp_unittest_wrapper(name="bloom_test", extra_compiler_flags=[]) +cpp_unittest_wrapper(name="builtin_index_factory_test", + srcs=["table/block_based/builtin_index_factory_test.cc"], + deps=[":rocksdb_test_lib"], + extra_compiler_flags=[]) + + cpp_unittest_wrapper(name="cache_reservation_manager_test", srcs=["cache/cache_reservation_manager_test.cc"], deps=[":rocksdb_test_lib"], diff --git a/CMakeLists.txt b/CMakeLists.txt index 4f744e2ba484..577c2018c9cc 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -874,6 +874,7 @@ set(SOURCES table/block_based/filter_block_reader_common.cc table/block_based/filter_policy.cc table/block_based/flush_block_policy.cc + table/block_based/builtin_index_factory.cc table/block_based/full_filter_block.cc table/block_based/hash_index_reader.cc table/block_based/index_builder.cc @@ -1511,6 +1512,7 @@ if(WITH_TESTS) options/options_test.cc table/block_based/block_based_table_reader_test.cc table/block_based/block_test.cc + table/block_based/builtin_index_factory_test.cc table/block_based/data_block_hash_index_test.cc table/block_based/full_filter_block_test.cc table/block_based/partitioned_filter_block_test.cc diff --git a/Makefile b/Makefile index 7c878e45d2b4..e4e022b0141b 100644 --- a/Makefile +++ b/Makefile @@ -1775,6 +1775,9 @@ block_fetcher_test: table/block_fetcher_test.o $(TEST_LIBRARY) $(LIBRARY) block_test: $(OBJ_DIR)/table/block_based/block_test.o $(TEST_LIBRARY) $(LIBRARY) $(AM_LINK) +builtin_index_factory_test: $(OBJ_DIR)/table/block_based/builtin_index_factory_test.o $(TEST_LIBRARY) $(LIBRARY) + $(AM_LINK) + data_block_hash_index_test: $(OBJ_DIR)/table/block_based/data_block_hash_index_test.o $(TEST_LIBRARY) $(LIBRARY) $(AM_LINK) diff --git a/db/wide/db_wide_blob_direct_write_test.cc b/db/wide/db_wide_blob_direct_write_test.cc index 5d0e92ef9807..0e7087509bbb 100644 --- a/db/wide/db_wide_blob_direct_write_test.cc +++ b/db/wide/db_wide_blob_direct_write_test.cc @@ -180,7 +180,7 @@ class DBWideBlobDirectWriteTest : public DBTestBase { trie_index::TrieIndexFactory trie_index_factory; if (scenario.use_trie_index) { - coalescing_ro.table_index_factory = &trie_index_factory; + coalescing_ro.read_index = ReadOptions::ReadIndex::kCustom; } ReadOptions control_ro = coalescing_ro; @@ -714,7 +714,7 @@ TEST_F(DBWideBlobDirectWriteTest, read_options.snapshot = snapshot; trie_index::TrieIndexFactory trie_index_factory; - read_options.table_index_factory = &trie_index_factory; + read_options.read_index = ReadOptions::ReadIndex::kCustom; std::vector cfhs{handles_[1], handles_[0]}; std::unique_ptr coalescing = @@ -845,7 +845,7 @@ TEST_F(DBWideBlobDirectWriteTest, trie_index::TrieIndexFactory trie_index_factory; if (test_case.use_trie_index) { - read_options.table_index_factory = &trie_index_factory; + read_options.read_index = ReadOptions::ReadIndex::kCustom; } const auto verify_snapshot_reads = [&]() { diff --git a/db_stress_tool/db_stress_common.h b/db_stress_tool/db_stress_common.h index 6395ecee2d62..d8492a3dfd5f 100644 --- a/db_stress_tool/db_stress_common.h +++ b/db_stress_tool/db_stress_common.h @@ -184,7 +184,7 @@ DECLARE_int32(data_block_index_type); DECLARE_int32(index_block_search_type); DECLARE_double(uniform_cv_threshold); DECLARE_bool(use_trie_index); -DECLARE_bool(use_udi_as_primary_index); +DECLARE_int32(index_mode); DECLARE_bool(test_backward_scan); DECLARE_string(db); DECLARE_string(secondaries_base); diff --git a/db_stress_tool/db_stress_gflags.cc b/db_stress_tool/db_stress_gflags.cc index 881d223bc4ca..365fc9d7222c 100644 --- a/db_stress_tool/db_stress_gflags.cc +++ b/db_stress_tool/db_stress_gflags.cc @@ -670,15 +670,13 @@ DEFINE_bool(use_trie_index, false, "Use trie-based user defined index (UDI) for SST files. " "Compatible with all operation types (Put, Delete, Merge, etc.) " "and all iteration directions (forward and reverse). " - "Combined with use_udi_as_primary_index to control whether the " - "UDI is the primary or secondary index."); - -DEFINE_bool(use_udi_as_primary_index, false, - "When use_trie_index is enabled, use the UDI as the primary " - "index. All reads automatically go through the UDI (both " - "the standard index and UDI are always built). When false, " - "the UDI is a secondary index and reads require " - "ReadOptions::table_index_factory to be set."); + "Combined with index_mode to control the UDI role."); + +DEFINE_int32(index_mode, 0, + "Controls how the custom IndexFactory interacts with the " + "built-in index. 0=kStandardOnly, 1=kStandardDefault, " + "2=kCustomDefault, 3=kCustomOnly. Requires use_trie_index " + "for modes >= 1."); DEFINE_bool(test_backward_scan, true, "Test backward iteration (Prev, SeekForPrev) in stress tests."); diff --git a/db_stress_tool/db_stress_test_base.cc b/db_stress_tool/db_stress_test_base.cc index dbfd4e0d2f40..b32cee5a41fe 100644 --- a/db_stress_tool/db_stress_test_base.cc +++ b/db_stress_tool/db_stress_test_base.cc @@ -1003,8 +1003,13 @@ void StressTest::OperateDb(ThreadState* thread) { read_opts.allow_unprepared_value = FLAGS_allow_unprepared_value; read_opts.auto_refresh_iterator_with_snapshot = FLAGS_auto_refresh_iterator_with_snapshot; - if (FLAGS_use_trie_index && !FLAGS_use_udi_as_primary_index && udi_factory_) { - read_opts.table_index_factory = udi_factory_.get(); + if (FLAGS_use_trie_index && udi_factory_) { + if (FLAGS_index_mode == 1) { + // kStandardDefault: custom index is secondary, select explicitly per-read + read_opts.read_index = ReadOptions::ReadIndex::kCustom; + } + // kCustomDefault/kCustomOnly: custom index is default, no override needed + // kStandardOnly: custom index not built, don't select it } WriteOptions write_opts; if (FLAGS_rate_limit_auto_wal_flush) { @@ -1054,8 +1059,8 @@ void StressTest::OperateDb(ThreadState* thread) { } // Commenting this out as we don't want to reset stats on each open. // thread->stats.Start(); - if (FLAGS_use_trie_index && udi_factory_) { - read_opts.table_index_factory = udi_factory_.get(); + if (FLAGS_use_trie_index && FLAGS_index_mode == 1 && udi_factory_) { + read_opts.read_index = ReadOptions::ReadIndex::kCustom; } } @@ -2163,7 +2168,7 @@ void StressTest::DumpIteratorDivergenceDiagnostics( "selected_cf_count=%zu\n", seek_key.ToString(/*hex=*/true).c_str(), cmp_cfh->GetName().c_str(), static_cast(options_.prefix_extractor != nullptr), - static_cast(ro.table_index_factory != nullptr), + static_cast(ro.read_index != ReadOptions::ReadIndex::kDefault), static_cast(FLAGS_use_multi_cf_iterator), rand_column_families.size()); @@ -2224,13 +2229,13 @@ void StressTest::DumpIteratorDivergenceDiagnostics( }; ReadOptions standard_ro = ro; - standard_ro.table_index_factory = nullptr; + standard_ro.read_index = ReadOptions::ReadIndex::kDefault; dump_debug_iter("Debug standard direct", standard_ro, /*use_multi_cf_iter=*/false); if (udi_factory_) { ReadOptions trie_ro = ro; - trie_ro.table_index_factory = udi_factory_.get(); + trie_ro.read_index = ReadOptions::ReadIndex::kCustom; dump_debug_iter("Debug trie direct", trie_ro, /*use_multi_cf_iter=*/false); } @@ -2240,7 +2245,7 @@ void StressTest::DumpIteratorDivergenceDiagnostics( /*use_multi_cf_iter=*/true); if (udi_factory_) { ReadOptions trie_ro = ro; - trie_ro.table_index_factory = udi_factory_.get(); + trie_ro.read_index = ReadOptions::ReadIndex::kCustom; dump_debug_iter("Debug trie coalescing", trie_ro, /*use_multi_cf_iter=*/true); } @@ -4431,8 +4436,7 @@ bool InitializeOptionsFromFile(Options& options) { void InitializeOptionsFromFlags( const std::shared_ptr& cache, const std::shared_ptr& filter_policy, - const std::shared_ptr& udi_factory, - Options& options) { + const std::shared_ptr& udi_factory, Options& options) { BlockBasedTableOptions block_based_options; block_based_options.decouple_partitioned_filters = FLAGS_decouple_partitioned_filters; @@ -4517,18 +4521,32 @@ void InitializeOptionsFromFlags( fLU64::FLAGS_super_block_alignment_space_overhead_ratio; if (udi_factory) { block_based_options.user_defined_index_factory = udi_factory; - if (FLAGS_use_udi_as_primary_index) { - block_based_options.use_udi_as_primary_index = true; - } - // Write fault injection can corrupt the UDI meta block during SST - // creation. In primary mode all reads route through the UDI, so a - // corrupted UDI block causes the reader to fail, making compaction - // read zero keys from the affected SST and triggering a false - // positive in record count verification. In secondary mode this is - // not an issue because reads fall back to the standard index. - if (FLAGS_use_udi_as_primary_index && - (FLAGS_write_fault_one_in > 0 || - FLAGS_metadata_write_fault_one_in > 0)) { + if (FLAGS_index_mode < 0 || FLAGS_index_mode > 3) { + fprintf(stderr, "Invalid --index_mode=%d (must be 0-3)\n", + FLAGS_index_mode); + abort(); + } + block_based_options.index_mode = + static_cast(FLAGS_index_mode); + // Disable compaction record count verification when write fault + // injection is active in custom index modes (kCustomDefault/kCustomOnly). + // + // The custom index is stored as a meta block in the SST. Write fault + // injection (metadata_write_fault_one_in, write_fault_one_in) can + // corrupt this meta block during SST creation. In kCustomOnly, a + // corrupted custom index causes the compaction iterator to read zero + // keys (no standard index fallback). In kCustomDefault, the SST open + // returns an error on corrupted custom index. Either way, the + // compaction record count check produces a false positive. + // + // Without fault injection, all modes (including kCustomOnly) pass + // the compaction record count check correctly. + // + // Non-UDI modes (kStandardOnly, kStandardDefault) are not affected + // because the standard index is written as a main block (not a + // meta block), so write faults do not corrupt it. + if (FLAGS_index_mode >= 2 && (FLAGS_write_fault_one_in > 0 || + FLAGS_metadata_write_fault_one_in > 0)) { options.compaction_verify_record_count = false; } } diff --git a/db_stress_tool/db_stress_test_base.h b/db_stress_tool/db_stress_test_base.h index 7474c5b0a81e..42b0f16b79ec 100644 --- a/db_stress_tool/db_stress_test_base.h +++ b/db_stress_tool/db_stress_test_base.h @@ -434,7 +434,7 @@ class StressTest { std::vector options_index_; std::atomic db_preload_finished_; std::shared_ptr sqfc_factory_; - std::shared_ptr udi_factory_; + std::shared_ptr udi_factory_; std::unique_ptr secondary_db_; std::vector secondary_cfhs_; @@ -451,8 +451,7 @@ bool InitializeOptionsFromFile(Options& options); void InitializeOptionsFromFlags( const std::shared_ptr& cache, const std::shared_ptr& filter_policy, - const std::shared_ptr& udi_factory, - Options& options); + const std::shared_ptr& udi_factory, Options& options); // Initialize `options` on which `InitializeOptionsFromFile()` and // `InitializeOptionsFromFlags()` have both been called already. diff --git a/db_stress_tool/no_batched_ops_stress.cc b/db_stress_tool/no_batched_ops_stress.cc index db07077131d6..0085b6481bcf 100644 --- a/db_stress_tool/no_batched_ops_stress.cc +++ b/db_stress_tool/no_batched_ops_stress.cc @@ -2571,7 +2571,7 @@ class NonBatchedOpsStressTest : public StressTest { static_cast(ro.auto_refresh_iterator_with_snapshot), static_cast(ro.snapshot != nullptr), static_cast(FLAGS_use_multi_cf_iterator), - static_cast(ro.table_index_factory != nullptr), + static_cast(ro.read_index != ReadOptions::ReadIndex::kDefault), static_cast(FLAGS_use_trie_index)); fprintf(stderr, "Iterator value: %s\n", iter->value().ToString(true).c_str()); @@ -2628,7 +2628,7 @@ class NonBatchedOpsStressTest : public StressTest { }; ReadOptions standard_ro = ro; - standard_ro.table_index_factory = nullptr; + standard_ro.read_index = ReadOptions::ReadIndex::kDefault; dump_debug_iter("Debug standard direct", standard_ro, /*use_multi_cf_iter=*/false, /*replay_from_mid=*/false); diff --git a/include/rocksdb/compression_type.h b/include/rocksdb/compression_type.h index 2261a44439b9..908bb6869a80 100644 --- a/include/rocksdb/compression_type.h +++ b/include/rocksdb/compression_type.h @@ -233,7 +233,7 @@ struct CompressionOptions { // // This option is valid only when BlockBasedTable is used and is disabled // (sanitized to 1) with any of these: - // * User-defined index (UserDefinedIndexFactory) + // * User-defined index (IndexFactory) // * partition_filters == true && decouple_partitioned_filters == false // // When parallel compression is enabled, SST size file sizes might be diff --git a/include/rocksdb/index_factory.h b/include/rocksdb/index_factory.h new file mode 100644 index 000000000000..f5d9be0c03d1 --- /dev/null +++ b/include/rocksdb/index_factory.h @@ -0,0 +1,378 @@ +// Copyright (c) Meta Platforms, Inc. and affiliates. +// This source code is licensed under both the GPLv2 (found in the +// COPYING file in the root directory) and Apache 2.0 License +// (found in the LICENSE.Apache file in the root directory). +// +// ***************************************************************** +// * EXPERIMENTAL: This interface is part of the RocksDB User * +// * Defined Index (UDI) framework and may change at any time * +// * without notice. It is not yet considered part of the stable * +// * public API. * +// ***************************************************************** + +#pragma once + +#include +#include +#include +#include + +#include "rocksdb/advanced_iterator.h" +#include "rocksdb/customizable.h" +#include "rocksdb/iterator.h" +#include "rocksdb/options.h" +#include "rocksdb/rocksdb_namespace.h" +#include "rocksdb/slice.h" +#include "rocksdb/status.h" + +namespace ROCKSDB_NAMESPACE { + +struct ReadOptions; +class Comparator; +class PartitionCoordinator; + +// Prefix for meta block keys used by custom index implementations. +inline constexpr const char* kIndexFactoryMetaPrefix = "rocksdb.index_factory."; + +// ============================================================================ +// IndexFactory: pluggable index for BlockBasedTable SST files. +// +// The IndexFactory interface allows custom index implementations (e.g., trie, +// learned index, etc.) to coexist alongside the built-in standard index. +// In most modes, both indexes are built and stored in each SST file: +// - The built-in standard index (present in kStandardDefault/kCustomDefault) +// - The custom index (present when an IndexFactory is configured) +// In kCustomOnly mode, only the custom index is built. +// +// Read routing: +// - By default (index_mode=kStandardOnly), reads use the built-in standard +// index. +// - When index_mode is kCustomDefault or kCustomOnly in +// BlockBasedTableOptions, all reads (including internal operations) route +// through the custom index. +// - Per-read override: set ReadOptions::read_index to select the +// custom index for a specific read (relevant in kStandardDefault mode). +// +// This follows the FilterPolicy model: the built-in standard index is +// analogous to the default data block format, while custom IndexFactory +// implementations are analogous to custom FilterPolicy implementations. +// +// Single-index mode (kCustomOnly): the built-in standard index is +// not built. Only the custom IndexFactory produces an index, stored as a +// meta block in the SST. A minimal empty index block is written to +// satisfy the SST footer format. +// ============================================================================ + +// --------------------------------------------------------------------------- +// IndexFactoryBuilder: builds a custom index during SST construction. +// +// Called by BlockBasedTableBuilder for every key and every data block +// boundary. The builder accumulates index entries and serializes them +// into a meta block stored in the SST. +// +// Thread safety: all methods except EstimatedSize() are called from a +// single thread (the emit thread in BlockBasedTableBuilder). Custom +// IndexFactory implementations can support parallel compression by +// overriding SupportsParallelAddEntry(), PrepareAddEntry(), and +// FinishAddEntry(). When not overridden, the default single-threaded +// AddIndexEntry() path is used. +// --------------------------------------------------------------------------- +class IndexFactoryBuilder { + public: + // Simple block handle used by the public interface. + // Equivalent to the internal BlockHandle but without encoding/decoding. + struct BlockHandle { + uint64_t offset; + uint64_t size; + }; + + // Context passed to AddIndexEntry describing the internal key tags + // (packed sequence number + value type) for the last key in the + // current block and the first key in the next block. These enable + // custom indexes that need sequence-number-aware separator selection + // (e.g., for correct Seek when the same user key spans multiple + // blocks with different sequence numbers). + struct IndexEntryContext { + uint64_t last_key_tag = 0; + uint64_t first_key_tag = 0; + }; + + // Value type categories for OnKeyAdded. + enum ValueType : uint8_t { + kValue = 0, + kDelete = 1, + kMerge = 2, + kOther = 3, + }; + + virtual ~IndexFactoryBuilder() = default; + + // Called once for each data block boundary. The implementation should + // record the association between the separator key and the block handle. + // + // @param last_key_in_current_block User key of the last entry in the + // current data block. + // @param first_key_in_next_block User key of the first entry in the + // next data block. nullptr for the + // last block in the SST. + // @param block_handle Location and size of the data block. + // @param separator_scratch Scratch space for computing the + // separator. The returned Slice may + // reference this string. + // @param context Packed sequence+type tags for the + // boundary keys. + // @return The separator key actually stored. + virtual Slice AddIndexEntry(const Slice& last_key_in_current_block, + const Slice* first_key_in_next_block, + const BlockHandle& block_handle, + std::string* separator_scratch, + const IndexEntryContext& context) = 0; + + // Called for every key added to the SST. This provides the custom + // index with per-key visibility (e.g., for building a filter or + // maintaining statistics). The default implementation is a no-op. + // + // @param key User key (no internal key trailer). + // @param type Value type category. + // @param value The user value associated with this key. + virtual void OnKeyAdded(const Slice& /*key*/, ValueType /*type*/, + const Slice& /*value*/) {} + + // Serialize the index into a byte buffer. The memory backing the + // returned Slice must remain valid until this builder is destroyed. + virtual Status Finish(Slice* index_contents) = 0; + + // Returns the estimated size in bytes of the index built so far. + // Used by BlockBasedTableBuilder for SST file size estimation. + // Thread safety: for built-in indexes, may be called concurrently from + // the emit thread. Custom IndexFactory implementations are single- + // threaded (parallel compression disabled), so concurrency is not a + // concern. + virtual uint64_t EstimatedSize() const = 0; + + // ========================================================================= + // Optional protocols. Default implementations are provided. + // Built-in index factories override these for full functionality. + // Custom index implementations typically do NOT need to override these. + // ========================================================================= + + // --- Write protocol: IndexBlockWriter callback for Finish --- + // + // Some indexes (e.g., partitioned) need to write multiple blocks during + // Finish. The IndexBlockWriter callback allows the builder to drive the + // write loop internally. + class IndexBlockWriter { + public: + virtual ~IndexBlockWriter() = default; + // Write a block to the SST file. The handle is populated on success. + // @param compress If true, the block may be compressed per SST options. + virtual Status WriteBlock(const Slice& contents, BlockHandle* handle, + bool compress) = 0; + // Register a meta block by name and handle with the meta index builder. + // Called during FinishAndWrite for indexes that produce auxiliary meta + // blocks (e.g., hash index prefix blocks). + virtual void AddMetaBlock(const std::string& name, + const BlockHandle& handle) = 0; + }; + + // Finish the index and write all blocks via the writer callback. + // The final_handle receives the handle of the top-level index block. + // Default: calls Finish(Slice*) and writes a single block. + virtual Status FinishAndWrite(IndexBlockWriter* writer, + BlockHandle* final_handle, bool compress) { + Slice contents; + Status s = Finish(&contents); + if (!s.ok()) return s; + return writer->WriteBlock(contents, final_handle, compress); + } + + // --- Parallel compression protocol --- + // + // Splits AddIndexEntry into two phases for concurrent block compression: + // Phase 1 (emit thread): PrepareAddEntry — records keys + separator + // Phase 2 (write thread): FinishAddEntry — records the block handle + // + // Custom implementations that don't support parallel compression leave + // SupportsParallelAddEntry() returning false. The table builder will + // use single-threaded AddIndexEntry instead. + + struct PreparedAddEntry { + virtual ~PreparedAddEntry() = default; + }; + + virtual bool SupportsParallelAddEntry() const { return false; } + + virtual std::unique_ptr CreatePreparedAddEntry() { + return nullptr; + } + + // Phase 1: called on the emit thread. Records the separator keys. + // The block handle is not yet known. + virtual void PrepareAddEntry(const Slice& /*last_key_in_current_block*/, + const Slice* /*first_key_in_next_block*/, + const IndexEntryContext& /*context*/, + PreparedAddEntry* /*out*/) {} + + // Phase 2: called on the write thread. Records the block handle. + // skip_delta_encoding is true when block alignment padding causes + // non-sequential offsets. + virtual void FinishAddEntry(const BlockHandle& /*block_handle*/, + PreparedAddEntry* /*entry*/, + std::string* /*separator_scratch*/, + bool /*skip_delta_encoding*/) {} + + // --- Metadata queries for table properties --- + // + // These are queried after Finish() to populate SST table properties. + // Default values are appropriate for simple (non-partitioned) indexes. + + // Whether index separators include sequence numbers (internal key format). + // true = separators are full internal keys (user_key + seq + type). + // false = separators are user keys only. + virtual bool separator_is_key_plus_seq() const { return true; } + + // Number of uniform-sized index blocks (0 if not applicable). + virtual uint64_t NumUniformIndexBlocks() const { return 0; } + + // Total serialized index size (after Finish). + virtual size_t IndexSize() const { return 0; } + + // --- Partitioned index metadata (0 for non-partitioned) --- + + virtual uint64_t NumPartitions() const { return 0; } + + virtual uint64_t TopLevelIndexSize(uint64_t /*offset*/) const { return 0; } + + // --- Filter coordination --- + // + // Returns a PartitionCoordinator for filter↔index partition alignment. + // nullptr if this builder doesn't support partitioned coordination. + // The returned pointer is valid for the lifetime of this builder. + virtual PartitionCoordinator* GetPartitionCoordinator() { return nullptr; } +}; + +// --------------------------------------------------------------------------- +// IndexFactoryIterator: iterates over index entries in a custom index. +// +// Returned by IndexFactoryReader::NewIterator. Each position in the +// iterator corresponds to a data block in the SST file. +// +// The iterator returns user keys (not internal keys) as separator keys, +// and simple BlockHandle values (offset + size). The BlockBasedTable +// reader adapts these to the internal InternalIteratorBase +// interface automatically. +// --------------------------------------------------------------------------- +class IndexFactoryIterator { + public: + virtual ~IndexFactoryIterator() = default; + + // Hint for upcoming scan ranges. Implementations may use this for + // prefetching or bounding. + // @param scan_opts Array of scan range descriptors. + // @param num_opts Number of elements in scan_opts. + virtual void Prepare(const ScanOptions scan_opts[], size_t num_opts) = 0; + + // Context for Seek, carrying the packed sequence+type tag of the + // target key. Used by indexes that need sequence-number-aware block + // selection (e.g., when the same user key spans multiple blocks). + struct SeekContext { + uint64_t target_tag = 0; + }; + + // Position at the first entry >= target and populate result. + virtual Status SeekAndGetResult(const Slice& target, IterateResult* result, + const SeekContext& context) = 0; + + // Advance to the next entry and populate result. + virtual Status NextAndGetResult(IterateResult* result) = 0; + + // Position at the first entry. + // Default: seeks with an empty key (works for bytewise comparator). + virtual Status SeekToFirstAndGetResult(IterateResult* result) { + return SeekAndGetResult(Slice(), result, SeekContext{}); + } + + // Position at the last entry. Optional — reverse iteration support. + // Default: returns NotSupported. + virtual Status SeekToLastAndGetResult(IterateResult* result) { + (void)result; + return Status::NotSupported("SeekToLast not supported by this index"); + } + + // Move to the previous entry. Optional — reverse iteration support. + // Default: returns NotSupported. + virtual Status PrevAndGetResult(IterateResult* result) { + (void)result; + return Status::NotSupported("Prev not supported by this index"); + } + + // Returns the block handle for the current position. + virtual IndexFactoryBuilder::BlockHandle value() = 0; +}; + +// NOTE: The IndexFactory API is intentionally asymmetric between build +// and read. Built-in and custom indexes share the factory abstraction +// for SST construction, but built-in index reads continue to use the +// internal BlockBasedTable::IndexReader path. That internal reader +// contract carries table-local behaviors such as cache/prefetch/pinning +// and iterator reuse that are not part of this public SPI. Custom +// IndexFactoryReader implementations are adapted to the internal reader +// contract via IndexFactoryReaderWrapper. + +// --------------------------------------------------------------------------- +// IndexFactoryReader: reads a custom index from a serialized SST block. +// --------------------------------------------------------------------------- +class IndexFactoryReader { + public: + virtual ~IndexFactoryReader() = default; + + // Create an iterator over the index. + virtual std::unique_ptr NewIterator( + const ReadOptions& read_options) = 0; + + // Approximate heap memory used by this reader (excluding the raw + // index block contents, which are tracked separately by the block + // cache or table reader). + virtual size_t ApproximateMemoryUsage() const = 0; +}; + +// --------------------------------------------------------------------------- +// IndexFactoryOptions: configuration passed to NewBuilder / NewReader. +// --------------------------------------------------------------------------- +struct IndexFactoryOptions { + // The user comparator for this column family. + const Comparator* comparator = nullptr; +}; + +// --------------------------------------------------------------------------- +// IndexFactory: the top-level factory that creates builders and readers. +// +// Extends Customizable for string-based construction (CreateFromString), +// options serialization, and Name()-based identification. +// --------------------------------------------------------------------------- +class IndexFactory : public Customizable { + public: + ~IndexFactory() override = default; + + static const char* Type() { return "IndexFactory"; } + + // Create an IndexFactory from a string identifier (e.g., "trie"). + static Status CreateFromString(const ConfigOptions& config_options, + const std::string& value, + std::shared_ptr* factory); + + // Create a builder for constructing the index during SST creation. + virtual Status NewBuilder( + const IndexFactoryOptions& options, + std::unique_ptr& builder) const = 0; + + // Create a reader for an existing serialized index block. + // @param options Configuration (comparator, etc.) + // @param index_contents Raw bytes of the serialized index. The Slice + // must remain valid for the lifetime of the reader. + virtual Status NewReader( + const IndexFactoryOptions& options, Slice& index_contents, + std::unique_ptr& reader) const = 0; +}; + +} // namespace ROCKSDB_NAMESPACE diff --git a/include/rocksdb/options.h b/include/rocksdb/options.h index 758b5628c2c4..32da49889192 100644 --- a/include/rocksdb/options.h +++ b/include/rocksdb/options.h @@ -57,7 +57,7 @@ class Statistics; class InternalKeyComparator; class WalFilter; class FileSystem; -class UserDefinedIndexFactory; + class IODispatcher; struct Options; @@ -2300,22 +2300,34 @@ struct ReadOptions { // EXPERIMENTAL // - // Specify an alternate index to use in the SST files instead of the native - // block based table index. The table_factory used for the column family - // must support building/reading this index. - // - // The UDI framework supports all iterator operations: forward scans - // (SeekToFirst, Seek, Next), reverse scans (SeekToLast, SeekForPrev, Prev), - // and point lookups (Get). Concrete UDI implementations may impose their - // own restrictions -- check the specific implementation's documentation. - // - // When BlockBasedTableOptions::use_udi_as_primary_index is true, this field - // does not need to be set -- all reads automatically use the UDI. If set - // while use_udi_as_primary_index is true, the UDI from - // BlockBasedTableOptions takes precedence. This field is only needed when - // the UDI is a secondary index and you want to explicitly select it for - // reads. - const UserDefinedIndexFactory* table_index_factory = nullptr; + // Per-read index selection. Overrides the default read routing determined + // by BlockBasedTableOptions::index_mode. + // + // kDefault: use whatever index_mode says. + // kStandardOnly/kStandardDefault → built-in standard index. + // kCustomDefault/kCustomOnly → custom IndexFactory index. + // + // kBuiltin: force the built-in standard index for this read. + // Useful for debugging, comparing results between indexes, or + // temporary fallback. In kCustomOnly mode, the built-in index + // is a minimal stub — reads will return no useful results. + // + // kCustom: select the custom IndexFactory index for this read. + // In kStandardDefault mode, this is how you select the custom + // index for individual reads. If no custom index is available + // for a given SST, the read falls back to the standard index. + // ReadIndex is a two-way selector because each SST has exactly two + // potential read targets: the standard index (selected by + // BlockBasedTableOptions::index_type) and at most one custom index + // (from user_defined_index_factory). If the selected target is not + // available for a given SST (e.g., kCustom on an SST without a custom + // index), the read silently falls back to the available index. + enum class ReadIndex : uint8_t { + kDefault = 0, + kBuiltin = 1, + kCustom = 2, + }; + ReadIndex read_index = ReadIndex::kDefault; // *** END options only relevant to iterators or scans *** diff --git a/include/rocksdb/table.h b/include/rocksdb/table.h index 060c88520b50..698faefd5572 100644 --- a/include/rocksdb/table.h +++ b/include/rocksdb/table.h @@ -44,7 +44,7 @@ class TableReader; class WritableFileWriter; struct ConfigOptions; struct EnvOptions; -class UserDefinedIndexFactory; +class IndexFactory; // Types of checksums to use for checking integrity of logical blocks within // files. All checksums currently use 32 bits of checking power (1 in 4B @@ -533,67 +533,65 @@ struct BlockBasedTableOptions { // This allows users to define their own index format and build the index // during table building. // - // NOTE: UserDefinedIndexFactory currently disables parallel compression + // NOTE: IndexFactory currently disables parallel compression // (CompressionOptions::parallel_threads sanitized to 1). - std::shared_ptr user_defined_index_factory = nullptr; + std::shared_ptr user_defined_index_factory = nullptr; // EXPERIMENTAL // - // When true and user_defined_index_factory is set, the UDI becomes the - // primary index for reads. All reads (including internal operations like - // compaction and VerifyChecksum) automatically route through the UDI - // without needing ReadOptions::table_index_factory. - // - // Both the standard binary search index and the UDI are always fully - // built. The standard index serves as a safety fallback (e.g., for - // backup/restore or rollback to a non-UDI configuration). A future - // refactor will extract the index abstraction to allow skipping the - // standard index build when the UDI is primary. - // - // When the UDI is primary: - // - All reads automatically use the UDI (ReadOptions::table_index_factory - // does not need to be set) - // - Partitioned index (kTwoLevelIndexSearch) and partitioned filters are - // incompatible with this option - // - fail_if_no_udi_on_open is automatically enforced to prevent silent - // data loss if these SSTs are opened without UDI support + // Controls how the custom IndexFactory interacts with the built-in + // standard index. Requires user_defined_index_factory to be set + // for any mode other than kStandardOnly. + // + // kStandardOnly (default): + // Only the built-in standard index is used. + // user_defined_index_factory is ignored if set. + // + // kStandardDefault: + // Both indexes are built. Reads use the built-in index by default. + // The custom index is accessible via ReadOptions::read_index + // for per-read override. When opening SSTs that lack the custom + // index block, falls back to the standard index with a warning + // (not a hard error). + // + // kCustomDefault: + // Both indexes are built. All reads (including internal operations + // like compaction and VerifyChecksum) route through the custom + // index. The built-in index serves as a safety fallback for + // backup/restore and rollback. + // + // kCustomOnly: + // Only the custom index is built. The built-in index is not + // populated (a minimal stub satisfies the SST footer format). + // Maximum efficiency but no fallback — rollback requires + // compacting with a mode that builds the standard index. // // Recommended migration path: - // - // 1. Deploy with user_defined_index_factory set but - // use_udi_as_primary_index=false (secondary mode). New SSTs are written - // with both indexes. Reads use the standard index by default. - // - // 2. Validate reads through the UDI by setting - // ReadOptions::table_index_factory on a subset of reads. - // - // 3. Compact the entire DB to rewrite all pre-existing SSTs with both - // indexes. All SSTs must have a UDI block before proceeding. - // - // 4. Enable use_udi_as_primary_index=true. All reads use the UDI. - // - // Rollback: set use_udi_as_primary_index=false. Since the standard index - // is always fully populated, SSTs are immediately readable through the - // standard index. No compaction is required. All reads immediately - // revert to the standard index path. - // - // Backup/restore: the user_defined_index_factory is a shared_ptr that - // cannot survive Options serialization (e.g., GetStringFromDBOptions). - // Since the standard index is always fully populated, a restored DB can - // be opened and read without the factory (reads fall back to the standard - // index). Set the factory when opening the restored DB to resume using - // the UDI. - // - // Default: false (UDI is built alongside the standard index as a secondary) - bool use_udi_as_primary_index = false; - - // EXPERIMENTAL - // - // Return an error Status if a user_defined_index_factory is configured, - // but there's no corresponding UDI block in the SST file being opened. - // When use_udi_as_primary_index is true, this check is automatically - // enforced (a missing UDI block is always an error in primary mode). - bool fail_if_no_udi_on_open = false; + // kStandardOnly → kStandardDefault → kCustomDefault → kCustomOnly + // + // Rollback: + // From kCustomDefault: switch to kStandardDefault or kStandardOnly. + // The standard index is fully populated, so SSTs are immediately + // readable. + // From kCustomOnly: switch to kCustomDefault and compact to rewrite + // all SSTs with both indexes before downgrading further. + // + // Backup/restore: user_defined_index_factory (shared_ptr) does not + // survive Options serialization. In kStandardDefault/kCustomDefault, + // the restored DB falls back to the standard index. In kCustomOnly, + // the factory must be explicitly set after restore. + // + // Incompatible with: + // - Partitioned index (kTwoLevelIndexSearch) in kCustomDefault/kCustomOnly + // - Partitioned filters in kCustomDefault/kCustomOnly + // - Parallel compression in any mode that uses a custom index + enum class IndexMode { + kStandardOnly = 0, + kStandardDefault = 1, + kCustomDefault = 2, + kCustomOnly = 3, + }; + IndexMode index_mode = IndexMode::kStandardOnly; // If true, place whole keys in the filter (not just prefixes). // This must generally be true for gets to be efficient. diff --git a/include/rocksdb/user_defined_index.h b/include/rocksdb/user_defined_index.h index 1f8c5eb2a243..9183f5b57cef 100644 --- a/include/rocksdb/user_defined_index.h +++ b/include/rocksdb/user_defined_index.h @@ -6,275 +6,24 @@ // ***************************************************************** // EXPERIMENTAL - subject to change while under development // ***************************************************************** +// +// DEPRECATED: This header is a backward-compatibility shim. +// New code should #include "rocksdb/index_factory.h" directly and +// use the IndexFactory / IndexFactoryBuilder / IndexFactoryReader / +// IndexFactoryIterator / IndexFactoryOptions names instead. #pragma once -#include - -#include "rocksdb/advanced_iterator.h" -#include "rocksdb/customizable.h" -#include "rocksdb/options.h" -#include "rocksdb/slice.h" -#include "rocksdb/status.h" -#include "rocksdb/types.h" +#include "rocksdb/index_factory.h" namespace ROCKSDB_NAMESPACE { -// Prefix for user-defined index block names -inline constexpr const char* kUserDefinedIndexPrefix = - "rocksdb.user_defined_index."; - -// This is a public API for user-defined index builders. -// It allows users to define their own index format and build custom -// indexes during table building. Currently, only a monolithic index -// block is supported (no partitioned index). - -// The interface for building user-defined index. -class UserDefinedIndexBuilder { - public: - // Indicates the type of key-value entry being added via OnKeyAdded(). - // UDI builders that only use AddIndexEntry() (e.g., trie-based indexes) - // can safely ignore this. - enum ValueType : uint8_t { - kValue = 0, // Put: the value is the full user value. - kDelete = 1, // Deletion (Delete, SingleDelete, or DeleteWithTimestamp): - // the value is typically empty. - kMerge = 2, // Merge operand: the value is a partial update. - kOther = 3, // Other types (e.g., blob reference, wide-column entity). - // The value format is type-specific and may not be the - // actual user data. - kTypeMax, // Sentinel — must be last. Value may change across releases. - }; - - // File offset and size of the data block - struct BlockHandle { - uint64_t offset; - uint64_t size; - }; - - // Optional context for AddIndexEntry providing sequence numbers at block - // boundaries. Passed as a struct for forward-compatible extensibility - // (new fields can be added without breaking existing implementations). - struct IndexEntryContext { - // Tag (packed sequence number and type) of last_key_in_current_block: - // (sequence_number << 8) | value_type - // This is the same format used by InternalKeyComparator for ordering. - // UDI implementations that encode sequence numbers should store this - // tag (not just the sequence number) to ensure correct block - // selection when the same user key spans multiple blocks. - uint64_t last_key_tag = 0; - // Tag (packed sequence number and type) of first_key_in_next_block (valid - // only when first_key_in_next_block != nullptr). - uint64_t first_key_tag = 0; - }; - - virtual ~UserDefinedIndexBuilder() = default; - - // Add a new index entry for a data block boundary. - // - // The keys are user keys (without the 8-byte tag). - // - // The UDI is free to compute a separator between the two user keys and - // store it along with the block handle. The separator must satisfy: - // last_key_in_current_block <= separator < first_key_in_next_block - // in user-key order (ignoring sequence numbers). - // - // Called before the OnKeyAdded() call for first_key_in_next_block. - // @last_key_in_current_block: The last user key in the current data block - // @first_key_in_next_block: First user key in the next data block, or - // nullptr if this is the last block - // @block_handle: offset/size of the data block - // @separator_scratch: scratch buffer for a computed separator - // @context: sequence number context for block boundaries. The sequence - // numbers are needed when the same user key spans a data block boundary - // (e.g., when snapshots keep multiple versions of a key). Without - // sequence numbers, the UDI cannot produce a separator that distinguishes - // the two blocks. This mirrors the internal index's behavior of switching - // to full internal-key separators (see - // ShortenedIndexBuilder::must_use_separator_with_seq_). - // Implementations that don't need sequence numbers can ignore the context. - // @return: the separator stored in the index - virtual Slice AddIndexEntry(const Slice& last_key_in_current_block, - const Slice* first_key_in_next_block, - const BlockHandle& block_handle, - std::string* separator_scratch, - const IndexEntryContext& context) = 0; - - // Called for every key-value pair added to the SST file. UDI builders may - // override this to collect per-key information (e.g., for secondary - // indexes). Builders that only use separator keys from AddIndexEntry() - // (e.g., trie-based indexes) can leave this as a no-op. - // - // @key: The user key (without sequence number or type suffix). - // @type: The entry type — kValue (Put), kDelete, kMerge, or kOther. - // For kDelete entries, the value may be empty. For kOther, the - // value format is type-specific and may not be actual user data. - // @value: The associated value (may be empty for deletions). - // - // NOTE: In SST files produced by flush or compaction, there may be multiple - // entries for the same user key with different sequence numbers (e.g., when - // snapshots are active). UDI builders that use OnKeyAdded() should be - // prepared for this. - // - // Thread safety: For a given builder instance, OnKeyAdded() and - // AddIndexEntry() are always called from a single thread. Builders do - // not need internal synchronization. - virtual void OnKeyAdded(const Slice& /*key*/, ValueType /*type*/, - const Slice& /*value*/) {} - - // Finish building the index. - // Returns a Status and the serialized index contents. - // The memory backing the contents should not be freed until this builder - // object is destructed. - virtual Status Finish(Slice* index_contents) = 0; - - // Returns an estimate of the current serialized index size in bytes. - virtual uint64_t EstimatedSize() const = 0; -}; - -// The interface for iterating the user defined index. This will be -// instantiated and used by a scan to iterate through the index entries -// covered by the scan. -class UserDefinedIndexIterator { - public: - virtual ~UserDefinedIndexIterator() = default; - - // Prepare the iterator for a series of scans. The iterator should use - // this as an opportunity to do any prefetching and buffering of results. - virtual void Prepare(const ScanOptions scan_opts[], size_t num_opts) = 0; - - // Optional context for SeekAndGetResult providing the target sequence - // number. Passed as a struct for forward-compatible extensibility. - struct SeekContext { - // Tag (packed sequence number and type) of the target key: - // (sequence_number << 8) | value_type - // Used by UDI implementations that encode sequence numbers (when the - // same user key spans multiple data blocks) to locate the correct block. - // Must match the format stored in - // IndexEntryContext::last_key_tag. - uint64_t target_tag = 0; - }; - - // Position the index iterator at the very first index entry. The result - // must be populated the same way as SeekAndGetResult. - // - // The default implementation calls SeekAndGetResult with an empty key, - // which works for BytewiseComparator (empty string is the smallest key). - // Implementations should override this if they can reach the first entry - // more efficiently or if they use a comparator where empty is not smallest. - virtual Status SeekToFirstAndGetResult(IterateResult* result) { - return SeekAndGetResult(Slice(), result, SeekContext{}); - } - - // Position the index iterator at the very last index entry. The result - // must be populated the same way as SeekAndGetResult. - // - // The default implementation returns NotSupported. Concrete UDI - // implementations must override this to support reverse iteration - // (SeekToLast, Prev), which is required for full iterator functionality. - virtual Status SeekToLastAndGetResult(IterateResult* result) { - (void)result; - return Status::NotSupported("SeekToLast not supported by this UDI"); - } - - // Move to the previous index entry. The result must be populated the - // same way as SeekAndGetResult. - // - // The default implementation returns NotSupported. Concrete UDI - // implementations must override this to support reverse iteration - // (SeekToLast, Prev), which is required for full iterator functionality. - virtual Status PrevAndGetResult(IterateResult* result) { - (void)result; - return Status::NotSupported("Prev not supported by this UDI"); - } - - // Given the target key, position the index iterator at the index entry - // for the data block that may contain the target. - // - // The target is a user key. - // - // The result must be updated with the index key and bound_check_result. - // bound_check_result should be kOutOfBound if no block satisfies the - // target, kInbound if the data block is definitely within bounds, or - // kUnknown if partially within bounds. - // - // The UDI implementation needs to be careful about returning kOutOfBound. - // If a limit key is specified in ScanOptions, an implementation that - // does not store the first key in the block for the corresponding index - // entry cannot reliably determine if the block is out of bounds. It must - // compare against the previous index key to determine if the current block - // is out of bounds w.r.t the limit. Other termination criteria (specified - // in property_bag) may cause the scan to terminate earlier, in which case - // kOutOfBound can be returned earlier. - // - // @context: sequence number context for the seek. The sequence number is - // needed when the same user key spans multiple data blocks with different - // sequence numbers. Without it, the UDI cannot distinguish which block to - // return for a given (user_key, seqno) target. Implementations that don't - // need sequence numbers can ignore the context. - virtual Status SeekAndGetResult(const Slice& target, IterateResult* result, - const SeekContext& context) = 0; - - // Advance to the next index entry. The result must be populated similar - // to SeekAndGetResult. - virtual Status NextAndGetResult(IterateResult* result) = 0; - - // Return the BlockHandle in the current index entry - virtual UserDefinedIndexBuilder::BlockHandle value() = 0; -}; - -// A reader interface for the user defined index -class UserDefinedIndexReader { - public: - virtual ~UserDefinedIndexReader() = default; - - // Allocate an iterator that will be used by RocksDB to perform scans - virtual std::unique_ptr NewIterator( - const ReadOptions& read_options) = 0; - - // The memory usage of the index, including the size of the raw contents and - // any other heap data structures allocated by the reader - virtual size_t ApproximateMemoryUsage() const = 0; -}; - -// Options for user defined index -struct UserDefinedIndexOption { - const Comparator* comparator = BytewiseComparator(); -}; - -// Factory for creating user-defined index builders. -class UserDefinedIndexFactory : public Customizable { - public: - ~UserDefinedIndexFactory() override = default; - - static const char* Type() { return "UserDefinedIndexFactory"; } - - static Status CreateFromString( - const ConfigOptions& config_options, const std::string& value, - std::shared_ptr* factory); - - // Create a new builder for user-defined index. - virtual UserDefinedIndexBuilder* NewBuilder() const = 0; - - // Create a new user defined index reader given the contents of the index - // block - virtual std::unique_ptr NewReader( - Slice& index_block) const = 0; - - // New API for allowing customized comparator - virtual Status NewBuilder( - const UserDefinedIndexOption& /*option*/, - std::unique_ptr& builder) const { - builder.reset(NewBuilder()); - return Status::OK(); - } +using UserDefinedIndexBuilder = IndexFactoryBuilder; +using UserDefinedIndexIterator = IndexFactoryIterator; +using UserDefinedIndexReader = IndexFactoryReader; +using UserDefinedIndexFactory = IndexFactory; +using UserDefinedIndexOption = IndexFactoryOptions; - virtual Status NewReader( - const UserDefinedIndexOption& /*option*/, Slice& index_block, - std::unique_ptr& reader) const { - reader = NewReader(index_block); - return Status::OK(); - } -}; +inline constexpr const char* kUserDefinedIndexPrefix = kIndexFactoryMetaPrefix; } // namespace ROCKSDB_NAMESPACE diff --git a/options/options_settable_test.cc b/options/options_settable_test.cc index ff24369ad517..fcb0d0b76af1 100644 --- a/options/options_settable_test.cc +++ b/options/options_settable_test.cc @@ -130,7 +130,7 @@ TEST_F(OptionsSettableTest, BlockBasedTableOptionsAllFieldsSettable) { {offsetof(struct BlockBasedTableOptions, filter_policy), sizeof(std::shared_ptr)}, {offsetof(struct BlockBasedTableOptions, user_defined_index_factory), - sizeof(std::shared_ptr)}, + sizeof(std::shared_ptr)}, }; // In this test, we catch a new option of BlockBasedTableOptions that is not @@ -207,8 +207,7 @@ TEST_F(OptionsSettableTest, BlockBasedTableOptionsAllFieldsSettable) { "prepopulate_block_cache=kDisable;" "initial_auto_readahead_size=0;" "num_file_reads_for_auto_readahead=0;" - "fail_if_no_udi_on_open=true;" - "use_udi_as_primary_index=true;" + "index_mode=kCustomDefault;" "separate_key_value_in_data_block=true;" "uniform_cv_threshold=0.2", new_bbto)); diff --git a/src.mk b/src.mk index 6b5f539b2218..c74adfd1d159 100644 --- a/src.mk +++ b/src.mk @@ -197,6 +197,7 @@ LIB_SOURCES = \ table/block_based/filter_block_reader_common.cc \ table/block_based/filter_policy.cc \ table/block_based/flush_block_policy.cc \ + table/block_based/builtin_index_factory.cc \ table/block_based/full_filter_block.cc \ table/block_based/hash_index_reader.cc \ table/block_based/index_builder.cc \ @@ -603,6 +604,7 @@ TEST_MAIN_SOURCES = \ options/options_test.cc \ table/block_based/block_based_table_reader_test.cc \ table/block_based/block_test.cc \ + table/block_based/builtin_index_factory_test.cc \ table/block_based/data_block_hash_index_test.cc \ table/block_based/full_filter_block_test.cc \ table/block_based/partitioned_filter_block_test.cc \ diff --git a/table/block_based/block_based_table_builder.cc b/table/block_based/block_based_table_builder.cc index 03d87e552091..06c001f7d245 100644 --- a/table/block_based/block_based_table_builder.cc +++ b/table/block_based/block_based_table_builder.cc @@ -21,12 +21,14 @@ #include #include "block_cache.h" +#include "builtin_index_factory.h" #include "cache/cache_entry_roles.h" #include "cache/cache_helpers.h" #include "cache/cache_key.h" #include "cache/cache_reservation_manager.h" #include "db/dbformat.h" -#include "index_builder.h" +// NOTE: index_builder.h is included indirectly through builtin_index_factory.h. +// All index operations go through the IndexFactoryBuilder interface. #include "logging/logging.h" #include "memory/memory_allocator_impl.h" #include "options/options_helper.h" @@ -45,6 +47,7 @@ #include "table/block_based/filter_block.h" #include "table/block_based/filter_policy_internal.h" #include "table/block_based/full_filter_block.h" +#include "table/block_based/partition_coordinator.h" #include "table/block_based/partitioned_filter_block.h" #include "table/block_based/user_defined_index_wrapper.h" #include "table/format.h" @@ -73,7 +76,7 @@ FilterBlockBuilder* CreateFilterBlockBuilder( const ImmutableCFOptions& /*opt*/, const MutableCFOptions& mopt, const FilterBuildingContext& context, const bool use_delta_encoding_for_index_values, - PartitionedIndexBuilder* const p_index_builder, size_t ts_sz, + PartitionCoordinator* const partition_coordinator, size_t ts_sz, const bool persist_user_defined_timestamps) { const BlockBasedTableOptions& table_opt = context.table_options; assert(table_opt.filter_policy); // precondition @@ -84,7 +87,7 @@ FilterBlockBuilder* CreateFilterBlockBuilder( return nullptr; } else { if (table_opt.partition_filters) { - assert(p_index_builder != nullptr); + assert(partition_coordinator != nullptr); // Since after partition cut request from filter builder it takes time // until index builder actully cuts the partition, until the end of a // data block potentially with many keys, we take the lower bound as @@ -99,8 +102,8 @@ FilterBlockBuilder* CreateFilterBlockBuilder( return new PartitionedFilterBlockBuilder( mopt.prefix_extractor.get(), table_opt.whole_key_filtering, filter_bits_builder, table_opt.index_block_restart_interval, - use_delta_encoding_for_index_values, p_index_builder, partition_size, - ts_sz, persist_user_defined_timestamps, + use_delta_encoding_for_index_values, partition_coordinator, + partition_size, ts_sz, persist_user_defined_timestamps, table_opt.decouple_partitioned_filters); } else { return new FullFilterBlockBuilder(mopt.prefix_extractor.get(), @@ -279,7 +282,8 @@ struct BlockBasedTableBuilder::ParallelCompressionRep { std::string uncompressed; GrowableBuffer compressed; CompressionType compression_type = kNoCompression; - std::unique_ptr prepared_index_entry; + std::unique_ptr prepared_index_entry; + bool index_entry_prepared = false; }; // Ring buffer of emitted blocks that may or may not yet be compressed. @@ -843,9 +847,137 @@ struct BlockBasedTableBuilder::Rep { BlockBuilder range_del_block; InternalKeySliceTransform internal_prefix_transform; - std::unique_ptr index_builder; + std::unique_ptr index_builder; std::string index_separator_scratch; - PartitionedIndexBuilder* p_index_builder_ = nullptr; + + // Custom indexes built alongside the built-in index. Each entry is + // (factory_name, builder). These are managed directly by the table + // builder — not wrapped around the built-in index_builder. + struct CustomIndex { + // Factory name (from IndexFactory::Name()), used as the suffix in the + // meta block key: kIndexFactoryMetaPrefix + name. + std::string name; + std::unique_ptr builder; + // Persistent scratch buffer for AddIndexEntry separator results. + // Each custom index needs its own scratch so that a Slice returned + // by one builder's AddIndexEntry (which may reference this buffer) + // is not invalidated by a subsequent call. + std::string separator_scratch; + }; + std::vector custom_indexes; + + // Forwards OnKeyAdded to ALL index builders (built-in + custom). + // The built-in builder receives the full internal key via + // OnKeyAddedInternal() (needed for kBinarySearchWithFirstKey). + // Custom builders receive user keys via the public OnKeyAdded(). + void ForwardOnKeyAddedToAll(const Slice& internal_key, + const std::optional& value) { + // Forward to the built-in builder with the full internal key + // (needed for kBinarySearchWithFirstKey tracking). + // When index_mode=kCustomOnly, index_builder is null — skip. + if (index_builder) { + static_cast(index_builder.get()) + ->OnKeyAddedInternal(internal_key, value); + } + + // Forward to custom builders with user keys. + if (custom_indexes.empty()) { + return; + } + ParsedInternalKey pkey; + Status parse_s = ParseInternalKey(internal_key, &pkey, false); + assert(parse_s.ok()); + if (!parse_s.ok()) { + return; // Defensive: should never happen + } + IndexFactoryBuilder::ValueType vt; + switch (pkey.type) { + case kTypeValue: + case kTypeValuePreferredSeqno: + vt = IndexFactoryBuilder::kValue; + break; + case kTypeDeletion: + case kTypeSingleDeletion: + case kTypeDeletionWithTimestamp: + vt = IndexFactoryBuilder::kDelete; + break; + case kTypeMerge: + vt = IndexFactoryBuilder::kMerge; + break; + default: + vt = IndexFactoryBuilder::kOther; + } + // kTypeValuePreferredSeqno stores the user value and a preferred + // sequence number in a packed format. Extract just the user value + // portion — custom index builders should not see internal encoding. + Slice user_value; + if (value.has_value()) { + user_value = (pkey.type == kTypeValuePreferredSeqno) + ? ParsePackedValueForValue(*value) + : *value; + } + for (auto& ci : custom_indexes) { + ci.builder->OnKeyAdded(pkey.user_key, vt, user_value); + } + } + + // Forwards AddIndexEntry to ALL index builders (built-in + custom), + // translating internal keys to user keys + seqno context. The built-in + // builder reconstructs internal keys internally from the user keys + + // context tags. + void ForwardAddIndexEntryToAll(const Slice& last_internal_key, + const Slice* first_internal_key_next, + const BlockHandle& handle, + bool skip_delta_encoding = false) { + // Fast path: no custom indexes — pass internal keys directly to the + // built-in builder, avoiding ParseInternalKey + key reconstruction + // overhead. + if (custom_indexes.empty() && index_builder) { + static_cast(index_builder.get()) + ->AddIndexEntryDirect(last_internal_key, first_internal_key_next, + handle, &index_separator_scratch, + skip_delta_encoding); + return; + } + + // Slow path: parse internal keys to user keys for custom builders. + ParsedInternalKey last_pkey; + Status parse_s = ParseInternalKey(last_internal_key, &last_pkey, false); + assert(parse_s.ok()); + if (!parse_s.ok()) { + return; // Defensive: should never happen + } + IndexFactoryBuilder::IndexEntryContext ctx; + ctx.last_key_tag = PackSequenceAndType(last_pkey.sequence, last_pkey.type); + ParsedInternalKey next_pkey; + const Slice* next_user = nullptr; + if (first_internal_key_next != nullptr && + ParseInternalKey(*first_internal_key_next, &next_pkey, false).ok()) { + next_user = &next_pkey.user_key; + ctx.first_key_tag = + PackSequenceAndType(next_pkey.sequence, next_pkey.type); + } + IndexFactoryBuilder::BlockHandle pub{handle.offset(), handle.size()}; + // skip_delta_encoding is passed via SetSkipDeltaEncoding() rather than + // as an AddIndexEntry parameter because the public IndexFactoryBuilder + // API is designed for custom indexes that don't use delta encoding. + // Only the built-in index needs this flag (when block alignment padding + // causes non-contiguous block offsets). + // Call built-in builder (null when index_mode=kCustomOnly). + if (index_builder) { + if (skip_delta_encoding) { + static_cast(index_builder.get()) + ->SetSkipDeltaEncoding(true); + } + index_builder->AddIndexEntry(last_pkey.user_key, next_user, pub, + &index_separator_scratch, ctx); + } + // Call custom builders + for (auto& ci : custom_indexes) { + ci.builder->AddIndexEntry(last_pkey.user_key, next_user, pub, + &ci.separator_scratch, ctx); + } + } std::string last_ikey; // Internal key or empty (unset) bool uses_explicit_compression_manager = false; @@ -1259,65 +1391,122 @@ struct BlockBasedTableBuilder::Rep { compression_dict_buffer_cache_res_mgr = nullptr; } - if (table_options.index_type == - BlockBasedTableOptions::kTwoLevelIndexSearch) { - p_index_builder_ = PartitionedIndexBuilder::CreateIndexBuilder( - &internal_comparator, use_delta_encoding_for_index_values, - table_options, ts_sz, persist_user_defined_timestamps, - ioptions.stats); - index_builder.reset(p_index_builder_); - } else { - index_builder.reset(IndexBuilder::CreateIndexBuilder( - table_options.index_type, &internal_comparator, - &this->internal_prefix_transform, use_delta_encoding_for_index_values, - table_options, ts_sz, persist_user_defined_timestamps, - ioptions.stats)); - } - - // If user_defined_index_factory is provided, wrap the index builder with - // UserDefinedIndexWrapper - if (table_options.use_udi_as_primary_index && + // Create the built-in index through the IndexFactory interface. + // The factory stores all internal configuration needed by the builder. + // --- Validate custom index options --- + if (table_options.index_mode >= + BlockBasedTableOptions::IndexMode::kStandardDefault && table_options.user_defined_index_factory == nullptr) { - SetStatus(Status::InvalidArgument( - "use_udi_as_primary_index requires user_defined_index_factory to " - "be set")); + SetStatus( + Status::InvalidArgument("index_mode >= kStandardDefault requires " + "user_defined_index_factory to be set")); } - if (table_options.user_defined_index_factory != nullptr) { + if (table_options.index_mode >= + BlockBasedTableOptions::IndexMode::kStandardDefault && + table_options.user_defined_index_factory != nullptr) { if (tbo.moptions.compression_opts.parallel_threads > 1 || tbo.moptions.bottommost_compression_opts.parallel_threads > 1) { - SetStatus( - Status::InvalidArgument("user_defined_index_factory not supported " - "with parallel compression")); - } else if (table_options.use_udi_as_primary_index && + // Custom index builders use the single-threaded AddIndexEntry + // protocol and cannot participate in the parallel compression + // PrepareIndexEntry/FinishIndexEntry protocol. + SetStatus(Status::InvalidArgument( + "user_defined_index_factory is not supported with parallel " + "compression")); + } else if (table_options.index_mode >= + BlockBasedTableOptions::IndexMode::kCustomDefault && table_options.index_type == BlockBasedTableOptions::kTwoLevelIndexSearch) { SetStatus(Status::InvalidArgument( - "use_udi_as_primary_index is incompatible with partitioned index " - "(kTwoLevelIndexSearch)")); - } else if (table_options.use_udi_as_primary_index && + "index_mode kCustomDefault/kCustomOnly is incompatible with " + "partitioned index (kTwoLevelIndexSearch)")); + } else if (table_options.index_mode >= + BlockBasedTableOptions::IndexMode::kCustomDefault && table_options.partition_filters) { SetStatus(Status::InvalidArgument( - "use_udi_as_primary_index is incompatible with partitioned " - "filters")); - } else { - std::unique_ptr user_defined_index_builder; - UserDefinedIndexOption udi_options; - udi_options.comparator = internal_comparator.user_comparator(); - auto s = table_options.user_defined_index_factory->NewBuilder( - udi_options, user_defined_index_builder); - if (!s.ok()) { - SetStatus(s); - } else { - if (user_defined_index_builder != nullptr) { - index_builder = std::make_unique( - std::string(table_options.user_defined_index_factory->Name()), - std::move(index_builder), std::move(user_defined_index_builder), - &internal_comparator, ts_sz, persist_user_defined_timestamps); + "index_mode kCustomDefault/kCustomOnly is incompatible with " + "partitioned filters")); + } + } + + // --- Create the built-in index builder --- + // When index_mode=kCustomOnly, the built-in index is NOT created. + // The custom IndexFactory is the sole index. A minimal empty index + // block is still written to satisfy the SST footer format. + const bool build_standard_index = + table_options.index_mode != + BlockBasedTableOptions::IndexMode::kCustomOnly; + if (build_standard_index) { + BuiltinIndexFactoryConfig builtin_config; + builtin_config.internal_comparator = &internal_comparator; + builtin_config.internal_prefix_transform = + &this->internal_prefix_transform; + builtin_config.use_delta_encoding_for_index_values = + use_delta_encoding_for_index_values; + builtin_config.table_options = &table_options; + builtin_config.ts_sz = ts_sz; + builtin_config.persist_user_defined_timestamps = + persist_user_defined_timestamps; + builtin_config.stats = ioptions.stats; + + // Use stack-local factory objects to avoid shared_ptr heap allocation. + // The factory is only needed for the duration of NewBuilder(). + IndexFactoryOptions builtin_opts; + builtin_opts.comparator = internal_comparator.user_comparator(); + switch (table_options.index_type) { + case BlockBasedTableOptions::kBinarySearch: { + BinarySearchIndexFactory factory(/*with_first_key=*/false, + builtin_config); + Status s = factory.NewBuilder(builtin_opts, index_builder); + if (!s.ok()) { + SetStatus(s); + } + break; + } + case BlockBasedTableOptions::kBinarySearchWithFirstKey: { + BinarySearchIndexFactory factory(/*with_first_key=*/true, + builtin_config); + Status s = factory.NewBuilder(builtin_opts, index_builder); + if (!s.ok()) { + SetStatus(s); + } + break; + } + case BlockBasedTableOptions::kHashSearch: { + HashIndexFactory factory(builtin_config); + Status s = factory.NewBuilder(builtin_opts, index_builder); + if (!s.ok()) { + SetStatus(s); } + break; + } + case BlockBasedTableOptions::kTwoLevelIndexSearch: { + PartitionedIndexFactory factory(builtin_config); + Status s = factory.NewBuilder(builtin_opts, index_builder); + if (!s.ok()) { + SetStatus(s); + } + break; } } } + // --- Create the custom index builder --- + if (table_options.index_mode >= + BlockBasedTableOptions::IndexMode::kStandardDefault && + table_options.user_defined_index_factory != nullptr) { + IndexFactoryOptions custom_opts; + custom_opts.comparator = internal_comparator.user_comparator(); + CustomIndex ci; + ci.name = table_options.user_defined_index_factory->Name(); + auto s = table_options.user_defined_index_factory->NewBuilder(custom_opts, + ci.builder); + if (!s.ok()) { + SetStatus(s); + } else if (ci.builder != nullptr) { + custom_indexes.push_back(std::move(ci)); + } + } + if (ioptions.optimize_filters_for_hits && tbo.is_bottommost) { // Apply optimize_filters_for_hits setting here when applicable by // skipping filter generation @@ -1328,8 +1517,9 @@ struct BlockBasedTableBuilder::Rep { } else { filter_builder.reset(CreateFilterBlockBuilder( ioptions, tbo.moptions, filter_context, - use_delta_encoding_for_index_values, p_index_builder_, ts_sz, - persist_user_defined_timestamps)); + use_delta_encoding_for_index_values, + index_builder ? index_builder->GetPartitionCoordinator() : nullptr, + ts_sz, persist_user_defined_timestamps)); } assert(tbo.internal_tbl_prop_coll_factories); @@ -1608,7 +1798,7 @@ void BlockBasedTableBuilder::Add(const Slice& ikey, const Slice& value) { // Buffered keys will be replayed from data_block_buffers during // `Finish()` once compression dictionary has been finalized. } else { - r->index_builder->OnKeyAdded(ikey, value); + r->ForwardOnKeyAddedToAll(ikey, value); } // TODO offset passed in is not accurate for parallel compression case NotifyCollectTableCollectorsOnAdd(ikey, value, r->get_offset(), @@ -1774,9 +1964,30 @@ void BlockBasedTableBuilder::EmitBlockForParallel( pc_rep.estimated_inflight_size.FetchAddRelaxed(uncompressed.size() + kBlockTrailerSize); std::swap(uncompressed, block_rep->uncompressed); - r->index_builder->PrepareIndexEntry(last_key_in_current_block, - first_key_in_next_block, - block_rep->prepared_index_entry.get()); + // Translate internal keys to user keys + context for the + // IndexFactoryBuilder parallel compression protocol. + // Guard: index_builder is null when index_mode=kCustomOnly. Parallel + // compression is rejected with custom indexes, so this should be + // unreachable, but guard defensively. + if (r->index_builder) { + ParsedInternalKey last_pkey; + if (ParseInternalKey(last_key_in_current_block, &last_pkey, false).ok()) { + IndexFactoryBuilder::IndexEntryContext ctx; + ctx.last_key_tag = + PackSequenceAndType(last_pkey.sequence, last_pkey.type); + const Slice* next_user = nullptr; + ParsedInternalKey next_pkey; + if (first_key_in_next_block != nullptr && + ParseInternalKey(*first_key_in_next_block, &next_pkey, false).ok()) { + next_user = &next_pkey.user_key; + ctx.first_key_tag = + PackSequenceAndType(next_pkey.sequence, next_pkey.type); + } + r->index_builder->PrepareAddEntry(last_pkey.user_key, next_user, ctx, + block_rep->prepared_index_entry.get()); + block_rep->index_entry_prepared = true; + } + } block_rep->compressed.Reset(); block_rep->compression_type = kNoCompression; @@ -1837,9 +2048,9 @@ void BlockBasedTableBuilder::EmitBlock(std::string& uncompressed, // "the r" as the key for the index block entry since it is >= all // entries in the first block and < all entries in subsequent // blocks. - r->index_builder->AddIndexEntry( - last_key_in_current_block, first_key_in_next_block, r->pending_handle, - &r->index_separator_scratch, skip_delta_encoding); + r->ForwardAddIndexEntryToAll(last_key_in_current_block, + first_key_in_next_block, r->pending_handle, + skip_delta_encoding); } } @@ -1924,9 +2135,16 @@ void BlockBasedTableBuilder::BGWorker(WorkingAreaPair& working_area) { rep_->props.data_size = rep_->get_offset(); rep_->props.uncompressed_data_size += block_rep->uncompressed.size(); - rep_->index_builder->FinishIndexEntry( - rep_->pending_handle, block_rep->prepared_index_entry.get(), - skip_delta_encoding); + // Guard: index_builder is null when index_mode=kCustomOnly. + // Parallel compression is rejected with custom indexes, so this + // should be unreachable, but guard defensively. + if (rep_->index_builder && block_rep->index_entry_prepared) { + IndexFactoryBuilder::BlockHandle pub_handle{ + rep_->pending_handle.offset(), rep_->pending_handle.size()}; + rep_->index_builder->FinishAddEntry( + pub_handle, block_rep->prepared_index_entry.get(), + &rep_->index_separator_scratch, skip_delta_encoding); + } } }; switch (thread_state) { @@ -2263,9 +2481,14 @@ void BlockBasedTableBuilder::MaybeStartParallelCompression() { rep_->pc_rep = std::make_unique( rep_->compression_parallel_threads); auto& pc_rep = *rep_->pc_rep; - for (uint32_t i = 0; i <= pc_rep.ring_buffer_mask; i++) { - pc_rep.ring_buffer[i].prepared_index_entry = - rep_->index_builder->CreatePreparedIndexEntry(); + // Guard: index_builder is null when index_mode=kCustomOnly. Parallel + // compression is rejected with custom indexes, so this should be + // unreachable, but guard defensively. + if (rep_->index_builder) { + for (uint32_t i = 0; i <= pc_rep.ring_buffer_mask; i++) { + pc_rep.ring_buffer[i].prepared_index_entry = + rep_->index_builder->CreatePreparedAddEntry(); + } } pc_rep.worker_threads.reserve(pc_rep.num_worker_threads); pc_rep.working_areas.resize(pc_rep.num_worker_threads); @@ -2419,80 +2642,124 @@ void BlockBasedTableBuilder::WriteIndexBlock( if (UNLIKELY(!ok())) { return; } - IndexBuilder::IndexBlocks index_blocks; - auto index_builder_status = rep_->index_builder->Finish(&index_blocks); - if (LIKELY(ok()) && !index_builder_status.ok() && - !index_builder_status.IsIncomplete()) { - // If the index builder failed for non-Incomplete errors, we should - // mark the entire builder as having failed wit that status. However, - // If the index builder failed with an incomplete error, we should - // continue writing out any meta blocks that may have been generated. - rep_->SetStatus(index_builder_status); - } - if (LIKELY(ok())) { - for (const auto& item : index_blocks.meta_blocks) { - BlockHandle block_handle; - if (item.second.first == BlockType::kIndex) { - WriteBlock(item.second.second, &block_handle, item.second.first); + // Use the FinishAndWrite protocol which handles: + // - Multi-partition writes for partitioned indexes + // - Auxiliary meta blocks (e.g., hash index prefix blocks) + // - Single-block writes for simple indexes + // The IndexBlockWriter callback adapts between the public + // IndexFactoryBuilder::BlockHandle and the internal BlockHandle. + bool compress = rep_->table_options.enable_index_compression; + + class IndexBlockWriterImpl : public IndexFactoryBuilder::IndexBlockWriter { + public: + IndexBlockWriterImpl(BlockBasedTableBuilder* builder, + MetaIndexBuilder* meta_builder, bool compress) + : builder_(builder), meta_builder_(meta_builder), compress_(compress) {} + + Status WriteBlock(const Slice& contents, + IndexFactoryBuilder::BlockHandle* handle, + bool compress_this) override { + BlockHandle internal_handle; + // Two-level compression control: compress_ is the SST-level setting + // (enable_index_compression), while compress_this is per-block + // (callers may request uncompressed writes for auxiliary blocks). + bool should_compress = compress_ && compress_this; + if (should_compress) { + builder_->WriteBlock(contents, &internal_handle, BlockType::kIndex); } else { - assert(item.second.first == BlockType::kUserDefinedIndex); - WriteMaybeCompressedBlock(item.second.second, kNoCompression, - &block_handle, item.second.first); + builder_->WriteMaybeCompressedBlock( + contents, kNoCompression, &internal_handle, BlockType::kIndex); } - if (UNLIKELY(!ok())) { - break; + if (!builder_->ok()) { + return builder_->status(); } - meta_index_builder->Add(item.first, block_handle); + handle->offset = internal_handle.offset(); + handle->size = internal_handle.size(); + return Status::OK(); } - } - if (LIKELY(ok())) { - if (rep_->table_options.enable_index_compression) { - WriteBlock(index_blocks.index_block_contents, index_block_handle, - BlockType::kIndex); - } else { - WriteMaybeCompressedBlock(index_blocks.index_block_contents, - kNoCompression, index_block_handle, - BlockType::kIndex); - } - } - // If there are more index partitions, finish them and write them out - if (index_builder_status.IsIncomplete()) { - bool index_building_finished = false; - while (LIKELY(ok()) && !index_building_finished) { - Status s = - rep_->index_builder->Finish(&index_blocks, *index_block_handle); - if (s.ok()) { - index_building_finished = true; - } else if (s.IsIncomplete()) { - // More partitioned index after this one - assert(!index_building_finished); - } else { - // Error - rep_->SetStatus(s); - return; - } - if (rep_->table_options.enable_index_compression) { - WriteBlock(index_blocks.index_block_contents, index_block_handle, - BlockType::kIndex); - } else { - WriteMaybeCompressedBlock(index_blocks.index_block_contents, - kNoCompression, index_block_handle, - BlockType::kIndex); - } - // The last index_block_handle will be for the partition index block + void AddMetaBlock(const std::string& name, + const IndexFactoryBuilder::BlockHandle& handle) override { + BlockHandle internal_handle; + internal_handle.set_offset(handle.offset); + internal_handle.set_size(handle.size); + meta_builder_->Add(name, internal_handle); } - } - if (LIKELY(ok())) { - rep_->props.num_uniform_blocks = - rep_->index_builder->NumUniformIndexBlocks(); + + private: + BlockBasedTableBuilder* builder_; + MetaIndexBuilder* meta_builder_; + bool compress_; + }; + + if (rep_->index_builder) { + // Normal path: built-in index is present. + IndexBlockWriterImpl writer(this, meta_index_builder, compress); + IndexFactoryBuilder::BlockHandle final_handle{0, 0}; + Status s = + rep_->index_builder->FinishAndWrite(&writer, &final_handle, compress); + if (!s.ok()) { + rep_->SetStatus(s); + return; + } + // Convert the public handle back to internal handle + index_block_handle->set_offset(final_handle.offset); + index_block_handle->set_size(final_handle.size); + + if (LIKELY(ok())) { + rep_->props.num_uniform_blocks = + rep_->index_builder->NumUniformIndexBlocks(); + } + } else { + // index_mode=kCustomOnly: no built-in index builder. + // Write a minimal empty index block to satisfy the SST footer format. + // The empty stub has no entries — set properties to reflect that. + BlockBuilder empty_index_block(1 /* block_restart_interval */, + false /* use_delta_encoding */, + false /* use_value_delta_encoding */); + Slice empty_contents = empty_index_block.Finish(); + WriteMaybeCompressedBlock(empty_contents, kNoCompression, + index_block_handle, BlockType::kIndex); + // The empty stub has no entries — set properties to reflect that. + // index_key_is_user_key=1: no separators with sequence numbers. + // index_value_is_delta_encoded=0: no entries to delta-encode. + rep_->props.index_key_is_user_key = 1; + rep_->props.index_value_is_delta_encoded = 0; + rep_->props.num_uniform_blocks = 0; } // If success and need to record in metaindex rather than footer... if (LIKELY(ok()) && !FormatVersionUsesIndexHandleInFooter( rep_->table_options.format_version)) { meta_index_builder->Add(kIndexBlockName, *index_block_handle); } + + // Finish and write custom index blocks (e.g., trie index). + // Each custom builder produces a serialized block that's stored as a + // meta block alongside the built-in index. The meta block key is + // kIndexFactoryMetaPrefix + factory_name. + for (auto& ci : rep_->custom_indexes) { + if (UNLIKELY(!ok())) { + break; + } + Slice custom_contents; + Status cs = ci.builder->Finish(&custom_contents); + if (!cs.ok()) { + rep_->SetStatus(cs); + break; + } + // Custom index blocks are written uncompressed. The custom IndexFactory + // controls its own serialization format and may not benefit from (or + // be incompatible with) RocksDB's block compression. The factory can + // apply its own compression internally if desired. + BlockHandle custom_handle; + WriteMaybeCompressedBlock(custom_contents, kNoCompression, &custom_handle, + BlockType::kUserDefinedIndex); + if (LIKELY(ok())) { + meta_index_builder->Add(std::string(kIndexFactoryMetaPrefix) + ci.name, + custom_handle); + } + } } void BlockBasedTableBuilder::WritePropertiesBlock( @@ -2505,7 +2772,9 @@ void BlockBasedTableBuilder::WritePropertiesBlock( ? rep_->table_options.filter_policy->Name() : ""; rep_->props.index_size = - rep_->index_builder->IndexSize() + kBlockTrailerSize; + rep_->index_builder ? rep_->index_builder->IndexSize() + + kBlockTrailerSize // via IndexFactoryBuilder + : 0; rep_->props.comparator_name = rep_->ioptions.user_comparator != nullptr ? rep_->ioptions.user_comparator->Name() : "nullptr"; @@ -2529,23 +2798,34 @@ void BlockBasedTableBuilder::WritePropertiesBlock( rep_->PostPopulateCompressionProperties(); - if (rep_->table_options.index_type == - BlockBasedTableOptions::kTwoLevelIndexSearch) { - assert(rep_->p_index_builder_ != nullptr); - rep_->props.index_partitions = rep_->p_index_builder_->NumPartitions(); - rep_->props.top_level_index_size = - rep_->p_index_builder_->TopLevelIndexSize(rep_->offset.LoadRelaxed()); - } - rep_->props.index_key_is_user_key = - !rep_->index_builder->separator_is_key_plus_seq(); - if (rep_->table_options.use_udi_as_primary_index && - rep_->table_options.user_defined_index_factory != nullptr) { + if (rep_->index_builder) { + // Normal path: built-in index is present. + if (rep_->table_options.index_type == + BlockBasedTableOptions::kTwoLevelIndexSearch) { + rep_->props.index_partitions = rep_->index_builder->NumPartitions(); + rep_->props.top_level_index_size = + rep_->index_builder->TopLevelIndexSize(rep_->offset.LoadRelaxed()); + } + rep_->props.index_key_is_user_key = + !rep_->index_builder->separator_is_key_plus_seq(); + if (rep_->table_options.index_mode >= + BlockBasedTableOptions::IndexMode::kCustomDefault && + rep_->table_options.user_defined_index_factory != nullptr) { + rep_->props.udi_is_primary_index = 1; + } + // The standard index is always fully populated (even in primary mode), + // so delta encoding applies normally. + rep_->props.index_value_is_delta_encoded = + rep_->use_delta_encoding_for_index_values; + } else { + // index_mode=kCustomOnly: no built-in index builder. + // The empty stub has no entries — set properties to reflect that. + rep_->props.index_key_is_user_key = 1; + rep_->props.index_value_is_delta_encoded = 0; + rep_->props.index_partitions = 0; + rep_->props.top_level_index_size = 0; rep_->props.udi_is_primary_index = 1; } - // The standard index is always fully populated (even in primary mode), - // so delta encoding applies normally. - rep_->props.index_value_is_delta_encoded = - rep_->use_delta_encoding_for_index_values; if (rep_->sampled_input_data_bytes.LoadRelaxed() > 0) { rep_->props.slow_compression_estimated_data_size = static_cast( static_cast( @@ -2821,7 +3101,7 @@ void BlockBasedTableBuilder::MaybeEnterUnbuffered( // unbuffered operation. r->filter_builder->Add(ExtractUserKeyAndStripTimestamp(key, r->ts_sz)); } - r->index_builder->OnKeyAdded(key, iter->value()); + r->ForwardOnKeyAddedToAll(key, iter->value()); } Slice first_key_in_loop_next_block; @@ -2978,14 +3258,12 @@ uint64_t BlockBasedTableBuilder::EstimatedFileSize() const { uint64_t BlockBasedTableBuilder::EstimatedTailSize() const { uint64_t estimated_tail_size = 0; - // 1. Estimate index size - if (rep_->table_options.index_type == - BlockBasedTableOptions::kTwoLevelIndexSearch) { - assert(rep_->p_index_builder_); - estimated_tail_size += rep_->p_index_builder_->CurrentIndexSizeEstimate(); - } else { - assert(rep_->index_builder); - estimated_tail_size += rep_->index_builder->CurrentIndexSizeEstimate(); + // 1. Estimate index size (built-in + custom indexes) + if (rep_->index_builder) { + estimated_tail_size += rep_->index_builder->EstimatedSize(); + } + for (const auto& ci : rep_->custom_indexes) { + estimated_tail_size += ci.builder->EstimatedSize(); } // 2. Estimate filter size diff --git a/table/block_based/block_based_table_factory.cc b/table/block_based/block_based_table_factory.cc index 230b88cb4879..decf0c78a6d5 100644 --- a/table/block_based/block_based_table_factory.cc +++ b/table/block_based/block_based_table_factory.cc @@ -24,9 +24,9 @@ #include "rocksdb/convenience.h" #include "rocksdb/filter_policy.h" #include "rocksdb/flush_block_policy.h" +#include "rocksdb/index_factory.h" #include "rocksdb/rocksdb_namespace.h" #include "rocksdb/table.h" -#include "rocksdb/user_defined_index.h" #include "rocksdb/utilities/customizable_util.h" #include "rocksdb/utilities/options_type.h" #include "table/block_based/block_based_table_builder.h" @@ -226,6 +226,14 @@ static std::unordered_map offsetof(struct MetadataCacheOptions, unpartitioned_pinning), &pinning_tier_type_string_map)}}; +static const std::unordered_map + block_base_table_index_mode_string_map = { + {"kStandardOnly", BlockBasedTableOptions::IndexMode::kStandardOnly}, + {"kStandardDefault", + BlockBasedTableOptions::IndexMode::kStandardDefault}, + {"kCustomDefault", BlockBasedTableOptions::IndexMode::kCustomDefault}, + {"kCustomOnly", BlockBasedTableOptions::IndexMode::kCustomOnly}}; + static std::unordered_map block_base_table_prepopulate_block_cache_string_map = { @@ -329,7 +337,7 @@ static struct BlockBasedTableTypeInfo { offsetof(struct BlockBasedTableOptions, filter_policy), OptionVerificationType::kByNameAllowFromNull)}, {"user_defined_index_factory", - OptionTypeInfo::AsCustomSharedPtr( + OptionTypeInfo::AsCustomSharedPtr( offsetof(struct BlockBasedTableOptions, user_defined_index_factory), OptionVerificationType::kByNameAllowFromNull)}, @@ -427,12 +435,16 @@ static struct BlockBasedTableTypeInfo { {offsetof(struct BlockBasedTableOptions, num_file_reads_for_auto_readahead), OptionType::kUInt64T, OptionVerificationType::kNormal}}, + {"index_mode", OptionTypeInfo::Enum( + offsetof(struct BlockBasedTableOptions, index_mode), + &block_base_table_index_mode_string_map)}, + // Old boolean names are accepted as deprecated aliases. {"fail_if_no_udi_on_open", - {offsetof(struct BlockBasedTableOptions, fail_if_no_udi_on_open), - OptionType::kBoolean, OptionVerificationType::kNormal}}, + {0, OptionType::kBoolean, OptionVerificationType::kDeprecated}}, {"use_udi_as_primary_index", - {offsetof(struct BlockBasedTableOptions, use_udi_as_primary_index), - OptionType::kBoolean, OptionVerificationType::kNormal}}, + {0, OptionType::kBoolean, OptionVerificationType::kDeprecated}}, + {"skip_standard_index", + {0, OptionType::kBoolean, OptionVerificationType::kDeprecated}}, }; } } block_based_table_type_info; @@ -760,30 +772,31 @@ Status BlockBasedTableFactory::ValidateOptions( "data_block_hash_table_util_ratio should be greater than 0 when " "data_block_index_type is set to kDataBlockBinaryAndHash"); } - if (table_options_.user_defined_index_factory) { + if (table_options_.index_mode >= + BlockBasedTableOptions::IndexMode::kStandardDefault) { + if (!table_options_.user_defined_index_factory) { + return Status::InvalidArgument( + "index_mode >= kStandardDefault requires user_defined_index_factory"); + } if (cf_opts.compression_opts.parallel_threads > 1 || cf_opts.bottommost_compression_opts.parallel_threads > 1) { return Status::InvalidArgument( "user_defined_index_factory not supported with parallel compression"); } - if (table_options_.use_udi_as_primary_index) { + if (table_options_.index_mode >= + BlockBasedTableOptions::IndexMode::kCustomDefault) { if (table_options_.index_type == BlockBasedTableOptions::kTwoLevelIndexSearch) { return Status::InvalidArgument( - "use_udi_as_primary_index is incompatible with partitioned index " - "(kTwoLevelIndexSearch). The UDI wrapper currently only supports " - "flat (single-level) index builders."); + "index_mode kCustomDefault/kCustomOnly is incompatible with " + "partitioned index (kTwoLevelIndexSearch)."); } if (table_options_.partition_filters) { return Status::InvalidArgument( - "use_udi_as_primary_index is incompatible with partitioned " - "filters. The UDI wrapper does not support the partitioned " - "index/filter layout."); + "index_mode kCustomDefault/kCustomOnly is incompatible with " + "partitioned filters."); } } - } else if (table_options_.use_udi_as_primary_index) { - return Status::InvalidArgument( - "use_udi_as_primary_index requires user_defined_index_factory"); } if (db_opts.unordered_write && cf_opts.max_successive_merges > 0) { // TODO(myabandeh): support it @@ -961,11 +974,8 @@ std::string BlockBasedTableFactory::GetPrintableOptions() const { ? "nullptr" : table_options_.user_defined_index_factory->Name()); ret.append(buffer); - snprintf(buffer, kBufferSize, " use_udi_as_primary_index: %d\n", - table_options_.use_udi_as_primary_index); - ret.append(buffer); - snprintf(buffer, kBufferSize, " fail_if_no_udi_on_open: %d\n", - table_options_.fail_if_no_udi_on_open); + snprintf(buffer, kBufferSize, " index_mode: %d\n", + static_cast(table_options_.index_mode)); ret.append(buffer); snprintf(buffer, kBufferSize, " whole_key_filtering: %d\n", table_options_.whole_key_filtering); @@ -1123,11 +1133,10 @@ TableFactory* NewBlockBasedTableFactory( return new BlockBasedTableFactory(_table_options); } -Status UserDefinedIndexFactory::CreateFromString( - const ConfigOptions& config_options, const std::string& value, - std::shared_ptr* factory) { - return LoadSharedObject(config_options, value, - factory); +Status IndexFactory::CreateFromString(const ConfigOptions& config_options, + const std::string& value, + std::shared_ptr* factory) { + return LoadSharedObject(config_options, value, factory); } const std::string BlockBasedTablePropertyNames::kIndexType = diff --git a/table/block_based/block_based_table_reader.cc b/table/block_based/block_based_table_reader.cc index 0226e02faf27..edb8d7492991 100644 --- a/table/block_based/block_based_table_reader.cc +++ b/table/block_based/block_based_table_reader.cc @@ -38,6 +38,7 @@ #include "rocksdb/env.h" #include "rocksdb/file_system.h" #include "rocksdb/filter_policy.h" +#include "rocksdb/index_factory.h" #include "rocksdb/iterator.h" #include "rocksdb/options.h" #include "rocksdb/snapshot.h" @@ -46,7 +47,6 @@ #include "rocksdb/table.h" #include "rocksdb/table_properties.h" #include "rocksdb/trace_record.h" -#include "rocksdb/user_defined_index.h" #include "table/block_based/binary_search_index_reader.h" #include "table/block_based/block.h" #include "table/block_based/block_based_table_factory.h" @@ -1317,15 +1317,17 @@ Status BlockBasedTable::PrefetchIndexAndFilterBlocks( BlockHandle udi_block_handle; // Use FindMetaBlock (not FindOptionalMetaBlock) so we get a non-OK status - // when the block is missing, allowing the fail_if_no_udi_on_open logic - // below to decide whether to error or warn. - s = FindMetaBlock(meta_iter, kUserDefinedIndexPrefix + udi_name, + // when the block is missing, allowing the index_mode logic below to + // decide whether to error or warn. + s = FindMetaBlock(meta_iter, kIndexFactoryMetaPrefix + udi_name, &udi_block_handle); if (!s.ok()) { RecordTick(rep_->ioptions.statistics.get(), SST_USER_DEFINED_INDEX_LOAD_FAIL_COUNT); - if (table_options.fail_if_no_udi_on_open || - table_options.use_udi_as_primary_index) { + if (table_options.index_mode >= + BlockBasedTableOptions::IndexMode::kCustomDefault) { + // kCustomDefault and kCustomOnly route all reads through the UDI, + // so a missing UDI block is a hard error. ROCKS_LOG_ERROR(rep_->ioptions.logger, "Failed to find the UDI block %s in file %s; %s", udi_name.c_str(), rep_->file->file_name().c_str(), @@ -1334,7 +1336,9 @@ Status BlockBasedTable::PrefetchIndexAndFilterBlocks( s = Status::Corruption(s.ToString(), rep_->file->file_name()); return s; } else { - // Emit a warning, but ignore the error status + // kStandardDefault or kStandardOnly: the UDI is optional. When the + // block is absent the standard index handles all reads. Log a warning + // so operators know they have pre-UDI SSTs that need compaction. ROCKS_LOG_WARN(rep_->ioptions.logger, "Failed to find the UDI block %s in file %s; %s", udi_name.c_str(), rep_->file->file_name().c_str(), @@ -1359,8 +1363,8 @@ Status BlockBasedTable::PrefetchIndexAndFilterBlocks( if (s.ok()) { assert(!rep_->udi_block.IsEmpty()); - std::unique_ptr udi_reader; - UserDefinedIndexOption udi_option; + std::unique_ptr udi_reader; + IndexFactoryOptions udi_option; udi_option.comparator = rep_->internal_comparator.user_comparator(); s = table_options.user_defined_index_factory->NewReader( udi_option, rep_->udi_block.GetValue()->data, udi_reader); @@ -1369,12 +1373,13 @@ Status BlockBasedTable::PrefetchIndexAndFilterBlocks( // Primary UDI mode is purely config-driven. The // udi_is_primary_index table property is informational only // (for diagnostics / sst_dump) and does not affect routing. - // This keeps rollback simple: setting - // use_udi_as_primary_index=false immediately reverts all SSTs - // to standard-index reads without needing compaction. - index_reader = std::make_unique( + // This keeps rollback simple: changing index_mode immediately + // reverts all SSTs to standard-index reads without needing + // compaction. + index_reader = std::make_unique( udi_name, std::move(index_reader), std::move(udi_reader), - table_options.use_udi_as_primary_index); + table_options.index_mode >= + BlockBasedTableOptions::IndexMode::kCustomDefault); } else { s = Status::Corruption("Failed to create UDI reader for " + udi_name + " in file " + @@ -2725,8 +2730,8 @@ Status BlockBasedTable::Prefetch(const ReadOptions& read_options, const bool is_user_key = !rep_->index_key_includes_seq; if (end && ((!is_user_key && comparator.Compare(iiter->key(), *end) >= 0) || - (is_user_key && - user_comparator.Compare(iiter->key(), ExtractUserKey(*end)) >= 0))) { + (is_user_key && user_comparator.Compare(iiter->user_key(), + ExtractUserKey(*end)) >= 0))) { if (prefetching_boundary_page) { break; } @@ -2869,7 +2874,7 @@ BlockType BlockBasedTable::GetBlockTypeForMetaBlockByName( return BlockType::kIndex; } - if (meta_block_name.starts_with(kUserDefinedIndexPrefix)) { + if (meta_block_name.starts_with(kIndexFactoryMetaPrefix)) { return BlockType::kUserDefinedIndex; } diff --git a/table/block_based/builtin_index_factory.cc b/table/block_based/builtin_index_factory.cc new file mode 100644 index 000000000000..6304880e9e36 --- /dev/null +++ b/table/block_based/builtin_index_factory.cc @@ -0,0 +1,533 @@ +// Copyright (c) Meta Platforms, Inc. and affiliates. +// This source code is licensed under both the GPLv2 (found in the +// COPYING file in the root directory) and Apache 2.0 License +// (found in the LICENSE.Apache file in the root directory). + +#include "table/block_based/builtin_index_factory.h" + +#include +#include +#include + +#include "db/dbformat.h" +#include "rocksdb/comparator.h" +#include "rocksdb/index_factory.h" +#include "rocksdb/slice.h" +#include "rocksdb/status.h" +#include "table/block_based/index_builder.h" + +namespace ROCKSDB_NAMESPACE { + +// Wrapper around the internal IndexBuilder::PreparedIndexEntry, adapting it +// to the public IndexFactoryBuilder::PreparedAddEntry interface for parallel +// compression support. +struct BuiltinPreparedAddEntry : public IndexFactoryBuilder::PreparedAddEntry { + std::unique_ptr internal_entry; + explicit BuiltinPreparedAddEntry( + std::unique_ptr e) + : internal_entry(std::move(e)) {} +}; + +// ============================================================================ +// BuiltinIndexFactoryBuilder method definitions. +// +// The class is declared in builtin_index_factory.h. This file provides +// the implementations. The builder adapts internal IndexBuilder to the +// public IndexFactoryBuilder interface, translating user keys (public +// interface) → internal keys (IndexBuilder) on AddIndexEntry and +// PrepareAddEntry. +// ============================================================================ + +BuiltinIndexFactoryBuilder::BuiltinIndexFactoryBuilder( + std::unique_ptr icmp, + const BlockBasedTableOptions* table_opts) + : icmp_(std::move(icmp)), table_opts_(table_opts) {} + +BuiltinIndexFactoryBuilder::~BuiltinIndexFactoryBuilder() = default; + +void BuiltinIndexFactoryBuilder::SetInternalBuilder( + std::unique_ptr builder) { + internal_builder_ = std::move(builder); +} + +const InternalKeyComparator* BuiltinIndexFactoryBuilder::GetComparator() const { + return icmp_.get(); +} + +const BlockBasedTableOptions& BuiltinIndexFactoryBuilder::GetTableOptions() + const { + return *table_opts_; +} + +void BuiltinIndexFactoryBuilder::ReconstructInternalKeys( + const Slice& last_user_key, const Slice* next_user_key, + const IndexEntryContext& ctx) { + last_internal_key_.clear(); + last_internal_key_.append(last_user_key.data(), last_user_key.size()); + PutFixed64(&last_internal_key_, ctx.last_key_tag); + + if (next_user_key != nullptr) { + next_internal_key_.clear(); + next_internal_key_.append(next_user_key->data(), next_user_key->size()); + PutFixed64(&next_internal_key_, ctx.first_key_tag); + } +} + +Slice BuiltinIndexFactoryBuilder::AddIndexEntry( + const Slice& last_key_in_current_block, + const Slice* first_key_in_next_block, const BlockHandle& block_handle, + std::string* separator_scratch, const IndexEntryContext& context) { + // Reconstruct internal keys from user keys + packed tags. + // The internal IndexBuilder expects full internal keys: + // [user_key | packed_seq_and_type (8 bytes)] + ReconstructInternalKeys(last_key_in_current_block, first_key_in_next_block, + context); + Slice last_ik(last_internal_key_); + + Slice next_ik; + const Slice* next_ik_ptr = nullptr; + if (first_key_in_next_block != nullptr) { + next_ik = Slice(next_internal_key_); + next_ik_ptr = &next_ik; + } + + // Convert the public BlockHandle to the internal BlockHandle. + ROCKSDB_NAMESPACE::BlockHandle internal_handle(block_handle.offset, + block_handle.size); + + bool skip = skip_delta_encoding_; + skip_delta_encoding_ = false; // Reset after use + return internal_builder_->AddIndexEntry(last_ik, next_ik_ptr, internal_handle, + separator_scratch, skip); +} + +void BuiltinIndexFactoryBuilder::OnKeyAdded(const Slice& /*key*/, + ValueType /*type*/, + const Slice& /*value*/) { + // The public OnKeyAdded receives user keys. The internal + // ShortenedIndexBuilder::OnKeyAdded needs the full internal key + // (to record first_internal_key for kBinarySearchWithFirstKey). + // This no-op is intentional — the table builder calls + // OnKeyAddedInternal() separately with the full internal key. +} + +void BuiltinIndexFactoryBuilder::OnKeyAddedInternal( + const Slice& internal_key, const std::optional& value) { + internal_builder_->OnKeyAdded(internal_key, value); +} + +Status BuiltinIndexFactoryBuilder::Finish(Slice* index_contents) { + IndexBuilder::IndexBlocks index_blocks; + Status s = internal_builder_->Finish(&index_blocks); + if (!s.ok()) { + return s; + } + // Store the contents — the internal builder's memory backs this Slice. + *index_contents = index_blocks.index_block_contents; + return Status::OK(); +} + +uint64_t BuiltinIndexFactoryBuilder::EstimatedSize() const { + return internal_builder_->CurrentIndexSizeEstimate(); +} + +Status BuiltinIndexFactoryBuilder::FinishAndWrite(IndexBlockWriter* writer, + BlockHandle* final_handle, + bool compress) { + IndexBuilder::IndexBlocks index_blocks; + Status s = internal_builder_->Finish(&index_blocks); + if (!s.ok() && !s.IsIncomplete()) { + return s; + } + + // Write any auxiliary meta blocks (e.g., hash index prefix blocks). + // The writer callback registers them with the meta index builder. + for (const auto& item : index_blocks.meta_blocks) { + BlockHandle meta_bh{0, 0}; + Status ws = writer->WriteBlock(item.second.second, &meta_bh, compress); + if (!ws.ok()) { + return ws; + } + writer->AddMetaBlock(item.first, meta_bh); + } + + // Write the first (or only) index block. + BlockHandle handle{0, 0}; + Status ws = + writer->WriteBlock(index_blocks.index_block_contents, &handle, compress); + if (!ws.ok()) { + return ws; + } + + // For partitioned indexes, the internal builder returns + // Status::Incomplete() to signal more partitions remain. Each + // subsequent Finish() call receives the handle of the previously + // written partition so it can build the top-level index. + while (s.IsIncomplete()) { + // Convert public BlockHandle to internal BlockHandle for Finish. + ROCKSDB_NAMESPACE::BlockHandle internal_handle(handle.offset, handle.size); + s = internal_builder_->Finish(&index_blocks, internal_handle); + if (!s.ok() && !s.IsIncomplete()) { + return s; + } + ws = writer->WriteBlock(index_blocks.index_block_contents, &handle, + compress); + if (!ws.ok()) { + return ws; + } + } + + *final_handle = {handle.offset, handle.size}; + return Status::OK(); +} + +bool BuiltinIndexFactoryBuilder::SupportsParallelAddEntry() const { + return true; +} + +std::unique_ptr +BuiltinIndexFactoryBuilder::CreatePreparedAddEntry() { + return std::make_unique( + internal_builder_->CreatePreparedIndexEntry()); +} + +void BuiltinIndexFactoryBuilder::PrepareAddEntry(const Slice& last_key, + const Slice* next_key, + const IndexEntryContext& ctx, + PreparedAddEntry* out) { + auto* entry = static_cast(out); + + // Reconstruct internal keys from user keys + packed tags. + ReconstructInternalKeys(last_key, next_key, ctx); + + Slice next_ik; + const Slice* next_ik_ptr = nullptr; + if (next_key) { + next_ik = Slice(next_internal_key_); + next_ik_ptr = &next_ik; + } + + internal_builder_->PrepareIndexEntry(Slice(last_internal_key_), next_ik_ptr, + entry->internal_entry.get()); +} + +void BuiltinIndexFactoryBuilder::FinishAddEntry( + const BlockHandle& handle, PreparedAddEntry* entry, + std::string* /*separator_scratch*/, bool skip_delta_encoding) { + auto* builtin_entry = static_cast(entry); + ROCKSDB_NAMESPACE::BlockHandle internal_handle(handle.offset, handle.size); + internal_builder_->FinishIndexEntry(internal_handle, + builtin_entry->internal_entry.get(), + skip_delta_encoding); +} + +bool BuiltinIndexFactoryBuilder::separator_is_key_plus_seq() const { + // The internal IndexBuilder::separator_is_key_plus_seq() is non-const + // but the underlying implementations use RelaxedAtomic loads, which + // are safe to call without mutation. const_cast is appropriate here. + return const_cast(internal_builder_.get()) + ->separator_is_key_plus_seq(); +} + +uint64_t BuiltinIndexFactoryBuilder::NumUniformIndexBlocks() const { + return internal_builder_->NumUniformIndexBlocks(); +} + +size_t BuiltinIndexFactoryBuilder::IndexSize() const { + return internal_builder_->IndexSize(); +} + +uint64_t BuiltinIndexFactoryBuilder::NumPartitions() const { + if (!IsPartitioned()) { + return 0; + } + return static_cast(internal_builder_.get()) + ->NumPartitions(); +} + +uint64_t BuiltinIndexFactoryBuilder::TopLevelIndexSize(uint64_t offset) const { + if (!IsPartitioned()) { + return 0; + } + return static_cast(internal_builder_.get()) + ->TopLevelIndexSize(offset); +} + +PartitionCoordinator* BuiltinIndexFactoryBuilder::GetPartitionCoordinator() { + if (!IsPartitioned()) { + return nullptr; + } + // PartitionedIndexBuilder implements PartitionCoordinator via + // multiple inheritance. static_cast is safe because IsPartitioned() + // checks the table_options index_type. + return static_cast(internal_builder_.get()); +} + +bool BuiltinIndexFactoryBuilder::IsPartitioned() const { + return table_opts_ && table_opts_->index_type == + BlockBasedTableOptions::kTwoLevelIndexSearch; +} + +IndexBuilder* BuiltinIndexFactoryBuilder::GetInternalBuilder() { + return internal_builder_.get(); +} + +Slice BuiltinIndexFactoryBuilder::AddIndexEntryDirect( + const Slice& last_internal_key, const Slice* first_internal_key_next, + const ::ROCKSDB_NAMESPACE::BlockHandle& handle, + std::string* separator_scratch, bool skip_delta_encoding) { + bool skip = skip_delta_encoding || skip_delta_encoding_; + skip_delta_encoding_ = false; // Reset after use + return internal_builder_->AddIndexEntry(last_internal_key, + first_internal_key_next, handle, + separator_scratch, skip); +} + +// ============================================================================ +// Factory implementations +// ============================================================================ + +// --- BinarySearchIndexFactory --- + +static const char* const kBinarySearchName = + "rocksdb.builtin.BinarySearchIndex"; +static const char* const kBinarySearchWithFirstKeyName = + "rocksdb.builtin.BinarySearchWithFirstKeyIndex"; + +BinarySearchIndexFactory::BinarySearchIndexFactory(bool with_first_key) + : with_first_key_(with_first_key) {} + +BinarySearchIndexFactory::BinarySearchIndexFactory( + bool with_first_key, const BuiltinIndexFactoryConfig& config) + : with_first_key_(with_first_key), has_config_(true), config_(config) {} + +const char* BinarySearchIndexFactory::Name() const { + return with_first_key_ ? kBinarySearchWithFirstKeyName : kBinarySearchName; +} + +const char* BinarySearchIndexFactory::kClassName() { return kBinarySearchName; } + +const char* BinarySearchIndexFactory::kClassNameWithFirstKey() { + return kBinarySearchWithFirstKeyName; +} + +Status BinarySearchIndexFactory::NewBuilder( + const IndexFactoryOptions& options, + std::unique_ptr& builder) const { + if (options.comparator == nullptr) { + return Status::InvalidArgument( + "BinarySearchIndexFactory::NewBuilder requires a comparator"); + } + + if (has_config_) { + // Full construction path: use stored internal params. + // The factory was created by the table builder with all internal + // configuration needed for proper index construction. + auto icmp = std::make_unique( + config_.internal_comparator->user_comparator()); + auto index_type = with_first_key_ + ? BlockBasedTableOptions::kBinarySearchWithFirstKey + : BlockBasedTableOptions::kBinarySearch; + // Pass the pointer to the Rep's table_options — the Rep outlives the + // builder, so the pointer remains valid. + auto wrapper = std::make_unique( + std::move(icmp), config_.table_options); + std::unique_ptr internal(IndexBuilder::CreateIndexBuilder( + index_type, wrapper->GetComparator(), config_.internal_prefix_transform, + config_.use_delta_encoding_for_index_values, wrapper->GetTableOptions(), + config_.ts_sz, config_.persist_user_defined_timestamps, config_.stats)); + wrapper->SetInternalBuilder(std::move(internal)); + builder = std::move(wrapper); + return Status::OK(); + } + + // Lightweight construction path: standalone / test usage with minimal + // default configuration. Uses only the comparator from options. + // This path still needs a locally-owned BlockBasedTableOptions because + // there is no Rep to borrow from. We allocate one on the heap and + // store it in a static thread_local or embed it. However, for test + // usage the simplest approach is to use a static default instance. + auto icmp = std::make_unique(options.comparator); + auto index_type = with_first_key_ + ? BlockBasedTableOptions::kBinarySearchWithFirstKey + : BlockBasedTableOptions::kBinarySearch; + static const BlockBasedTableOptions kDefaultBinarySearchOpts = []() { + BlockBasedTableOptions opts; + // index_type is set per-query below via CreateIndexBuilder, so the + // static default doesn't need it. The wrapper's GetTableOptions() + // returns this, and the only consumer that reads index_type from it + // is the internal IndexBuilder which receives it as a separate param. + return opts; + }(); + // Create the wrapper first so that table_opts_ is stable. + // The internal builder references it by address. + auto wrapper = std::make_unique( + std::move(icmp), &kDefaultBinarySearchOpts); + std::unique_ptr internal(IndexBuilder::CreateIndexBuilder( + index_type, wrapper->GetComparator(), + /*int_key_slice_transform=*/nullptr, + /*use_value_delta_encoding=*/true, wrapper->GetTableOptions(), + /*ts_sz=*/0, /*persist_user_defined_timestamps=*/true)); + wrapper->SetInternalBuilder(std::move(internal)); + builder = std::move(wrapper); + return Status::OK(); +} + +Status BinarySearchIndexFactory::NewReader( + const IndexFactoryOptions& /*options*/, Slice& /*index_contents*/, + std::unique_ptr& /*reader*/) const { + // The built-in reader is created through BlockBasedTable::CreateIndexReader + // which uses the internal BinarySearchIndexReader::Create() path directly. + // This method exists to satisfy the IndexFactory interface but is not + // called for built-in indexes — they use the internal reader path. + return Status::NotSupported( + "BinarySearchIndexFactory::NewReader is not used directly. " + "The built-in reader is created through " + "BlockBasedTable::CreateIndexReader."); +} + +// --- HashIndexFactory --- + +static const char* const kHashIndexName = "rocksdb.builtin.HashIndex"; + +HashIndexFactory::HashIndexFactory(const BuiltinIndexFactoryConfig& config) + : has_config_(true), config_(config) {} + +const char* HashIndexFactory::Name() const { return kHashIndexName; } +const char* HashIndexFactory::kClassName() { return kHashIndexName; } + +// NOTE: OnKeyAdded is not forwarded to the internal HashIndexBuilder, so +// hash prefix metadata is not built through the public OnKeyAdded path. +// However, the FinishAndWrite protocol correctly writes and registers +// hash prefix meta blocks (prefix block and prefix metadata block) via +// the IndexBlockWriter callback. +Status HashIndexFactory::NewBuilder( + const IndexFactoryOptions& options, + std::unique_ptr& builder) const { + if (options.comparator == nullptr) { + return Status::InvalidArgument( + "HashIndexFactory::NewBuilder requires a comparator"); + } + + if (has_config_) { + // Full construction path with internal params. + auto icmp = std::make_unique( + config_.internal_comparator->user_comparator()); + // Pass the pointer to the Rep's table_options — the Rep outlives the + // builder, so the pointer remains valid. + auto wrapper = std::make_unique( + std::move(icmp), config_.table_options); + std::unique_ptr internal(IndexBuilder::CreateIndexBuilder( + BlockBasedTableOptions::kHashSearch, wrapper->GetComparator(), + config_.internal_prefix_transform, + config_.use_delta_encoding_for_index_values, wrapper->GetTableOptions(), + config_.ts_sz, config_.persist_user_defined_timestamps, config_.stats)); + wrapper->SetInternalBuilder(std::move(internal)); + builder = std::move(wrapper); + return Status::OK(); + } + + // Lightweight construction path for standalone / test usage. + auto icmp = std::make_unique(options.comparator); + static const BlockBasedTableOptions kDefaultHashOpts = []() { + BlockBasedTableOptions opts; + return opts; + }(); + // Create the wrapper first so that table_opts_ is stable. + // The internal builder references it by address. + auto wrapper = std::make_unique( + std::move(icmp), &kDefaultHashOpts); + std::unique_ptr internal(IndexBuilder::CreateIndexBuilder( + BlockBasedTableOptions::kHashSearch, wrapper->GetComparator(), + /*int_key_slice_transform=*/nullptr, + /*use_value_delta_encoding=*/true, wrapper->GetTableOptions(), + /*ts_sz=*/0, /*persist_user_defined_timestamps=*/true)); + wrapper->SetInternalBuilder(std::move(internal)); + builder = std::move(wrapper); + return Status::OK(); +} + +Status HashIndexFactory::NewReader( + const IndexFactoryOptions& /*options*/, Slice& /*index_contents*/, + std::unique_ptr& /*reader*/) const { + return Status::NotSupported( + "HashIndexFactory::NewReader is not used directly."); +} + +// --- PartitionedIndexFactory --- + +static const char* const kPartitionedIndexName = + "rocksdb.builtin.PartitionedIndex"; + +PartitionedIndexFactory::PartitionedIndexFactory( + const BuiltinIndexFactoryConfig& config) + : has_config_(true), config_(config) {} + +const char* PartitionedIndexFactory::Name() const { + return kPartitionedIndexName; +} +const char* PartitionedIndexFactory::kClassName() { + return kPartitionedIndexName; +} + +// The partitioned index uses a multi-call Finish protocol internally +// (returning Status::Incomplete() for each partition). The single-call +// Finish(Slice*) only returns the first partition block. For full +// partitioned index construction, use FinishAndWrite() which drives +// the multi-call protocol through the IndexBlockWriter callback. +Status PartitionedIndexFactory::NewBuilder( + const IndexFactoryOptions& options, + std::unique_ptr& builder) const { + if (options.comparator == nullptr) { + return Status::InvalidArgument( + "PartitionedIndexFactory::NewBuilder requires a comparator"); + } + + if (has_config_) { + // Full construction path with internal params. + auto icmp = std::make_unique( + config_.internal_comparator->user_comparator()); + // Pass the pointer to the Rep's table_options — the Rep outlives the + // builder, so the pointer remains valid. + auto wrapper = std::make_unique( + std::move(icmp), config_.table_options); + std::unique_ptr internal( + PartitionedIndexBuilder::CreateIndexBuilder( + wrapper->GetComparator(), + config_.use_delta_encoding_for_index_values, + wrapper->GetTableOptions(), config_.ts_sz, + config_.persist_user_defined_timestamps, config_.stats)); + wrapper->SetInternalBuilder(std::move(internal)); + builder = std::move(wrapper); + return Status::OK(); + } + + // Lightweight construction path for standalone / test usage. + auto icmp = std::make_unique(options.comparator); + static const BlockBasedTableOptions kDefaultPartitionedOpts = []() { + BlockBasedTableOptions opts; + return opts; + }(); + // Create the wrapper first so that table_opts_ is stable. + // PartitionedIndexBuilder stores a const reference to + // table_opts_, so the member must be constructed before the builder. + auto wrapper = std::make_unique( + std::move(icmp), &kDefaultPartitionedOpts); + std::unique_ptr internal( + PartitionedIndexBuilder::CreateIndexBuilder( + wrapper->GetComparator(), /*use_value_delta_encoding=*/true, + wrapper->GetTableOptions(), + /*ts_sz=*/0, /*persist_user_defined_timestamps=*/true)); + wrapper->SetInternalBuilder(std::move(internal)); + builder = std::move(wrapper); + return Status::OK(); +} + +Status PartitionedIndexFactory::NewReader( + const IndexFactoryOptions& /*options*/, Slice& /*index_contents*/, + std::unique_ptr& /*reader*/) const { + return Status::NotSupported( + "PartitionedIndexFactory::NewReader is not used directly."); +} + +} // namespace ROCKSDB_NAMESPACE diff --git a/table/block_based/builtin_index_factory.h b/table/block_based/builtin_index_factory.h new file mode 100644 index 000000000000..3b517b7f721d --- /dev/null +++ b/table/block_based/builtin_index_factory.h @@ -0,0 +1,267 @@ +// Copyright (c) Meta Platforms, Inc. and affiliates. +// This source code is licensed under both the GPLv2 (found in the +// COPYING file in the root directory) and Apache 2.0 License +// (found in the LICENSE.Apache file in the root directory). + +#pragma once + +#include +#include +#include + +#include "rocksdb/index_factory.h" +#include "rocksdb/table.h" + +namespace ROCKSDB_NAMESPACE { + +class InternalKeyComparator; +class InternalKeySliceTransform; +class Statistics; + +// ============================================================================ +// Built-in index factories. +// +// These wrap RocksDB's internal index builder/reader infrastructure behind +// the public IndexFactory interface. They are proper IndexFactory subclasses +// — at the same abstraction level as any custom IndexFactory implementation. +// +// The internal IndexBuilder/IndexReader classes remain as implementation +// details. The built-in factories delegate to them, translating between +// the public interface (user keys, simple BlockHandles) and the internal +// interface (internal keys, IndexValue with first_internal_key). +// +// Unlike custom IndexFactory implementations, these built-in factories store +// internal construction parameters (comparator, prefix transform, table +// options, etc.) that are set at factory creation time and used by +// NewBuilder(). This allows the table builder to create the built-in index +// through the same factory interface used for custom indexes. +// ============================================================================ + +// --------------------------------------------------------------------------- +// Internal construction parameters for built-in index factories. +// +// These are set at factory creation time (per-SST in the Rep constructor) +// and used by NewBuilder() to construct the internal IndexBuilder with +// the correct configuration. Custom IndexFactory implementations do NOT +// need these — they use only IndexFactoryOptions::comparator. +// --------------------------------------------------------------------------- +struct BuiltinIndexFactoryConfig { + const InternalKeyComparator* internal_comparator = nullptr; + const InternalKeySliceTransform* internal_prefix_transform = nullptr; + bool use_delta_encoding_for_index_values = true; + // Pointer to the Rep's table_options (which outlives the builder). + // Avoids copying the large BlockBasedTableOptions struct per-SST. + const BlockBasedTableOptions* table_options = nullptr; + size_t ts_sz = 0; + bool persist_user_defined_timestamps = true; + Statistics* stats = nullptr; +}; + +// --------------------------------------------------------------------------- +// BinarySearchIndexFactory: the default index for BlockBasedTable. +// +// Wraps ShortenedIndexBuilder (for building) and BinarySearchIndexReader +// (for reading). Supports kBinarySearch and kBinarySearchWithFirstKey +// index types. +// +// This factory is implicitly used when no custom IndexFactory is configured. +// It can also be explicitly set as a secondary index alongside a custom +// primary index. +// --------------------------------------------------------------------------- +class BinarySearchIndexFactory : public IndexFactory { + public: + // Lightweight constructor for standalone / test usage. + // NewBuilder will use minimal default configuration. + // @param with_first_key If true, creates kBinarySearchWithFirstKey + // indexes that store the first internal key per + // block for optimized point lookups. + explicit BinarySearchIndexFactory(bool with_first_key = false); + + // Full constructor for use by the table builder. + // The factory stores all internal params needed by NewBuilder(). + BinarySearchIndexFactory(bool with_first_key, + const BuiltinIndexFactoryConfig& config); + + ~BinarySearchIndexFactory() override = default; + + const char* Name() const override; + static const char* kClassName(); + static const char* kClassNameWithFirstKey(); + + Status NewBuilder( + const IndexFactoryOptions& options, + std::unique_ptr& builder) const override; + + Status NewReader(const IndexFactoryOptions& options, Slice& index_contents, + std::unique_ptr& reader) const override; + + private: + bool with_first_key_; + bool has_config_ = false; + BuiltinIndexFactoryConfig config_; +}; + +// --------------------------------------------------------------------------- +// HashIndexFactory: hash-based prefix index for BlockBasedTable. +// +// Wraps HashIndexBuilder (for building) and HashIndexReader (for reading). +// Requires a prefix_extractor to be configured. +// --------------------------------------------------------------------------- +class HashIndexFactory : public IndexFactory { + public: + // Lightweight constructor for standalone / test usage. + HashIndexFactory() = default; + + // Full constructor for use by the table builder. + explicit HashIndexFactory(const BuiltinIndexFactoryConfig& config); + + ~HashIndexFactory() override = default; + + const char* Name() const override; + static const char* kClassName(); + + Status NewBuilder( + const IndexFactoryOptions& options, + std::unique_ptr& builder) const override; + + Status NewReader(const IndexFactoryOptions& options, Slice& index_contents, + std::unique_ptr& reader) const override; + + private: + bool has_config_ = false; + BuiltinIndexFactoryConfig config_; +}; + +// --------------------------------------------------------------------------- +// PartitionedIndexFactory: two-level partitioned index for BlockBasedTable. +// +// Wraps PartitionedIndexBuilder (for building) and PartitionIndexReader +// (for reading). Supports partitioned filters via the PartitionCoordinator +// interface. The builder implements the full FinishAndWrite protocol for +// multi-partition writes and exposes GetPartitionCoordinator() for +// filter↔index partition alignment. +// --------------------------------------------------------------------------- +class PartitionedIndexFactory : public IndexFactory { + public: + // Lightweight constructor for standalone / test usage. + PartitionedIndexFactory() = default; + + // Full constructor for use by the table builder. + explicit PartitionedIndexFactory(const BuiltinIndexFactoryConfig& config); + + ~PartitionedIndexFactory() override = default; + + const char* Name() const override; + static const char* kClassName(); + + Status NewBuilder( + const IndexFactoryOptions& options, + std::unique_ptr& builder) const override; + + Status NewReader(const IndexFactoryOptions& options, Slice& index_contents, + std::unique_ptr& reader) const override; + + private: + bool has_config_ = false; + BuiltinIndexFactoryConfig config_; +}; + +// --------------------------------------------------------------------------- +// BuiltinIndexFactoryBuilder: adapts the internal IndexBuilder behind the +// public IndexFactoryBuilder interface. Declared here so the table builder +// can access methods like OnKeyAddedInternal() and AddIndexEntryDirect() +// for the fast path. Implementation is in builtin_index_factory.cc. +// --------------------------------------------------------------------------- +class BlockHandle; // Internal BlockHandle from table/format.h +class IndexBuilder; + +class BuiltinIndexFactoryBuilder : public IndexFactoryBuilder { + public: + BuiltinIndexFactoryBuilder(std::unique_ptr icmp, + const BlockBasedTableOptions* table_opts); + ~BuiltinIndexFactoryBuilder() override; + + void SetInternalBuilder(std::unique_ptr builder); + + const InternalKeyComparator* GetComparator() const; + const BlockBasedTableOptions& GetTableOptions() const; + + // Forward OnKeyAdded to the internal builder with the full internal key. + // Called by the table builder which has the internal key available. + // Needed for kBinarySearchWithFirstKey to track first_internal_key. + void OnKeyAddedInternal(const Slice& internal_key, + const std::optional& value); + + // --- IndexFactoryBuilder overrides --- + Slice AddIndexEntry(const Slice& last_key_in_current_block, + const Slice* first_key_in_next_block, + const BlockHandle& block_handle, + std::string* separator_scratch, + const IndexEntryContext& context) override; + + void OnKeyAdded(const Slice& key, ValueType type, + const Slice& value) override; + + Status Finish(Slice* index_contents) override; + uint64_t EstimatedSize() const override; + + Status FinishAndWrite(IndexBlockWriter* writer, BlockHandle* final_handle, + bool compress) override; + + bool SupportsParallelAddEntry() const override; + std::unique_ptr CreatePreparedAddEntry() override; + void PrepareAddEntry(const Slice& last_key, const Slice* next_key, + const IndexEntryContext& ctx, + PreparedAddEntry* out) override; + void FinishAddEntry(const BlockHandle& handle, PreparedAddEntry* entry, + std::string* separator_scratch, + bool skip_delta_encoding) override; + + bool separator_is_key_plus_seq() const override; + uint64_t NumUniformIndexBlocks() const override; + size_t IndexSize() const override; + uint64_t NumPartitions() const override; + uint64_t TopLevelIndexSize(uint64_t offset) const override; + PartitionCoordinator* GetPartitionCoordinator() override; + + IndexBuilder* GetInternalBuilder(); + + // Set whether the next AddIndexEntry should skip delta encoding. + // This is called by the table builder when block alignment padding + // causes non-contiguous block offsets, which breaks the delta + // encoding assumption. Must be called before AddIndexEntry. + void SetSkipDeltaEncoding(bool skip) { skip_delta_encoding_ = skip; } + + // Fast path for when no user-key translation is needed. Passes internal + // keys directly to the underlying IndexBuilder, avoiding the decompose- + // recompose overhead of the public AddIndexEntry (which converts user + // keys back to internal keys). Used by ForwardAddIndexEntryToAll when + // there are no custom indexes. + Slice AddIndexEntryDirect(const Slice& last_internal_key, + const Slice* first_internal_key_next, + const ::ROCKSDB_NAMESPACE::BlockHandle& handle, + std::string* separator_scratch, + bool skip_delta_encoding); + + private: + std::unique_ptr icmp_; + const BlockBasedTableOptions* table_opts_; + std::unique_ptr internal_builder_; + std::string last_internal_key_; + std::string next_internal_key_; + bool skip_delta_encoding_ = false; + + // Reconstruct full internal keys from user keys and packed tags. + // Writes into last_internal_key_ and (if next_user_key != nullptr) + // next_internal_key_ member buffers. + void ReconstructInternalKeys(const Slice& last_user_key, + const Slice* next_user_key, + const IndexEntryContext& ctx); + + // Returns true when the internal builder is a PartitionedIndexBuilder, + // determined by checking table_opts_->index_type. Used to safely + // static_cast for partition-specific methods without requiring RTTI. + bool IsPartitioned() const; +}; + +} // namespace ROCKSDB_NAMESPACE diff --git a/table/block_based/builtin_index_factory_test.cc b/table/block_based/builtin_index_factory_test.cc new file mode 100644 index 000000000000..095d4c31f71b --- /dev/null +++ b/table/block_based/builtin_index_factory_test.cc @@ -0,0 +1,440 @@ +// Copyright (c) Meta Platforms, Inc. and affiliates. +// This source code is licensed under both the GPLv2 (found in the +// COPYING file in the root directory) and Apache 2.0 License +// (found in the LICENSE.Apache file in the root directory). + +#include "table/block_based/builtin_index_factory.h" + +#include +#include +#include +#include +#include +#include + +#include "db/dbformat.h" +#include "rocksdb/comparator.h" +#include "rocksdb/index_factory.h" +#include "rocksdb/slice.h" +#include "rocksdb/user_defined_index.h" +#include "table/block_based/index_builder.h" +#include "test_util/testharness.h" + +namespace ROCKSDB_NAMESPACE { + +// ============================================================================ +// Backward compatibility aliases test +// ============================================================================ + +TEST(BackwardCompatTest, UserDefinedIndexAliasesCompile) { + // Verify backward-compatible type aliases in user_defined_index.h + static_assert(std::is_same_v); + static_assert(std::is_same_v); + static_assert(std::is_same_v); + static_assert(std::is_same_v); + static_assert(std::is_same_v); + ASSERT_STREQ(kUserDefinedIndexPrefix, kIndexFactoryMetaPrefix); +} + +// Helper to build IndexFactoryOptions with BytewiseComparator. +static IndexFactoryOptions MakeOptions() { + IndexFactoryOptions opts; + opts.comparator = BytewiseComparator(); + return opts; +} + +// The internal IndexBuilder uses delta encoding for block handles and expects +// consecutive blocks laid out as: offset_i = offset_{i-1} + size_{i-1} + +// kBlockTrailerSize (5 bytes: 1-byte compression type + 32-bit checksum). +static constexpr uint64_t kBlockTrailerSize = 5; +static constexpr uint64_t kBlockSize = 100; + +// Helper to add a few index entries to a builder. Simulates block boundaries +// for keys "aaa", "bbb", "ccc" at consecutive offsets. +static void AddSampleEntries(IndexFactoryBuilder* builder) { + std::string scratch; + IndexFactoryBuilder::IndexEntryContext ctx; + ctx.last_key_tag = 0; + ctx.first_key_tag = 0; + + Slice key_a("aaa"); + Slice key_b("bbb"); + Slice key_c("ccc"); + IndexFactoryBuilder::BlockHandle bh{0, 0}; + + // Block 0: [0, 100) + bh = {0, kBlockSize}; + builder->AddIndexEntry(key_a, &key_b, bh, &scratch, ctx); + + // Block 1: starts at 0 + 100 + 5 = 105 + bh = {kBlockSize + kBlockTrailerSize, kBlockSize}; + builder->AddIndexEntry(key_b, &key_c, bh, &scratch, ctx); + + // Block 2 (last): starts at 105 + 100 + 5 = 210 + bh = {2 * (kBlockSize + kBlockTrailerSize), kBlockSize}; + builder->AddIndexEntry(key_c, nullptr, bh, &scratch, ctx); +} + +// ============================================================================ +// BinarySearchIndexFactory tests +// ============================================================================ + +class BinarySearchIndexFactoryTest : public ::testing::Test {}; + +TEST_F(BinarySearchIndexFactoryTest, Name) { + BinarySearchIndexFactory factory(/*with_first_key=*/false); + ASSERT_STREQ(factory.Name(), "rocksdb.builtin.BinarySearchIndex"); +} + +TEST_F(BinarySearchIndexFactoryTest, NameWithFirstKey) { + BinarySearchIndexFactory factory(/*with_first_key=*/true); + ASSERT_STREQ(factory.Name(), "rocksdb.builtin.BinarySearchWithFirstKeyIndex"); +} + +TEST_F(BinarySearchIndexFactoryTest, KClassName) { + ASSERT_STREQ(BinarySearchIndexFactory::kClassName(), + "rocksdb.builtin.BinarySearchIndex"); +} + +TEST_F(BinarySearchIndexFactoryTest, KClassNameWithFirstKey) { + ASSERT_STREQ(BinarySearchIndexFactory::kClassNameWithFirstKey(), + "rocksdb.builtin.BinarySearchWithFirstKeyIndex"); +} + +TEST_F(BinarySearchIndexFactoryTest, NewBuilderSucceeds) { + BinarySearchIndexFactory factory(/*with_first_key=*/false); + auto opts = MakeOptions(); + std::unique_ptr builder; + ASSERT_OK(factory.NewBuilder(opts, builder)); + ASSERT_NE(builder, nullptr); +} + +TEST_F(BinarySearchIndexFactoryTest, NewBuilderRequiresComparator) { + BinarySearchIndexFactory factory(/*with_first_key=*/false); + IndexFactoryOptions opts; // comparator is nullptr + std::unique_ptr builder; + Status s = factory.NewBuilder(opts, builder); + ASSERT_TRUE(s.IsInvalidArgument()); +} + +TEST_F(BinarySearchIndexFactoryTest, BuilderAddAndFinish) { + BinarySearchIndexFactory factory(/*with_first_key=*/false); + auto opts = MakeOptions(); + std::unique_ptr builder; + ASSERT_OK(factory.NewBuilder(opts, builder)); + + AddSampleEntries(builder.get()); + + ASSERT_GT(builder->EstimatedSize(), static_cast(0)); + + Slice contents; + ASSERT_OK(builder->Finish(&contents)); + ASSERT_GT(contents.size(), static_cast(0)); +} + +TEST_F(BinarySearchIndexFactoryTest, NewBuilderWithFirstKeySucceeds) { + BinarySearchIndexFactory factory(/*with_first_key=*/true); + auto opts = MakeOptions(); + std::unique_ptr builder; + ASSERT_OK(factory.NewBuilder(opts, builder)); + ASSERT_NE(builder, nullptr); +} + +TEST_F(BinarySearchIndexFactoryTest, NewReaderReturnsNotSupported) { + BinarySearchIndexFactory factory(/*with_first_key=*/false); + auto opts = MakeOptions(); + Slice dummy_contents("dummy"); + std::unique_ptr reader; + Status s = factory.NewReader(opts, dummy_contents, reader); + ASSERT_TRUE(s.IsNotSupported()); + ASSERT_EQ(reader, nullptr); +} + +// ============================================================================ +// HashIndexFactory tests +// ============================================================================ + +class HashIndexFactoryTest : public ::testing::Test {}; + +TEST_F(HashIndexFactoryTest, Name) { + HashIndexFactory factory; + ASSERT_STREQ(factory.Name(), "rocksdb.builtin.HashIndex"); +} + +TEST_F(HashIndexFactoryTest, KClassName) { + ASSERT_STREQ(HashIndexFactory::kClassName(), "rocksdb.builtin.HashIndex"); +} + +TEST_F(HashIndexFactoryTest, NewBuilderSucceeds) { + HashIndexFactory factory; + auto opts = MakeOptions(); + std::unique_ptr builder; + ASSERT_OK(factory.NewBuilder(opts, builder)); + ASSERT_NE(builder, nullptr); +} + +TEST_F(HashIndexFactoryTest, NewBuilderRequiresComparator) { + HashIndexFactory factory; + IndexFactoryOptions opts; // comparator is nullptr + std::unique_ptr builder; + Status s = factory.NewBuilder(opts, builder); + ASSERT_TRUE(s.IsInvalidArgument()); +} + +TEST_F(HashIndexFactoryTest, BuilderAddAndFinish) { + HashIndexFactory factory; + auto opts = MakeOptions(); + std::unique_ptr builder; + ASSERT_OK(factory.NewBuilder(opts, builder)); + + AddSampleEntries(builder.get()); + + // HashIndexBuilder::CurrentIndexSizeEstimate() always returns 0 by design. + // The hash builder tracks size differently from the binary search builder. + + Slice contents; + ASSERT_OK(builder->Finish(&contents)); + ASSERT_GT(contents.size(), static_cast(0)); +} + +TEST_F(HashIndexFactoryTest, NewReaderReturnsNotSupported) { + HashIndexFactory factory; + auto opts = MakeOptions(); + Slice dummy_contents("dummy"); + std::unique_ptr reader; + Status s = factory.NewReader(opts, dummy_contents, reader); + ASSERT_TRUE(s.IsNotSupported()); + ASSERT_EQ(reader, nullptr); +} + +// ============================================================================ +// PartitionedIndexFactory tests +// ============================================================================ + +class PartitionedIndexFactoryTest : public ::testing::Test {}; + +TEST_F(PartitionedIndexFactoryTest, Name) { + PartitionedIndexFactory factory; + ASSERT_STREQ(factory.Name(), "rocksdb.builtin.PartitionedIndex"); +} + +TEST_F(PartitionedIndexFactoryTest, KClassName) { + ASSERT_STREQ(PartitionedIndexFactory::kClassName(), + "rocksdb.builtin.PartitionedIndex"); +} + +TEST_F(PartitionedIndexFactoryTest, NewBuilderSucceeds) { + PartitionedIndexFactory factory; + auto opts = MakeOptions(); + std::unique_ptr builder; + ASSERT_OK(factory.NewBuilder(opts, builder)); + ASSERT_NE(builder, nullptr); +} + +TEST_F(PartitionedIndexFactoryTest, NewBuilderRequiresComparator) { + PartitionedIndexFactory factory; + IndexFactoryOptions opts; // comparator is nullptr + std::unique_ptr builder; + Status s = factory.NewBuilder(opts, builder); + ASSERT_TRUE(s.IsInvalidArgument()); +} + +TEST_F(PartitionedIndexFactoryTest, BuilderAddEntries) { + PartitionedIndexFactory factory; + auto opts = MakeOptions(); + std::unique_ptr builder; + ASSERT_OK(factory.NewBuilder(opts, builder)); + + // AddIndexEntry should succeed without crashing. + AddSampleEntries(builder.get()); + + // EstimatedSize should be non-zero after adding entries. + ASSERT_GT(builder->EstimatedSize(), static_cast(0)); + + // Note: PartitionedIndexBuilder::Finish() requires a multi-step protocol + // with partition block handles provided by the table builder. Testing the + // full Finish flow requires integration with BlockBasedTableBuilder and is + // covered by higher-level tests (e.g., table_test). +} + +TEST_F(PartitionedIndexFactoryTest, NewReaderReturnsNotSupported) { + PartitionedIndexFactory factory; + auto opts = MakeOptions(); + Slice dummy_contents("dummy"); + std::unique_ptr reader; + Status s = factory.NewReader(opts, dummy_contents, reader); + ASSERT_TRUE(s.IsNotSupported()); + ASSERT_EQ(reader, nullptr); +} + +// ============================================================================ +// InternalKeyReconstruction test: verify that BuiltinIndexFactoryBuilder's +// AddIndexEntry (which reconstructs internal keys from user keys + tags) +// produces the same output as calling the raw ShortenedIndexBuilder directly +// with pre-built internal keys. +// ============================================================================ + +TEST_F(BinarySearchIndexFactoryTest, InternalKeyReconstruction) { + // Create factory with full config so we exercise the has_config_ path. + BuiltinIndexFactoryConfig config; + InternalKeyComparator icmp(BytewiseComparator()); + config.internal_comparator = &icmp; + config.use_delta_encoding_for_index_values = true; + BlockBasedTableOptions table_opts; + config.table_options = &table_opts; + + BinarySearchIndexFactory factory(/*with_first_key=*/false, config); + IndexFactoryOptions opts; + opts.comparator = BytewiseComparator(); + std::unique_ptr builder; + ASSERT_OK(factory.NewBuilder(opts, builder)); + + // Add entries with known user keys and tags via the public interface. + IndexFactoryBuilder::BlockHandle h1{0, kBlockSize}; + IndexFactoryBuilder::BlockHandle h2{kBlockSize + kBlockTrailerSize, + kBlockSize}; + IndexFactoryBuilder::IndexEntryContext ctx1; + ctx1.last_key_tag = PackSequenceAndType(100, kTypeValue); + ctx1.first_key_tag = PackSequenceAndType(50, kTypeValue); + IndexFactoryBuilder::IndexEntryContext ctx2; + ctx2.last_key_tag = PackSequenceAndType(50, kTypeValue); + ctx2.first_key_tag = 0; + + std::string scratch; + Slice next1("bbb"); + builder->AddIndexEntry(Slice("aaa"), &next1, h1, &scratch, ctx1); + builder->AddIndexEntry(Slice("bbb"), nullptr, h2, &scratch, ctx2); + + // Verify Finish produces valid output. + Slice contents; + ASSERT_OK(builder->Finish(&contents)); + ASSERT_GT(contents.size(), static_cast(0)); + + // Also create a raw ShortenedIndexBuilder with the same parameters and + // pass pre-built internal keys. The outputs should be identical. + std::unique_ptr raw_builder(IndexBuilder::CreateIndexBuilder( + BlockBasedTableOptions::kBinarySearch, &icmp, + /*int_key_slice_transform=*/nullptr, + /*use_value_delta_encoding=*/true, table_opts, + /*ts_sz=*/0, /*persist_user_defined_timestamps=*/true)); + + // Construct internal keys: user_key + PutFixed64(PackSequenceAndType(...)) + std::string ik1; + ik1.append("aaa"); + PutFixed64(&ik1, PackSequenceAndType(100, kTypeValue)); + std::string ik2; + ik2.append("bbb"); + PutFixed64(&ik2, PackSequenceAndType(50, kTypeValue)); + Slice ik1_slice(ik1); + Slice ik2_slice(ik2); + + BlockHandle bh1(0, kBlockSize); + BlockHandle bh2(kBlockSize + kBlockTrailerSize, kBlockSize); + raw_builder->AddIndexEntry(ik1_slice, &ik2_slice, bh1, &scratch, + /*skip_delta_encoding=*/false); + raw_builder->AddIndexEntry(ik2_slice, nullptr, bh2, &scratch, + /*skip_delta_encoding=*/false); + + IndexBuilder::IndexBlocks raw_blocks; + ASSERT_OK(raw_builder->Finish(&raw_blocks)); + + // Both should produce the same index block content. + ASSERT_EQ(contents.ToString(), raw_blocks.index_block_contents.ToString()); +} + +// ============================================================================ +// GetPartitionCoordinator test: partitioned builders return a non-null +// coordinator; non-partitioned builders return null. +// ============================================================================ + +TEST_F(PartitionedIndexFactoryTest, GetPartitionCoordinator) { + // Create factory with full config (needed for partitioned builder). + BuiltinIndexFactoryConfig config; + InternalKeyComparator icmp(BytewiseComparator()); + config.internal_comparator = &icmp; + config.use_delta_encoding_for_index_values = true; + BlockBasedTableOptions table_opts; + table_opts.index_type = BlockBasedTableOptions::kTwoLevelIndexSearch; + table_opts.metadata_block_size = 4096; + config.table_options = &table_opts; + + PartitionedIndexFactory factory(config); + IndexFactoryOptions opts; + opts.comparator = BytewiseComparator(); + std::unique_ptr builder; + ASSERT_OK(factory.NewBuilder(opts, builder)); + + // Partitioned builder should return a non-null coordinator. + auto* coord = builder->GetPartitionCoordinator(); + ASSERT_NE(coord, nullptr); + + // Non-partitioned builders should return null. + BinarySearchIndexFactory bs_factory(/*with_first_key=*/false); + std::unique_ptr bs_builder; + ASSERT_OK(bs_factory.NewBuilder(opts, bs_builder)); + ASSERT_EQ(bs_builder->GetPartitionCoordinator(), nullptr); + + HashIndexFactory hash_factory; + std::unique_ptr hash_builder; + ASSERT_OK(hash_factory.NewBuilder(opts, hash_builder)); + ASSERT_EQ(hash_builder->GetPartitionCoordinator(), nullptr); +} + +// ============================================================================ +// FinishAndWrite default implementation test: the default FinishAndWrite +// calls Finish() then WriteBlock() once. We verify with a mock writer. +// ============================================================================ + +// Mock IndexBlockWriter for testing the FinishAndWrite protocol. +class MockIndexBlockWriter : public IndexFactoryBuilder::IndexBlockWriter { + public: + Status WriteBlock(const Slice& contents, + IndexFactoryBuilder::BlockHandle* handle, + bool /*compress*/) override { + blocks_written.push_back(contents.ToString()); + handle->offset = next_offset; + handle->size = contents.size(); + next_offset += contents.size(); + return Status::OK(); + } + void AddMetaBlock(const std::string& name, + const IndexFactoryBuilder::BlockHandle& handle) override { + meta_blocks.emplace_back(name, handle); + } + + std::vector blocks_written; + std::vector> + meta_blocks; + uint64_t next_offset = 0; +}; + +TEST_F(BinarySearchIndexFactoryTest, FinishAndWriteDefaultImpl) { + // The default FinishAndWrite calls Finish() then WriteBlock() once. + auto factory = BinarySearchIndexFactory(/*with_first_key=*/false); + IndexFactoryOptions opts; + opts.comparator = BytewiseComparator(); + std::unique_ptr builder; + ASSERT_OK(factory.NewBuilder(opts, builder)); + + // Add some entries. + AddSampleEntries(builder.get()); + + // FinishAndWrite through the default impl. + MockIndexBlockWriter writer; + IndexFactoryBuilder::BlockHandle final_handle{0, 0}; + ASSERT_OK(builder->FinishAndWrite(&writer, &final_handle, /*compress=*/true)); + + // Should have written exactly one block. + ASSERT_EQ(writer.blocks_written.size(), static_cast(1)); + ASSERT_GT(writer.blocks_written[0].size(), static_cast(0)); + ASSERT_EQ(final_handle.offset, static_cast(0)); + ASSERT_EQ(final_handle.size, writer.blocks_written[0].size()); +} + +} // namespace ROCKSDB_NAMESPACE + +int main(int argc, char** argv) { + ROCKSDB_NAMESPACE::port::InstallStackTraceHandler(); + ::testing::InitGoogleTest(&argc, argv); + return RUN_ALL_TESTS(); +} diff --git a/table/block_based/index_builder.h b/table/block_based/index_builder.h index 4d7df7607f1b..ff57a09d72e9 100644 --- a/table/block_based/index_builder.h +++ b/table/block_based/index_builder.h @@ -19,6 +19,7 @@ #include "table/block_based/block_based_table_factory.h" #include "table/block_based/block_builder.h" #include "table/block_based/flush_block_policy_impl.h" +#include "table/block_based/partition_coordinator.h" #include "table/format.h" #include "util/atomic.h" @@ -665,7 +666,8 @@ class HashIndexBuilder : public IndexBuilder { * containing a secondary index on the partitions, built using * ShortenedIndexBuilder. */ -class PartitionedIndexBuilder : public IndexBuilder { +class PartitionedIndexBuilder : public IndexBuilder, + public PartitionCoordinator { public: static PartitionedIndexBuilder* CreateIndexBuilder( const InternalKeyComparator* comparator, bool use_value_delta_encoding, @@ -710,7 +712,7 @@ class PartitionedIndexBuilder : public IndexBuilder { return estimated_index_size_.LoadRelaxed(); } - inline bool ShouldCutFilterBlock() { + inline bool ShouldCutFilterBlock() override { // Current policy is to align the partitions of index and filters if (cut_filter_block) { cut_filter_block = false; @@ -719,14 +721,14 @@ class PartitionedIndexBuilder : public IndexBuilder { return false; } - const std::string& GetPartitionKey() { + const std::string& GetPartitionKey() override { static const std::string kEmptyKey; return entries_.empty() ? kEmptyKey : entries_.back().key; } // Called when an external entity (such as filter partition builder) request // cutting the next partition - void RequestPartitionCut(); + void RequestPartitionCut() override; // This function must be thread safe because multiple worker threads might // update the index builder state during parallel compression. diff --git a/table/block_based/partition_coordinator.h b/table/block_based/partition_coordinator.h new file mode 100644 index 000000000000..51b8e9956744 --- /dev/null +++ b/table/block_based/partition_coordinator.h @@ -0,0 +1,51 @@ +// Copyright (c) Meta Platforms, Inc. and affiliates. +// This source code is licensed under both the GPLv2 (found in the +// COPYING file in the root directory) and Apache 2.0 License +// (found in the LICENSE.Apache file in the root directory). + +#pragma once + +#include + +#include "rocksdb/rocksdb_namespace.h" + +namespace ROCKSDB_NAMESPACE { + +// Interface for coordinating partition boundaries between the partitioned +// index builder and the partitioned filter builder. +// +// The partitioned filter builder needs to know: +// 1. When to cut a filter partition (aligned with index partitions) +// 2. What key to use as the partition boundary +// 3. Whether separators include sequence numbers (for choosing the +// correct top-level index block builder) +// +// This interface decouples the filter builder from the concrete +// PartitionedIndexBuilder type, allowing the index to be a pluggable +// abstraction without leaking its concrete type to the filter. +class PartitionCoordinator { + public: + virtual ~PartitionCoordinator() = default; + + // Request that the index builder cut a partition at the next opportunity. + // Called by the filter builder when it has accumulated enough keys. + // The actual cut happens asynchronously — the filter must poll + // ShouldCutFilterBlock() to check. + virtual void RequestPartitionCut() = 0; + + // Returns true if the index builder has cut a partition since the last + // call. The filter builder should cut its own partition in response. + virtual bool ShouldCutFilterBlock() = 0; + + // Returns the partition boundary key from the index builder. + // Used as the separator key in the filter's top-level index. + virtual const std::string& GetPartitionKey() = 0; + + // Returns whether index separators include sequence numbers. + // Controls which top-level index block builder the filter uses: + // - true: separators are full internal keys (user_key + seq + type) + // - false: separators are user keys only + virtual bool separator_is_key_plus_seq() = 0; +}; + +} // namespace ROCKSDB_NAMESPACE diff --git a/table/block_based/partitioned_filter_block.cc b/table/block_based/partitioned_filter_block.cc index a553fd62e0a2..0a35702495de 100644 --- a/table/block_based/partitioned_filter_block.cc +++ b/table/block_based/partitioned_filter_block.cc @@ -25,13 +25,13 @@ PartitionedFilterBlockBuilder::PartitionedFilterBlockBuilder( const SliceTransform* _prefix_extractor, bool whole_key_filtering, FilterBitsBuilder* filter_bits_builder, int index_block_restart_interval, const bool use_value_delta_encoding, - PartitionedIndexBuilder* const p_index_builder, + PartitionCoordinator* const partition_coordinator, const uint32_t partition_size, size_t ts_sz, const bool persist_user_defined_timestamps, bool decouple_from_index_partitions) : FullFilterBlockBuilder(_prefix_extractor, whole_key_filtering, filter_bits_builder), - p_index_builder_(p_index_builder), + partition_coordinator_(partition_coordinator), ts_sz_(ts_sz), decouple_from_index_partitions_(decouple_from_index_partitions), index_on_filter_block_builder_( @@ -94,9 +94,9 @@ bool PartitionedFilterBlockBuilder::DecideCutAFilterBlock() { if (added >= keys_per_partition_) { // Currently only index builder is in charge of cutting a partition. We // keep requesting until it is granted. - p_index_builder_->RequestPartitionCut(); + partition_coordinator_->RequestPartitionCut(); } - return p_index_builder_->ShouldCutFilterBlock(); + return partition_coordinator_->ShouldCutFilterBlock(); } } @@ -142,7 +142,7 @@ void PartitionedFilterBlockBuilder::CutAFilterBlock(const Slice* next_key, } AppendInternalKeyFooter(&ikey, /*seqno*/ 0, ValueType::kTypeDeletion); } else { - ikey = p_index_builder_->GetPartitionKey(); + ikey = partition_coordinator_->GetPartitionKey(); } filters_.push_back({std::move(ikey), std::move(filter_data), filter}); completed_partitions_size_.FetchAddRelaxed(filter.size()); @@ -244,7 +244,7 @@ void PartitionedFilterBlockBuilder::UpdateFilterSizeEstimate( size_t filter_estimate = std::max(partitions_size, active_filter_estimate); // Estimate top-level partition index size - if (p_index_builder_->separator_is_key_plus_seq()) { + if (partition_coordinator_->separator_is_key_plus_seq()) { filter_estimate += index_on_filter_block_builder_.CurrentSizeEstimate(); } else { filter_estimate += @@ -294,7 +294,7 @@ Status PartitionedFilterBlockBuilder::Finish( // NOTE: WriteBatch guarantees keys < 4GB; handle values are also small index_on_filter_block_builder_.Add(e.ikey, handle_encoding, &handle_delta_encoding_slice); - if (!p_index_builder_->separator_is_key_plus_seq()) { + if (!partition_coordinator_->separator_is_key_plus_seq()) { index_on_filter_block_builder_without_seq_.Add( ExtractUserKey(e.ikey), handle_encoding, &handle_delta_encoding_slice); @@ -321,7 +321,7 @@ Status PartitionedFilterBlockBuilder::Finish( if (UNLIKELY(filters_.empty())) { if (!index_on_filter_block_builder_.empty()) { // Simplest to just add them all at the end - if (p_index_builder_->separator_is_key_plus_seq()) { + if (partition_coordinator_->separator_is_key_plus_seq()) { *filter = index_on_filter_block_builder_.Finish(); } else { *filter = index_on_filter_block_builder_without_seq_.Finish(); diff --git a/table/block_based/partitioned_filter_block.h b/table/block_based/partitioned_filter_block.h index 96f39dd4f01a..cc3c1de2c8ff 100644 --- a/table/block_based/partitioned_filter_block.h +++ b/table/block_based/partitioned_filter_block.h @@ -18,6 +18,7 @@ #include "table/block_based/filter_block_reader_common.h" #include "table/block_based/full_filter_block.h" #include "table/block_based/index_builder.h" +#include "table/block_based/partition_coordinator.h" #include "util/atomic.h" #include "util/autovector.h" #include "util/hash_containers.h" @@ -31,7 +32,7 @@ class PartitionedFilterBlockBuilder : public FullFilterBlockBuilder { const SliceTransform* prefix_extractor, bool whole_key_filtering, FilterBitsBuilder* filter_bits_builder, int index_block_restart_interval, const bool use_value_delta_encoding, - PartitionedIndexBuilder* const p_index_builder, + PartitionCoordinator* const partition_coordinator, const uint32_t partition_size, size_t ts_sz, const bool persist_user_defined_timestamps, bool decouple_from_index_partitions); @@ -87,8 +88,8 @@ class PartitionedFilterBlockBuilder : public FullFilterBlockBuilder { // Currently we keep the same number of partitions for filters and indexes. // This would allow for some potentioal optimizations in future. If such // optimizations did not realize we can use different number of partitions and - // eliminate p_index_builder_ - PartitionedIndexBuilder* const p_index_builder_; + // eliminate partition_coordinator_ + PartitionCoordinator* const partition_coordinator_; const size_t ts_sz_; const bool decouple_from_index_partitions_; diff --git a/table/block_based/user_defined_index_wrapper.h b/table/block_based/user_defined_index_wrapper.h index 0c2e7f3fd8aa..a0cb65476109 100644 --- a/table/block_based/user_defined_index_wrapper.h +++ b/table/block_based/user_defined_index_wrapper.h @@ -9,244 +9,38 @@ #include #include "db/seqno_to_time_mapping.h" +#include "rocksdb/index_factory.h" #include "rocksdb/slice.h" #include "rocksdb/status.h" -#include "rocksdb/user_defined_index.h" #include "table/block_based/block_based_table_reader.h" #include "table/block_based/block_type.h" #include "table/block_based/index_builder.h" namespace ROCKSDB_NAMESPACE { -// UserDefinedIndexBuilderWrapper wraps around the existing index types in block -// based table, and supports plugging in an additional user defined index. The -// wrapper class forwards calls to both the wrapped internal index, and a user -// defined index builder. -class UserDefinedIndexBuilderWrapper : public IndexBuilder { +// --------------------------------------------------------------------------- +// Adapter classes for custom IndexFactory implementations. +// +// IndexFactoryIteratorWrapper adapts IndexFactoryIterator (public interface, +// user keys) to InternalIteratorBase (internal interface, internal +// keys). This adapter is necessary because BlockBasedTableIterator expects +// the internal iterator interface. +// +// IndexFactoryReaderWrapper dispatches NewIterator calls to either the +// standard IndexReader or the custom IndexFactoryReader based on the +// index_mode config (kCustomDefault/kCustomOnly) or ReadOptions::read_index. +// --------------------------------------------------------------------------- + +// Forward declaration for the reader wrapper. +class IndexFactoryIteratorWrapper; + +class IndexFactoryIteratorWrapper : public InternalIteratorBase { public: - UserDefinedIndexBuilderWrapper( - const std::string& name, - std::unique_ptr internal_index_builder, - std::unique_ptr user_defined_index_builder, - const InternalKeyComparator* comparator, size_t ts_sz, - bool persist_user_defined_timestamps) - : IndexBuilder(comparator, ts_sz, persist_user_defined_timestamps), - name_(name), - internal_index_builder_(std::move(internal_index_builder)), - user_defined_index_builder_(std::move(user_defined_index_builder)) {} - - ~UserDefinedIndexBuilderWrapper() override = default; - - Slice AddIndexEntry(const Slice& last_key_in_current_block, - const Slice* first_key_in_next_block, - const BlockHandle& block_handle, - std::string* separator_scratch, - bool skip_delta_encoding) override { - UserDefinedIndexBuilder::BlockHandle handle{}; - handle.offset = block_handle.offset(); - handle.size = block_handle.size(); - // Forward the call to both index builders. - // Parse the internal keys to extract user keys and sequence numbers. - // There's no way to return an error here, so we remember the status and - // return it in Finish(). - ParsedInternalKey pkey_last; - ParsedInternalKey pkey_first; - if (status_.ok()) { - status_ = ParseInternalKey(last_key_in_current_block, &pkey_last, - /*log_err_key*/ false); - } - if (status_.ok() && first_key_in_next_block) { - status_ = ParseInternalKey(*first_key_in_next_block, &pkey_first, - /*log_err_key*/ false); - } - if (status_.ok()) { - // Pass both user keys AND sequence numbers to the UDI builder via - // the IndexEntryContext. The sequence numbers are needed when the - // same user key spans a data block boundary (e.g., due to snapshots - // keeping multiple versions). Without sequence numbers, the UDI - // cannot produce a separator that distinguishes the two blocks, - // causing incorrect Seek results. - UserDefinedIndexBuilder::IndexEntryContext ctx; - ctx.last_key_tag = - PackSequenceAndType(pkey_last.sequence, pkey_last.type); - ctx.first_key_tag = - first_key_in_next_block - ? PackSequenceAndType(pkey_first.sequence, pkey_first.type) - : 0; - user_defined_index_builder_->AddIndexEntry( - pkey_last.user_key, - first_key_in_next_block ? &pkey_first.user_key : nullptr, handle, - separator_scratch, ctx); - } - // Always forward to the standard index builder, even in primary mode. - // The standard index is fully populated alongside the UDI. In primary - // mode the UDI handles all reads, but the standard index serves as a - // safety fallback (e.g., backup/restore, rollback to non-UDI config) - // and its presence is required for correct internal RocksDB behavior. - // The write-path cost is the standard index block in the SST (~1-2% - // of SST size). Skipping the standard index is deferred to a future - // refactor that extracts the index abstraction to put the binary - // index and UDI at the same level (see PR #14547 discussion). - return internal_index_builder_->AddIndexEntry( - last_key_in_current_block, first_key_in_next_block, block_handle, - separator_scratch, skip_delta_encoding); - } - - // Parallel compression splits AddIndexEntry() into PrepareIndexEntry() (emit - // thread) and FinishIndexEntry() (worker thread). This wrapper does not - // implement that split yet, so parallel compression is rejected at option - // validation time (see BlockBasedTableFactory::ValidateOptions and the Rep - // constructor). These stubs exist only to satisfy the interface. - std::unique_ptr CreatePreparedIndexEntry() override { - return nullptr; - } - void PrepareIndexEntry(const Slice& last_key_in_current_block, - const Slice* first_key_in_next_block, - PreparedIndexEntry* out) override { - (void)last_key_in_current_block; - (void)first_key_in_next_block; - (void)out; - assert(false); - } - void FinishIndexEntry(const BlockHandle& block_handle, - PreparedIndexEntry* entry, - bool skip_delta_encoding) override { - (void)block_handle; - (void)entry; - (void)skip_delta_encoding; - assert(false); - } - - void OnKeyAdded(const Slice& key, - const std::optional& value) override { - // Always forward to the internal builder which needs OnKeyAdded for - // every key to maintain state (e.g., current_block_first_internal_key_). - // The standard index is always fully populated, even in primary mode. - internal_index_builder_->OnKeyAdded(key, value); - - ParsedInternalKey pkey; - if (status_.ok()) { - // Defensive: value should always be present since OnKeyAdded() is called - // on the main thread in Add() with the original value Slice. No current - // code path passes std::nullopt here. - if (!value.has_value()) { - assert(false); - status_ = Status::InvalidArgument( - "OnKeyAdded called without a value; UDI requires the value to " - "forward to the plugin builder"); - } else { - status_ = ParseInternalKey(key, &pkey, /*log_err_key*/ false); - } - } - if (!status_.ok()) { - return; - } - - // Pass the user key to the UDI with the mapped value type. In SST files - // produced by flush or compaction, there may be multiple entries for the - // same user key with different sequence numbers (e.g., when snapshots are - // active). UDI builders that use OnKeyAdded() should handle this; builders - // that only use AddIndexEntry() separator keys (e.g., trie) are unaffected. - Slice udi_value = value.value(); - if (pkey.type == kTypeValuePreferredSeqno) { - // Strip the packed preferred seqno suffix so the UDI plugin receives - // only the user value, consistent with the kValue contract. - udi_value = ParsePackedValueForValue(udi_value); - } - user_defined_index_builder_->OnKeyAdded( - pkey.user_key, MapToUDIValueType(pkey.type), udi_value); - } - - Status Finish(IndexBlocks* index_blocks, - const BlockHandle& last_partition_block_handle) override { - if (!status_.ok() && !status_.IsIncomplete()) { - return status_; - } - - if (!udi_finished_) { - // Finish the user defined index builder - Slice user_index_contents; - status_ = user_defined_index_builder_->Finish(&user_index_contents); - if (!status_.ok()) { - return status_; - } - - // Add the user defined index to the meta blocks - std::string block_name = kUserDefinedIndexPrefix + name_; - index_blocks->meta_blocks.insert( - {block_name, {BlockType::kUserDefinedIndex, user_index_contents}}); - udi_finished_ = true; - } - - // Finish the internal index builder. The standard index is always fully - // populated (even in primary mode), producing a real index block. - status_ = internal_index_builder_->Finish(index_blocks, - last_partition_block_handle); - if (!status_.ok()) { - return status_; - } - - index_size_ = internal_index_builder_->IndexSize(); - return status_; - } - - size_t IndexSize() const override { return index_size_; } - - uint64_t CurrentIndexSizeEstimate() const override { - // Only includes the standard index size. The UDI meta block size is - // not included because EstimatedSize() reads non-atomic fields that - // are written by AddIndexEntry, which would be a data race if - // parallel compression were enabled. The conservative tail-size - // estimates in BlockBasedTableBuilder (properties + meta-index) - // provide a rough buffer. A more accurate estimate would require - // making EstimatedSize() thread-safe. - return internal_index_builder_->CurrentIndexSizeEstimate(); - } - - bool separator_is_key_plus_seq() override { - return internal_index_builder_->separator_is_key_plus_seq(); - } - - private: - static UserDefinedIndexBuilder::ValueType MapToUDIValueType( - ROCKSDB_NAMESPACE::ValueType t) { - switch (t) { - case kTypeValue: - case kTypeValuePreferredSeqno: - return UserDefinedIndexBuilder::kValue; - case kTypeDeletion: - case kTypeSingleDeletion: - case kTypeDeletionWithTimestamp: - return UserDefinedIndexBuilder::kDelete; - case kTypeMerge: - return UserDefinedIndexBuilder::kMerge; - case kTypeBlobIndex: - case kTypeWideColumnEntity: - return UserDefinedIndexBuilder::kOther; - default: - // Any new type that reaches OnKeyAdded() should be explicitly mapped - // above. Falling through to kOther is a safe default but indicates a - // missing case that should be added. - assert(false); - return UserDefinedIndexBuilder::kOther; - } - } - - const std::string name_; - std::unique_ptr internal_index_builder_; - std::unique_ptr user_defined_index_builder_; - Status status_; - bool udi_finished_ = false; -}; - -class UserDefinedIndexIteratorWrapper - : public InternalIteratorBase { - public: - explicit UserDefinedIndexIteratorWrapper( - std::unique_ptr&& udi_iter) + explicit IndexFactoryIteratorWrapper( + std::unique_ptr&& udi_iter) : udi_iter_(std::move(udi_iter)), valid_(false) {} - ~UserDefinedIndexIteratorWrapper() override = default; + ~IndexFactoryIteratorWrapper() override = default; bool Valid() const override { return valid_; } @@ -269,7 +63,7 @@ class UserDefinedIndexIteratorWrapper // spans multiple data blocks with different sequence numbers (e.g., // due to snapshots). Without it, the UDI cannot distinguish which // block to return for a given (user_key, seqno) target. - UserDefinedIndexIterator::SeekContext ctx; + IndexFactoryIterator::SeekContext ctx; ctx.target_tag = PackSequenceAndType(pkey.sequence, pkey.type); status_ = udi_iter_->SeekAndGetResult(pkey.user_key, &result_, ctx); } @@ -295,10 +89,10 @@ class UserDefinedIndexIteratorWrapper } void SeekForPrev(const Slice& /*target*/) override { - // BlockBasedTableIterator never calls SeekForPrev on the index iterator. - // It uses Seek + FindKeyBackward(Prev) instead. The standard index's - // IndexBlockIter::SeekForPrevImpl is also assert(false). Keep this as - // NotSupported for safety. + // BlockBasedTableIterator never calls SeekForPrev on the index + // iterator. It uses Seek + FindKeyBackward(Prev) instead. The standard + // index's IndexBlockIter::SeekForPrevImpl is also assert(false). Keep + // this as NotSupported for safety. valid_ = false; status_ = Status::NotSupported("SeekForPrev not supported"); } @@ -326,10 +120,15 @@ class UserDefinedIndexIteratorWrapper } private: - // Common logic after every UDI positioning operation: check status, update - // valid_, and build the internal key + cache the IndexValue if valid. + // Common logic after every UDI positioning operation: check status, + // update valid_, and build the internal key + cache the IndexValue if + // valid. void UpdateValidAndKey() { if (status_.ok()) { + // IndexFactoryIterator implementations must set + // bound_check_result=kInbound when they have a valid result. + // kUnknown and kOutOfBound both mean no valid position (the + // iterator is exhausted or the key is outside bounds). valid_ = result_.bound_check_result == IterBoundCheck::kInbound; if (valid_) { SetInternalKeyFromUDIResult(); @@ -360,7 +159,7 @@ class UserDefinedIndexIteratorWrapper IndexValue(BlockHandle(handle.offset, handle.size), Slice()); } - std::unique_ptr udi_iter_; + std::unique_ptr udi_iter_; IterateResult result_; InternalKey ikey_; IndexValue cached_value_; @@ -368,15 +167,15 @@ class UserDefinedIndexIteratorWrapper bool valid_; }; -class UserDefinedIndexReaderWrapper : public BlockBasedTable::IndexReader { +class IndexFactoryReaderWrapper : public BlockBasedTable::IndexReader { public: // @udi_is_primary: use UDI for all reads (default dispatch), including // internal operations like compaction and VerifyChecksum that don't - // set ReadOptions::table_index_factory. - UserDefinedIndexReaderWrapper( + // set ReadOptions::read_index. + IndexFactoryReaderWrapper( const std::string& name, std::unique_ptr&& reader, - std::unique_ptr&& udi_reader, bool udi_is_primary) + std::unique_ptr&& udi_reader, bool udi_is_primary) : name_(name), reader_(std::move(reader)), udi_reader_(std::move(udi_reader)), @@ -387,27 +186,29 @@ class UserDefinedIndexReaderWrapper : public BlockBasedTable::IndexReader { IndexBlockIter* iter, GetContext* get_context, BlockCacheLookupContext* lookup_context) override { // Determine whether to use the UDI for this read: - // 1. UDI is primary -- always use it (standard index is present in the - // SST but not used for reads in this mode) - // 2. ReadOptions::table_index_factory is set -- use it (explicit request) - // 3. Neither -- fall through to the standard index - bool use_udi = udi_is_primary_; - if (!use_udi && read_options.table_index_factory) { - if (name_ == read_options.table_index_factory->Name()) { + // kDefault → udi_is_primary_ (kCustomDefault/kCustomOnly → custom, + // kStandardOnly/kStandardDefault → standard) + // kBuiltin → force standard index + // kCustom → force custom index + bool use_udi; + switch (read_options.read_index) { + case ReadOptions::ReadIndex::kBuiltin: + use_udi = false; + break; + case ReadOptions::ReadIndex::kCustom: use_udi = true; - } else { - return NewErrorInternalIterator(Status::InvalidArgument( - "Bad index name: " + - std::string(read_options.table_index_factory->Name()) + - ". Only supported UDI is " + name_)); - } + break; + case ReadOptions::ReadIndex::kDefault: + default: + use_udi = udi_is_primary_; + break; } if (use_udi) { - std::unique_ptr udi_iter = + std::unique_ptr udi_iter = udi_reader_->NewIterator(read_options); if (udi_iter) { - return new UserDefinedIndexIteratorWrapper(std::move(udi_iter)); + return new IndexFactoryIteratorWrapper(std::move(udi_iter)); } return NewErrorInternalIterator( Status::Corruption("Could not create UDI iterator")); @@ -419,7 +220,8 @@ class UserDefinedIndexReaderWrapper : public BlockBasedTable::IndexReader { Status CacheDependencies(const ReadOptions& ro, bool pin, FilePrefetchBuffer* tail_prefetch_buffer) override { - // The standard index is always fully populated, even in primary mode. + // The standard index is fully populated in kStandardDefault and + // kCustomDefault modes. In kCustomOnly, it is an empty stub. return reader_->CacheDependencies(ro, pin, tail_prefetch_buffer); } @@ -437,7 +239,8 @@ class UserDefinedIndexReaderWrapper : public BlockBasedTable::IndexReader { private: const std::string name_; std::unique_ptr reader_; - std::unique_ptr udi_reader_; + std::unique_ptr udi_reader_; const bool udi_is_primary_; }; + } // namespace ROCKSDB_NAMESPACE diff --git a/table/table_test.cc b/table/table_test.cc index 4d0f63564c0c..5cbb8129cec0 100644 --- a/table/table_test.cc +++ b/table/table_test.cc @@ -42,6 +42,7 @@ #include "rocksdb/file_checksum.h" #include "rocksdb/file_system.h" #include "rocksdb/filter_policy.h" +#include "rocksdb/index_factory.h" #include "rocksdb/iterator.h" #include "rocksdb/memtablerep.h" #include "rocksdb/options.h" @@ -52,7 +53,6 @@ #include "rocksdb/table_properties.h" #include "rocksdb/trace_record.h" #include "rocksdb/unique_id.h" -#include "rocksdb/user_defined_index.h" #include "rocksdb/utilities/object_registry.h" #include "rocksdb/write_buffer_manager.h" #include "table/block_based/block.h" @@ -7850,13 +7850,13 @@ class UserDefinedIndexTestBase : public BlockBasedTableTestBase { }; public: - class TestUserDefinedIndexFactory : public UserDefinedIndexFactory { + class TestIndexFactory : public IndexFactory { public: const char* Name() const override { return "test_index"; } Status NewBuilder( - const UserDefinedIndexOption& /*option*/, - std::unique_ptr& builder) const override { - auto b = std::make_unique(); + const IndexFactoryOptions& /*option*/, + std::unique_ptr& builder) const override { + auto b = std::make_unique(); b->skip_key_size_check_ = skip_key_size_check_; // Share the factory's key_type_log so tests can inspect after flush. b->shared_key_type_log_ = &key_type_log_; @@ -7869,8 +7869,7 @@ class UserDefinedIndexTestBase : public BlockBasedTableTestBase { // Accumulated log of (key, ValueType) pairs from all builders created // by this factory. Tests can inspect this after flush/compaction. - mutable std::vector< - std::pair> + mutable std::vector> key_type_log_; struct CustomizedMapComparator { @@ -7882,18 +7881,10 @@ class UserDefinedIndexTestBase : public BlockBasedTableTestBase { } }; - // Deprecated API - UserDefinedIndexBuilder* NewBuilder() const override { return nullptr; } - - std::unique_ptr NewReader( - Slice& /*index_block*/) const override { - return nullptr; - } - Status NewReader( - const UserDefinedIndexOption& option, Slice& index_block, - std::unique_ptr& reader) const override { - reader = std::make_unique( + const IndexFactoryOptions& option, Slice& index_block, + std::unique_ptr& reader) const override { + reader = std::make_unique( index_block, option.comparator, this); return Status::OK(); } @@ -7902,9 +7893,9 @@ class UserDefinedIndexTestBase : public BlockBasedTableTestBase { uint64_t next_error_count_ = 0; private: - class TestUserDefinedIndexBuilder : public UserDefinedIndexBuilder { + class TestIndexFactoryBuilder : public IndexFactoryBuilder { public: - TestUserDefinedIndexBuilder() : entries_added_(0), keys_added_(0) {} + TestIndexFactoryBuilder() : entries_added_(0), keys_added_(0) {} Slice AddIndexEntry(const Slice& last_key_in_current_block, const Slice* first_key_in_next_block, @@ -7951,7 +7942,7 @@ class UserDefinedIndexTestBase : public BlockBasedTableTestBase { keys_added_++; if (!skip_key_size_check_) { // For fixed-size key tests, add a dummy per-key entry that the - // TestUserDefinedIndexReader can parse alongside block-level entries. + // TestIndexFactoryReader can parse alongside block-level entries. PutFixed64(&index_data_[key.ToString()], 0); PutFixed64(&index_data_[key.ToString()], 0); PutFixed32(&index_data_[key.ToString()], 0); @@ -7993,11 +7984,11 @@ class UserDefinedIndexTestBase : public BlockBasedTableTestBase { std::string index_contents_data_; }; - class TestUserDefinedIndexReader : public UserDefinedIndexReader { + class TestIndexFactoryReader : public IndexFactoryReader { public: - explicit TestUserDefinedIndexReader( - Slice& index_block, const Comparator* comparator, - const TestUserDefinedIndexFactory* factory) + explicit TestIndexFactoryReader(Slice& index_block, + const Comparator* comparator, + const TestIndexFactory* factory) : factory_(factory), comparator_(comparator), index_data_(CustomizedMapComparator(comparator)) { @@ -8012,32 +8003,31 @@ class UserDefinedIndexTestBase : public BlockBasedTableTestBase { EXPECT_TRUE(GetFixed64(&block, &size)); EXPECT_TRUE(GetFixed32(&block, &num_keys)); - UserDefinedIndexBuilder::BlockHandle handle{0, 0}; + IndexFactoryBuilder::BlockHandle handle{0, 0}; handle.offset = offset; handle.size = size; index_data_[key.ToString()] = - std::make_pair( + std::make_pair( std::move(handle), std::move(num_keys)); } } - std::unique_ptr NewIterator( + std::unique_ptr NewIterator( const ReadOptions& /*ro*/) override { - return std::make_unique( - index_data_, factory_, comparator_); + return std::make_unique(index_data_, factory_, + comparator_); } size_t ApproximateMemoryUsage() const override { return 0; } private: - class TestUserDefinedIndexIterator : public UserDefinedIndexIterator { + class TestIndexFactoryIterator : public IndexFactoryIterator { public: - TestUserDefinedIndexIterator( + TestIndexFactoryIterator( std::map, + std::pair, CustomizedMapComparator>& index, - const TestUserDefinedIndexFactory* factory, - const Comparator* comparator) + const TestIndexFactory* factory, const Comparator* comparator) : index_(index), iter_(index_.end()), scan_opts_(nullptr), @@ -8153,8 +8143,8 @@ class UserDefinedIndexTestBase : public BlockBasedTableTestBase { return true; } - UserDefinedIndexBuilder::BlockHandle value() override { - UserDefinedIndexBuilder::BlockHandle handle{0, 0}; + IndexFactoryBuilder::BlockHandle value() override { + IndexFactoryBuilder::BlockHandle handle{0, 0}; handle.offset = iter_->second.first.offset; handle.size = iter_->second.first.size; return handle; @@ -8170,9 +8160,9 @@ class UserDefinedIndexTestBase : public BlockBasedTableTestBase { private: std::map, + std::pair, CustomizedMapComparator>& index_; - std::map>::iterator iter_; const ScanOptions* scan_opts_; size_t num_opts_{}; @@ -8183,10 +8173,10 @@ class UserDefinedIndexTestBase : public BlockBasedTableTestBase { const Comparator* comparator_; }; - const TestUserDefinedIndexFactory* factory_; + const TestIndexFactory* factory_; const Comparator* comparator_; std::map, + std::pair, CustomizedMapComparator> index_data_; }; @@ -8315,9 +8305,10 @@ void UserDefinedIndexTestBase::BasicTest(bool use_partitioned_index) { std::string ingest_file = dbname + "test.sst"; // Set up the user-defined index factory - auto user_defined_index_factory = - std::make_shared(); + auto user_defined_index_factory = std::make_shared(); table_options.user_defined_index_factory = user_defined_index_factory; + table_options.index_mode = + BlockBasedTableOptions::IndexMode::kStandardDefault; if (use_partitioned_index) { table_options.partition_filters = true; table_options.decouple_partitioned_filters = true; @@ -8356,7 +8347,7 @@ void UserDefinedIndexTestBase::BasicTest(bool use_partitioned_index) { /* tail_size */ 0, ioptions.persist_user_defined_timestamps); // Verify that the user-defined index was created std::string meta_block_name = - std::string(kUserDefinedIndexPrefix) + "test_index"; + std::string(kIndexFactoryMetaPrefix) + "test_index"; BlockHandle block_handle; uint64_t file_size = 0; std::unique_ptr file; @@ -8395,7 +8386,7 @@ void UserDefinedIndexTestBase::BasicTest(bool use_partitioned_index) { ASSERT_OK(iter->status()); iter.reset(); - ro.table_index_factory = user_defined_index_factory.get(); + ro.read_index = ReadOptions::ReadIndex::kCustom; iter.reset(reader->NewIterator(ro)); ASSERT_NE(iter, nullptr); @@ -8483,9 +8474,10 @@ TEST_P(UserDefinedIndexTest, InvalidArgumentTest1) { std::string ingest_file = dbname + "test.sst"; // Set up the user-defined index factory - auto user_defined_index_factory = - std::make_shared(); + auto user_defined_index_factory = std::make_shared(); table_options.user_defined_index_factory = user_defined_index_factory; + table_options.index_mode = + BlockBasedTableOptions::IndexMode::kStandardDefault; // Set up custom flush block policy that flushes every 3 keys table_options.flush_block_policy_factory = @@ -8511,9 +8503,10 @@ TEST_P(UserDefinedIndexTest, MergeWithUDI) { std::string dbname = test::PerThreadDBPath("user_defined_index_test"); std::string ingest_file = dbname + "test.sst"; - auto user_defined_index_factory = - std::make_shared(); + auto user_defined_index_factory = std::make_shared(); table_options.user_defined_index_factory = user_defined_index_factory; + table_options.index_mode = + BlockBasedTableOptions::IndexMode::kStandardDefault; table_options.flush_block_policy_factory = std::make_shared(); @@ -8524,7 +8517,7 @@ TEST_P(UserDefinedIndexTest, MergeWithUDI) { writer.reset(new SstFileWriter(EnvOptions(), options_)); ASSERT_OK(writer->Open(ingest_file)); - // Use 5-byte keys to match TestUserDefinedIndexBuilder expectations. + // Use 5-byte keys to match TestIndexFactoryBuilder expectations. ASSERT_OK(writer->Merge("key_a", "val_a")); ASSERT_OK(writer->Finish()); writer.reset(); @@ -8550,10 +8543,11 @@ TEST_P(UserDefinedIndexTest, DBFlushWithMixedOpsAndUDI) { ASSERT_OK(DestroyDB(dbname, options_)); BlockBasedTableOptions table_options; - auto user_defined_index_factory = - std::make_shared(); + auto user_defined_index_factory = std::make_shared(); user_defined_index_factory->skip_key_size_check_ = true; table_options.user_defined_index_factory = user_defined_index_factory; + table_options.index_mode = + BlockBasedTableOptions::IndexMode::kStandardDefault; table_options.flush_block_policy_factory = std::make_shared(); options_.table_factory.reset(NewBlockBasedTableFactory(table_options)); @@ -8604,7 +8598,7 @@ TEST_P(UserDefinedIndexTest, DBFlushWithMixedOpsAndUDI) { TEST_P(UserDefinedIndexTest, ValueTypeMappingViaDBFlush) { // Verify that MapToUDIValueType correctly maps internal ValueTypes to UDI // ValueTypes by writing various operation types via the DB API, flushing, - // and inspecting what the TestUserDefinedIndexBuilder received. + // and inspecting what the TestIndexFactoryBuilder received. if (is_reverse_comparator_) { // Skip for reverse comparator — the key ordering makes this test // unnecessarily complex and the mapping logic is comparator-independent. @@ -8615,10 +8609,11 @@ TEST_P(UserDefinedIndexTest, ValueTypeMappingViaDBFlush) { ASSERT_OK(DestroyDB(dbname, options_)); BlockBasedTableOptions table_options; - auto user_defined_index_factory = - std::make_shared(); + auto user_defined_index_factory = std::make_shared(); user_defined_index_factory->skip_key_size_check_ = true; table_options.user_defined_index_factory = user_defined_index_factory; + table_options.index_mode = + BlockBasedTableOptions::IndexMode::kStandardDefault; options_.table_factory.reset(NewBlockBasedTableFactory(table_options)); options_.merge_operator = MergeOperators::CreateStringAppendOperator(); options_.create_if_missing = true; @@ -8647,26 +8642,26 @@ TEST_P(UserDefinedIndexTest, ValueTypeMappingViaDBFlush) { ASSERT_FALSE(log.empty()); // Build a map from key to the ValueType received by OnKeyAdded. - std::map type_map; + std::map type_map; for (const auto& entry : log) { type_map[entry.first] = entry.second; } // Verify each mapping. ASSERT_EQ(type_map.count("key_01_put"), 1u); - EXPECT_EQ(type_map["key_01_put"], UserDefinedIndexBuilder::kValue); + EXPECT_EQ(type_map["key_01_put"], IndexFactoryBuilder::kValue); ASSERT_EQ(type_map.count("key_02_merge"), 1u); - EXPECT_EQ(type_map["key_02_merge"], UserDefinedIndexBuilder::kMerge); + EXPECT_EQ(type_map["key_02_merge"], IndexFactoryBuilder::kMerge); ASSERT_EQ(type_map.count("key_03_del"), 1u); - EXPECT_EQ(type_map["key_03_del"], UserDefinedIndexBuilder::kDelete); + EXPECT_EQ(type_map["key_03_del"], IndexFactoryBuilder::kDelete); ASSERT_EQ(type_map.count("key_04_sdel"), 1u); - EXPECT_EQ(type_map["key_04_sdel"], UserDefinedIndexBuilder::kDelete); + EXPECT_EQ(type_map["key_04_sdel"], IndexFactoryBuilder::kDelete); ASSERT_EQ(type_map.count("key_05_entity"), 1u); - EXPECT_EQ(type_map["key_05_entity"], UserDefinedIndexBuilder::kOther); + EXPECT_EQ(type_map["key_05_entity"], IndexFactoryBuilder::kOther); ASSERT_OK(db->Close()); ASSERT_OK(DestroyDB(dbname, options_)); @@ -8683,10 +8678,11 @@ TEST_P(UserDefinedIndexTest, CompactionWithSnapshotsAndUDI) { ASSERT_OK(DestroyDB(dbname, options_)); BlockBasedTableOptions table_options; - auto user_defined_index_factory = - std::make_shared(); + auto user_defined_index_factory = std::make_shared(); user_defined_index_factory->skip_key_size_check_ = true; table_options.user_defined_index_factory = user_defined_index_factory; + table_options.index_mode = + BlockBasedTableOptions::IndexMode::kStandardDefault; options_.table_factory.reset(NewBlockBasedTableFactory(table_options)); options_.create_if_missing = true; // Disable auto-compaction so we control when compaction runs. @@ -8780,9 +8776,10 @@ TEST_P(UserDefinedIndexTest, IngestTest) { std::string ingest_file = dbname + "test.sst"; // Set up the user-defined index factory - auto user_defined_index_factory = - std::make_shared(); + auto user_defined_index_factory = std::make_shared(); table_options.user_defined_index_factory = user_defined_index_factory; + table_options.index_mode = + BlockBasedTableOptions::IndexMode::kStandardDefault; // Set up custom flush block policy that flushes every 3 keys table_options.flush_block_policy_factory = @@ -8828,7 +8825,7 @@ TEST_P(UserDefinedIndexTest, IngestTest) { ASSERT_OK(iter->status()); iter.reset(); - ro.table_index_factory = user_defined_index_factory.get(); + ro.read_index = ReadOptions::ReadIndex::kCustom; iter.reset(db->NewIterator(ro, cfh)); ASSERT_NE(iter, nullptr); @@ -8866,9 +8863,10 @@ TEST_P(UserDefinedIndexTest, EmptyRangeTest) { std::string ingest_file = dbname + "test.sst"; // Set up the user-defined index factory - auto user_defined_index_factory = - std::make_shared(); + auto user_defined_index_factory = std::make_shared(); table_options.user_defined_index_factory = user_defined_index_factory; + table_options.index_mode = + BlockBasedTableOptions::IndexMode::kStandardDefault; // Set up custom flush block policy that flushes every 3 keys table_options.flush_block_policy_factory = @@ -8933,7 +8931,7 @@ TEST_P(UserDefinedIndexTest, EmptyRangeTest) { ASSERT_OK(iter->status()); iter.reset(); - ro.table_index_factory = user_defined_index_factory.get(); + ro.read_index = ReadOptions::ReadIndex::kCustom; std::vector key_counts; MultiScanArgs scan_opts(options_.comparator); std::unordered_map property_bag; @@ -8983,8 +8981,8 @@ TEST_P(UserDefinedIndexTest, EmptyRangeTest) { } // Verify that external file ingestion fails if we try to ingest an SST file -// without the UDI and a UDI factory is configured in BlockBasedTableOptions -// and fail_if_no_udi_on_open is true in BlockBasedTableOptions. +// without the UDI block when index_mode is kCustomDefault or kCustomOnly (which +// require all SSTs to have the custom index). TEST_P(UserDefinedIndexTest, IngestFailTest) { BlockBasedTableOptions table_options; std::string dbname = test::PerThreadDBPath("user_defined_index_test"); @@ -9008,10 +9006,9 @@ TEST_P(UserDefinedIndexTest, IngestFailTest) { writer.reset(); // Set up the user-defined index factory - auto user_defined_index_factory = - std::make_shared(); + auto user_defined_index_factory = std::make_shared(); table_options.user_defined_index_factory = user_defined_index_factory; - table_options.fail_if_no_udi_on_open = true; + table_options.index_mode = BlockBasedTableOptions::IndexMode::kCustomDefault; options_.table_factory.reset(NewBlockBasedTableFactory(table_options)); std::unique_ptr db; @@ -9026,8 +9023,9 @@ TEST_P(UserDefinedIndexTest, IngestFailTest) { s = db->IngestExternalFile(cfh, {ingest_file}, ifo); ASSERT_NOK(s); + // Downgrade to kStandardOnly to allow ingesting files without UDI. ASSERT_OK(db->SetOptions( - cfh, {{"block_based_table_factory", "{fail_if_no_udi_on_open=false;}"}})); + cfh, {{"block_based_table_factory", "{index_mode=kStandardOnly;}"}})); s = db->IngestExternalFile(cfh, {ingest_file}, ifo); ASSERT_OK(s); @@ -9043,9 +9041,10 @@ TEST_P(UserDefinedIndexTest, IngestEmptyUDI) { std::string ingest_file2 = dbname + "dummy.sst"; // Set up the user-defined index factory - auto user_defined_index_factory = - std::make_shared(); + auto user_defined_index_factory = std::make_shared(); table_options.user_defined_index_factory = user_defined_index_factory; + table_options.index_mode = + BlockBasedTableOptions::IndexMode::kStandardDefault; // Set up custom flush block policy that flushes every 3 keys table_options.flush_block_policy_factory = std::make_shared(); @@ -9068,7 +9067,8 @@ TEST_P(UserDefinedIndexTest, IngestEmptyUDI) { ASSERT_OK(writer->Finish()); writer.reset(); - table_options.fail_if_no_udi_on_open = true; + table_options.index_mode = + BlockBasedTableOptions::IndexMode::kStandardDefault; options_.table_factory.reset(NewBlockBasedTableFactory(table_options)); std::unique_ptr db; @@ -9098,9 +9098,10 @@ TEST_P(UserDefinedIndexTest, MultiScanFailureTest) { std::string ingest_file = dbname + "test.sst"; // Set up the user-defined index factory - auto user_defined_index_factory = - std::make_shared(); + auto user_defined_index_factory = std::make_shared(); table_options.user_defined_index_factory = user_defined_index_factory; + table_options.index_mode = + BlockBasedTableOptions::IndexMode::kStandardDefault; // Set up custom flush block policy that flushes every 3 keys table_options.flush_block_policy_factory = @@ -9134,7 +9135,7 @@ TEST_P(UserDefinedIndexTest, MultiScanFailureTest) { std::vector key_ranges({"key03", "key05", "key12", "key14"}); ReadOptions ro; - ro.table_index_factory = user_defined_index_factory.get(); + ro.read_index = ReadOptions::ReadIndex::kCustom; Slice ub; ro.iterate_upper_bound = &ub; std::unordered_map property_bag; @@ -9285,9 +9286,10 @@ TEST_P(UserDefinedIndexTest, ConfigTest) { std::string ingest_file = dbname + "test.sst"; // Set up the user-defined index factory - auto user_defined_index_factory = - std::make_shared(); + auto user_defined_index_factory = std::make_shared(); table_options.user_defined_index_factory = user_defined_index_factory; + table_options.index_mode = + BlockBasedTableOptions::IndexMode::kStandardDefault; // Set up custom flush block policy that flushes every 3 keys table_options.flush_block_policy_factory = @@ -9309,11 +9311,11 @@ TEST_P(UserDefinedIndexTest, ConfigTest) { table_options.user_defined_index_factory.reset(); options_.table_factory.reset(NewBlockBasedTableFactory(table_options)); // Set up the user-defined index factory - ObjectLibrary::Default().get()->AddFactory( - "test_index", [](const std::string& /* uri */, - std::unique_ptr* guard, - std::string* /* errmsg */) { - auto factory = new TestUserDefinedIndexFactory(); + ObjectLibrary::Default().get()->AddFactory( + "test_index", + [](const std::string& /* uri */, std::unique_ptr* guard, + std::string* /* errmsg */) { + auto factory = new TestIndexFactory(); guard->reset(factory); return guard->get(); }); @@ -9337,7 +9339,7 @@ TEST_P(UserDefinedIndexTest, ConfigTest) { ReadOptions ro; Slice ub; ro.iterate_upper_bound = &ub; - ro.table_index_factory = user_defined_index_factory.get(); + ro.read_index = ReadOptions::ReadIndex::kCustom; std::unique_ptr iter(db->NewIterator(ro, cfh)); ASSERT_NE(iter, nullptr); MultiScanArgs scan_opts(options_.comparator); @@ -9379,9 +9381,10 @@ TEST_P(UserDefinedIndexTest, RangeDelete) { std::string ingest_file = dbname + "test.sst"; // Set up the user-defined index factory - auto user_defined_index_factory = - std::make_shared(); + auto user_defined_index_factory = std::make_shared(); table_options.user_defined_index_factory = user_defined_index_factory; + table_options.index_mode = + BlockBasedTableOptions::IndexMode::kStandardDefault; // Set up custom flush block policy that flushes every 3 keys table_options.flush_block_policy_factory = @@ -9496,9 +9499,10 @@ TEST_P(UserDefinedIndexTest, QueryCrossTwoFiles) { std::string ingest_file = dbname + "test.sst"; // Set up the user-defined index factory - auto user_defined_index_factory = - std::make_shared(); + auto user_defined_index_factory = std::make_shared(); table_options.user_defined_index_factory = user_defined_index_factory; + table_options.index_mode = + BlockBasedTableOptions::IndexMode::kStandardDefault; // Set up custom flush block policy that flushes every 3 keys table_options.flush_block_policy_factory = @@ -9728,7 +9732,7 @@ class UserDefinedIndexStressTest bool enable_udi_{}; bool enable_compaction_with_sst_partitioner_{}; uint32_t rand_seed_{}; - std::shared_ptr user_defined_index_factory_; + std::shared_ptr user_defined_index_factory_; BlockBasedTableOptions table_options_; const Comparator* comparator_{}; bool is_reverse_comparator_{}; @@ -9756,9 +9760,10 @@ class UserDefinedIndexStressTest if (enable_udi_) { // Set up the user-defined index factory - user_defined_index_factory_ = - std::make_shared(); + user_defined_index_factory_ = std::make_shared(); table_options_.user_defined_index_factory = user_defined_index_factory_; + table_options_.index_mode = + BlockBasedTableOptions::IndexMode::kStandardDefault; } options_.table_factory.reset(NewBlockBasedTableFactory(table_options_)); @@ -9964,7 +9969,7 @@ class UserDefinedIndexStressTest // Query ingest CF with UDI if it is enabled if (enable_udi_) { - ro.table_index_factory = user_defined_index_factory_.get(); + ro.read_index = ReadOptions::ReadIndex::kCustom; } iter.reset(db_->NewIterator(ro, ingest_cfh_)); diff --git a/tools/db_bench_tool.cc b/tools/db_bench_tool.cc index b60f51bd84ef..36c3f76d3010 100644 --- a/tools/db_bench_tool.cc +++ b/tools/db_bench_tool.cc @@ -2923,7 +2923,7 @@ class Benchmark { int64_t max_num_range_tombstones_; ReadOptions read_options_; WriteOptions write_options_; - std::shared_ptr udi_factory_; + std::shared_ptr udi_factory_; Options open_options_; // keep options around to properly destroy db later TraceOptions trace_options_; TraceOptions block_cache_trace_options_; @@ -3689,7 +3689,7 @@ class Benchmark { read_options_.auto_refresh_iterator_with_snapshot = FLAGS_auto_refresh_iterator_with_snapshot; if (FLAGS_use_trie_index && udi_factory_) { - read_options_.table_index_factory = udi_factory_.get(); + read_options_.read_index = ReadOptions::ReadIndex::kCustom; } void (Benchmark::*method)(ThreadState*) = nullptr; @@ -4893,6 +4893,8 @@ class Benchmark { if (FLAGS_use_trie_index) { udi_factory_ = std::make_shared(); block_based_options.user_defined_index_factory = udi_factory_; + block_based_options.index_mode = + BlockBasedTableOptions::IndexMode::kStandardDefault; } options.table_factory.reset( diff --git a/tools/db_crashtest.py b/tools/db_crashtest.py index 50645f2efa1a..2b1bc621627f 100644 --- a/tools/db_crashtest.py +++ b/tools/db_crashtest.py @@ -248,10 +248,10 @@ def apply_random_seed_per_iteration(): # use_trie_index must be the same across invocations so that all SSTs # in a DB are opened with matching table options. "use_trie_index": random.choice([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1]), - # use_udi_as_primary_index must be the same across invocations (like - # use_trie_index) so that SSTs written in primary mode can be read on - # reopen. - "use_udi_as_primary_index": random.choice([0, 0, 0, 1]), + # index_mode must be the same across invocations (like use_trie_index) + # so that SSTs written in primary mode can be read on reopen. + # 0=kStandardOnly, 1=kStandardDefault, 2=kCustomDefault, 3=kCustomOnly + "index_mode": random.choice([0, 0, 1, 2, 3]), # use_put_entity_one_in has to be the same across invocations for verification to work, hence no lambda "use_put_entity_one_in": random.choice([0] * 7 + [1, 5, 10]), "use_attribute_group": lambda: random.randint(0, 1), @@ -1093,10 +1093,12 @@ def finalize_and_sanitize(src_params): dest_params["mmap_read"] = 0 # Parallel compression is incompatible with UDI dest_params["compression_parallel_threads"] = 1 - if dest_params.get("use_udi_as_primary_index") == 1: - # Primary UDI mode: the standard index is still fully populated, - # but partitioned index (kTwoLevelIndexSearch) and partitioned - # filters are not compatible with the UDI wrapper layout. + index_mode = dest_params.get("index_mode", 0) + if index_mode >= 2: + # kCustomDefault/kCustomOnly: the standard index is still fully + # populated (except kCustomOnly), but partitioned index + # (kTwoLevelIndexSearch) and partitioned filters are not + # compatible with the UDI wrapper layout. dest_params["index_type"] = random.choice([0, 0, 3]) dest_params["partition_filters"] = 0 # Backup/restore serializes Options to strings, losing the @@ -1109,8 +1111,8 @@ def finalize_and_sanitize(src_params): # reads cannot be routed through the trie. dest_params["test_secondary"] = 0 else: - # use_udi_as_primary_index requires use_trie_index - dest_params["use_udi_as_primary_index"] = 0 + # index_mode >= kStandardDefault requires use_trie_index + dest_params["index_mode"] = 0 # Multi-key operations are not currently compatible with transactions or # timestamp. diff --git a/utilities/trie_index/louds_trie.h b/utilities/trie_index/louds_trie.h index 7c65bc8d36d2..0b3b485add6e 100644 --- a/utilities/trie_index/louds_trie.h +++ b/utilities/trie_index/louds_trie.h @@ -66,7 +66,7 @@ class LoudsTrie; // ============================================================================ // BlockHandle: offset and size of a data block in the SST file. -// Matches UserDefinedIndexBuilder::BlockHandle but defined locally to avoid +// Matches IndexFactoryBuilder::BlockHandle but defined locally to avoid // header dependencies in the core trie implementation. // ============================================================================ struct TrieBlockHandle { diff --git a/utilities/trie_index/trie_index_db_test.cc b/utilities/trie_index/trie_index_db_test.cc index 11dadc632a53..dae686b067be 100644 --- a/utilities/trie_index/trie_index_db_test.cc +++ b/utilities/trie_index/trie_index_db_test.cc @@ -146,11 +146,15 @@ MakeStressLikeSqfcFactory() { return factory; } -// Parameterized on UDI mode: false = secondary, true = primary. -// All tests run in both modes to ensure full coverage. -class TrieIndexDBTest : public testing::TestWithParam { +// Parameterized on IndexMode: kStandardDefault, kCustomDefault, and +// kCustomOnly. All tests run in all three modes to ensure full coverage. +class TrieIndexDBTest + : public testing::TestWithParam { protected: - bool IsPrimaryMode() const { return GetParam(); } + bool IsPrimaryMode() const { + return GetParam() >= BlockBasedTableOptions::IndexMode::kCustomDefault; + } + BlockBasedTableOptions::IndexMode GetIndexMode() const { return GetParam(); } void SetUp() override { trie_factory_ = std::make_shared(); @@ -168,24 +172,27 @@ class TrieIndexDBTest : public testing::TestWithParam { // Opens a DB using the parameterized UDI mode. Status OpenDB(int block_size = 0) { - return OpenDBImpl(block_size, IsPrimaryMode()); + return OpenDBImpl(block_size, GetIndexMode()); } // Explicitly opens as primary -- used by the backward compatibility test. Status OpenDBPrimary(int block_size = 0) { - return OpenDBImpl(block_size, /*udi_primary=*/true); + return OpenDBImpl(block_size, + BlockBasedTableOptions::IndexMode::kCustomDefault); } // Explicitly opens as secondary -- used by the backward compatibility test. Status OpenDBSecondary(int block_size = 0) { - return OpenDBImpl(block_size, /*udi_primary=*/false); + return OpenDBImpl(block_size, + BlockBasedTableOptions::IndexMode::kStandardDefault); } - Status OpenDBImpl(int block_size, bool udi_primary) { + Status OpenDBImpl(int block_size, + BlockBasedTableOptions::IndexMode index_mode) { options_.create_if_missing = true; BlockBasedTableOptions table_options; table_options.user_defined_index_factory = trie_factory_; - table_options.use_udi_as_primary_index = udi_primary; + table_options.index_mode = index_mode; if (block_size > 0) { table_options.block_size = block_size; } @@ -194,19 +201,24 @@ class TrieIndexDBTest : public testing::TestWithParam { return DB::Open(options_, dbname_, &db_); } - // Returns a ReadOptions for the standard index. In secondary mode, this - // is a bare ReadOptions (no table_index_factory). In primary mode, this - // also returns a bare ReadOptions -- which routes through the trie anyway, - // making the dual-index comparison a trie-vs-trie sanity check. - ReadOptions StandardIndexReadOptions() const { return ReadOptions(); } + // Returns a ReadOptions that routes reads through the built-in binary + // search index. In secondary mode, this is the default. In primary mode, + // kBuiltin overrides the custom index routing. + ReadOptions StandardIndexReadOptions() const { + ReadOptions ro; + if (IsPrimaryMode()) { + ro.read_index = ReadOptions::ReadIndex::kBuiltin; + } + return ro; + } // Returns a ReadOptions that routes reads through the trie. In primary - // mode, a bare ReadOptions already uses the trie, so table_index_factory - // is not set. In secondary mode, table_index_factory is set explicitly. + // mode, a bare ReadOptions already uses the trie, so read_index + // is not set. In secondary mode, read_index is set explicitly. ReadOptions TrieIndexReadOptions() const { ReadOptions ro; if (!IsPrimaryMode()) { - ro.table_index_factory = trie_factory_.get(); + ro.read_index = ReadOptions::ReadIndex::kCustom; } return ro; } @@ -223,8 +235,13 @@ class TrieIndexDBTest : public testing::TestWithParam { return keys; } - // Collects all visible keys via forward scan using the standard index. + // Collects all visible keys via forward scan using the default index for + // the current mode: standard index for kStandardDefault/kCustomDefault, trie + // index for kCustomOnly (where the standard index is an empty stub). std::vector ScanAllKeys() { + if (GetIndexMode() == BlockBasedTableOptions::IndexMode::kCustomOnly) { + return ScanAllKeys(TrieIndexReadOptions()); + } return ScanAllKeys(StandardIndexReadOptions()); } @@ -244,8 +261,10 @@ class TrieIndexDBTest : public testing::TestWithParam { // Verifies that forward scan via SeekToFirst+Next AND reverse scan via // SeekToLast+Prev both produce the expected key set through both the // standard index and the trie index. + // In kCustomOnly mode, the standard index is an empty stub, so we skip + // the standard index comparison. void VerifyScanBothIndexes(const std::vector& expected_keys) { - { + if (GetIndexMode() != BlockBasedTableOptions::IndexMode::kCustomOnly) { SCOPED_TRACE("standard index forward"); ASSERT_EQ(ScanAllKeys(StandardIndexReadOptions()), expected_keys); } @@ -256,7 +275,7 @@ class TrieIndexDBTest : public testing::TestWithParam { // Reverse scan must produce the reversed key set. std::vector expected_reverse(expected_keys.rbegin(), expected_keys.rend()); - { + if (GetIndexMode() != BlockBasedTableOptions::IndexMode::kCustomOnly) { SCOPED_TRACE("standard index reverse"); ASSERT_EQ(ReverseScanAllKeys(StandardIndexReadOptions()), expected_reverse); @@ -272,7 +291,7 @@ class TrieIndexDBTest : public testing::TestWithParam { // both indexes. void VerifyScanBothIndexes( const std::vector>& expected_kvs) { - { + if (GetIndexMode() != BlockBasedTableOptions::IndexMode::kCustomOnly) { SCOPED_TRACE("standard index forward"); ASSERT_EQ(ScanAllKeyValues(StandardIndexReadOptions()), expected_kvs); } @@ -283,7 +302,7 @@ class TrieIndexDBTest : public testing::TestWithParam { // Reverse scan must produce the reversed pairs. std::vector> expected_reverse( expected_kvs.rbegin(), expected_kvs.rend()); - { + if (GetIndexMode() != BlockBasedTableOptions::IndexMode::kCustomOnly) { SCOPED_TRACE("standard index reverse"); ASSERT_EQ(ReverseScanAllKeyValues(StandardIndexReadOptions()), expected_reverse); @@ -295,12 +314,22 @@ class TrieIndexDBTest : public testing::TestWithParam { } } + // Returns the list of ReadOptions to test through both indexes, skipping the + // standard index in kCustomOnly mode (where it's an empty stub). + std::vector BothIndexReadOptions() const { + if (GetIndexMode() == BlockBasedTableOptions::IndexMode::kCustomOnly) { + return {TrieIndexReadOptions()}; + } + return {StandardIndexReadOptions(), TrieIndexReadOptions()}; + } + // Verifies that a point Get returns the expected value through both indexes. void VerifyGetBothIndexes(const std::string& key, const std::string& expected_value) { - for (const auto& ro : - {StandardIndexReadOptions(), TrieIndexReadOptions()}) { - SCOPED_TRACE(ro.table_index_factory ? "trie index" : "standard index"); + for (const auto& ro : BothIndexReadOptions()) { + SCOPED_TRACE(ro.read_index != ReadOptions::ReadIndex::kDefault + ? "custom index" + : "builtin index"); std::string value; ASSERT_OK(db_->Get(ro, key, &value)); ASSERT_EQ(value, expected_value); @@ -309,9 +338,10 @@ class TrieIndexDBTest : public testing::TestWithParam { // Verifies that a point Get returns NotFound through both indexes. void VerifyGetNotFoundBothIndexes(const std::string& key) { - for (const auto& ro : - {StandardIndexReadOptions(), TrieIndexReadOptions()}) { - SCOPED_TRACE(ro.table_index_factory ? "trie index" : "standard index"); + for (const auto& ro : BothIndexReadOptions()) { + SCOPED_TRACE(ro.read_index != ReadOptions::ReadIndex::kDefault + ? "custom index" + : "builtin index"); std::string value; ASSERT_TRUE(db_->Get(ro, key, &value).IsNotFound()); } @@ -320,9 +350,10 @@ class TrieIndexDBTest : public testing::TestWithParam { // Verifies Get with a snapshot through both indexes. void VerifyGetBothIndexes(const Snapshot* snap, const std::string& key, const std::string& expected_value) { - for (auto base_ro : {StandardIndexReadOptions(), TrieIndexReadOptions()}) { - SCOPED_TRACE(base_ro.table_index_factory ? "trie index" - : "standard index"); + for (auto base_ro : BothIndexReadOptions()) { + SCOPED_TRACE(base_ro.read_index != ReadOptions::ReadIndex::kDefault + ? "custom index" + : "builtin index"); base_ro.snapshot = snap; std::string value; ASSERT_OK(db_->Get(base_ro, key, &value)); @@ -333,9 +364,10 @@ class TrieIndexDBTest : public testing::TestWithParam { // Verifies Get returns NotFound with a snapshot through both indexes. void VerifyGetNotFoundBothIndexes(const Snapshot* snap, const std::string& key) { - for (auto base_ro : {StandardIndexReadOptions(), TrieIndexReadOptions()}) { - SCOPED_TRACE(base_ro.table_index_factory ? "trie index" - : "standard index"); + for (auto base_ro : BothIndexReadOptions()) { + SCOPED_TRACE(base_ro.read_index != ReadOptions::ReadIndex::kDefault + ? "custom index" + : "builtin index"); base_ro.snapshot = snap; std::string value; ASSERT_TRUE(db_->Get(base_ro, key, &value).IsNotFound()); @@ -347,18 +379,20 @@ class TrieIndexDBTest : public testing::TestWithParam { void VerifyScanBothIndexes( const Snapshot* snap, const std::vector>& expected_kvs) { - for (auto base_ro : {StandardIndexReadOptions(), TrieIndexReadOptions()}) { - SCOPED_TRACE(base_ro.table_index_factory ? "trie index forward" - : "standard index forward"); + for (auto base_ro : BothIndexReadOptions()) { + SCOPED_TRACE(base_ro.read_index != ReadOptions::ReadIndex::kDefault + ? "custom index forward" + : "builtin index forward"); base_ro.snapshot = snap; ASSERT_EQ(ScanAllKeyValues(base_ro), expected_kvs); } // Reverse scan at the same snapshot must produce reversed pairs. std::vector> expected_reverse( expected_kvs.rbegin(), expected_kvs.rend()); - for (auto base_ro : {StandardIndexReadOptions(), TrieIndexReadOptions()}) { - SCOPED_TRACE(base_ro.table_index_factory ? "trie index reverse" - : "standard index reverse"); + for (auto base_ro : BothIndexReadOptions()) { + SCOPED_TRACE(base_ro.read_index != ReadOptions::ReadIndex::kDefault + ? "custom index reverse" + : "builtin index reverse"); base_ro.snapshot = snap; ASSERT_EQ(ReverseScanAllKeyValues(base_ro), expected_reverse); } @@ -369,9 +403,10 @@ class TrieIndexDBTest : public testing::TestWithParam { void VerifySeekBothIndexes(const std::string& seek_key, const std::string& expected_key, const std::string& expected_value) { - for (const auto& ro : - {StandardIndexReadOptions(), TrieIndexReadOptions()}) { - SCOPED_TRACE(ro.table_index_factory ? "trie index" : "standard index"); + for (const auto& ro : BothIndexReadOptions()) { + SCOPED_TRACE(ro.read_index != ReadOptions::ReadIndex::kDefault + ? "custom index" + : "builtin index"); std::unique_ptr iter(db_->NewIterator(ro)); iter->Seek(seek_key); ASSERT_TRUE(iter->Valid()); @@ -386,9 +421,10 @@ class TrieIndexDBTest : public testing::TestWithParam { void VerifySeekBothIndexes(const Snapshot* snap, const std::string& seek_key, const std::string& expected_key, const std::string& expected_value) { - for (auto base_ro : {StandardIndexReadOptions(), TrieIndexReadOptions()}) { - SCOPED_TRACE(base_ro.table_index_factory ? "trie index" - : "standard index"); + for (auto base_ro : BothIndexReadOptions()) { + SCOPED_TRACE(base_ro.read_index != ReadOptions::ReadIndex::kDefault + ? "custom index" + : "builtin index"); base_ro.snapshot = snap; std::unique_ptr iter(db_->NewIterator(base_ro)); iter->Seek(seek_key); @@ -533,9 +569,10 @@ class TrieIndexDBTest : public testing::TestWithParam { void VerifySeekForPrevBothIndexes(const std::string& target, const std::string& expected_key, const std::string& expected_value) { - for (const auto& ro : - {StandardIndexReadOptions(), TrieIndexReadOptions()}) { - SCOPED_TRACE(ro.table_index_factory ? "trie index" : "standard index"); + for (const auto& ro : BothIndexReadOptions()) { + SCOPED_TRACE(ro.read_index != ReadOptions::ReadIndex::kDefault + ? "custom index" + : "builtin index"); std::unique_ptr iter(db_->NewIterator(ro)); iter->SeekForPrev(target); ASSERT_TRUE(iter->Valid()); @@ -547,9 +584,10 @@ class TrieIndexDBTest : public testing::TestWithParam { // Verifies SeekForPrev returns invalid (target before all keys). void VerifySeekForPrevNotFoundBothIndexes(const std::string& target) { - for (const auto& ro : - {StandardIndexReadOptions(), TrieIndexReadOptions()}) { - SCOPED_TRACE(ro.table_index_factory ? "trie index" : "standard index"); + for (const auto& ro : BothIndexReadOptions()) { + SCOPED_TRACE(ro.read_index != ReadOptions::ReadIndex::kDefault + ? "custom index" + : "builtin index"); std::unique_ptr iter(db_->NewIterator(ro)); iter->SeekForPrev(target); ASSERT_FALSE(iter->Valid()); @@ -985,8 +1023,10 @@ TEST_P(TrieIndexDBTest, ReverseIteration) { ASSERT_NO_FATAL_FAILURE(VerifySeekForPrevNotFoundBothIndexes("key_00")); // Prev from a Seek position in the middle of the range -- both indexes. - for (const auto& ro : {StandardIndexReadOptions(), TrieIndexReadOptions()}) { - SCOPED_TRACE(ro.table_index_factory ? "trie index" : "standard index"); + for (const auto& ro : BothIndexReadOptions()) { + SCOPED_TRACE(ro.read_index != ReadOptions::ReadIndex::kDefault + ? "custom index" + : "builtin index"); std::unique_ptr iter(db_->NewIterator(ro)); iter->Seek("key_05"); ASSERT_TRUE(iter->Valid()); @@ -1125,6 +1165,8 @@ TEST_P(TrieIndexDBTest, IngestExternalFileWithTrieUDI) { sst_options.merge_operator = MergeOperators::CreateStringAppendOperator(); BlockBasedTableOptions table_options; table_options.user_defined_index_factory = trie_factory_; + table_options.index_mode = + BlockBasedTableOptions::IndexMode::kStandardDefault; sst_options.table_factory.reset(NewBlockBasedTableFactory(table_options)); SstFileWriter writer(EnvOptions(), sst_options); @@ -1267,8 +1309,10 @@ TEST_P(TrieIndexDBTest, LargeMixedOperationsAcrossBlocks) { ASSERT_NO_FATAL_FAILURE(VerifyScanBothIndexes(expected_visible)); // Spot-check: Seek to every 10th visible key via both indexes. - for (const auto& ro : {StandardIndexReadOptions(), TrieIndexReadOptions()}) { - SCOPED_TRACE(ro.table_index_factory ? "trie index" : "standard index"); + for (const auto& ro : BothIndexReadOptions()) { + SCOPED_TRACE(ro.read_index != ReadOptions::ReadIndex::kDefault + ? "custom index" + : "builtin index"); std::unique_ptr iter(db_->NewIterator(ro)); for (size_t i = 0; i < expected_visible.size(); i += 10) { iter->Seek(expected_visible[i]); @@ -1471,18 +1515,17 @@ TEST_P(TrieIndexDBTest, const Slice lower_bound_slice(lower_bound); const Slice upper_bound_slice(upper_bound); - const auto build_read_options = - [&](const UserDefinedIndexFactory* table_index_factory) { - ReadOptions ro; - ro.snapshot = snapshot; - ro.auto_prefix_mode = true; - ro.allow_unprepared_value = true; - ro.auto_refresh_iterator_with_snapshot = true; - ro.iterate_lower_bound = &lower_bound_slice; - ro.iterate_upper_bound = &upper_bound_slice; - ro.table_index_factory = table_index_factory; - return ro; - }; + const auto build_read_options = [&](ReadOptions::ReadIndex read_index) { + ReadOptions ro; + ro.snapshot = snapshot; + ro.auto_prefix_mode = true; + ro.allow_unprepared_value = true; + ro.auto_refresh_iterator_with_snapshot = true; + ro.iterate_lower_bound = &lower_bound_slice; + ro.iterate_upper_bound = &upper_bound_slice; + ro.read_index = read_index; + return ro; + }; const std::vector> expected = { {before, large_value('a')}, @@ -1491,12 +1534,13 @@ TEST_P(TrieIndexDBTest, {expected_2, large_value('n')}, }; - const UserDefinedIndexFactory* table_index_factories[] = { - nullptr, trie_factory_.get()}; - for (const auto* table_index_factory : table_index_factories) { - SCOPED_TRACE(table_index_factory == nullptr ? "standard index" - : "trie index"); - const ReadOptions ro = build_read_options(table_index_factory); + const ReadOptions::ReadIndex read_indices[] = { + ReadOptions::ReadIndex::kDefault, ReadOptions::ReadIndex::kCustom}; + for (const auto read_index : read_indices) { + SCOPED_TRACE(read_index == ReadOptions::ReadIndex::kDefault + ? "builtin index" + : "custom index"); + const ReadOptions ro = build_read_options(read_index); ASSERT_EQ(ScanAllKeyValues(ro), expected); std::unique_ptr iter(db_->NewIterator(ro)); @@ -1520,6 +1564,7 @@ TEST_P(TrieIndexDBTest, AutoRefreshSnapshotNextAcrossSameUserKeyBoundaries) { BlockBasedTableOptions table_options; table_options.user_defined_index_factory = trie_factory_; + table_options.index_mode = GetIndexMode(); table_options.block_size = 64; table_options.separate_key_value_in_data_block = true; options_.table_factory.reset(NewBlockBasedTableFactory(table_options)); @@ -1552,7 +1597,7 @@ TEST_P(TrieIndexDBTest, AutoRefreshSnapshotNextAcrossSameUserKeyBoundaries) { std_ro.allow_unprepared_value = true; ReadOptions trie_ro = std_ro; - trie_ro.table_index_factory = trie_factory_.get(); + trie_ro.read_index = ReadOptions::ReadIndex::kCustom; std::unique_ptr std_iter(db_->NewIterator(std_ro)); std::unique_ptr trie_iter(db_->NewIterator(trie_ro)); @@ -1613,6 +1658,7 @@ TEST_P(TrieIndexDBTest, BlockBasedTableOptions table_options; table_options.user_defined_index_factory = trie_factory_; + table_options.index_mode = GetIndexMode(); table_options.block_size = 64; table_options.separate_key_value_in_data_block = true; options_.table_factory.reset(NewBlockBasedTableFactory(table_options)); @@ -1645,7 +1691,7 @@ TEST_P(TrieIndexDBTest, std_ro.allow_unprepared_value = true; ReadOptions trie_ro = std_ro; - trie_ro.table_index_factory = trie_factory_.get(); + trie_ro.read_index = ReadOptions::ReadIndex::kCustom; std::unique_ptr std_iter(db_->NewIterator(std_ro)); std::unique_ptr trie_iter(db_->NewIterator(trie_ro)); @@ -1704,6 +1750,7 @@ TEST_P(TrieIndexDBTest, BlockBasedTableOptions table_options; table_options.user_defined_index_factory = trie_factory_; + table_options.index_mode = GetIndexMode(); table_options.block_size = 128; table_options.separate_key_value_in_data_block = true; options_.table_factory.reset(NewBlockBasedTableFactory(table_options)); @@ -1742,7 +1789,7 @@ TEST_P(TrieIndexDBTest, ro.auto_refresh_iterator_with_snapshot = true; ro.table_filter = sqfc_factory->GetTableFilterForRangeQuery(lb, ub); if (use_trie) { - ro.table_index_factory = trie_factory_.get(); + ro.read_index = ReadOptions::ReadIndex::kCustom; } if (use_coalescing) { return db_->NewCoalescingIterator(ro, {db_->DefaultColumnFamily()}); @@ -1827,8 +1874,10 @@ TEST_P(TrieIndexDBTest, MultiGetWithTrieUDI) { ASSERT_OK(db_->Flush(FlushOptions())); // MultiGet through both indexes. - for (const auto& ro : {StandardIndexReadOptions(), TrieIndexReadOptions()}) { - SCOPED_TRACE(ro.table_index_factory ? "trie index" : "standard index"); + for (const auto& ro : BothIndexReadOptions()) { + SCOPED_TRACE(ro.read_index != ReadOptions::ReadIndex::kDefault + ? "custom index" + : "builtin index"); std::vector mg_keys = {"key_01", "key_02", "key_03", "key_04", "key_05", "key_06", @@ -1912,6 +1961,7 @@ TEST_P(TrieIndexDBTest, MultipleColumnFamilies) { BlockBasedTableOptions table_options; table_options.user_defined_index_factory = trie_factory_; + table_options.index_mode = GetIndexMode(); options_.table_factory.reset(NewBlockBasedTableFactory(table_options)); last_options_ = options_; @@ -1938,16 +1988,20 @@ TEST_P(TrieIndexDBTest, MultipleColumnFamilies) { ASSERT_OK(db_->Flush(FlushOptions(), cf2)); // Verify default CF. - for (const auto& ro : {StandardIndexReadOptions(), TrieIndexReadOptions()}) { - SCOPED_TRACE(ro.table_index_factory ? "trie index" : "standard index"); + for (const auto& ro : BothIndexReadOptions()) { + SCOPED_TRACE(ro.read_index != ReadOptions::ReadIndex::kDefault + ? "custom index" + : "builtin index"); std::string value; ASSERT_OK(db_->Get(ro, "default_key", &value)); ASSERT_EQ(value, "default_val"); } // Verify cf_one through both indexes. - for (const auto& ro : {StandardIndexReadOptions(), TrieIndexReadOptions()}) { - SCOPED_TRACE(ro.table_index_factory ? "trie index" : "standard index"); + for (const auto& ro : BothIndexReadOptions()) { + SCOPED_TRACE(ro.read_index != ReadOptions::ReadIndex::kDefault + ? "custom index" + : "builtin index"); std::string value; ASSERT_OK(db_->Get(ro, cf1, "cf1_key_a", &value)); ASSERT_EQ(value, "cf1_val_a"); @@ -1956,8 +2010,10 @@ TEST_P(TrieIndexDBTest, MultipleColumnFamilies) { } // Verify cf_two through both indexes. - for (const auto& ro : {StandardIndexReadOptions(), TrieIndexReadOptions()}) { - SCOPED_TRACE(ro.table_index_factory ? "trie index" : "standard index"); + for (const auto& ro : BothIndexReadOptions()) { + SCOPED_TRACE(ro.read_index != ReadOptions::ReadIndex::kDefault + ? "custom index" + : "builtin index"); std::string value; ASSERT_OK(db_->Get(ro, cf2, "cf2_key_x", &value)); ASSERT_EQ(value, "cf2_val_x"); @@ -1967,8 +2023,10 @@ TEST_P(TrieIndexDBTest, MultipleColumnFamilies) { } // Forward scan on each CF via both indexes. - for (const auto& ro : {StandardIndexReadOptions(), TrieIndexReadOptions()}) { - SCOPED_TRACE(ro.table_index_factory ? "trie index" : "standard index"); + for (const auto& ro : BothIndexReadOptions()) { + SCOPED_TRACE(ro.read_index != ReadOptions::ReadIndex::kDefault + ? "custom index" + : "builtin index"); // cf_one scan. std::unique_ptr it1(db_->NewIterator(ro, cf1)); @@ -2068,12 +2126,13 @@ TEST_P(TrieIndexDBTest, BatchedPrefixScan) { ASSERT_OK(db_->Flush(FlushOptions())); // Phase 2: Prefix scan with both indexes. - for (int idx_type = 0; idx_type < 2; ++idx_type) { - ReadOptions base_ro = - idx_type == 0 ? StandardIndexReadOptions() : TrieIndexReadOptions(); - SCOPED_TRACE(idx_type == 0 ? "standard index" : "trie index"); + for (const auto& base_ro_template : BothIndexReadOptions()) { + SCOPED_TRACE(base_ro_template.read_index != ReadOptions::ReadIndex::kDefault + ? "trie index" + : "standard index"); const Snapshot* snap = db_->GetSnapshot(); + ReadOptions base_ro = base_ro_template; base_ro.snapshot = snap; uint64_t count = VerifyPrefixScanLockstep(base_ro, kNumPrefixes, @@ -2164,12 +2223,13 @@ TEST_P(TrieIndexDBTest, BatchedPrefixScanWithOverwrites) { } // Now verify with both indexes. - for (int idx_type = 0; idx_type < 2; ++idx_type) { - ReadOptions base_ro = - idx_type == 0 ? StandardIndexReadOptions() : TrieIndexReadOptions(); - SCOPED_TRACE(idx_type == 0 ? "standard index" : "trie index"); + for (const auto& base_ro_template : BothIndexReadOptions()) { + SCOPED_TRACE(base_ro_template.read_index != ReadOptions::ReadIndex::kDefault + ? "trie index" + : "standard index"); const Snapshot* snap = db_->GetSnapshot(); + ReadOptions base_ro = base_ro_template; base_ro.snapshot = snap; uint64_t count = VerifyPrefixScanLockstep(base_ro, kNumPrefixes, @@ -2293,37 +2353,42 @@ TEST_P(TrieIndexDBTest, PrefixIterationWithTrieIndex) { // Forward prefix scans. { std::vector expected = {"aaaa1", "aaaa2", "aaaa3", "aaaa4"}; - ASSERT_EQ(PrefixScan(StandardIndexReadOptions(), "aaaa"), expected); - ASSERT_EQ(PrefixScan(TrieIndexReadOptions(), "aaaa"), expected); + for (const auto& ro : BothIndexReadOptions()) { + ASSERT_EQ(PrefixScan(ro, "aaaa"), expected); + } } { std::vector expected = {"bbbb1", "bbbb2", "bbbb3"}; - ASSERT_EQ(PrefixScan(StandardIndexReadOptions(), "bbbb"), expected); - ASSERT_EQ(PrefixScan(TrieIndexReadOptions(), "bbbb"), expected); + for (const auto& ro : BothIndexReadOptions()) { + ASSERT_EQ(PrefixScan(ro, "bbbb"), expected); + } } { std::vector expected = {"cccc1"}; - ASSERT_EQ(PrefixScan(StandardIndexReadOptions(), "cccc"), expected); - ASSERT_EQ(PrefixScan(TrieIndexReadOptions(), "cccc"), expected); + for (const auto& ro : BothIndexReadOptions()) { + ASSERT_EQ(PrefixScan(ro, "cccc"), expected); + } } // Reverse prefix scans. { std::vector expected = {"aaaa4", "aaaa3", "aaaa2", "aaaa1"}; - ASSERT_EQ(ReversePrefixScan(StandardIndexReadOptions(), "aaaa\xff"), - expected); - ASSERT_EQ(ReversePrefixScan(TrieIndexReadOptions(), "aaaa\xff"), expected); + for (const auto& ro : BothIndexReadOptions()) { + ASSERT_EQ(ReversePrefixScan(ro, "aaaa\xff"), expected); + } } { std::vector expected = {"bbbb3", "bbbb2", "bbbb1"}; - ASSERT_EQ(ReversePrefixScan(StandardIndexReadOptions(), "bbbb\xff"), - expected); - ASSERT_EQ(ReversePrefixScan(TrieIndexReadOptions(), "bbbb\xff"), expected); + for (const auto& ro : BothIndexReadOptions()) { + ASSERT_EQ(ReversePrefixScan(ro, "bbbb\xff"), expected); + } } // Direction switching within a prefix. - for (const auto& ro : {StandardIndexReadOptions(), TrieIndexReadOptions()}) { - SCOPED_TRACE(ro.table_index_factory ? "trie index" : "standard index"); + for (const auto& ro : BothIndexReadOptions()) { + SCOPED_TRACE(ro.read_index != ReadOptions::ReadIndex::kDefault + ? "custom index" + : "builtin index"); std::unique_ptr iter(db_->NewIterator(ro)); iter->Seek("aaaa2"); ASSERT_TRUE(iter->Valid()); @@ -2364,8 +2429,10 @@ TEST_P(TrieIndexDBTest, PrefixIterationWithUpperBound) { std::string upper = "aaaa0025"; Slice upper_bound(upper); - for (auto base_ro : {StandardIndexReadOptions(), TrieIndexReadOptions()}) { - SCOPED_TRACE(base_ro.table_index_factory ? "trie" : "standard"); + for (auto base_ro : BothIndexReadOptions()) { + SCOPED_TRACE(base_ro.read_index != ReadOptions::ReadIndex::kDefault + ? "custom" + : "builtin"); base_ro.iterate_upper_bound = &upper_bound; std::vector keys; std::unique_ptr iter(db_->NewIterator(base_ro)); @@ -2378,8 +2445,10 @@ TEST_P(TrieIndexDBTest, PrefixIterationWithUpperBound) { ASSERT_EQ(keys.back(), "aaaa0024"); } - for (auto base_ro : {StandardIndexReadOptions(), TrieIndexReadOptions()}) { - SCOPED_TRACE(base_ro.table_index_factory ? "trie" : "standard"); + for (auto base_ro : BothIndexReadOptions()) { + SCOPED_TRACE(base_ro.read_index != ReadOptions::ReadIndex::kDefault + ? "custom" + : "builtin"); base_ro.iterate_upper_bound = &upper_bound; std::unique_ptr iter(db_->NewIterator(base_ro)); iter->SeekToLast(); @@ -2388,8 +2457,10 @@ TEST_P(TrieIndexDBTest, PrefixIterationWithUpperBound) { ASSERT_OK(iter->status()); } - for (auto base_ro : {StandardIndexReadOptions(), TrieIndexReadOptions()}) { - SCOPED_TRACE(base_ro.table_index_factory ? "trie" : "standard"); + for (auto base_ro : BothIndexReadOptions()) { + SCOPED_TRACE(base_ro.read_index != ReadOptions::ReadIndex::kDefault + ? "custom" + : "builtin"); base_ro.iterate_upper_bound = &upper_bound; std::vector keys; std::unique_ptr iter(db_->NewIterator(base_ro)); @@ -2425,51 +2496,45 @@ TEST_P(TrieIndexDBTest, PrefixIterationDirectionSwitchStress) { for (const char* pfx : prefixes) { SCOPED_TRACE(pfx); - std::vector std_keys; - std::vector trie_keys; - { - auto ro = StandardIndexReadOptions(); + // Collect keys through each available index and verify consistency. + std::vector ref_keys; + for (const auto& ro : BothIndexReadOptions()) { + std::vector cur_keys; std::unique_ptr iter(db_->NewIterator(ro)); for (iter->Seek(pfx); iter->Valid(); iter->Next()) { if (iter->key().ToString().substr(0, 3) != std::string(pfx)) { break; } - std_keys.push_back(iter->key().ToString()); + cur_keys.push_back(iter->key().ToString()); } ASSERT_OK(iter->status()); - } - { - auto ro = TrieIndexReadOptions(); - std::unique_ptr iter(db_->NewIterator(ro)); - for (iter->Seek(pfx); iter->Valid(); iter->Next()) { - if (iter->key().ToString().substr(0, 3) != std::string(pfx)) { - break; - } - trie_keys.push_back(iter->key().ToString()); + if (ref_keys.empty()) { + ref_keys = cur_keys; + } else { + ASSERT_EQ(ref_keys, cur_keys); } - ASSERT_OK(iter->status()); } - ASSERT_EQ(std_keys, trie_keys); - ASSERT_FALSE(std_keys.empty()); + ASSERT_FALSE(ref_keys.empty()); - size_t mid = std_keys.size() / 2; - for (const auto& ro : - {StandardIndexReadOptions(), TrieIndexReadOptions()}) { - SCOPED_TRACE(ro.table_index_factory ? "trie" : "standard"); + size_t mid = ref_keys.size() / 2; + for (const auto& ro : BothIndexReadOptions()) { + SCOPED_TRACE(ro.read_index != ReadOptions::ReadIndex::kDefault + ? "custom" + : "builtin"); std::unique_ptr iter(db_->NewIterator(ro)); - iter->Seek(std_keys[mid]); + iter->Seek(ref_keys[mid]); ASSERT_TRUE(iter->Valid()); - ASSERT_EQ(iter->key().ToString(), std_keys[mid]); - for (int i = 1; i <= 3 && mid + i < std_keys.size(); i++) { + ASSERT_EQ(iter->key().ToString(), ref_keys[mid]); + for (int i = 1; i <= 3 && mid + i < ref_keys.size(); i++) { iter->Next(); ASSERT_TRUE(iter->Valid()); - ASSERT_EQ(iter->key().ToString(), std_keys[mid + i]); + ASSERT_EQ(iter->key().ToString(), ref_keys[mid + i]); } - size_t pos = std::min(mid + 3, std_keys.size() - 1); + size_t pos = std::min(mid + 3, ref_keys.size() - 1); for (int i = 1; i <= 2 && pos >= 1; i++) { iter->Prev(); ASSERT_TRUE(iter->Valid()); - ASSERT_EQ(iter->key().ToString(), std_keys[pos - i]); + ASSERT_EQ(iter->key().ToString(), ref_keys[pos - i]); } ASSERT_OK(iter->status()); } @@ -2510,21 +2575,24 @@ TEST_P(TrieIndexDBTest, PrefixIterationWithDeletesAndMerges) { return result; }; - auto std_result = PrefixScan(StandardIndexReadOptions()); auto trie_result = PrefixScan(TrieIndexReadOptions()); - ASSERT_EQ(std_result, trie_result); - ASSERT_EQ(std_result.size(), 4u); - ASSERT_EQ(std_result[0].first, "aaa01"); - ASSERT_EQ(std_result[1], + ASSERT_EQ(trie_result.size(), 4u); + ASSERT_EQ(trie_result[0].first, "aaa01"); + ASSERT_EQ(trie_result[1], std::make_pair(std::string("aaa03"), std::string("v3,,m1"))); - ASSERT_EQ(std_result[2], + ASSERT_EQ(trie_result[2], std::make_pair(std::string("aaa05"), std::string("v5,,m2"))); - ASSERT_EQ(std_result[3].first, "aaa06"); + ASSERT_EQ(trie_result[3].first, "aaa06"); + if (GetIndexMode() != BlockBasedTableOptions::IndexMode::kCustomOnly) { + ASSERT_EQ(PrefixScan(StandardIndexReadOptions()), trie_result); + } - auto std_rev = ReversePrefixScanKeys(StandardIndexReadOptions(), "aaa"); auto trie_rev = ReversePrefixScanKeys(TrieIndexReadOptions(), "aaa"); - ASSERT_EQ(std_rev, trie_rev); - ASSERT_EQ(std_rev.size(), 4u); + ASSERT_EQ(trie_rev.size(), 4u); + if (GetIndexMode() != BlockBasedTableOptions::IndexMode::kCustomOnly) { + ASSERT_EQ(ReversePrefixScanKeys(StandardIndexReadOptions(), "aaa"), + trie_rev); + } } TEST_P(TrieIndexDBTest, PrefixIterationAfterCompaction) { @@ -2551,8 +2619,10 @@ TEST_P(TrieIndexDBTest, PrefixIterationAfterCompaction) { for (const char* pfx : {"aaa", "bbb", "ccc"}) { SCOPED_TRACE(pfx); - ASSERT_EQ(PrefixScanKeys(StandardIndexReadOptions(), pfx), - PrefixScanKeys(TrieIndexReadOptions(), pfx)); + auto trie_keys = PrefixScanKeys(TrieIndexReadOptions(), pfx); + if (GetIndexMode() != BlockBasedTableOptions::IndexMode::kCustomOnly) { + ASSERT_EQ(PrefixScanKeys(StandardIndexReadOptions(), pfx), trie_keys); + } } } @@ -2591,8 +2661,9 @@ TEST_P(TrieIndexDBTest, PrefixIterationWithSnapshots) { return keys; }; - for (const auto& ro : {StandardIndexReadOptions(), TrieIndexReadOptions()}) { - SCOPED_TRACE(ro.table_index_factory ? "trie" : "standard"); + for (const auto& ro : BothIndexReadOptions()) { + SCOPED_TRACE(ro.read_index != ReadOptions::ReadIndex::kDefault ? "custom" + : "builtin"); ASSERT_EQ(PrefixScanAt(ro, snap1, "aaa"), (std::vector{"aaa01", "aaa02"})); ASSERT_EQ(PrefixScanAt(ro, snap2, "aaa"), @@ -2613,8 +2684,9 @@ TEST_P(TrieIndexDBTest, PrefixIterationEmptyPrefix) { ASSERT_OK(db_->Put(WriteOptions(), "ccc01", "v3")); ASSERT_OK(db_->Flush(FlushOptions())); - for (const auto& ro : {StandardIndexReadOptions(), TrieIndexReadOptions()}) { - SCOPED_TRACE(ro.table_index_factory ? "trie" : "standard"); + for (const auto& ro : BothIndexReadOptions()) { + SCOPED_TRACE(ro.read_index != ReadOptions::ReadIndex::kDefault ? "custom" + : "builtin"); std::unique_ptr iter(db_->NewIterator(ro)); iter->Seek("bbb"); if (iter->Valid()) { @@ -2646,8 +2718,10 @@ TEST_P(TrieIndexDBTest, PrefixIterationWithLowerBound) { std::string lower = "aaa0010"; Slice lower_bound(lower); - for (auto base_ro : {StandardIndexReadOptions(), TrieIndexReadOptions()}) { - SCOPED_TRACE(base_ro.table_index_factory ? "trie" : "standard"); + for (auto base_ro : BothIndexReadOptions()) { + SCOPED_TRACE(base_ro.read_index != ReadOptions::ReadIndex::kDefault + ? "custom" + : "builtin"); base_ro.iterate_lower_bound = &lower_bound; std::vector keys; std::unique_ptr iter(db_->NewIterator(base_ro)); @@ -2663,8 +2737,10 @@ TEST_P(TrieIndexDBTest, PrefixIterationWithLowerBound) { ASSERT_EQ(keys.back(), "aaa0019"); } - for (auto base_ro : {StandardIndexReadOptions(), TrieIndexReadOptions()}) { - SCOPED_TRACE(base_ro.table_index_factory ? "trie" : "standard"); + for (auto base_ro : BothIndexReadOptions()) { + SCOPED_TRACE(base_ro.read_index != ReadOptions::ReadIndex::kDefault + ? "custom" + : "builtin"); base_ro.iterate_lower_bound = &lower_bound; std::unique_ptr iter(db_->NewIterator(base_ro)); iter->SeekToFirst(); @@ -2692,13 +2768,17 @@ TEST_P(TrieIndexDBTest, PrefixIterationWithDeleteRange) { "aaa0005", "aaa0015")); ASSERT_OK(db_->Flush(FlushOptions())); - auto std_keys = PrefixScanKeys(StandardIndexReadOptions(), "aaa"); auto trie_keys = PrefixScanKeys(TrieIndexReadOptions(), "aaa"); - ASSERT_EQ(std_keys, trie_keys); - ASSERT_EQ(std_keys.size(), 10u); + ASSERT_EQ(trie_keys.size(), 10u); + if (GetIndexMode() != BlockBasedTableOptions::IndexMode::kCustomOnly) { + ASSERT_EQ(PrefixScanKeys(StandardIndexReadOptions(), "aaa"), trie_keys); + } - ASSERT_EQ(ReversePrefixScanKeys(StandardIndexReadOptions(), "aaa"), - ReversePrefixScanKeys(TrieIndexReadOptions(), "aaa")); + auto trie_rev = ReversePrefixScanKeys(TrieIndexReadOptions(), "aaa"); + if (GetIndexMode() != BlockBasedTableOptions::IndexMode::kCustomOnly) { + ASSERT_EQ(ReversePrefixScanKeys(StandardIndexReadOptions(), "aaa"), + trie_rev); + } } TEST_P(TrieIndexDBTest, PrefixIterationMemtablePlusSST) { @@ -2729,13 +2809,17 @@ TEST_P(TrieIndexDBTest, PrefixIterationMemtablePlusSST) { return result; }; - auto std_result = PrefixScan(StandardIndexReadOptions()); auto trie_result = PrefixScan(TrieIndexReadOptions()); - ASSERT_EQ(std_result, trie_result); - ASSERT_EQ(std_result.size(), 5u); + ASSERT_EQ(trie_result.size(), 5u); + if (GetIndexMode() != BlockBasedTableOptions::IndexMode::kCustomOnly) { + ASSERT_EQ(PrefixScan(StandardIndexReadOptions()), trie_result); + } - ASSERT_EQ(ReversePrefixScanKeys(StandardIndexReadOptions(), "aaa"), - ReversePrefixScanKeys(TrieIndexReadOptions(), "aaa")); + auto trie_rev = ReversePrefixScanKeys(TrieIndexReadOptions(), "aaa"); + if (GetIndexMode() != BlockBasedTableOptions::IndexMode::kCustomOnly) { + ASSERT_EQ(ReversePrefixScanKeys(StandardIndexReadOptions(), "aaa"), + trie_rev); + } } // --------------------------------------------------------------------------- @@ -2765,8 +2849,10 @@ TEST_P(TrieIndexDBTest, LastBlockSeparatorNotShortened) { // "not found". std::string seek_target = std::string("9\xff\xff\x01", 4); - for (const auto& ro : {StandardIndexReadOptions(), TrieIndexReadOptions()}) { - SCOPED_TRACE(ro.table_index_factory ? "trie index" : "standard index"); + for (const auto& ro : BothIndexReadOptions()) { + SCOPED_TRACE(ro.read_index != ReadOptions::ReadIndex::kDefault + ? "custom index" + : "builtin index"); std::unique_ptr iter(db_->NewIterator(ro)); iter->Seek(seek_target); @@ -2776,8 +2862,10 @@ TEST_P(TrieIndexDBTest, LastBlockSeparatorNotShortened) { } // Also verify the actual last key is still findable. - for (const auto& ro : {StandardIndexReadOptions(), TrieIndexReadOptions()}) { - SCOPED_TRACE(ro.table_index_factory ? "trie index" : "standard index"); + for (const auto& ro : BothIndexReadOptions()) { + SCOPED_TRACE(ro.read_index != ReadOptions::ReadIndex::kDefault + ? "custom index" + : "builtin index"); std::unique_ptr iter(db_->NewIterator(ro)); iter->Seek(std::string("9\xff\xff", 3)); @@ -2809,8 +2897,10 @@ TEST_P(TrieIndexDBTest, LastBlockSeparatorWithDeletes) { // Now seeking for the deleted key should yield "5bbb" or nothing, // depending on the seek target. Both indexes must agree. - for (const auto& ro : {StandardIndexReadOptions(), TrieIndexReadOptions()}) { - SCOPED_TRACE(ro.table_index_factory ? "trie index" : "standard index"); + for (const auto& ro : BothIndexReadOptions()) { + SCOPED_TRACE(ro.read_index != ReadOptions::ReadIndex::kDefault + ? "custom index" + : "builtin index"); std::unique_ptr iter(db_->NewIterator(ro)); // Seek to the deleted key -- should skip it and land on nothing (it was @@ -2834,8 +2924,10 @@ TEST_P(TrieIndexDBTest, LastBlockSeparatorWithDeletes) { // Compact to merge the tombstone, then verify again. ASSERT_OK(db_->CompactRange(CompactRangeOptions(), nullptr, nullptr)); - for (const auto& ro : {StandardIndexReadOptions(), TrieIndexReadOptions()}) { - SCOPED_TRACE(ro.table_index_factory ? "trie index" : "standard index"); + for (const auto& ro : BothIndexReadOptions()) { + SCOPED_TRACE(ro.read_index != ReadOptions::ReadIndex::kDefault + ? "custom index" + : "builtin index"); std::unique_ptr iter(db_->NewIterator(ro)); iter->SeekToFirst(); @@ -2873,8 +2965,10 @@ TEST_P(TrieIndexDBTest, SingleEntrySST) { ASSERT_NO_FATAL_FAILURE(VerifySeekBothIndexes("a", "only_key", "only_val")); // Seek past the key -- should be invalid. - for (const auto& ro : {StandardIndexReadOptions(), TrieIndexReadOptions()}) { - SCOPED_TRACE(ro.table_index_factory ? "trie index" : "standard index"); + for (const auto& ro : BothIndexReadOptions()) { + SCOPED_TRACE(ro.read_index != ReadOptions::ReadIndex::kDefault + ? "custom index" + : "builtin index"); std::unique_ptr iter(db_->NewIterator(ro)); iter->Seek("z"); ASSERT_FALSE(iter->Valid()); @@ -2960,8 +3054,9 @@ TEST_P(TrieIndexDBTest, EmptyDBOperations) { // Get / Seek / SeekToFirst on empty memtable (no SSTs yet). ASSERT_NO_FATAL_FAILURE(VerifyGetNotFoundBothIndexes("anything")); - for (const auto& ro : {StandardIndexReadOptions(), TrieIndexReadOptions()}) { - SCOPED_TRACE(ro.table_index_factory ? "trie" : "standard"); + for (const auto& ro : BothIndexReadOptions()) { + SCOPED_TRACE(ro.read_index != ReadOptions::ReadIndex::kDefault ? "custom" + : "builtin"); std::unique_ptr iter(db_->NewIterator(ro)); iter->SeekToFirst(); ASSERT_FALSE(iter->Valid()); @@ -2980,8 +3075,9 @@ TEST_P(TrieIndexDBTest, EmptyDBOperations) { ASSERT_OK(db_->CompactRange(CompactRangeOptions(), nullptr, nullptr)); ASSERT_NO_FATAL_FAILURE(VerifyGetNotFoundBothIndexes("temp")); - for (const auto& ro : {StandardIndexReadOptions(), TrieIndexReadOptions()}) { - SCOPED_TRACE(ro.table_index_factory ? "trie" : "standard"); + for (const auto& ro : BothIndexReadOptions()) { + SCOPED_TRACE(ro.read_index != ReadOptions::ReadIndex::kDefault ? "custom" + : "builtin"); std::unique_ptr iter(db_->NewIterator(ro)); iter->SeekToFirst(); ASSERT_FALSE(iter->Valid()); @@ -3000,8 +3096,9 @@ TEST_P(TrieIndexDBTest, SeekEdgeCases) { } ASSERT_OK(db_->Flush(FlushOptions())); - for (const auto& ro : {StandardIndexReadOptions(), TrieIndexReadOptions()}) { - SCOPED_TRACE(ro.table_index_factory ? "trie" : "standard"); + for (const auto& ro : BothIndexReadOptions()) { + SCOPED_TRACE(ro.read_index != ReadOptions::ReadIndex::kDefault ? "custom" + : "builtin"); std::unique_ptr iter(db_->NewIterator(ro)); // Before first key. @@ -3057,8 +3154,9 @@ TEST_P(TrieIndexDBTest, GetEntityWithTrieUDI) { ASSERT_OK(db_->Put(WriteOptions(), "regular_key", "regular_val")); ASSERT_OK(db_->Flush(FlushOptions())); - for (const auto& ro : {StandardIndexReadOptions(), TrieIndexReadOptions()}) { - SCOPED_TRACE(ro.table_index_factory ? "trie" : "standard"); + for (const auto& ro : BothIndexReadOptions()) { + SCOPED_TRACE(ro.read_index != ReadOptions::ReadIndex::kDefault ? "custom" + : "builtin"); // GetEntity on a PutEntity key. PinnableWideColumns result; @@ -3119,8 +3217,9 @@ TEST_P(TrieIndexDBTest, OverlappingL0SSTs) { ASSERT_OK(db_->Flush(FlushOptions())); // Verify: latest writer wins for overlapping keys. - for (const auto& ro : {StandardIndexReadOptions(), TrieIndexReadOptions()}) { - SCOPED_TRACE(ro.table_index_factory ? "trie" : "standard"); + for (const auto& ro : BothIndexReadOptions()) { + SCOPED_TRACE(ro.read_index != ReadOptions::ReadIndex::kDefault ? "custom" + : "builtin"); auto kvs = ScanAllKeyValues(ro); ASSERT_EQ(kvs.size(), 100u); for (int i = 0; i < 100; i++) { @@ -3139,8 +3238,9 @@ TEST_P(TrieIndexDBTest, OverlappingL0SSTs) { // Compact all L0 → L1, re-verify. ASSERT_OK(db_->CompactRange(CompactRangeOptions(), nullptr, nullptr)); - for (const auto& ro : {StandardIndexReadOptions(), TrieIndexReadOptions()}) { - SCOPED_TRACE(ro.table_index_factory ? "trie" : "standard"); + for (const auto& ro : BothIndexReadOptions()) { + SCOPED_TRACE(ro.read_index != ReadOptions::ReadIndex::kDefault ? "custom" + : "builtin"); ASSERT_EQ(ScanAllKeyValues(ro).size(), 100u); } } @@ -3166,8 +3266,9 @@ TEST_P(TrieIndexDBTest, CompactRangeSubset) { ASSERT_OK(db_->CompactRange(cro, &begin_s, &end_s)); // All 26 keys should still be readable. - for (const auto& ro : {StandardIndexReadOptions(), TrieIndexReadOptions()}) { - SCOPED_TRACE(ro.table_index_factory ? "trie" : "standard"); + for (const auto& ro : BothIndexReadOptions()) { + SCOPED_TRACE(ro.read_index != ReadOptions::ReadIndex::kDefault ? "custom" + : "builtin"); ASSERT_EQ(ScanAllKeys(ro).size(), 26u); } ASSERT_NO_FATAL_FAILURE(VerifyGetBothIndexes("key_a", "val_0")); @@ -3194,15 +3295,17 @@ TEST_P(TrieIndexDBTest, AllKeysDeletedCompaction) { ASSERT_OK(db_->Flush(FlushOptions())); // Before compaction: tombstones hide all keys. - for (const auto& ro : {StandardIndexReadOptions(), TrieIndexReadOptions()}) { - SCOPED_TRACE(ro.table_index_factory ? "trie" : "standard"); + for (const auto& ro : BothIndexReadOptions()) { + SCOPED_TRACE(ro.read_index != ReadOptions::ReadIndex::kDefault ? "custom" + : "builtin"); ASSERT_EQ(ScanAllKeys(ro).size(), 0u); } // After compaction: all tombstones and data are gone. ASSERT_OK(db_->CompactRange(CompactRangeOptions(), nullptr, nullptr)); - for (const auto& ro : {StandardIndexReadOptions(), TrieIndexReadOptions()}) { - SCOPED_TRACE(ro.table_index_factory ? "trie" : "standard"); + for (const auto& ro : BothIndexReadOptions()) { + SCOPED_TRACE(ro.read_index != ReadOptions::ReadIndex::kDefault ? "custom" + : "builtin"); ASSERT_EQ(ScanAllKeys(ro).size(), 0u); } } @@ -3234,8 +3337,9 @@ TEST_P(TrieIndexDBTest, BinaryKeyEdgeCases) { ASSERT_OK(db_->Flush(FlushOptions())); // Forward scan: all keys in order through both indexes. - for (const auto& ro : {StandardIndexReadOptions(), TrieIndexReadOptions()}) { - SCOPED_TRACE(ro.table_index_factory ? "trie" : "standard"); + for (const auto& ro : BothIndexReadOptions()) { + SCOPED_TRACE(ro.read_index != ReadOptions::ReadIndex::kDefault ? "custom" + : "builtin"); auto actual = ScanAllKeyValues(ro); ASSERT_EQ(actual.size(), kvs.size()); for (size_t i = 0; i < kvs.size(); i++) { @@ -3297,8 +3401,9 @@ TEST_P(TrieIndexDBTest, CompressionZlib) { ASSERT_OK(db_->Flush(FlushOptions())); // Forward scan. - for (const auto& ro : {StandardIndexReadOptions(), TrieIndexReadOptions()}) { - SCOPED_TRACE(ro.table_index_factory ? "trie" : "standard"); + for (const auto& ro : BothIndexReadOptions()) { + SCOPED_TRACE(ro.read_index != ReadOptions::ReadIndex::kDefault ? "custom" + : "builtin"); ASSERT_EQ(ScanAllKeys(ro).size(), 100u); } @@ -3363,9 +3468,10 @@ TEST_P(TrieIndexDBTest, IteratorUpperBound) { } ASSERT_OK(db_->Flush(FlushOptions())); - for (const auto& base_ro : - {StandardIndexReadOptions(), TrieIndexReadOptions()}) { - SCOPED_TRACE(base_ro.table_index_factory ? "trie" : "standard"); + for (const auto& base_ro : BothIndexReadOptions()) { + SCOPED_TRACE(base_ro.read_index != ReadOptions::ReadIndex::kDefault + ? "custom" + : "builtin"); // Upper bound = "dd" → should see aa, bb, cc only. std::string ub_str = "dd"; @@ -3424,9 +3530,10 @@ TEST_P(TrieIndexDBTest, IteratorSnapshotAndUpperBound) { ASSERT_OK(db_->Put(WriteOptions(), "key_e", "new_e")); ASSERT_OK(db_->Flush(FlushOptions())); - for (const auto& base_ro : - {StandardIndexReadOptions(), TrieIndexReadOptions()}) { - SCOPED_TRACE(base_ro.table_index_factory ? "trie" : "standard"); + for (const auto& base_ro : BothIndexReadOptions()) { + SCOPED_TRACE(base_ro.read_index != ReadOptions::ReadIndex::kDefault + ? "custom" + : "builtin"); std::string ub_str = "key_d"; Slice ub(ub_str); @@ -3483,8 +3590,9 @@ TEST_P(TrieIndexDBTest, ManySmallSSTs) { } // Verify all 100 keys are readable. - for (const auto& ro : {StandardIndexReadOptions(), TrieIndexReadOptions()}) { - SCOPED_TRACE(ro.table_index_factory ? "trie" : "standard"); + for (const auto& ro : BothIndexReadOptions()) { + SCOPED_TRACE(ro.read_index != ReadOptions::ReadIndex::kDefault ? "custom" + : "builtin"); auto keys = ScanAllKeys(ro); ASSERT_EQ(keys.size(), 100u); } @@ -3495,8 +3603,9 @@ TEST_P(TrieIndexDBTest, ManySmallSSTs) { // Compact everything into one SST, re-verify. ASSERT_OK(db_->CompactRange(CompactRangeOptions(), nullptr, nullptr)); - for (const auto& ro : {StandardIndexReadOptions(), TrieIndexReadOptions()}) { - SCOPED_TRACE(ro.table_index_factory ? "trie" : "standard"); + for (const auto& ro : BothIndexReadOptions()) { + SCOPED_TRACE(ro.read_index != ReadOptions::ReadIndex::kDefault ? "custom" + : "builtin"); ASSERT_EQ(ScanAllKeys(ro).size(), 100u); } } @@ -3619,7 +3728,7 @@ TEST_P(TrieIndexDBTest, TransactionCommit) { options_.create_if_missing = true; BlockBasedTableOptions table_options; table_options.user_defined_index_factory = trie_factory_; - table_options.use_udi_as_primary_index = IsPrimaryMode(); + table_options.index_mode = GetIndexMode(); options_.table_factory.reset(NewBlockBasedTableFactory(table_options)); last_options_ = options_; @@ -3653,7 +3762,7 @@ TEST_P(TrieIndexDBTest, TransactionRollback) { options_.create_if_missing = true; BlockBasedTableOptions table_options; table_options.user_defined_index_factory = trie_factory_; - table_options.use_udi_as_primary_index = IsPrimaryMode(); + table_options.index_mode = GetIndexMode(); options_.table_factory.reset(NewBlockBasedTableFactory(table_options)); last_options_ = options_; @@ -3700,8 +3809,10 @@ TEST_P(TrieIndexDBTest, TotalOrderSeekWithPrefixExtractor) { ASSERT_OK(db_->Flush(FlushOptions())); // With total_order_seek=true, scan all keys across prefixes. - for (auto base_ro : {StandardIndexReadOptions(), TrieIndexReadOptions()}) { - SCOPED_TRACE(base_ro.table_index_factory ? "trie" : "standard"); + for (auto base_ro : BothIndexReadOptions()) { + SCOPED_TRACE(base_ro.read_index != ReadOptions::ReadIndex::kDefault + ? "custom" + : "builtin"); base_ro.total_order_seek = true; auto keys = ScanAllKeys(base_ro); ASSERT_EQ(keys.size(), 4u); @@ -3719,8 +3830,10 @@ TEST_P(TrieIndexDBTest, TotalOrderSeekWithPrefixExtractor) { } // auto_prefix_mode: let RocksDB decide per-seek. - for (auto base_ro : {StandardIndexReadOptions(), TrieIndexReadOptions()}) { - SCOPED_TRACE(base_ro.table_index_factory ? "trie" : "standard"); + for (auto base_ro : BothIndexReadOptions()) { + SCOPED_TRACE(base_ro.read_index != ReadOptions::ReadIndex::kDefault + ? "custom" + : "builtin"); base_ro.auto_prefix_mode = true; std::unique_ptr iter(db_->NewIterator(base_ro)); iter->Seek("bbb_1"); @@ -3765,12 +3878,16 @@ TEST_P(TrieIndexDBTest, MultiLevelDeleteRangeRandomized) { }; // Core correctness check: forward scan via both indexes must match. + // In kCustomOnly mode, the standard index is an empty stub, so we just + // verify that the trie scan succeeds (no crash, valid iteration). auto verify_scan_consistency = [&]() { - auto standard_kvs = ScanAllKeyValues(StandardIndexReadOptions()); auto trie_kvs = ScanAllKeyValues(TrieIndexReadOptions()); - ASSERT_EQ(standard_kvs, trie_kvs) - << "Scan mismatch: standard=" << standard_kvs.size() - << " trie=" << trie_kvs.size(); + if (GetIndexMode() != BlockBasedTableOptions::IndexMode::kCustomOnly) { + auto standard_kvs = ScanAllKeyValues(StandardIndexReadOptions()); + ASSERT_EQ(standard_kvs, trie_kvs) + << "Scan mismatch: standard=" << standard_kvs.size() + << " trie=" << trie_kvs.size(); + } }; // Phase 1: Populate bottommost level with baseline data. @@ -3832,7 +3949,10 @@ TEST_P(TrieIndexDBTest, MultiLevelDeleteRangeRandomized) { // Phase 3: Snapshot, then delete a large range. The snapshot must // preserve the pre-deletion state while current reads see the deletion. const Snapshot* snap = db_->GetSnapshot(); - auto snap_kvs = ScanAllKeyValues(StandardIndexReadOptions()); + auto snap_kvs = ScanAllKeyValues( + GetIndexMode() == BlockBasedTableOptions::IndexMode::kCustomOnly + ? TrieIndexReadOptions() + : StandardIndexReadOptions()); int big_start = rnd.Uniform(kMaxKey / 4); int big_end = big_start + kMaxKey / 3; @@ -3867,13 +3987,22 @@ TEST_P(TrieIndexDBTest, MultiLevelDeleteRangeRandomized) { // Phase 6: Point lookups for a sample of keys -- both indexes must agree. for (int i = 0; i < kMaxKey; i += 7) { std::string key = format_key(i); - std::string std_val; - std::string trie_val; - Status s1 = db_->Get(StandardIndexReadOptions(), key, &std_val); - Status s2 = db_->Get(TrieIndexReadOptions(), key, &trie_val); - ASSERT_EQ(s1.code(), s2.code()) << "Status mismatch for " << key; - if (s1.ok()) { - ASSERT_EQ(std_val, trie_val) << "Value mismatch for " << key; + if (GetIndexMode() != BlockBasedTableOptions::IndexMode::kCustomOnly) { + std::string std_val; + std::string trie_val; + Status s1 = db_->Get(StandardIndexReadOptions(), key, &std_val); + Status s2 = db_->Get(TrieIndexReadOptions(), key, &trie_val); + ASSERT_EQ(s1.code(), s2.code()) << "Status mismatch for " << key; + if (s1.ok()) { + ASSERT_EQ(std_val, trie_val) << "Value mismatch for " << key; + } + } else { + std::string trie_val; + // Just verify trie reads don't crash in kCustomOnly mode. + // The key may or may not exist (delete ranges active), so ignore + // NotFound but check for unexpected errors. + Status s = db_->Get(TrieIndexReadOptions(), key, &trie_val); + ASSERT_TRUE(s.ok() || s.IsNotFound()) << s.ToString(); } } } @@ -3974,8 +4103,10 @@ TEST_P(TrieIndexDBTest, PrevAfterSeekToFirstBothIndexes) { ASSERT_OK(db_->Put(WriteOptions(), "c", "3")); ASSERT_OK(db_->Flush(FlushOptions())); - for (const auto& ro : {StandardIndexReadOptions(), TrieIndexReadOptions()}) { - SCOPED_TRACE(ro.table_index_factory ? "trie index" : "standard index"); + for (const auto& ro : BothIndexReadOptions()) { + SCOPED_TRACE(ro.read_index != ReadOptions::ReadIndex::kDefault + ? "custom index" + : "builtin index"); std::unique_ptr iter(db_->NewIterator(ro)); iter->SeekToFirst(); ASSERT_TRUE(iter->Valid()); @@ -3994,8 +4125,10 @@ TEST_P(TrieIndexDBTest, ForwardThenReverseDirection) { WriteSequentialKeys(0, 50); ASSERT_OK(db_->Flush(FlushOptions())); - for (const auto& ro : {StandardIndexReadOptions(), TrieIndexReadOptions()}) { - SCOPED_TRACE(ro.table_index_factory ? "trie index" : "standard index"); + for (const auto& ro : BothIndexReadOptions()) { + SCOPED_TRACE(ro.read_index != ReadOptions::ReadIndex::kDefault + ? "custom index" + : "builtin index"); std::unique_ptr iter(db_->NewIterator(ro)); // Seek to middle, go forward a few, then reverse. @@ -4040,8 +4173,10 @@ TEST_P(TrieIndexDBTest, SeekToLastSingleEntry) { ASSERT_OK(db_->Put(WriteOptions(), "only_key", "only_val")); ASSERT_OK(db_->Flush(FlushOptions())); - for (const auto& ro : {StandardIndexReadOptions(), TrieIndexReadOptions()}) { - SCOPED_TRACE(ro.table_index_factory ? "trie index" : "standard index"); + for (const auto& ro : BothIndexReadOptions()) { + SCOPED_TRACE(ro.read_index != ReadOptions::ReadIndex::kDefault + ? "custom index" + : "builtin index"); std::unique_ptr iter(db_->NewIterator(ro)); iter->SeekToLast(); ASSERT_TRUE(iter->Valid()); @@ -4142,23 +4277,36 @@ TEST_P(TrieIndexDBTest, SeekForPrevVariableLengthKeys) { // SeekForPrev for every key -- trie must match standard index. for (const auto& key : keys) { - std::string std_result, trie_result; - { - std::unique_ptr iter( - db_->NewIterator(StandardIndexReadOptions())); + for (const auto& ro : BothIndexReadOptions()) { + SCOPED_TRACE(ro.read_index != ReadOptions::ReadIndex::kDefault + ? "custom index" + : "builtin index"); + std::unique_ptr iter(db_->NewIterator(ro)); iter->SeekForPrev(key); ASSERT_TRUE(iter->Valid()); - std_result = iter->key().ToString(); ASSERT_OK(iter->status()); } - { - std::unique_ptr iter(db_->NewIterator(TrieIndexReadOptions())); - iter->SeekForPrev(key); - ASSERT_TRUE(iter->Valid()); - trie_result = iter->key().ToString(); - ASSERT_OK(iter->status()); + if (GetIndexMode() != BlockBasedTableOptions::IndexMode::kCustomOnly) { + std::string std_result, trie_result; + { + std::unique_ptr iter( + db_->NewIterator(StandardIndexReadOptions())); + iter->SeekForPrev(key); + ASSERT_TRUE(iter->Valid()); + std_result = iter->key().ToString(); + ASSERT_OK(iter->status()); + } + { + std::unique_ptr iter( + db_->NewIterator(TrieIndexReadOptions())); + iter->SeekForPrev(key); + ASSERT_TRUE(iter->Valid()); + trie_result = iter->key().ToString(); + ASSERT_OK(iter->status()); + } + ASSERT_EQ(std_result, trie_result) + << "SeekForPrev(" << key << ") diverged"; } - ASSERT_EQ(std_result, trie_result) << "SeekForPrev(" << key << ") diverged"; } // SeekForPrev for keys between existing keys. @@ -4167,6 +4315,13 @@ TEST_P(TrieIndexDBTest, SeekForPrevVariableLengthKeys) { int len = snprintf(buf, sizeof(buf), "k%04da", i); std::string target(buf, len); + if (GetIndexMode() == BlockBasedTableOptions::IndexMode::kCustomOnly) { + // Just verify trie works in kCustomOnly mode. + std::unique_ptr iter(db_->NewIterator(TrieIndexReadOptions())); + iter->SeekForPrev(target); + ASSERT_OK(iter->status()); + continue; + } std::string std_result, trie_result; { std::unique_ptr iter( @@ -4198,7 +4353,7 @@ TEST_P(TrieIndexDBTest, SeekForPrevVariableLengthKeys) { TEST_P(TrieIndexDBTest, PrimaryUDIBackwardCompatibility) { // Verifies that SSTs written with UDI as secondary (both indexes present) // can be read correctly when the DB is reopened with - // use_udi_as_primary_index. This is the upgrade path: old SSTs have both + // index_mode=kCustomDefault. This is the upgrade path: old SSTs have both // indexes, new config says "use UDI as primary for all reads." ASSERT_OK(OpenDBSecondary(/*block_size=*/128)); @@ -4214,7 +4369,7 @@ TEST_P(TrieIndexDBTest, PrimaryUDIBackwardCompatibility) { db_.reset(); ASSERT_OK(OpenDBPrimary(/*block_size=*/128)); - // Now all reads automatically use UDI -- no ReadOptions::table_index_factory. + // Now all reads automatically use UDI -- no ReadOptions::read_index needed. ReadOptions ro; std::vector keys; std::unique_ptr iter(db_->NewIterator(ro)); @@ -4238,6 +4393,12 @@ TEST_P(TrieIndexDBTest, MigrationFullPath) { // Tests the complete recommended migration path: // Step 1: No UDI → Step 2: UDI secondary → Step 3: Compact all SSTs → // Step 4: UDI primary + // In kCustomOnly mode, the standard index is an empty stub, so the + // mixed-mode migration path is not applicable. + if (GetIndexMode() == BlockBasedTableOptions::IndexMode::kCustomOnly) { + ROCKSDB_GTEST_SKIP("Not applicable in kCustomOnly mode"); + return; + } // Step 1: Start without UDI. Write some data. ASSERT_OK(OpenDBWithoutUDI(/*block_size=*/128)); @@ -4281,7 +4442,7 @@ TEST_P(TrieIndexDBTest, MigrationFullPath) { // Step 4: Enable UDI as primary. ASSERT_OK(OpenDBPrimary(/*block_size=*/128)); - // All reads go through UDI automatically -- no table_index_factory needed. + // All reads go through UDI automatically -- no read_index needed. ReadOptions ro; std::vector keys; std::unique_ptr iter(db_->NewIterator(ro)); @@ -4299,7 +4460,7 @@ TEST_P(TrieIndexDBTest, MigrationFullPath) { } TEST_P(TrieIndexDBTest, MigrationPrimaryRejectsPreUDISSTs) { - // Verifies that enabling use_udi_as_primary_index on a DB with SSTs + // Verifies that enabling index_mode=kCustomDefault on a DB with SSTs // that have no UDI block fails at open time (not silently). options_.disable_auto_compactions = true; @@ -4335,14 +4496,14 @@ TEST_P(TrieIndexDBTest, RollbackFromPrimaryToSecondary) { // Rollback step 1: Reopen as secondary (with UDI factory still set). // Primary UDI routing is purely config-driven, so reopening as secondary // immediately reverts all reads to the standard index path. The trie is - // still accessible via explicit ReadOptions::table_index_factory. + // still accessible via explicit ReadOptions::read_index. ASSERT_OK(OpenDBSecondary(/*block_size=*/128)); // Verify the primary-mode SST is readable through both paths. // Explicit trie read: ASSERT_OK(db_->Get(TrieIndexReadOptions(), "key_0015", &value)); ASSERT_EQ(value, "val_0015"); - // Standard index read (default ReadOptions, no table_index_factory): + // Standard index read (default ReadOptions, no read_index): ASSERT_OK(db_->Get(ReadOptions(), "key_0015", &value)); ASSERT_EQ(value, "val_0015"); @@ -4384,6 +4545,12 @@ TEST_P(TrieIndexDBTest, RollbackFromPrimaryWithoutCompactSucceeds) { // Verifies that removing UDI from primary-mode SSTs WITHOUT compacting // first still works. The standard index is always fully populated (even // in primary mode), so reads fall back to the standard index correctly. + // In kCustomOnly mode, the standard index is an empty stub, so this + // rollback path is not applicable. + if (GetIndexMode() == BlockBasedTableOptions::IndexMode::kCustomOnly) { + ROCKSDB_GTEST_SKIP("Not applicable in kCustomOnly mode"); + return; + } options_.disable_auto_compactions = true; // Write SSTs in primary mode. @@ -4410,7 +4577,7 @@ TEST_P(TrieIndexDBTest, RollbackFromPrimaryWithoutCompactSucceeds) { TEST_P(TrieIndexDBTest, PrimaryModeTableProperties) { // Verifies primary-mode-specific behavior: the udi_is_primary_index table // property is set (informational, does not affect read routing), and - // reads work without setting ReadOptions::table_index_factory. + // reads work without setting ReadOptions::read_index. if (!IsPrimaryMode()) { ROCKSDB_GTEST_SKIP("Only applicable in primary mode"); return; @@ -4429,7 +4596,7 @@ TEST_P(TrieIndexDBTest, PrimaryModeTableProperties) { ASSERT_EQ(p.second->udi_is_primary_index, 1u); } - // Reads work with default ReadOptions (no table_index_factory needed). + // Reads work with default ReadOptions (no read_index needed). ReadOptions ro; std::string value; ASSERT_OK(db_->Get(ro, "key1", &value)); @@ -4438,7 +4605,13 @@ TEST_P(TrieIndexDBTest, PrimaryModeTableProperties) { TEST_P(TrieIndexDBTest, EstimatedSizeNonZero) { // Verifies that TrieIndexBuilder::EstimatedSize() returns non-zero after - // adding entries, ensuring compaction file sizing works. + // adding entries, ensuring compaction file sizing works. In kCustomOnly + // mode, the standard index is a stub with zero size — the trie's size is + // tracked separately in the meta block, not in the index_size property. + if (GetIndexMode() == BlockBasedTableOptions::IndexMode::kCustomOnly) { + ROCKSDB_GTEST_SKIP("index_size property tracks standard index only"); + return; + } ASSERT_OK(OpenDB(/*block_size=*/128)); // Write enough data to produce multiple blocks. @@ -4477,8 +4650,10 @@ TEST_P(TrieIndexDBTest, NonBoundarySeparatorSeekCorrectness) { ASSERT_OK(db_->Flush(FlushOptions())); // Seek for "acc" should find "acc" through both indexes. - for (auto base_ro : {StandardIndexReadOptions(), TrieIndexReadOptions()}) { - SCOPED_TRACE(base_ro.table_index_factory ? "trie" : "standard"); + for (auto base_ro : BothIndexReadOptions()) { + SCOPED_TRACE(base_ro.read_index != ReadOptions::ReadIndex::kDefault + ? "custom" + : "builtin"); base_ro.snapshot = read_snap.snapshot(); std::unique_ptr iter(db_->NewIterator(base_ro)); iter->Seek("acc"); @@ -4488,8 +4663,10 @@ TEST_P(TrieIndexDBTest, NonBoundarySeparatorSeekCorrectness) { } // Also verify point Get works. - for (auto base_ro : {StandardIndexReadOptions(), TrieIndexReadOptions()}) { - SCOPED_TRACE(base_ro.table_index_factory ? "trie" : "standard"); + for (auto base_ro : BothIndexReadOptions()) { + SCOPED_TRACE(base_ro.read_index != ReadOptions::ReadIndex::kDefault + ? "custom" + : "builtin"); base_ro.snapshot = read_snap.snapshot(); std::string value; ASSERT_OK(db_->Get(base_ro, "acc", &value)); @@ -4510,7 +4687,7 @@ TEST_P(TrieIndexDBTest, MultiCFCoalescingIterator) { options_.create_if_missing = true; BlockBasedTableOptions table_options; table_options.user_defined_index_factory = trie_factory_; - table_options.use_udi_as_primary_index = IsPrimaryMode(); + table_options.index_mode = GetIndexMode(); options_.table_factory.reset(NewBlockBasedTableFactory(table_options)); last_options_ = options_; ASSERT_OK(DB::Open(options_, dbname_, &db_)); @@ -4618,8 +4795,10 @@ TEST_P(TrieIndexDBTest, GetEntityWithExplicitSnapshotComparison) { ASSERT_OK(db_->Flush(FlushOptions())); // Read at snapshot through both indexes — should see v1 data. - for (auto base_ro : {StandardIndexReadOptions(), TrieIndexReadOptions()}) { - SCOPED_TRACE(base_ro.table_index_factory ? "trie" : "standard"); + for (auto base_ro : BothIndexReadOptions()) { + SCOPED_TRACE(base_ro.read_index != ReadOptions::ReadIndex::kDefault + ? "custom" + : "builtin"); base_ro.snapshot = snap; // GetEntity on PutEntity key at snapshot. @@ -4645,8 +4824,10 @@ TEST_P(TrieIndexDBTest, GetEntityWithExplicitSnapshotComparison) { } // Read without snapshot — should see v2 data. - for (auto base_ro : {StandardIndexReadOptions(), TrieIndexReadOptions()}) { - SCOPED_TRACE(base_ro.table_index_factory ? "trie" : "standard"); + for (auto base_ro : BothIndexReadOptions()) { + SCOPED_TRACE(base_ro.read_index != ReadOptions::ReadIndex::kDefault + ? "custom" + : "builtin"); PinnableWideColumns result; ASSERT_OK(db_->GetEntity(base_ro, db_->DefaultColumnFamily(), "entity_key", @@ -4687,8 +4868,10 @@ TEST_P(TrieIndexDBTest, ReverseIterationAcrossSameUserKeyBlocks) { VerifyScanBothIndexes(expected); // At an older snapshot, same_key should have the older value. - for (auto base_ro : {StandardIndexReadOptions(), TrieIndexReadOptions()}) { - SCOPED_TRACE(base_ro.table_index_factory ? "trie" : "standard"); + for (auto base_ro : BothIndexReadOptions()) { + SCOPED_TRACE(base_ro.read_index != ReadOptions::ReadIndex::kDefault + ? "custom" + : "builtin"); base_ro.snapshot = snapshots[3]; std::string value; ASSERT_OK(db_->Get(base_ro, "same_key", &value)); @@ -4696,8 +4879,10 @@ TEST_P(TrieIndexDBTest, ReverseIterationAcrossSameUserKeyBlocks) { } // Reverse scan through trie should produce zzz, same_key, aaa. - for (auto base_ro : {StandardIndexReadOptions(), TrieIndexReadOptions()}) { - SCOPED_TRACE(base_ro.table_index_factory ? "trie" : "standard"); + for (auto base_ro : BothIndexReadOptions()) { + SCOPED_TRACE(base_ro.read_index != ReadOptions::ReadIndex::kDefault + ? "custom" + : "builtin"); std::unique_ptr iter(db_->NewIterator(base_ro)); iter->SeekToLast(); ASSERT_TRUE(iter->Valid()); @@ -4721,11 +4906,93 @@ TEST_P(TrieIndexDBTest, ReverseIterationAcrossSameUserKeyBlocks) { } } -// Run all parameterized tests in both UDI modes: -// - Secondary (false): UDI is secondary, reads require table_index_factory -// - Primary (true): UDI is primary, all reads use the trie by default -INSTANTIATE_TEST_CASE_P(SecondaryAndPrimaryUDI, TrieIndexDBTest, - ::testing::Bool()); +// ============================================================================ +// Prefetch boundary comparison with custom index wrapper +// ============================================================================ + +TEST_P(TrieIndexDBTest, PrefetchWithCustomIndexWrapper) { + // Exercises the Prefetch() code path with the custom index wrapper active. + // In primary mode, Prefetch() uses the UDI wrapper whose key() returns an + // internal key (user key + 8-byte trailer). The Prefetch boundary comparison + // must use user_key() (not key()) when index_key_is_user_key=1 to avoid + // including the trailer in the comparison. + // + // This test writes keys with NO same-user-key boundaries (each key is + // unique), so index_key_is_user_key=1 in the SST properties. This is the + // condition that triggers the user-key comparison path in Prefetch. + if (GetIndexMode() == BlockBasedTableOptions::IndexMode::kStandardDefault) { + // In secondary mode, Prefetch uses the standard IndexBlockIter (the + // wrapper falls through for internal ReadOptions). The bug only + // manifests in primary mode where the wrapper intercepts all reads. + ROCKSDB_GTEST_SKIP("Only applicable in primary modes"); + return; + } + ASSERT_OK(OpenDB(/*block_size=*/256)); + + // Write unique keys (no same-user-key boundaries) to ensure + // index_key_is_user_key=1. + for (int i = 0; i < 500; i++) { + char key[32]; + snprintf(key, sizeof(key), "prefetch_key_%06d", i); + ASSERT_OK(db_->Put(WriteOptions(), key, std::string(200, 'v'))); + } + ASSERT_OK(db_->Flush(FlushOptions())); + + // Verify SST properties. In kCustomDefault mode, index_key_is_user_key=1 + // because the standard builder sees no same-user-key boundaries (all keys + // unique). In kCustomOnly mode, index_key_is_user_key=0 because the standard + // index is a stub and we force this property to match the UDI wrapper's + // internal key format. + TablePropertiesCollection props; + ASSERT_OK(db_->GetPropertiesOfAllTables(&props)); + ASSERT_FALSE(props.empty()); + if (GetIndexMode() == BlockBasedTableOptions::IndexMode::kCustomDefault) { + for (const auto& p : props) { + ASSERT_EQ(p.second->index_key_is_user_key, 1u) + << "kCustomDefault with unique keys: expected user-key-only " + "separators"; + } + } + + // Close and reopen to trigger Prefetch during Open. + ASSERT_OK(db_->Close()); + db_.reset(); + ASSERT_OK(OpenDB(/*block_size=*/256)); + + // Exercises the Prefetch code path with the custom index wrapper in + // primary mode. The wrapper's key() includes an 8-byte internal key + // trailer that Prefetch's boundary comparison must handle correctly + // via user_key(). An incorrect comparison would cause wrong prefetch + // boundaries (performance issue, not data correctness). Verify the + // reopen succeeds and all data is readable. + ReadOptions ro; + std::unique_ptr iter(db_->NewIterator(ro)); + iter->SeekToFirst(); + int count = 0; + while (iter->Valid()) { + count++; + iter->Next(); + } + ASSERT_OK(iter->status()); + ASSERT_EQ(count, 500); + + // Also verify point lookups work (uses a different read path). + std::string value; + ASSERT_OK(db_->Get(ro, "prefetch_key_000000", &value)); + ASSERT_EQ(value, std::string(200, 'v')); + ASSERT_OK(db_->Get(ro, "prefetch_key_000499", &value)); + ASSERT_EQ(value, std::string(200, 'v')); +} + +// Run all parameterized tests in all three custom UDI modes: +// - kStandardDefault: UDI is secondary, reads require read_index +// - kCustomDefault: UDI is primary, all reads use the trie by default +// - kCustomOnly: UDI is primary, standard index is an empty stub +INSTANTIATE_TEST_CASE_P( + AllIndexModes, TrieIndexDBTest, + ::testing::Values(BlockBasedTableOptions::IndexMode::kStandardDefault, + BlockBasedTableOptions::IndexMode::kCustomDefault, + BlockBasedTableOptions::IndexMode::kCustomOnly)); } // namespace trie_index } // namespace ROCKSDB_NAMESPACE diff --git a/utilities/trie_index/trie_index_factory.cc b/utilities/trie_index/trie_index_factory.cc index 68beae99d0ba..f66de782d26e 100644 --- a/utilities/trie_index/trie_index_factory.cc +++ b/utilities/trie_index/trie_index_factory.cc @@ -478,17 +478,17 @@ Status TrieIndexIterator::NextAndGetResult(IterateResult* result) { return Status::OK(); } -UserDefinedIndexBuilder::BlockHandle TrieIndexIterator::value() { +IndexFactoryBuilder::BlockHandle TrieIndexIterator::value() { if (overflow_run_index_ == 0) { // Primary block — use the trie leaf's handle. auto handle = iter_.Value(); - return UserDefinedIndexBuilder::BlockHandle{handle.offset, handle.size}; + return IndexFactoryBuilder::BlockHandle{handle.offset, handle.size}; } // Overflow block — use the side-table handle. // overflow_run_index_ is 1-based, overflow array is 0-based. uint32_t overflow_idx = overflow_base_idx_ + overflow_run_index_ - 1; auto handle = trie_->GetOverflowHandle(overflow_idx); - return UserDefinedIndexBuilder::BlockHandle{handle.offset, handle.size}; + return IndexFactoryBuilder::BlockHandle{handle.offset, handle.size}; } IterBoundCheck TrieIndexIterator::CheckBounds( @@ -538,7 +538,7 @@ Status TrieIndexReader::InitFromSlice(const Slice& data) { return trie_.InitFromData(data); } -std::unique_ptr TrieIndexReader::NewIterator( +std::unique_ptr TrieIndexReader::NewIterator( const ReadOptions& /*read_options*/) { return std::make_unique(&trie_, comparator_, trie_.HasSeqnoEncoding()); @@ -558,8 +558,8 @@ size_t TrieIndexReader::ApproximateMemoryUsage() const { // ============================================================================ Status TrieIndexFactory::NewBuilder( - const UserDefinedIndexOption& option, - std::unique_ptr& builder) const { + const IndexFactoryOptions& option, + std::unique_ptr& builder) const { // The trie traverses keys byte-by-byte in lexicographic order, so it // requires a bytewise comparator. Non-bytewise comparators (e.g., // ReverseBytewiseComparator or custom comparators) would produce separator @@ -581,8 +581,8 @@ Status TrieIndexFactory::NewBuilder( } Status TrieIndexFactory::NewReader( - const UserDefinedIndexOption& option, Slice& index_block, - std::unique_ptr& reader) const { + const IndexFactoryOptions& option, Slice& index_block, + std::unique_ptr& reader) const { const Comparator* cmp = option.comparator ? option.comparator : BytewiseComparator(); if (cmp != BytewiseComparator()) { diff --git a/utilities/trie_index/trie_index_factory.h b/utilities/trie_index/trie_index_factory.h index cf5e6825824a..442869bcb96b 100644 --- a/utilities/trie_index/trie_index_factory.h +++ b/utilities/trie_index/trie_index_factory.h @@ -9,7 +9,7 @@ // // Trie-based User Defined Index (UDI) for RocksDB's block-based tables. // -// This provides a TrieIndexFactory that implements the UserDefinedIndexFactory +// This provides a TrieIndexFactory that implements the IndexFactory // interface, building a Fast Succinct Trie (FST) index from the separator keys // generated during SST file construction. Based on the SuRF paper results, the // trie is expected to achieve significant space reduction compared to the @@ -19,12 +19,16 @@ // auto trie_factory = std::make_shared(); // BlockBasedTableOptions table_options; // table_options.user_defined_index_factory = trie_factory; +// table_options.index_mode = +// BlockBasedTableOptions::IndexMode::kStandardDefault; // -// At read time, set ReadOptions::table_index_factory to the same factory -// to use the trie for iteration: +// In kStandardDefault mode, reads use the standard index by default. +// Set ReadOptions::read_index to kCustom to use the trie: // ReadOptions ro; -// ro.table_index_factory = trie_factory.get(); +// ro.read_index = ReadOptions::ReadIndex::kCustom; // auto iter = db->NewIterator(ro); +// +// In kCustomDefault/kCustomOnly mode, all reads use the trie automatically. #pragma once @@ -34,15 +38,15 @@ #include #include "rocksdb/comparator.h" +#include "rocksdb/index_factory.h" #include "rocksdb/types.h" -#include "rocksdb/user_defined_index.h" #include "utilities/trie_index/louds_trie.h" namespace ROCKSDB_NAMESPACE { namespace trie_index { // ============================================================================ -// TrieIndexBuilder: Implements UserDefinedIndexBuilder using LoudsTrieBuilder. +// TrieIndexBuilder: Implements IndexFactoryBuilder using LoudsTrieBuilder. // // During SST file construction, RocksDB calls: // 1. OnKeyAdded() for each key-value pair. @@ -52,7 +56,7 @@ namespace trie_index { // The trie builder collects the separator keys from AddIndexEntry() and // builds a LOUDS-encoded trie during Finish(). // ============================================================================ -class TrieIndexBuilder final : public UserDefinedIndexBuilder { +class TrieIndexBuilder final : public IndexFactoryBuilder { public: explicit TrieIndexBuilder(const Comparator* comparator); ~TrieIndexBuilder() override = default; @@ -121,13 +125,13 @@ class TrieIndexBuilder final : public UserDefinedIndexBuilder { }; // ============================================================================ -// TrieIndexIterator: Implements UserDefinedIndexIterator using +// TrieIndexIterator: Implements IndexFactoryIterator using // LoudsTrieIterator. // // Wraps LoudsTrieIterator and adapts it to the UDI iterator interface, // handling bounds checking against ScanOptions. // ============================================================================ -class TrieIndexIterator final : public UserDefinedIndexIterator { +class TrieIndexIterator final : public IndexFactoryIterator { public: // @param has_seqno_encoding: true if the trie was built with a seqno // side-table (enabling post-seek correction for same-user-key boundaries). @@ -164,7 +168,7 @@ class TrieIndexIterator final : public UserDefinedIndexIterator { // Return the BlockHandle of the current block. When positioned on an // overflow block, returns the overflow block's handle instead of the // trie leaf's handle. - UserDefinedIndexBuilder::BlockHandle value() override; + IndexFactoryBuilder::BlockHandle value() override; private: // Check if the current block is within the active scan bounds. @@ -243,12 +247,12 @@ class TrieIndexIterator final : public UserDefinedIndexIterator { }; // ============================================================================ -// TrieIndexReader: Implements UserDefinedIndexReader. +// TrieIndexReader: Implements IndexFactoryReader. // // Owns (or references) the deserialized LoudsTrie and creates iterators // for read operations. // ============================================================================ -class TrieIndexReader : public UserDefinedIndexReader { +class TrieIndexReader : public IndexFactoryReader { public: explicit TrieIndexReader(const Comparator* comparator); ~TrieIndexReader() override = default; @@ -258,7 +262,7 @@ class TrieIndexReader : public UserDefinedIndexReader { Status InitFromSlice(const Slice& data); // Create a new iterator for scanning. - std::unique_ptr NewIterator( + std::unique_ptr NewIterator( const ReadOptions& read_options) override; // Approximate memory usage of the deserialized trie. @@ -271,13 +275,13 @@ class TrieIndexReader : public UserDefinedIndexReader { }; // ============================================================================ -// TrieIndexFactory: Implements UserDefinedIndexFactory. +// TrieIndexFactory: Implements IndexFactory. // // Factory for creating TrieIndexBuilder (during SST file writes) and // TrieIndexReader (during SST file reads). Registered as a Customizable // with name "trie_index". // ============================================================================ -class TrieIndexFactory : public UserDefinedIndexFactory { +class TrieIndexFactory : public IndexFactory { public: TrieIndexFactory() = default; ~TrieIndexFactory() override = default; @@ -285,29 +289,12 @@ class TrieIndexFactory : public UserDefinedIndexFactory { static const char* kClassName() { return "trie_index"; } const char* Name() const override { return kClassName(); } - // Deprecated API (required by base class). Use the overloads that accept - // UserDefinedIndexOption instead. These must never be called; the new - // overloads with UserDefinedIndexOption are always used by the block-based - // table builder/reader. Abort unconditionally (in both debug and release - // builds) to surface programming errors immediately. - UserDefinedIndexBuilder* NewBuilder() const override { - abort(); - return nullptr; - } - std::unique_ptr NewReader( - Slice& /*index_block*/) const override { - abort(); - return nullptr; - } - - // New API with comparator. Status NewBuilder( - const UserDefinedIndexOption& option, - std::unique_ptr& builder) const override; + const IndexFactoryOptions& option, + std::unique_ptr& builder) const override; - Status NewReader( - const UserDefinedIndexOption& option, Slice& index_block, - std::unique_ptr& reader) const override; + Status NewReader(const IndexFactoryOptions& option, Slice& index_block, + std::unique_ptr& reader) const override; }; } // namespace trie_index diff --git a/utilities/trie_index/trie_index_test.cc b/utilities/trie_index/trie_index_test.cc index 7ca271fa3ebe..1c5a9be25347 100644 --- a/utilities/trie_index/trie_index_test.cc +++ b/utilities/trie_index/trie_index_test.cc @@ -46,12 +46,12 @@ namespace trie_index { // Helpers: pack sequence numbers with kTypeValue (1) into tags. // Tests use plain sequence numbers for readability; these convert them to // the tag format that IndexEntryContext and SeekContext expect. -UserDefinedIndexIterator::SeekContext SeekCtx(SequenceNumber seq) { +IndexFactoryIterator::SeekContext SeekCtx(SequenceNumber seq) { return {PackSequenceAndType(seq, kValueTypeForSeek)}; } -UserDefinedIndexBuilder::IndexEntryContext EntryCtx(SequenceNumber last_seq, - SequenceNumber first_seq) { +IndexFactoryBuilder::IndexEntryContext EntryCtx(SequenceNumber last_seq, + SequenceNumber first_seq) { return {last_seq ? ((static_cast(last_seq) << 8) | 1) : 0, first_seq ? ((static_cast(first_seq) << 8) | 1) : 0}; } @@ -2374,22 +2374,22 @@ class TrieIndexFactoryTest : public testing::Test { // Owns the builder (which holds the serialized data), reader, and iterator. // All must stay alive for the iterator to be usable. struct TrieTestContext { - std::unique_ptr builder; + std::unique_ptr builder; Slice index_contents; - std::unique_ptr reader; - std::unique_ptr iter; + std::unique_ptr reader; + std::unique_ptr iter; }; // Build a trie from TestBlocks and return a context with a ready iterator. TrieTestContext BuildTrieAndGetIterator( const std::vector& blocks) { TrieTestContext ctx; - UserDefinedIndexOption option; + IndexFactoryOptions option; option.comparator = BytewiseComparator(); EXPECT_OK(factory_->NewBuilder(option, ctx.builder)); for (const auto& b : blocks) { - UserDefinedIndexBuilder::BlockHandle h{b.offset, b.size}; + IndexFactoryBuilder::BlockHandle h{b.offset, b.size}; std::string scratch; if (!b.next_key.empty()) { Slice next(b.next_key); @@ -2408,9 +2408,8 @@ class TrieIndexFactoryTest : public testing::Test { } // Seek and assert the resulting block offset. - static void AssertSeekOffset(UserDefinedIndexIterator* iter, - const Slice& target, SequenceNumber seq, - uint64_t expected_offset) { + static void AssertSeekOffset(IndexFactoryIterator* iter, const Slice& target, + SequenceNumber seq, uint64_t expected_offset) { IterateResult result; ASSERT_OK(iter->SeekAndGetResult(target, &result, SeekCtx(seq))); ASSERT_EQ(iter->value().offset, expected_offset) @@ -2423,7 +2422,7 @@ class TrieIndexFactoryTest : public testing::Test { // all blocks, asserting each offset matches expected_offsets. Also asserts // kUnknown past the end. static void AssertFullForwardScan( - UserDefinedIndexIterator* iter, const Slice& first_key, + IndexFactoryIterator* iter, const Slice& first_key, const std::vector& expected_offsets) { ASSERT_FALSE(expected_offsets.empty()); IterateResult result; @@ -2445,10 +2444,10 @@ class TrieIndexFactoryTest : public testing::Test { TEST_F(TrieIndexFactoryTest, BasicBuildAndRead) { // Build a trie index using the factory interface. - UserDefinedIndexOption option; + IndexFactoryOptions option; option.comparator = BytewiseComparator(); - std::unique_ptr builder; + std::unique_ptr builder; ASSERT_OK(factory_->NewBuilder(option, builder)); ASSERT_NE(builder, nullptr); @@ -2459,7 +2458,7 @@ TEST_F(TrieIndexFactoryTest, BasicBuildAndRead) { "elderberry", ""}; for (size_t i = 0; i < last_keys.size(); i++) { - UserDefinedIndexBuilder::BlockHandle handle{i * 1000, 500}; + IndexFactoryBuilder::BlockHandle handle{i * 1000, 500}; std::string scratch; Slice next_slice(first_keys[i]); @@ -2474,7 +2473,7 @@ TEST_F(TrieIndexFactoryTest, BasicBuildAndRead) { ASSERT_GT(index_contents.size(), 0u); // Read the index. - std::unique_ptr reader; + std::unique_ptr reader; ASSERT_OK(factory_->NewReader(option, index_contents, reader)); ASSERT_NE(reader, nullptr); ASSERT_GT(reader->ApproximateMemoryUsage(), 0u); @@ -2496,10 +2495,10 @@ TEST_F(TrieIndexFactoryTest, FactoryName) { } TEST_F(TrieIndexFactoryTest, EmptyIndex) { - UserDefinedIndexOption option; + IndexFactoryOptions option; option.comparator = BytewiseComparator(); - std::unique_ptr builder; + std::unique_ptr builder; ASSERT_OK(factory_->NewBuilder(option, builder)); // Finish without adding any entries. @@ -2509,10 +2508,10 @@ TEST_F(TrieIndexFactoryTest, EmptyIndex) { } TEST_F(TrieIndexFactoryTest, DoubleFinish) { - UserDefinedIndexOption option; + IndexFactoryOptions option; option.comparator = BytewiseComparator(); - std::unique_ptr builder; + std::unique_ptr builder; ASSERT_OK(factory_->NewBuilder(option, builder)); Slice index_contents; @@ -2526,10 +2525,10 @@ TEST_F(TrieIndexFactoryTest, DoubleFinish) { TEST_F(TrieIndexFactoryTest, IteratorBoundsChecking) { // Test the bounds checking in the UDI iterator. - UserDefinedIndexOption option; + IndexFactoryOptions option; option.comparator = BytewiseComparator(); - std::unique_ptr udi_builder; + std::unique_ptr udi_builder; ASSERT_OK(factory_->NewBuilder(option, udi_builder)); // Build index with 3 blocks. @@ -2540,8 +2539,8 @@ TEST_F(TrieIndexFactoryTest, IteratorBoundsChecking) { std::string sep = buf; separators.push_back(sep); - UserDefinedIndexBuilder::BlockHandle handle{static_cast(i) * 1000, - 500}; + IndexFactoryBuilder::BlockHandle handle{static_cast(i) * 1000, + 500}; std::string scratch; if (i < 2) { char next_buf[16]; @@ -2558,7 +2557,7 @@ TEST_F(TrieIndexFactoryTest, IteratorBoundsChecking) { Slice index_contents; ASSERT_OK(udi_builder->Finish(&index_contents)); - std::unique_ptr reader; + std::unique_ptr reader; ASSERT_OK(factory_->NewReader(option, index_contents, reader)); ReadOptions ro; @@ -2589,13 +2588,13 @@ TEST_F(TrieIndexFactoryTest, IteratorBoundsChecking) { TEST_F(TrieIndexFactoryTest, IteratorNoBounds) { // Without Prepare(), bounds checking should always return kInbound. - UserDefinedIndexOption option; + IndexFactoryOptions option; option.comparator = BytewiseComparator(); - std::unique_ptr udi_builder; + std::unique_ptr udi_builder; ASSERT_OK(factory_->NewBuilder(option, udi_builder)); - UserDefinedIndexBuilder::BlockHandle handle{0, 500}; + IndexFactoryBuilder::BlockHandle handle{0, 500}; std::string scratch; udi_builder->AddIndexEntry(Slice("key"), nullptr, handle, &scratch, EntryCtx(100, 100)); @@ -2603,7 +2602,7 @@ TEST_F(TrieIndexFactoryTest, IteratorNoBounds) { Slice index_contents; ASSERT_OK(udi_builder->Finish(&index_contents)); - std::unique_ptr reader; + std::unique_ptr reader; ASSERT_OK(factory_->NewReader(option, index_contents, reader)); ReadOptions ro; @@ -2623,15 +2622,15 @@ TEST_F(TrieIndexFactoryTest, UpperBoundDoesNotDropValidBlocks) { // use the previous separator (or seek target) as the reference key, not // the current separator, to avoid prematurely rejecting blocks that // contain keys within the limit. - UserDefinedIndexOption option; + IndexFactoryOptions option; option.comparator = BytewiseComparator(); - std::unique_ptr udi_builder; + std::unique_ptr udi_builder; ASSERT_OK(factory_->NewBuilder(option, udi_builder)); // Block 0: last="az", next_first="c" → separator ≈ "b" { - UserDefinedIndexBuilder::BlockHandle handle{0, 1000}; + IndexFactoryBuilder::BlockHandle handle{0, 1000}; std::string scratch; Slice next("c"); udi_builder->AddIndexEntry(Slice("az"), &next, handle, &scratch, @@ -2639,7 +2638,7 @@ TEST_F(TrieIndexFactoryTest, UpperBoundDoesNotDropValidBlocks) { } // Block 1: last="cz", next_first="e" → separator ≈ "d" { - UserDefinedIndexBuilder::BlockHandle handle{1000, 1000}; + IndexFactoryBuilder::BlockHandle handle{1000, 1000}; std::string scratch; Slice next("e"); udi_builder->AddIndexEntry(Slice("cz"), &next, handle, &scratch, @@ -2647,7 +2646,7 @@ TEST_F(TrieIndexFactoryTest, UpperBoundDoesNotDropValidBlocks) { } // Block 2: last="ez", no next → separator ≈ "f" { - UserDefinedIndexBuilder::BlockHandle handle{2000, 1000}; + IndexFactoryBuilder::BlockHandle handle{2000, 1000}; std::string scratch; udi_builder->AddIndexEntry(Slice("ez"), nullptr, handle, &scratch, EntryCtx(100, 100)); @@ -2656,7 +2655,7 @@ TEST_F(TrieIndexFactoryTest, UpperBoundDoesNotDropValidBlocks) { Slice index_contents; ASSERT_OK(udi_builder->Finish(&index_contents)); - std::unique_ptr reader; + std::unique_ptr reader; ASSERT_OK(factory_->NewReader(option, index_contents, reader)); ReadOptions ro; @@ -2690,10 +2689,10 @@ TEST_F(TrieIndexFactoryTest, MultiScanBoundsAdvanceCorrectly) { // Validates that current_scan_idx_ advances correctly when // the seek target is past the current scan's limit. Otherwise all // bounds checks evaluate against scan 0's limit. - UserDefinedIndexOption option; + IndexFactoryOptions option; option.comparator = BytewiseComparator(); - std::unique_ptr udi_builder; + std::unique_ptr udi_builder; ASSERT_OK(factory_->NewBuilder(option, udi_builder)); // 5 blocks with well-separated keys. @@ -2707,7 +2706,7 @@ TEST_F(TrieIndexFactoryTest, MultiScanBoundsAdvanceCorrectly) { {"gz", "i", 3000}, {"iz", nullptr, 4000}, }; for (const auto& b : blocks) { - UserDefinedIndexBuilder::BlockHandle handle{b.offset, 500}; + IndexFactoryBuilder::BlockHandle handle{b.offset, 500}; std::string scratch; if (b.next_first) { Slice next(b.next_first); @@ -2722,7 +2721,7 @@ TEST_F(TrieIndexFactoryTest, MultiScanBoundsAdvanceCorrectly) { Slice index_contents; ASSERT_OK(udi_builder->Finish(&index_contents)); - std::unique_ptr reader; + std::unique_ptr reader; ASSERT_OK(factory_->NewReader(option, index_contents, reader)); ReadOptions ro; @@ -2775,18 +2774,18 @@ TEST_F(TrieIndexFactoryTest, MultiScanBoundsAdvanceCorrectly) { TEST_F(TrieIndexFactoryTest, RejectsNonBytewiseComparator) { // The trie index requires bytewise ordering because it traverses keys // byte-by-byte. Non-bytewise comparators should be rejected. - UserDefinedIndexOption option; + IndexFactoryOptions option; // ReverseBytewiseComparator should be rejected. option.comparator = ReverseBytewiseComparator(); - std::unique_ptr builder; + std::unique_ptr builder; Status s = factory_->NewBuilder(option, builder); ASSERT_TRUE(s.IsNotSupported()) << s.ToString(); ASSERT_EQ(builder, nullptr); // NewReader should also reject non-bytewise comparators. Slice dummy_data; - std::unique_ptr reader; + std::unique_ptr reader; s = factory_->NewReader(option, dummy_data, reader); ASSERT_TRUE(s.IsNotSupported()) << s.ToString(); ASSERT_EQ(reader, nullptr); @@ -2800,10 +2799,10 @@ TEST_F(TrieIndexFactoryTest, RejectsNonBytewiseComparator) { TEST_F(TrieIndexFactoryTest, ApproximateMemoryUsageIncludesAuxData) { // Verify that ApproximateMemoryUsage() accounts for auxiliary heap // allocations (child position lookup tables), not just serialized data. - UserDefinedIndexOption option; + IndexFactoryOptions option; option.comparator = BytewiseComparator(); - std::unique_ptr udi_builder; + std::unique_ptr udi_builder; ASSERT_OK(factory_->NewBuilder(option, udi_builder)); // Build a non-trivial index with enough keys to produce sparse internal @@ -2814,8 +2813,8 @@ TEST_F(TrieIndexFactoryTest, ApproximateMemoryUsageIncludesAuxData) { snprintf(last_buf, sizeof(last_buf), "key_%04d", i); snprintf(next_buf, sizeof(next_buf), "key_%04d", i + 1); - UserDefinedIndexBuilder::BlockHandle handle{static_cast(i) * 1000, - 500}; + IndexFactoryBuilder::BlockHandle handle{static_cast(i) * 1000, + 500}; std::string scratch; if (i < 99) { Slice next(next_buf); @@ -2831,7 +2830,7 @@ TEST_F(TrieIndexFactoryTest, ApproximateMemoryUsageIncludesAuxData) { ASSERT_OK(udi_builder->Finish(&index_contents)); size_t serialized_size = index_contents.size(); - std::unique_ptr reader; + std::unique_ptr reader; ASSERT_OK(factory_->NewReader(option, index_contents, reader)); size_t mem_usage = reader->ApproximateMemoryUsage(); @@ -2846,16 +2845,16 @@ TEST_F(TrieIndexFactoryTest, ApproximateMemoryUsageIncludesAuxData) { TEST_F(TrieIndexFactoryTest, EmptyTrieIterator) { // Seek on an iterator built from an empty index. - UserDefinedIndexOption option; + IndexFactoryOptions option; option.comparator = BytewiseComparator(); - std::unique_ptr builder; + std::unique_ptr builder; ASSERT_OK(factory_->NewBuilder(option, builder)); Slice index_contents; ASSERT_OK(builder->Finish(&index_contents)); - std::unique_ptr reader; + std::unique_ptr reader; ASSERT_OK(factory_->NewReader(option, index_contents, reader)); ReadOptions ro; @@ -2948,12 +2947,12 @@ TEST_F(TrieIndexFactoryTest, ScanWithNoLimit) { TEST_F(TrieIndexFactoryTest, NewReaderWithCorruptedData) { // Attempt to create a reader from corrupted data. - UserDefinedIndexOption option; + IndexFactoryOptions option; option.comparator = BytewiseComparator(); std::string bad_data(10, '\xff'); Slice bad_slice(bad_data); - std::unique_ptr reader; + std::unique_ptr reader; Status s = factory_->NewReader(option, bad_slice, reader); ASSERT_TRUE(s.IsCorruption()) << s.ToString(); } @@ -2961,25 +2960,24 @@ TEST_F(TrieIndexFactoryTest, NewReaderWithCorruptedData) { TEST_F(TrieIndexFactoryTest, OnKeyAddedNoOp) { // Verify that OnKeyAdded() is a no-op for the trie builder regardless of // the ValueType. The trie only uses separator keys from AddIndexEntry(). - UserDefinedIndexOption option; + IndexFactoryOptions option; option.comparator = BytewiseComparator(); - std::unique_ptr builder; + std::unique_ptr builder; ASSERT_OK(factory_->NewBuilder(option, builder)); // Call OnKeyAdded with all ValueType variants — all should be no-ops. - builder->OnKeyAdded(Slice("key1"), UserDefinedIndexBuilder::kValue, + builder->OnKeyAdded(Slice("key1"), IndexFactoryBuilder::kValue, Slice("value1")); - builder->OnKeyAdded(Slice("key2"), UserDefinedIndexBuilder::kDelete, - Slice("")); - builder->OnKeyAdded(Slice("key3"), UserDefinedIndexBuilder::kMerge, + builder->OnKeyAdded(Slice("key2"), IndexFactoryBuilder::kDelete, Slice("")); + builder->OnKeyAdded(Slice("key3"), IndexFactoryBuilder::kMerge, Slice("merge_operand")); - builder->OnKeyAdded(Slice("key4"), UserDefinedIndexBuilder::kOther, + builder->OnKeyAdded(Slice("key4"), IndexFactoryBuilder::kOther, Slice("blob_ref")); - builder->OnKeyAdded(Slice(""), UserDefinedIndexBuilder::kValue, Slice("")); + builder->OnKeyAdded(Slice(""), IndexFactoryBuilder::kValue, Slice("")); // Building should still succeed (OnKeyAdded should not affect state). - UserDefinedIndexBuilder::BlockHandle handle{0, 500}; + IndexFactoryBuilder::BlockHandle handle{0, 500}; std::string scratch; builder->AddIndexEntry(Slice("key5"), nullptr, handle, &scratch, EntryCtx(100, 100)); @@ -2993,24 +2991,24 @@ TEST_F(TrieIndexFactoryTest, NullComparator) { // NewBuilder and NewReader with nullptr comparator should default to // BytewiseComparator. This tests that the null-comparator guard in both // NewBuilder and NewReader prevents null-pointer dereferences. - UserDefinedIndexOption option; + IndexFactoryOptions option; option.comparator = nullptr; // Build a non-trivial index with null comparator. The builder internally // defaults to BytewiseComparator. - std::unique_ptr builder; + std::unique_ptr builder; ASSERT_OK(factory_->NewBuilder(option, builder)); ASSERT_NE(builder, nullptr); // Add some entries — AddIndexEntry uses the defaulted comparator internally. std::string scratch; { - UserDefinedIndexBuilder::BlockHandle h{0, 100}; + IndexFactoryBuilder::BlockHandle h{0, 100}; Slice next("b"); builder->AddIndexEntry(Slice("a"), &next, h, &scratch, EntryCtx(100, 100)); } { - UserDefinedIndexBuilder::BlockHandle h{100, 100}; + IndexFactoryBuilder::BlockHandle h{100, 100}; builder->AddIndexEntry(Slice("b"), nullptr, h, &scratch, EntryCtx(100, 100)); } @@ -3021,7 +3019,7 @@ TEST_F(TrieIndexFactoryTest, NullComparator) { // NewReader with nullptr comparator must default to BytewiseComparator. // Storing a null comparator would cause a crash on Seek when CheckBounds // dereferences it. - std::unique_ptr reader; + std::unique_ptr reader; ASSERT_OK(factory_->NewReader(option, index_contents, reader)); ASSERT_NE(reader, nullptr); @@ -3339,16 +3337,16 @@ TEST_F(TrieIndexFactoryTest, LargeOverflowRun) { // "key" (11-block run, seqnos 1200..200) → "l" (block 11) → "zzz" (block // 12) // The overflow run for "key" has 10 overflow entries (blocks 1-10). - UserDefinedIndexOption option; + IndexFactoryOptions option; option.comparator = BytewiseComparator(); - std::unique_ptr builder; + std::unique_ptr builder; ASSERT_OK(factory_->NewBuilder(option, builder)); const int kNumKeyBlocks = 12; for (int i = 0; i < kNumKeyBlocks; i++) { - UserDefinedIndexBuilder::BlockHandle handle{static_cast(i) * 1000, - 1000}; + IndexFactoryBuilder::BlockHandle handle{static_cast(i) * 1000, + 1000}; std::string scratch; SequenceNumber seq = static_cast((kNumKeyBlocks - i) * 100); @@ -3369,7 +3367,7 @@ TEST_F(TrieIndexFactoryTest, LargeOverflowRun) { } // Final "zzz" block. Last block uses "zzz" as separator (no shortening). { - UserDefinedIndexBuilder::BlockHandle handle{ + IndexFactoryBuilder::BlockHandle handle{ static_cast(kNumKeyBlocks) * 1000, 1000}; std::string scratch; builder->AddIndexEntry(Slice("zzz"), nullptr, handle, &scratch, @@ -3379,7 +3377,7 @@ TEST_F(TrieIndexFactoryTest, LargeOverflowRun) { Slice index_contents; ASSERT_OK(builder->Finish(&index_contents)); - std::unique_ptr reader; + std::unique_ptr reader; ASSERT_OK(factory_->NewReader(option, index_contents, reader)); ReadOptions ro; @@ -3461,22 +3459,22 @@ TEST_F(TrieIndexFactoryTest, MixedSameKeyRuns) { // // For a true multi-block overflow run for "aaa", we need 3+ consecutive // blocks ALL with "aaa" → "aaa" boundaries. - UserDefinedIndexOption option; + IndexFactoryOptions option; option.comparator = BytewiseComparator(); - std::unique_ptr builder; + std::unique_ptr builder; ASSERT_OK(factory_->NewBuilder(option, builder)); // "aaa" run: 3 blocks (all same-key boundaries). { - UserDefinedIndexBuilder::BlockHandle h{0, 1000}; + IndexFactoryBuilder::BlockHandle h{0, 1000}; std::string scratch; Slice next("aaa"); builder->AddIndexEntry(Slice("aaa"), &next, h, &scratch, EntryCtx(300, 200)); } { - UserDefinedIndexBuilder::BlockHandle h{1000, 1000}; + IndexFactoryBuilder::BlockHandle h{1000, 1000}; std::string scratch; Slice next("aaa"); builder->AddIndexEntry(Slice("aaa"), &next, h, &scratch, @@ -3484,28 +3482,28 @@ TEST_F(TrieIndexFactoryTest, MixedSameKeyRuns) { } // Last "aaa" block transitions to "mmm" → separator "b". { - UserDefinedIndexBuilder::BlockHandle h{2000, 1000}; + IndexFactoryBuilder::BlockHandle h{2000, 1000}; std::string scratch; Slice next("mmm"); builder->AddIndexEntry(Slice("aaa"), &next, h, &scratch, EntryCtx(100, 60)); } // "mmm" run: 2 blocks (one same-key boundary). { - UserDefinedIndexBuilder::BlockHandle h{3000, 1000}; + IndexFactoryBuilder::BlockHandle h{3000, 1000}; std::string scratch; Slice next("mmm"); builder->AddIndexEntry(Slice("mmm"), &next, h, &scratch, EntryCtx(60, 30)); } // Last "mmm" block transitions to "zzz" → separator "n". { - UserDefinedIndexBuilder::BlockHandle h{4000, 1000}; + IndexFactoryBuilder::BlockHandle h{4000, 1000}; std::string scratch; Slice next("zzz"); builder->AddIndexEntry(Slice("mmm"), &next, h, &scratch, EntryCtx(30, 10)); } // "zzz": 1 block (last → separator "zzz", no shortening). { - UserDefinedIndexBuilder::BlockHandle h{5000, 1000}; + IndexFactoryBuilder::BlockHandle h{5000, 1000}; std::string scratch; builder->AddIndexEntry(Slice("zzz"), nullptr, h, &scratch, EntryCtx(10, 0)); } @@ -3513,7 +3511,7 @@ TEST_F(TrieIndexFactoryTest, MixedSameKeyRuns) { Slice index_contents; ASSERT_OK(builder->Finish(&index_contents)); - std::unique_ptr reader; + std::unique_ptr reader; ASSERT_OK(factory_->NewReader(option, index_contents, reader)); ReadOptions ro; @@ -3770,11 +3768,11 @@ TEST_F(TrieIndexFactoryTest, SeqnoEncodingOutOfBoundWithOverflow) { TEST_F(TrieIndexFactoryTest, SeqnoEncodingConsistentSize) { // Verify that tries built with different seqno contexts produce the // same serialized size (seqno encoding is always on). - UserDefinedIndexOption option; + IndexFactoryOptions option; option.comparator = BytewiseComparator(); // Build trie with distinct keys, seqno=0. - std::unique_ptr builder_no_seq; + std::unique_ptr builder_no_seq; ASSERT_OK(factory_->NewBuilder(option, builder_no_seq)); for (int i = 0; i < 50; i++) { char buf[16]; @@ -3782,8 +3780,8 @@ TEST_F(TrieIndexFactoryTest, SeqnoEncodingConsistentSize) { char next_buf[16]; snprintf(next_buf, sizeof(next_buf), "key_%04d", i + 1); - UserDefinedIndexBuilder::BlockHandle handle{static_cast(i) * 1000, - 500}; + IndexFactoryBuilder::BlockHandle handle{static_cast(i) * 1000, + 500}; std::string scratch; if (i < 49) { Slice next(next_buf); @@ -3799,7 +3797,7 @@ TEST_F(TrieIndexFactoryTest, SeqnoEncodingConsistentSize) { size_t size_no_seq = contents_no_seq.size(); // Same keys but with nonzero seqnos (still distinct → no seqno encoding). - std::unique_ptr builder_with_seq; + std::unique_ptr builder_with_seq; ASSERT_OK(factory_->NewBuilder(option, builder_with_seq)); for (int i = 0; i < 50; i++) { char buf[16]; @@ -3807,8 +3805,8 @@ TEST_F(TrieIndexFactoryTest, SeqnoEncodingConsistentSize) { char next_buf[16]; snprintf(next_buf, sizeof(next_buf), "key_%04d", i + 1); - UserDefinedIndexBuilder::BlockHandle handle{static_cast(i) * 1000, - 500}; + IndexFactoryBuilder::BlockHandle handle{static_cast(i) * 1000, + 500}; std::string scratch; if (i < 49) { Slice next(next_buf); @@ -3896,45 +3894,45 @@ TEST_F(TrieIndexFactoryTest, NextTransitionOverflowToOverflow) { // Note: the last block's separator is "bbb" (not "c"), matching the standard // index builder's behavior with kShortenSeparators (the default). This means // the last block joins the "bbb" run, making it a 3-block run. - UserDefinedIndexOption option; + IndexFactoryOptions option; option.comparator = BytewiseComparator(); - std::unique_ptr builder; + std::unique_ptr builder; ASSERT_OK(factory_->NewBuilder(option, builder)); { - UserDefinedIndexBuilder::BlockHandle h{0, 1000}; + IndexFactoryBuilder::BlockHandle h{0, 1000}; std::string scratch; Slice next("aaa"); builder->AddIndexEntry(Slice("aaa"), &next, h, &scratch, EntryCtx(200, 100)); } { - UserDefinedIndexBuilder::BlockHandle h{1000, 1000}; + IndexFactoryBuilder::BlockHandle h{1000, 1000}; std::string scratch; Slice next("aaa"); builder->AddIndexEntry(Slice("aaa"), &next, h, &scratch, EntryCtx(100, 50)); } { - UserDefinedIndexBuilder::BlockHandle h{2000, 1000}; + IndexFactoryBuilder::BlockHandle h{2000, 1000}; std::string scratch; Slice next("bbb"); builder->AddIndexEntry(Slice("aaa"), &next, h, &scratch, EntryCtx(50, 90)); } { - UserDefinedIndexBuilder::BlockHandle h{3000, 1000}; + IndexFactoryBuilder::BlockHandle h{3000, 1000}; std::string scratch; Slice next("bbb"); builder->AddIndexEntry(Slice("bbb"), &next, h, &scratch, EntryCtx(90, 60)); } { - UserDefinedIndexBuilder::BlockHandle h{4000, 1000}; + IndexFactoryBuilder::BlockHandle h{4000, 1000}; std::string scratch; Slice next("bbb"); builder->AddIndexEntry(Slice("bbb"), &next, h, &scratch, EntryCtx(60, 30)); } { - UserDefinedIndexBuilder::BlockHandle h{5000, 1000}; + IndexFactoryBuilder::BlockHandle h{5000, 1000}; std::string scratch; builder->AddIndexEntry(Slice("bbb"), nullptr, h, &scratch, EntryCtx(30, 0)); } @@ -3942,7 +3940,7 @@ TEST_F(TrieIndexFactoryTest, NextTransitionOverflowToOverflow) { Slice index_contents; ASSERT_OK(builder->Finish(&index_contents)); - std::unique_ptr reader; + std::unique_ptr reader; ASSERT_OK(factory_->NewReader(option, index_contents, reader)); ReadOptions ro; @@ -4024,9 +4022,9 @@ TEST_F(TrieIndexFactoryTest, TagDistinguishesSameSeqDifferentType) { // target (12800) < separator (12801) → advance to Block 1. // With seqno-only: 50 < 50 is false → stay (WRONG — should advance). - UserDefinedIndexOption option; + IndexFactoryOptions option; option.comparator = BytewiseComparator(); - std::unique_ptr builder; + std::unique_ptr builder; ASSERT_OK(factory_->NewBuilder(option, builder)); std::string scratch; @@ -4040,7 +4038,7 @@ TEST_F(TrieIndexFactoryTest, TagDistinguishesSameSeqDifferentType) { Slice index_contents; ASSERT_OK(builder->Finish(&index_contents)); - std::unique_ptr reader; + std::unique_ptr reader; ASSERT_OK(factory_->NewReader(option, index_contents, reader)); ReadOptions ro; auto iter = reader->NewIterator(ro); @@ -4274,6 +4272,8 @@ class TrieIndexSSTTest : public testing::Test { Options options; BlockBasedTableOptions table_options; table_options.user_defined_index_factory = trie_factory_; + table_options.index_mode = + BlockBasedTableOptions::IndexMode::kStandardDefault; options.table_factory.reset(NewBlockBasedTableFactory(table_options)); SstFileWriter writer(EnvOptions(), options); @@ -4325,18 +4325,20 @@ TEST_F(TrieIndexSSTTest, WriteAndReadWithTrieUDI) { ASSERT_EQ(count, 100); } - // Read WITH trie UDI — use table_index_factory in ReadOptions. + // Read WITH trie UDI — use read_index in ReadOptions. { Options options; BlockBasedTableOptions table_options; table_options.user_defined_index_factory = trie_factory_; + table_options.index_mode = + BlockBasedTableOptions::IndexMode::kStandardDefault; options.table_factory.reset(NewBlockBasedTableFactory(table_options)); SstFileReader reader(options); ASSERT_OK(reader.Open(sst_path_)); ReadOptions ro; - ro.table_index_factory = trie_factory_.get(); + ro.read_index = ReadOptions::ReadIndex::kCustom; std::unique_ptr iter(reader.NewIterator(ro)); // Full forward scan via Seek. @@ -4360,13 +4362,15 @@ TEST_F(TrieIndexSSTTest, SeekWithTrieUDI) { Options options; BlockBasedTableOptions table_options; table_options.user_defined_index_factory = trie_factory_; + table_options.index_mode = + BlockBasedTableOptions::IndexMode::kStandardDefault; options.table_factory.reset(NewBlockBasedTableFactory(table_options)); SstFileReader reader(options); ASSERT_OK(reader.Open(sst_path_)); ReadOptions ro; - ro.table_index_factory = trie_factory_.get(); + ro.read_index = ReadOptions::ReadIndex::kCustom; std::unique_ptr iter(reader.NewIterator(ro)); // Seek to the middle. @@ -4399,13 +4403,15 @@ TEST_F(TrieIndexSSTTest, SeekWithUpperBound) { Options options; BlockBasedTableOptions table_options; table_options.user_defined_index_factory = trie_factory_; + table_options.index_mode = + BlockBasedTableOptions::IndexMode::kStandardDefault; options.table_factory.reset(NewBlockBasedTableFactory(table_options)); SstFileReader reader(options); ASSERT_OK(reader.Open(sst_path_)); ReadOptions ro; - ro.table_index_factory = trie_factory_.get(); + ro.read_index = ReadOptions::ReadIndex::kCustom; Slice upper_bound("key_0075"); ro.iterate_upper_bound = &upper_bound; std::unique_ptr iter(reader.NewIterator(ro)); @@ -4430,13 +4436,15 @@ TEST_F(TrieIndexSSTTest, SmallSST) { Options options; BlockBasedTableOptions table_options; table_options.user_defined_index_factory = trie_factory_; + table_options.index_mode = + BlockBasedTableOptions::IndexMode::kStandardDefault; options.table_factory.reset(NewBlockBasedTableFactory(table_options)); SstFileReader reader(options); ASSERT_OK(reader.Open(sst_path_)); ReadOptions ro; - ro.table_index_factory = trie_factory_.get(); + ro.read_index = ReadOptions::ReadIndex::kCustom; std::unique_ptr iter(reader.NewIterator(ro)); iter->Seek("key_0000"); @@ -4469,6 +4477,8 @@ TEST_F(TrieIndexSSTTest, MixedKeyTypesWithCompressionDict) { table_options.index_type = BlockBasedTableOptions::IndexType::kBinarySearchWithFirstKey; table_options.user_defined_index_factory = trie_factory_; + table_options.index_mode = + BlockBasedTableOptions::IndexMode::kStandardDefault; Options options; options.compression = kCompression; @@ -4688,7 +4698,7 @@ TEST_F(TrieSeekBenchmark, TrieVsRealIndexBlockIter) { } // ---- Benchmark: Trie Seek (full production path) ---- - // Replicates UserDefinedIndexIteratorWrapper::Seek() → + // Replicates IndexFactoryIteratorWrapper::Seek() → // TrieIndexIterator::SeekAndGetResult(): // 1. ParseInternalKey to extract user_key // 2. trie_iter.Seek(user_key) @@ -4769,6 +4779,8 @@ TEST_F(TrieIndexSSTTest, MixedKeyTypesWithTrieUDI) { options.merge_operator = MergeOperators::CreateStringAppendOperator(); BlockBasedTableOptions table_options; table_options.user_defined_index_factory = trie_factory_; + table_options.index_mode = + BlockBasedTableOptions::IndexMode::kStandardDefault; options.table_factory.reset(NewBlockBasedTableFactory(table_options)); SstFileWriter writer(EnvOptions(), options); @@ -4820,6 +4832,8 @@ TEST_F(TrieIndexSSTTest, MixedKeyTypesWithTrieUDI) { read_options.merge_operator = MergeOperators::CreateStringAppendOperator(); BlockBasedTableOptions read_table_options; read_table_options.user_defined_index_factory = trie_factory_; + read_table_options.index_mode = + BlockBasedTableOptions::IndexMode::kStandardDefault; read_options.table_factory.reset( NewBlockBasedTableFactory(read_table_options)); @@ -4827,7 +4841,7 @@ TEST_F(TrieIndexSSTTest, MixedKeyTypesWithTrieUDI) { ASSERT_OK(reader.Open(sst_path_)); ReadOptions ro; - ro.table_index_factory = trie_factory_.get(); + ro.read_index = ReadOptions::ReadIndex::kCustom; std::unique_ptr iter(reader.NewIterator(ro)); // Full forward scan — expect 6 logically visible entries. @@ -4874,6 +4888,8 @@ TEST_F(TrieIndexSSTTest, LargeMixedKeyTypesWithTrieUDI) { options.merge_operator = MergeOperators::CreateStringAppendOperator(); BlockBasedTableOptions table_options; table_options.user_defined_index_factory = trie_factory_; + table_options.index_mode = + BlockBasedTableOptions::IndexMode::kStandardDefault; // Use small block size to force many data blocks, stressing the index. table_options.block_size = 128; options.table_factory.reset(NewBlockBasedTableFactory(table_options)); @@ -4947,6 +4963,8 @@ TEST_F(TrieIndexSSTTest, LargeMixedKeyTypesWithTrieUDI) { read_options.merge_operator = MergeOperators::CreateStringAppendOperator(); BlockBasedTableOptions read_table_options; read_table_options.user_defined_index_factory = trie_factory_; + read_table_options.index_mode = + BlockBasedTableOptions::IndexMode::kStandardDefault; read_options.table_factory.reset( NewBlockBasedTableFactory(read_table_options)); @@ -4954,7 +4972,7 @@ TEST_F(TrieIndexSSTTest, LargeMixedKeyTypesWithTrieUDI) { ASSERT_OK(reader.Open(sst_path_)); ReadOptions ro; - ro.table_index_factory = trie_factory_.get(); + ro.read_index = ReadOptions::ReadIndex::kCustom; std::unique_ptr iter(reader.NewIterator(ro)); // Full forward scan — only visible keys should appear. @@ -4997,26 +5015,26 @@ TEST_F(TrieIndexSSTTest, LargeMixedKeyTypesWithTrieUDI) { } TEST_F(TrieIndexFactoryTest, WrapperNextAndGetResultReturnsInternalKey) { - UserDefinedIndexOption option; + IndexFactoryOptions option; option.comparator = BytewiseComparator(); - std::unique_ptr builder; + std::unique_ptr builder; ASSERT_OK(factory_->NewBuilder(option, builder)); // Build a 3-block index: separators "a", "b", "c". std::string scratch; { - UserDefinedIndexBuilder::BlockHandle h{0, 100}; + IndexFactoryBuilder::BlockHandle h{0, 100}; Slice next("b"); builder->AddIndexEntry(Slice("a"), &next, h, &scratch, EntryCtx(100, 100)); } { - UserDefinedIndexBuilder::BlockHandle h{100, 100}; + IndexFactoryBuilder::BlockHandle h{100, 100}; Slice next("c"); builder->AddIndexEntry(Slice("b"), &next, h, &scratch, EntryCtx(100, 100)); } { - UserDefinedIndexBuilder::BlockHandle h{200, 100}; + IndexFactoryBuilder::BlockHandle h{200, 100}; builder->AddIndexEntry(Slice("c"), nullptr, h, &scratch, EntryCtx(100, 100)); } @@ -5024,13 +5042,13 @@ TEST_F(TrieIndexFactoryTest, WrapperNextAndGetResultReturnsInternalKey) { Slice index_contents; ASSERT_OK(builder->Finish(&index_contents)); - std::unique_ptr reader; + std::unique_ptr reader; ASSERT_OK(factory_->NewReader(option, index_contents, reader)); ReadOptions ro; auto udi_iter = reader->NewIterator(ro); // Wrap the UDI iterator in the adapter that converts to InternalIterator. - UserDefinedIndexIteratorWrapper wrapper(std::move(udi_iter)); + IndexFactoryIteratorWrapper wrapper(std::move(udi_iter)); // Seek to "a" — constructs an internal key from user key "a". InternalKey seek_ikey; @@ -5105,10 +5123,10 @@ TEST_F(TrieIndexFactoryTest, WrapperNextAndGetResultReturnsInternalKey) { // Without fix: Seek("b") at low seqno gets "ab"'s overflow data (wrong // offset and seqno), while Seek("ab") at low seqno gets "b"'s overflow. TEST_F(TrieIndexFactoryTest, OverflowBfsReordering) { - UserDefinedIndexOption option; + IndexFactoryOptions option; option.comparator = BytewiseComparator(); - std::unique_ptr builder; + std::unique_ptr builder; ASSERT_OK(factory_->NewBuilder(option, builder)); std::string scratch; @@ -5117,7 +5135,7 @@ TEST_F(TrieIndexFactoryTest, OverflowBfsReordering) { // Block 0: last="ab", next="ab" (same-key boundary) // → sep="ab", same_user_key=true, seqno=500 { - UserDefinedIndexBuilder::BlockHandle h{0, 100}; + IndexFactoryBuilder::BlockHandle h{0, 100}; Slice next("ab"); sep = builder->AddIndexEntry(Slice("ab"), &next, h, &scratch, EntryCtx(500, 0)); @@ -5126,7 +5144,7 @@ TEST_F(TrieIndexFactoryTest, OverflowBfsReordering) { // Block 1: last="ab", next="abc" (prefix — FindShortestSeparator no-op) // → sep="ab", edge-case match with prev sep, same_user_key=true, seqno=400 { - UserDefinedIndexBuilder::BlockHandle h{100, 100}; + IndexFactoryBuilder::BlockHandle h{100, 100}; Slice next("abc"); sep = builder->AddIndexEntry(Slice("ab"), &next, h, &scratch, EntryCtx(400, 0)); @@ -5137,7 +5155,7 @@ TEST_F(TrieIndexFactoryTest, OverflowBfsReordering) { // limit.size()-1=0 so fallback: increment start[1] 'b'→'c' → "ac" // → sep="ac", different key, seqno=kMax→0 { - UserDefinedIndexBuilder::BlockHandle h{200, 100}; + IndexFactoryBuilder::BlockHandle h{200, 100}; Slice next("b"); sep = builder->AddIndexEntry(Slice("abc"), &next, h, &scratch, EntryCtx(100, 100)); @@ -5146,7 +5164,7 @@ TEST_F(TrieIndexFactoryTest, OverflowBfsReordering) { // Block 3: last="b", next="b" (same-key boundary) // → sep="b", same_user_key=true, seqno=300 { - UserDefinedIndexBuilder::BlockHandle h{300, 100}; + IndexFactoryBuilder::BlockHandle h{300, 100}; Slice next("b"); sep = builder->AddIndexEntry(Slice("b"), &next, h, &scratch, EntryCtx(300, 0)); @@ -5155,7 +5173,7 @@ TEST_F(TrieIndexFactoryTest, OverflowBfsReordering) { // Block 4: last="b", next="ba" (prefix — FindShortestSeparator no-op) // → sep="b", edge-case match with prev sep, same_user_key=true, seqno=200 { - UserDefinedIndexBuilder::BlockHandle h{400, 100}; + IndexFactoryBuilder::BlockHandle h{400, 100}; Slice next("ba"); sep = builder->AddIndexEntry(Slice("b"), &next, h, &scratch, EntryCtx(200, 0)); @@ -5166,7 +5184,7 @@ TEST_F(TrieIndexFactoryTest, OverflowBfsReordering) { // start_byte+1='c' < 'd' → increment and truncate → "c" // → sep="c", different key, seqno=kMax→0 { - UserDefinedIndexBuilder::BlockHandle h{500, 100}; + IndexFactoryBuilder::BlockHandle h{500, 100}; Slice next("d"); sep = builder->AddIndexEntry(Slice("ba"), &next, h, &scratch, EntryCtx(100, 100)); @@ -5175,7 +5193,7 @@ TEST_F(TrieIndexFactoryTest, OverflowBfsReordering) { // Block 6: last="d", next=null (last block, no successor shortening) // → sep="d", different from prev "c", seqno=kMax→0 { - UserDefinedIndexBuilder::BlockHandle h{600, 100}; + IndexFactoryBuilder::BlockHandle h{600, 100}; sep = builder->AddIndexEntry(Slice("d"), nullptr, h, &scratch, EntryCtx(100, 100)); } @@ -5196,7 +5214,7 @@ TEST_F(TrieIndexFactoryTest, OverflowBfsReordering) { Slice index_contents; ASSERT_OK(builder->Finish(&index_contents)); - std::unique_ptr reader; + std::unique_ptr reader; ASSERT_OK(factory_->NewReader(option, index_contents, reader)); ReadOptions ro; @@ -5578,13 +5596,13 @@ TEST_F(TrieIndexFactoryTest, PrevLandsOnLeafWithOverflow) { TEST_F(TrieIndexFactoryTest, SeekToFirstOnEmptyTrie) { // SeekToFirstAndGetResult on an empty trie should return kUnknown. - UserDefinedIndexOption option; + IndexFactoryOptions option; option.comparator = BytewiseComparator(); - std::unique_ptr builder; + std::unique_ptr builder; ASSERT_OK(factory_->NewBuilder(option, builder)); Slice index_contents; ASSERT_OK(builder->Finish(&index_contents)); - std::unique_ptr reader; + std::unique_ptr reader; ASSERT_OK(factory_->NewReader(option, index_contents, reader)); ReadOptions ro; auto iter = reader->NewIterator(ro); @@ -5596,13 +5614,13 @@ TEST_F(TrieIndexFactoryTest, SeekToFirstOnEmptyTrie) { TEST_F(TrieIndexFactoryTest, SeekToLastOnEmptyTrie) { // SeekToLastAndGetResult on an empty trie should return kUnknown. - UserDefinedIndexOption option; + IndexFactoryOptions option; option.comparator = BytewiseComparator(); - std::unique_ptr builder; + std::unique_ptr builder; ASSERT_OK(factory_->NewBuilder(option, builder)); Slice index_contents; ASSERT_OK(builder->Finish(&index_contents)); - std::unique_ptr reader; + std::unique_ptr reader; ASSERT_OK(factory_->NewReader(option, index_contents, reader)); ReadOptions ro; auto iter = reader->NewIterator(ro);