Skip to content
This repository was archived by the owner on Oct 10, 2025. It is now read-only.
Merged
Show file tree
Hide file tree
Changes from 6 commits
Commits
Show all changes
20 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
36 changes: 26 additions & 10 deletions extension/vector/src/function/create_hnsw_index.cpp
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
#include "catalog/catalog_entry/function_catalog_entry.h"
#include "catalog/catalog_entry/node_table_catalog_entry.h"
#include "catalog/hnsw_index_catalog_entry.h"
#include "common/exception/binder.h"
#include "function/built_in_function_utils.h"
#include "function/hnsw_index_functions.h"
#include "function/table/bind_data.h"
Expand Down Expand Up @@ -44,17 +45,29 @@ static std::unique_ptr<TableFuncBindData> createInMemHNSWBindFunc(main::ClientCo
const auto tableName = input->getLiteralVal<std::string>(0);
const auto indexName = input->getLiteralVal<std::string>(1);
const auto columnName = input->getLiteralVal<std::string>(2);
auto tableEntry = HNSWIndexUtils::bindNodeTable(*context, tableName, indexName,
HNSWIndexUtils::IndexOperation::CREATE);
const auto tableID = tableEntry->getTableID();
HNSWIndexUtils::validateColumnType(*tableEntry, columnName);
const auto& table =
storage::StorageManager::Get(*context)->getTable(tableID)->cast<storage::NodeTable>();
auto propertyID = tableEntry->getPropertyID(columnName);
auto config = HNSWIndexConfig{input->optionalParams};
auto numNodes = table.getStats(context->getTransaction()).getTableCard();
return std::make_unique<CreateHNSWIndexBindData>(context, indexName, tableEntry, propertyID,
numNodes, std::move(config));
try {
Comment thread
carminite marked this conversation as resolved.
Outdated
auto tableEntry = HNSWIndexUtils::bindNodeTable(*context, tableName, indexName,
HNSWIndexUtils::IndexOperation::CREATE);
const auto tableID = tableEntry->getTableID();
HNSWIndexUtils::validateColumnType(*tableEntry, columnName);
const auto& table =
storage::StorageManager::Get(*context)->getTable(tableID)->cast<storage::NodeTable>();
auto propertyID = tableEntry->getPropertyID(columnName);
auto numNodes = table.getStats(context->getTransaction()).getTableCard();
return std::make_unique<CreateHNSWIndexBindData>(context, indexName, tableEntry, propertyID,
numNodes, std::move(config));
} catch (common::BinderException& e) {
Comment thread
sdht0 marked this conversation as resolved.
Outdated
if (std::string(e.what()) ==
common::stringFormat("Binder exception: Index {} already exists in table {}.",
indexName, tableName) &&
config.skipIfExists) {
// Swallow the exception if the index already exists and skip_if_exists is true.
return std::make_unique<CreateHNSWIndexBindData>(context, indexName, nullptr, 0, 0,
std::move(config), true); // Bad because magic numbers: what is a better solution?
}
throw std::move(e);
}
}

static std::unique_ptr<TableFuncSharedState> initCreateInMemHNSWSharedState(
Expand Down Expand Up @@ -326,6 +339,9 @@ static std::string rewriteCreateHNSWQuery(main::ClientContext& context,
const TableFuncBindData& bindData) {
context.setUseInternalCatalogEntry(true /* useInternalCatalogEntry */);
const auto hnswBindData = bindData.constPtrCast<CreateHNSWIndexBindData>();
if (hnswBindData->skipAfterBind) {
return std::string{""};
Comment thread
carminite marked this conversation as resolved.
Outdated
}
std::string query = "BEGIN TRANSACTION;";
auto indexName = hnswBindData->indexName;
auto tableName = hnswBindData->tableEntry->getName();
Expand Down
27 changes: 21 additions & 6 deletions extension/vector/src/function/drop_hnsw_index.cpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
#include "catalog/catalog.h"
#include "catalog/catalog_entry/node_table_catalog_entry.h"
#include "common/exception/binder.h"
#include "function/hnsw_index_functions.h"
#include "function/table/bind_data.h"
#include "index/hnsw_index_utils.h"
Expand All @@ -15,22 +16,33 @@ namespace vector_extension {
struct DropHNSWIndexBindData final : TableFuncBindData {
catalog::NodeTableCatalogEntry* tableEntry;
std::string indexName;
bool skipAfterBind;
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Rename to something along the line of skipIndexDropping.


DropHNSWIndexBindData(catalog::NodeTableCatalogEntry* tableEntry, std::string indexName)
: TableFuncBindData{0}, tableEntry{tableEntry}, indexName{std::move(indexName)} {}
DropHNSWIndexBindData(catalog::NodeTableCatalogEntry* tableEntry, std::string indexName, bool skipAfterBind = false)
: TableFuncBindData{0}, tableEntry{tableEntry}, indexName{std::move(indexName)}, skipAfterBind{skipAfterBind} {}

std::unique_ptr<TableFuncBindData> copy() const override {
return std::make_unique<DropHNSWIndexBindData>(tableEntry, indexName);
return std::make_unique<DropHNSWIndexBindData>(tableEntry, indexName, skipAfterBind);
}
};

static std::unique_ptr<TableFuncBindData> bindFunc(main::ClientContext* context,
const TableFuncBindInput* input) {
const auto tableName = input->getLiteralVal<std::string>(0);
const auto indexName = input->getLiteralVal<std::string>(1);
const auto tableEntry = HNSWIndexUtils::bindNodeTable(*context, tableName, indexName,
HNSWIndexUtils::IndexOperation::DROP);
return std::make_unique<DropHNSWIndexBindData>(tableEntry, indexName);
auto config = DropHNSWConfig{input->optionalParams};
try {
const auto tableEntry = HNSWIndexUtils::bindNodeTable(*context, tableName, indexName,
HNSWIndexUtils::IndexOperation::DROP);
return std::make_unique<DropHNSWIndexBindData>(tableEntry, indexName);
} catch (common::BinderException& e) {
Comment thread
sdht0 marked this conversation as resolved.
Outdated
if (config.skipIfNotExists && std::string(e.what()) ==
common::stringFormat("Binder exception: Table {} doesn't have an index with name {}.",
tableName, indexName)) {
return std::make_unique<DropHNSWIndexBindData>(nullptr, indexName, true);
}
throw std::move(e);
}
}

static common::offset_t internalTableFunc(const TableFuncInput& input, TableFuncOutput&) {
Expand All @@ -48,6 +60,9 @@ static std::string dropHNSWIndexTables(main::ClientContext& context,
const TableFuncBindData& bindData) {
const auto dropHNSWIndexBindData = bindData.constPtrCast<DropHNSWIndexBindData>();
context.setUseInternalCatalogEntry(true /* useInternalCatalogEntry */);
if (dropHNSWIndexBindData->skipAfterBind) {
return std::string{""};
}
std::string query = "";
const auto requireNewTransaction = !context.getTransactionContext()->hasActiveTransaction();
if (requireNewTransaction) {
Expand Down
8 changes: 5 additions & 3 deletions extension/vector/src/include/function/hnsw_index_functions.h
Original file line number Diff line number Diff line change
Expand Up @@ -17,16 +17,18 @@ struct CreateHNSWIndexBindData final : function::TableFuncBindData {
catalog::TableCatalogEntry* tableEntry;
common::property_id_t propertyID;
HNSWIndexConfig config;
bool skipAfterBind;
Comment thread
acquamarin marked this conversation as resolved.
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I would rename this to skipIndexCreation, which is more accurate.


CreateHNSWIndexBindData(main::ClientContext* context, std::string indexName,
catalog::TableCatalogEntry* tableEntry, common::property_id_t propertyID,
common::offset_t numNodes, HNSWIndexConfig config)
common::offset_t numNodes, HNSWIndexConfig config, bool skipAfterBind = false)
: TableFuncBindData{numNodes}, context{context}, indexName{std::move(indexName)},
tableEntry{tableEntry}, propertyID{propertyID}, config{std::move(config)} {}
tableEntry{tableEntry}, propertyID{propertyID}, config{std::move(config)},
skipAfterBind{skipAfterBind} {}

std::unique_ptr<TableFuncBindData> copy() const override {
return std::make_unique<CreateHNSWIndexBindData>(context, indexName, tableEntry, propertyID,
numRows, config.copy());
numRows, config.copy(), skipAfterBind);
}
};

Expand Down
24 changes: 23 additions & 1 deletion extension/vector/src/include/index/hnsw_config.h
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,18 @@ struct CacheEmbeddings {
static constexpr bool DEFAULT_VALUE = true;
};

struct SkipIfExists {
static constexpr const char* NAME = "skip_if_exists";
static constexpr common::LogicalTypeID TYPE = common::LogicalTypeID::BOOL;
static constexpr bool DEFAULT_VALUE = false;
};

struct SkipIfNotExists {
static constexpr const char* NAME = "skip_if_not_exists";
static constexpr common::LogicalTypeID TYPE = common::LogicalTypeID::BOOL;
static constexpr bool DEFAULT_VALUE = false;
};

struct BlindSearchUpSelThreshold {
static constexpr const char* NAME = "blind_search_up_sel";
static constexpr common::LogicalTypeID TYPE = common::LogicalTypeID::DOUBLE;
Expand All @@ -103,6 +115,7 @@ struct HNSWIndexConfig {
double alpha = Alpha::DEFAULT_VALUE;
int64_t efc = Efc::DEFAULT_VALUE;
bool cacheEmbeddingsColumn = CacheEmbeddings::DEFAULT_VALUE;
bool skipIfExists = SkipIfExists::DEFAULT_VALUE;

HNSWIndexConfig() = default;

Expand All @@ -119,11 +132,20 @@ struct HNSWIndexConfig {
private:
HNSWIndexConfig(const HNSWIndexConfig& other)
: mu{other.mu}, ml{other.ml}, pu{other.pu}, metric{other.metric}, alpha{other.alpha},
efc{other.efc}, cacheEmbeddingsColumn(other.cacheEmbeddingsColumn) {}
efc{other.efc}, cacheEmbeddingsColumn(other.cacheEmbeddingsColumn),
skipIfExists(other.skipIfExists) {}
Comment thread
carminite marked this conversation as resolved.
Outdated

static MetricType getMetricType(const std::string& metricName);
};

struct DropHNSWConfig {
bool skipIfNotExists = SkipIfNotExists::DEFAULT_VALUE;

DropHNSWConfig() = default;

explicit DropHNSWConfig(const function::optional_params_t& optionalParams);
};

struct QueryHNSWConfig {
int64_t efs = Efs::DEFAULT_VALUE;
double blindSearchUpSelThreshold = BlindSearchUpSelThreshold::DEFAULT_VALUE;
Expand Down
16 changes: 16 additions & 0 deletions extension/vector/src/index/hnsw_config.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -111,6 +111,9 @@ HNSWIndexConfig::HNSWIndexConfig(const function::optional_params_t& optionalPara
} else if (CacheEmbeddings::NAME == lowerCaseName) {
value.validateType(CacheEmbeddings::TYPE);
cacheEmbeddingsColumn = value.getValue<bool>();
} else if (SkipIfExists::NAME == lowerCaseName) {
value.validateType(SkipIfExists::TYPE);
skipIfExists = value.getValue<bool>();
} else {
throw common::BinderException{
common::stringFormat("Unrecognized optional parameter {} in {}.", name,
Expand Down Expand Up @@ -188,6 +191,19 @@ MetricType HNSWIndexConfig::getMetricType(const std::string& metricName) {
KU_UNREACHABLE;
}

DropHNSWConfig::DropHNSWConfig(const function::optional_params_t& optionalParams) {
for (auto& [name, value] : optionalParams) {
auto lowerCaseName = common::StringUtils::getLower(name);
if (SkipIfNotExists::NAME == lowerCaseName) {
value.validateType(SkipIfNotExists::TYPE);
skipIfNotExists = value.getValue<bool>();
} else {
throw common::BinderException{common::stringFormat(
Comment thread
carminite marked this conversation as resolved.
"Unrecognized optional parameter {} in {}.", name, QueryVectorIndexFunction::name)};
}
}
}

QueryHNSWConfig::QueryHNSWConfig(const function::optional_params_t& optionalParams) {
for (auto& [name, value] : optionalParams) {
auto lowerCaseName = common::StringUtils::getLower(name);
Expand Down
28 changes: 28 additions & 0 deletions extension/vector/test/test_files/error_suppress.test
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
-DATASET CSV empty

--

-CASE CreateSkipIfExists
-LOAD_DYNAMIC_EXTENSION vector
-STATEMENT CREATE NODE TABLE embeddings (id int64, vec FLOAT[8], PRIMARY KEY (id));
---- ok
-STATEMENT CALL CREATE_VECTOR_INDEX('embeddings', 'e_hnsw_index', 'vec');
---- ok
-STATEMENT CALL CREATE_VECTOR_INDEX('embeddings', 'e_hnsw_index', 'vec');
---- error
Binder exception: Index e_hnsw_index already exists in table embeddings.
-STATEMENT CALL CREATE_VECTOR_INDEX('embeddings', 'e_hnsw_index', 'vec', skip_if_exists := true);
---- ok
-STATEMENT CALL SHOW_INDEXES() RETURN *
---- 1
embeddings|e_hnsw_index|HNSW|[vec]|True|CALL CREATE_VECTOR_INDEX('embeddings', 'e_hnsw_index', 'vec', mu := 30, ml := 60, pu := 0.050000, metric := 'cosine', alpha := 1.100000, efc := 200);

-CASE DropSkipIfNotExists
-LOAD_DYNAMIC_EXTENSION vector
-STATEMENT CREATE NODE TABLE embeddings (id int64, vec FLOAT[8], PRIMARY KEY (id));
---- ok
-STATEMENT CALL DROP_VECTOR_INDEX('embeddings', 'e_hnsw_index');
---- error
Binder exception: Table embeddings doesn't have an index with name e_hnsw_index.
-STATEMENT CALL DROP_VECTOR_INDEX('embeddings', 'e_hnsw_index', skip_if_not_exists := true);
---- ok
Loading