This repository was archived by the owner on Oct 10, 2025. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 470
Expand file tree
/
Copy pathquery_fts_bind_data.cpp
More file actions
95 lines (85 loc) · 4.27 KB
/
query_fts_bind_data.cpp
File metadata and controls
95 lines (85 loc) · 4.27 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
#include "function/query_fts/query_fts_bind_data.h"
#include "binder/binder.h"
#include "binder/expression/expression_util.h"
#include "catalog/fts_index_catalog_entry.h"
#include "common/exception/binder.h"
#include "common/string_utils.h"
#include "libstemmer.h"
#include "re2.h"
#include "storage/storage_manager.h"
#include "storage/table/node_table.h"
#include "utils/fts_utils.h"
namespace kuzu {
namespace fts_extension {
using namespace kuzu::common;
using namespace kuzu::binder;
using namespace kuzu::storage;
QueryFTSOptionalParams::QueryFTSOptionalParams(const binder::expression_vector& optionalParams) {
for (auto& optionalParam : optionalParams) {
auto paramName = StringUtils::getLower(optionalParam->getAlias());
if (paramName == K::NAME) {
k = function::OptionalParam<K>(optionalParam);
} else if (paramName == B::NAME) {
b = function::OptionalParam<B>(optionalParam);
} else if (paramName == Conjunctive::NAME) {
conjunctive = function::OptionalParam<Conjunctive>(optionalParam);
} else if (paramName == TopK::NAME) {
topK = function::OptionalParam<TopK>(optionalParam);
} else {
throw common::BinderException{"Unknown optional parameter: " + paramName};
}
}
}
void QueryFTSOptionalParams::evaluateParams(main::ClientContext* context) {
k.evaluateParam(context);
b.evaluateParam(context);
conjunctive.evaluateParam(context);
topK.evaluateParam(context);
}
QueryFTSBindData::QueryFTSBindData(binder::expression_vector columns,
graph::NativeGraphEntry graphEntry, std::shared_ptr<binder::Expression> docs,
std::shared_ptr<binder::Expression> query, const catalog::IndexCatalogEntry& entry,
std::unique_ptr<QueryFTSOptionalParams> optionalParams, common::idx_t numDocs, double avgDocLen)
: GDSBindData{std::move(columns), std::move(graphEntry), binder::expression_vector{docs}},
query{std::move(query)}, entry{entry},
outputTableID{output[0]->constCast<binder::NodeExpression>().getTableIDs()[0]},
numDocs{numDocs}, avgDocLen{avgDocLen},
patternMatchAlgo{PatternMatchFactory::getPatternMatchAlgo(
entry.getAuxInfo().cast<FTSIndexAuxInfo>().config.exactTermMatch ? TermMatchType::EXACT :
TermMatchType::STEM)} {
auto& nodeExpr = output[0]->constCast<binder::NodeExpression>();
KU_ASSERT(nodeExpr.getNumEntries() == 1);
outputTableID = nodeExpr.getEntry(0)->getTableID();
this->optionalParams = std::move(optionalParams);
}
catalog::TableCatalogEntry* QueryFTSBindData::getTermsEntry(main::ClientContext& context) const {
auto catalog = catalog::Catalog::Get(context);
return catalog->getTableCatalogEntry(transaction::Transaction::Get(context),
FTSUtils::getTermsTableName(entry.getTableID(), entry.getIndexName()));
}
catalog::TableCatalogEntry* QueryFTSBindData::getOrigTermsEntry(
main::ClientContext& context) const {
auto catalog = catalog::Catalog::Get(context);
return catalog->getTableCatalogEntry(transaction::Transaction::Get(context),
FTSUtils::getOrigTermsTableName(entry.getTableID(), entry.getIndexName()));
}
std::vector<std::string> QueryFTSBindData::getQueryTerms(main::ClientContext& context) const {
auto queryInStr =
ExpressionUtil::evaluateLiteral<std::string>(&context, query, LogicalType::STRING());
auto config = entry.getAuxInfo().cast<FTSIndexAuxInfo>().config;
FTSUtils::normalizeQuery(queryInStr, config.ignorePatternQuery);
auto terms = FTSUtils::tokenizeString(queryInStr, config);
auto stopWordsTable =
StorageManager::Get(context)
->getTable(catalog::Catalog::Get(context)
->getTableCatalogEntry(transaction::Transaction::Get(context),
config.stopWordsTableName)
->getTableID())
->ptrCast<NodeTable>();
return FTSUtils::stemTerms(terms, entry.getAuxInfo().cast<FTSIndexAuxInfo>().config,
MemoryManager::Get(context), stopWordsTable, transaction::Transaction::Get(context),
optionalParams->constCast<QueryFTSOptionalParams>().conjunctive.getParamVal(),
true /* isQuery */);
}
} // namespace fts_extension
} // namespace kuzu