-
Notifications
You must be signed in to change notification settings - Fork 4.6k
Update SQL Server vector search to latest VECTOR_SEARCH() syntax #13863
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -502,3 +502,5 @@ swa-cli.config.json | |
|
|
||
| # dapr extension files | ||
| **/dapr.yaml | ||
|
|
||
| *.lscache | ||
| Original file line number | Diff line number | Diff line change | ||||
|---|---|---|---|---|---|---|
|
|
@@ -42,6 +42,12 @@ public class SqlServerCollection<TKey, TRecord> | |||||
| /// <summary>The database schema.</summary> | ||||||
| private readonly string? _schema; | ||||||
|
|
||||||
| /// <summary>Whether the model contains any DiskAnn vector properties, requiring Azure SQL.</summary> | ||||||
| private readonly bool _requiresAzureSql; | ||||||
|
|
||||||
| /// <summary>Cached result of the Azure SQL engine edition check (null = not yet checked).</summary> | ||||||
| private bool? _isAzureSql; | ||||||
|
|
||||||
| /// <summary> | ||||||
| /// Initializes a new instance of the <see cref="SqlServerCollection{TKey, TRecord}"/> class. | ||||||
| /// </summary> | ||||||
|
|
@@ -78,6 +84,16 @@ internal SqlServerCollection(string connectionString, string name, Func<SqlServe | |||||
|
|
||||||
| this._mapper = new SqlServerMapper<TRecord>(this._model); | ||||||
|
|
||||||
| // Check if any vector property uses DiskAnn, which requires Azure SQL. | ||||||
| foreach (var vp in this._model.VectorProperties) | ||||||
| { | ||||||
| if (vp.IndexKind is not (null or "" or IndexKind.Flat)) | ||||||
| { | ||||||
| this._requiresAzureSql = true; | ||||||
| break; | ||||||
| } | ||||||
| } | ||||||
|
|
||||||
| var connectionStringBuilder = new SqlConnectionStringBuilder(connectionString); | ||||||
|
|
||||||
| this._collectionMetadata = new() | ||||||
|
|
@@ -116,6 +132,12 @@ public override Task EnsureCollectionExistsAsync(CancellationToken cancellationT | |||||
| private async Task CreateCollectionAsync(bool ifNotExists, CancellationToken cancellationToken) | ||||||
| { | ||||||
| using SqlConnection connection = new(this._connectionString); | ||||||
|
|
||||||
| if (this._requiresAzureSql) | ||||||
| { | ||||||
| await this.EnsureAzureSqlForDiskAnnAsync(connection, cancellationToken).ConfigureAwait(false); | ||||||
| } | ||||||
|
|
||||||
| List<SqlCommand> commands = SqlServerCommandBuilder.CreateTable( | ||||||
| connection, | ||||||
| this._schema, | ||||||
|
|
@@ -604,6 +626,12 @@ _ when vectorProperty.EmbeddingGenerationDispatcher is not null | |||||
| // Connection and command are going to be disposed by the ReadVectorSearchResultsAsync, | ||||||
| // when the user is done with the results. | ||||||
| SqlConnection connection = new(this._connectionString); | ||||||
|
|
||||||
| if (vectorProperty.IndexKind is not (null or "" or IndexKind.Flat)) | ||||||
|
||||||
| if (vectorProperty.IndexKind is not (null or "" or IndexKind.Flat)) | |
| if (vectorProperty.IndexKind == IndexKind.DiskAnn) |
Copilot
AI
Apr 13, 2026
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The Azure SQL detection result is cached, but only the true case is short-circuited. If _isAzureSql is false, subsequent calls will still open a connection and re-run SERVERPROPERTY('EngineEdition') each time before throwing. Consider adding an early if (this._isAzureSql is false) throw ...; path to make the caching effective (reduces repeated roundtrips and avoids repeatedly opening connections just to re-throw the same unsupported exception).
Copilot
AI
Apr 13, 2026
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The Azure SQL detection result is cached, but only the true case is short-circuited. If _isAzureSql is false, subsequent calls will still open a connection and re-run SERVERPROPERTY('EngineEdition') each time before throwing. Consider adding an early if (this._isAzureSql is false) throw ...; path to make the caching effective (reduces repeated roundtrips and avoids repeatedly opening connections just to re-throw the same unsupported exception).
| Original file line number | Diff line number | Diff line change | ||||
|---|---|---|---|---|---|---|
|
|
@@ -535,41 +535,67 @@ private static SqlCommand SelectVectorWithVectorSearch<TRecord>( | |||||
| string distanceMetric, | ||||||
| string sorting) | ||||||
| { | ||||||
| // VECTOR_SEARCH() currently only supports post-filtering (TOP_N candidates are returned first, | ||||||
| // then predicates are applied). Pre-filtering is not supported. | ||||||
| if (options.Filter is not null) | ||||||
| { | ||||||
| throw new NotSupportedException( | ||||||
| "Filtering is not supported with approximate vector search (VECTOR_SEARCH). " + | ||||||
| "Remove the filter or use IndexKind.Flat for exact search with VECTOR_DISTANCE."); | ||||||
| } | ||||||
|
|
||||||
| SqlCommand command = connection.CreateCommand(); | ||||||
| command.Parameters.AddWithValue("@vector", vector); | ||||||
|
|
||||||
| StringBuilder sb = new(300); | ||||||
|
|
||||||
| // When skip > 0, we need a subquery since TOP and OFFSET/FETCH can't coexist in the same SELECT. | ||||||
| bool needsSubquery = options.Skip > 0; | ||||||
|
|
||||||
| if (needsSubquery) | ||||||
| { | ||||||
| sb.Append("SELECT * FROM ("); | ||||||
| } | ||||||
|
|
||||||
| // VECTOR_SEARCH returns all columns from the table plus a 'distance' column. | ||||||
| // We select the needed columns from the table alias and alias 'distance' as 'score'. | ||||||
| sb.Append("SELECT "); | ||||||
| // The latest version vector indexes require SELECT TOP(N) WITH APPROXIMATE instead of the deprecated TOP_N parameter. | ||||||
| sb.Append("SELECT TOP(").Append(top + options.Skip).Append(") WITH APPROXIMATE "); | ||||||
| sb.AppendIdentifiers(model.Properties, prefix: "t.", includeVectors: options.IncludeVectors); | ||||||
| sb.AppendLine(","); | ||||||
| sb.AppendLine("s.[distance] AS [score]"); | ||||||
| sb.Append("FROM VECTOR_SEARCH(TABLE = "); | ||||||
| sb.AppendTableName(schema, tableName); | ||||||
| sb.Append(" AS t, COLUMN = ").AppendIdentifier(vectorProperty.StorageName); | ||||||
| sb.Append(", SIMILAR_TO = @vector, METRIC = '").Append(distanceMetric).Append('\''); | ||||||
| sb.Append(", TOP_N = ").Append(top + options.Skip).AppendLine(") AS s"); | ||||||
| sb.Append(", SIMILAR_TO = @vector, METRIC = '").Append(distanceMetric).AppendLine("') AS s"); | ||||||
|
|
||||||
| // With latest version vector indexes, WHERE predicates are applied during the vector search process | ||||||
| // (iterative filtering), not after retrieval. | ||||||
| if (options.Filter is not null) | ||||||
| { | ||||||
| int startParamIndex = command.Parameters.Count; | ||||||
|
|
||||||
| SqlServerFilterTranslator translator = new(model, options.Filter, sb, startParamIndex: startParamIndex, tableAlias: "t"); | ||||||
| translator.Translate(appendWhere: true); | ||||||
| List<object> parameters = translator.ParameterValues; | ||||||
|
|
||||||
| foreach (object parameter in parameters) | ||||||
| { | ||||||
| command.AddParameter(vectorProperty, $"@_{startParamIndex++}", parameter); | ||||||
|
||||||
| command.AddParameter(vectorProperty, $"@_{startParamIndex++}", parameter); | |
| command.AddParameter(property: null, $"@_{startParamIndex++}", parameter); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The code/comment says this is for DiskANN, but the condition flags any non-Flat index kind as requiring Azure SQL. If additional index kinds are introduced later (or if some become supported on non-Azure SQL Server), this will block them unintentionally. If the intent is specifically DiskANN (per the comment and exception message), narrow the checks to
IndexKind.DiskAnn(and/or the exact set of Azure-only kinds) so the gating logic stays correct as the index-kind surface area evolves.