Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -502,3 +502,5 @@ swa-cli.config.json

# dapr extension files
**/dapr.yaml

*.lscache
70 changes: 70 additions & 0 deletions dotnet/src/VectorData/SqlServer/SqlServerCollection.cs
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,12 @@ public class SqlServerCollection<TKey, TRecord>
/// <summary>The database schema.</summary>
private readonly string? _schema;

/// <summary>Whether the model contains any DiskAnn vector properties, requiring Azure SQL.</summary>
private readonly bool _requiresAzureSql;

/// <summary>Cached result of the Azure SQL engine edition check (null = not yet checked).</summary>
private bool? _isAzureSql;

/// <summary>
/// Initializes a new instance of the <see cref="SqlServerCollection{TKey, TRecord}"/> class.
/// </summary>
Expand Down Expand Up @@ -78,6 +84,16 @@ internal SqlServerCollection(string connectionString, string name, Func<SqlServe

this._mapper = new SqlServerMapper<TRecord>(this._model);

// Check if any vector property uses DiskAnn, which requires Azure SQL.
foreach (var vp in this._model.VectorProperties)
{
if (vp.IndexKind is not (null or "" or IndexKind.Flat))
{
this._requiresAzureSql = true;
break;
}
}
Comment on lines +87 to +95
Copy link

Copilot AI Apr 13, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The code/comment says this is for DiskANN, but the condition flags any non-Flat index kind as requiring Azure SQL. If additional index kinds are introduced later (or if some become supported on non-Azure SQL Server), this will block them unintentionally. If the intent is specifically DiskANN (per the comment and exception message), narrow the checks to IndexKind.DiskAnn (and/or the exact set of Azure-only kinds) so the gating logic stays correct as the index-kind surface area evolves.

Copilot uses AI. Check for mistakes.

var connectionStringBuilder = new SqlConnectionStringBuilder(connectionString);

this._collectionMetadata = new()
Expand Down Expand Up @@ -116,6 +132,12 @@ public override Task EnsureCollectionExistsAsync(CancellationToken cancellationT
private async Task CreateCollectionAsync(bool ifNotExists, CancellationToken cancellationToken)
{
using SqlConnection connection = new(this._connectionString);

if (this._requiresAzureSql)
{
await this.EnsureAzureSqlForDiskAnnAsync(connection, cancellationToken).ConfigureAwait(false);
}

List<SqlCommand> commands = SqlServerCommandBuilder.CreateTable(
connection,
this._schema,
Expand Down Expand Up @@ -604,6 +626,12 @@ _ when vectorProperty.EmbeddingGenerationDispatcher is not null
// Connection and command are going to be disposed by the ReadVectorSearchResultsAsync,
// when the user is done with the results.
SqlConnection connection = new(this._connectionString);

if (vectorProperty.IndexKind is not (null or "" or IndexKind.Flat))
Copy link

Copilot AI Apr 13, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The code/comment says this is for DiskANN, but the condition flags any non-Flat index kind as requiring Azure SQL. If additional index kinds are introduced later (or if some become supported on non-Azure SQL Server), this will block them unintentionally. If the intent is specifically DiskANN (per the comment and exception message), narrow the checks to IndexKind.DiskAnn (and/or the exact set of Azure-only kinds) so the gating logic stays correct as the index-kind surface area evolves.

Suggested change
if (vectorProperty.IndexKind is not (null or "" or IndexKind.Flat))
if (vectorProperty.IndexKind == IndexKind.DiskAnn)

Copilot uses AI. Check for mistakes.
{
await this.EnsureAzureSqlForDiskAnnAsync(connection, cancellationToken).ConfigureAwait(false);
}

SqlCommand command = SqlServerCommandBuilder.SelectVector(
connection,
this._schema,
Expand Down Expand Up @@ -664,6 +692,12 @@ _ when vectorProperty.EmbeddingGenerationDispatcher is not null
// Connection and command are going to be disposed by the ReadVectorSearchResultsAsync,
// when the user is done with the results.
SqlConnection connection = new(this._connectionString);

if (vectorProperty.IndexKind is not (null or "" or IndexKind.Flat))
{
await this.EnsureAzureSqlForDiskAnnAsync(connection, cancellationToken).ConfigureAwait(false);
}

SqlCommand command = SqlServerCommandBuilder.SelectHybrid(
connection,
this._schema,
Expand Down Expand Up @@ -807,4 +841,40 @@ public override async IAsyncEnumerable<TRecord> GetAsync(Expression<Func<TRecord
yield return this._mapper.MapFromStorageToDataModel(reader, options.IncludeVectors);
}
}

/// <summary>
/// Validates that the connection is to Azure SQL Database or SQL database in Microsoft Fabric,
/// which is required for DiskAnn vector indexes and the VECTOR_SEARCH function.
/// </summary>
private async Task EnsureAzureSqlForDiskAnnAsync(SqlConnection connection, CancellationToken cancellationToken)
{
if (this._isAzureSql is true)
{
return;
}

if (connection.State != System.Data.ConnectionState.Open)
{
await connection.OpenAsync(cancellationToken).ConfigureAwait(false);
}
Comment on lines +851 to +859
Copy link

Copilot AI Apr 13, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The Azure SQL detection result is cached, but only the true case is short-circuited. If _isAzureSql is false, subsequent calls will still open a connection and re-run SERVERPROPERTY('EngineEdition') each time before throwing. Consider adding an early if (this._isAzureSql is false) throw ...; path to make the caching effective (reduces repeated roundtrips and avoids repeatedly opening connections just to re-throw the same unsupported exception).

Copilot uses AI. Check for mistakes.

using var command = connection.CreateCommand();
command.CommandText = "SELECT SERVERPROPERTY('EngineEdition')";
var result = await command.ExecuteScalarAsync(cancellationToken).ConfigureAwait(false);
var engineEdition = Convert.ToInt32(result);

// 5 = Azure SQL Database, 11 = SQL database in Microsoft Fabric
this._isAzureSql = engineEdition is 5 or 11;

if (!this._isAzureSql.Value)
{
// Dispose the connection before throwing; in SearchAsync/HybridSearchAsync the connection
// is not in a using block (it's normally disposed by ReadVectorSearchResultsAsync).
connection.Dispose();

throw new NotSupportedException(
"DiskAnn vector indexes and the VECTOR_SEARCH function require Azure SQL Database or SQL database in Microsoft Fabric. " +
"They are not supported on SQL Server. Use a Flat index kind with VECTOR_DISTANCE instead.");
}
Comment on lines +867 to +878
Copy link

Copilot AI Apr 13, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The Azure SQL detection result is cached, but only the true case is short-circuited. If _isAzureSql is false, subsequent calls will still open a connection and re-run SERVERPROPERTY('EngineEdition') each time before throwing. Consider adding an early if (this._isAzureSql is false) throw ...; path to make the caching effective (reduces repeated roundtrips and avoids repeatedly opening connections just to re-throw the same unsupported exception).

Copilot uses AI. Check for mistakes.
}
}
89 changes: 61 additions & 28 deletions dotnet/src/VectorData/SqlServer/SqlServerCommandBuilder.cs
Original file line number Diff line number Diff line change
Expand Up @@ -535,41 +535,67 @@ private static SqlCommand SelectVectorWithVectorSearch<TRecord>(
string distanceMetric,
string sorting)
{
// VECTOR_SEARCH() currently only supports post-filtering (TOP_N candidates are returned first,
// then predicates are applied). Pre-filtering is not supported.
if (options.Filter is not null)
{
throw new NotSupportedException(
"Filtering is not supported with approximate vector search (VECTOR_SEARCH). " +
"Remove the filter or use IndexKind.Flat for exact search with VECTOR_DISTANCE.");
}

SqlCommand command = connection.CreateCommand();
command.Parameters.AddWithValue("@vector", vector);

StringBuilder sb = new(300);

// When skip > 0, we need a subquery since TOP and OFFSET/FETCH can't coexist in the same SELECT.
bool needsSubquery = options.Skip > 0;

if (needsSubquery)
{
sb.Append("SELECT * FROM (");
}

// VECTOR_SEARCH returns all columns from the table plus a 'distance' column.
// We select the needed columns from the table alias and alias 'distance' as 'score'.
sb.Append("SELECT ");
// The latest version vector indexes require SELECT TOP(N) WITH APPROXIMATE instead of the deprecated TOP_N parameter.
sb.Append("SELECT TOP(").Append(top + options.Skip).Append(") WITH APPROXIMATE ");
sb.AppendIdentifiers(model.Properties, prefix: "t.", includeVectors: options.IncludeVectors);
sb.AppendLine(",");
sb.AppendLine("s.[distance] AS [score]");
sb.Append("FROM VECTOR_SEARCH(TABLE = ");
sb.AppendTableName(schema, tableName);
sb.Append(" AS t, COLUMN = ").AppendIdentifier(vectorProperty.StorageName);
sb.Append(", SIMILAR_TO = @vector, METRIC = '").Append(distanceMetric).Append('\'');
sb.Append(", TOP_N = ").Append(top + options.Skip).AppendLine(") AS s");
sb.Append(", SIMILAR_TO = @vector, METRIC = '").Append(distanceMetric).AppendLine("') AS s");

// With latest version vector indexes, WHERE predicates are applied during the vector search process
// (iterative filtering), not after retrieval.
if (options.Filter is not null)
{
int startParamIndex = command.Parameters.Count;

SqlServerFilterTranslator translator = new(model, options.Filter, sb, startParamIndex: startParamIndex, tableAlias: "t");
translator.Translate(appendWhere: true);
List<object> parameters = translator.ParameterValues;

foreach (object parameter in parameters)
{
command.AddParameter(vectorProperty, $"@_{startParamIndex++}", parameter);
Copy link

Copilot AI Apr 13, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

AddParameter is being called with vectorProperty for filter parameters. If AddParameter uses the provided property metadata to choose SQL type/size (common in these builders), this can incorrectly bind filter parameters as the vector column type (e.g., varbinary), causing query failures or implicit conversion issues when filtering on scalars like strings/ints. Use the correct storage property metadata for each translated filter parameter (preferred), or pass property: null (consistent with the hybrid path) so parameter typing is derived from the value rather than the vector property.

Suggested change
command.AddParameter(vectorProperty, $"@_{startParamIndex++}", parameter);
command.AddParameter(property: null, $"@_{startParamIndex++}", parameter);

Copilot uses AI. Check for mistakes.
}

sb.AppendLine();
}

if (options.ScoreThreshold is not null)
{
command.Parameters.AddWithValue("@scoreThreshold", options.ScoreThreshold!.Value);
sb.AppendLine("WHERE s.[distance] <= @scoreThreshold");
sb.Append(options.Filter is not null ? "AND " : "WHERE ");
sb.AppendLine("s.[distance] <= @scoreThreshold");
}

sb.AppendFormat("ORDER BY [score] {0}", sorting);
sb.AppendLine();
sb.AppendFormat("OFFSET {0} ROWS FETCH NEXT {1} ROWS ONLY;", options.Skip, top);

if (needsSubquery)
{
sb.AppendLine();
sb.Append(") AS [inner]");
sb.AppendLine();
sb.AppendFormat("ORDER BY [score] {0}", sorting);
sb.AppendLine();
sb.AppendFormat("OFFSET {0} ROWS FETCH NEXT {1} ROWS ONLY;", options.Skip, top);
}

command.CommandText = sb.ToString();
return command;
Expand All @@ -587,15 +613,6 @@ internal static SqlCommand SelectHybrid<TRecord>(
{
bool useVectorSearch = UseVectorSearch(vectorProperty);

// VECTOR_SEARCH() currently only supports post-filtering (TOP_N candidates are returned first,
// then predicates are applied). Pre-filtering is not supported.
if (useVectorSearch && options.Filter is not null)
{
throw new NotSupportedException(
"Filtering is not supported with approximate vector search (VECTOR_SEARCH). " +
"Remove the filter or use IndexKind.Flat for exact search with VECTOR_DISTANCE.");
}

string distanceFunction = vectorProperty.DistanceFunction ?? DistanceFunction.CosineDistance;
(string distanceMetric, _) = MapDistanceFunction(distanceFunction);

Expand Down Expand Up @@ -652,16 +669,32 @@ internal static SqlCommand SelectHybrid<TRecord>(
// CTE 2: Semantic/vector search
if (useVectorSearch)
{
// Use VECTOR_SEARCH() for approximate nearest neighbor search with a vector index
// Use VECTOR_SEARCH() for approximate nearest neighbor search with a vector index.
// The latest version vector indexes require SELECT TOP(N) WITH APPROXIMATE instead of the deprecated TOP_N parameter.
sb.AppendLine("semantic_search AS (");
sb.AppendLine(" SELECT TOP(@candidateCount)");
sb.AppendLine(" SELECT TOP(@candidateCount) WITH APPROXIMATE");
sb.Append(" t.").AppendIdentifier(model.KeyProperty.StorageName).AppendLine(",");
sb.AppendLine(" RANK() OVER (ORDER BY s.[distance]) AS [rank]");
sb.AppendLine(" FROM VECTOR_SEARCH(TABLE = ");
sb.Append(" ").AppendTableName(schema, tableName);
sb.Append(" AS t, COLUMN = ").AppendIdentifier(vectorProperty.StorageName);
sb.Append(", SIMILAR_TO = @vector, METRIC = '").Append(distanceMetric).Append('\'');
sb.Append(", TOP_N = @candidateCount").AppendLine(") AS s");
sb.Append(", SIMILAR_TO = @vector, METRIC = '").Append(distanceMetric).AppendLine("') AS s");

// With latest version vector indexes, WHERE predicates are applied during the vector search process
// (iterative filtering), not after retrieval.
if (options.Filter is not null)
{
int filterParamStart = command.Parameters.Count;
SqlServerFilterTranslator translator = new(model, options.Filter, sb, startParamIndex: filterParamStart, tableAlias: "t");
translator.Translate(appendWhere: true);
foreach (object parameter in translator.ParameterValues)
{
command.AddParameter(property: null, $"@_{filterParamStart++}", parameter);
}
sb.AppendLine();
}

sb.AppendLine(" ORDER BY s.[distance]");
sb.AppendLine("),");
}
else
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -262,11 +262,10 @@ public void SelectVector_WithDiskAnnIndex()

Assert.Equal(
"""
SELECT t.[id],t.[name],t.[embedding],
SELECT TOP(5) WITH APPROXIMATE t.[id],t.[name],t.[embedding],
s.[distance] AS [score]
FROM VECTOR_SEARCH(TABLE = [schema].[table] AS t, COLUMN = [embedding], SIMILAR_TO = @vector, METRIC = 'COSINE', TOP_N = 5) AS s
FROM VECTOR_SEARCH(TABLE = [schema].[table] AS t, COLUMN = [embedding], SIMILAR_TO = @vector, METRIC = 'COSINE') AS s
ORDER BY [score] ASC
OFFSET 0 ROWS FETCH NEXT 5 ROWS ONLY;
""", command.CommandText, ignoreLineEndingDifferences: true);
}

Expand Down Expand Up @@ -295,16 +294,18 @@ public void SelectVector_WithDiskAnnIndex_WithSkip()

Assert.Equal(
"""
SELECT t.[id],t.[name],
SELECT * FROM (SELECT TOP(8) WITH APPROXIMATE t.[id],t.[name],
s.[distance] AS [score]
FROM VECTOR_SEARCH(TABLE = [schema].[table] AS t, COLUMN = [embedding], SIMILAR_TO = @vector, METRIC = 'COSINE', TOP_N = 8) AS s
FROM VECTOR_SEARCH(TABLE = [schema].[table] AS t, COLUMN = [embedding], SIMILAR_TO = @vector, METRIC = 'COSINE') AS s
ORDER BY [score] ASC
) AS [inner]
ORDER BY [score] ASC
OFFSET 3 ROWS FETCH NEXT 5 ROWS ONLY;
""", command.CommandText, ignoreLineEndingDifferences: true);
}

[Fact]
public void SelectVector_WithDiskAnnIndex_WithFilter_Throws()
public void SelectVector_WithDiskAnnIndex_WithFilter()
{
var model = BuildModel(
[
Expand All @@ -324,12 +325,20 @@ public void SelectVector_WithDiskAnnIndex_WithFilter_Throws()
Filter = d => (string)d["name"]! == "test"
};

Assert.Throws<NotSupportedException>(() =>
SqlServerCommandBuilder.SelectVector(
connection, "schema", "table",
model.VectorProperties[0], model,
top: 5, options,
new SqlVector<float>(new float[] { 1f, 2f, 3f })));
using SqlCommand command = SqlServerCommandBuilder.SelectVector(
connection, "schema", "table",
model.VectorProperties[0], model,
top: 5, options,
new SqlVector<float>(new float[] { 1f, 2f, 3f }));

Assert.Equal(
"""
SELECT TOP(5) WITH APPROXIMATE t.[id],t.[name],
s.[distance] AS [score]
FROM VECTOR_SEARCH(TABLE = [schema].[table] AS t, COLUMN = [embedding], SIMILAR_TO = @vector, METRIC = 'COSINE') AS s
WHERE (t.[name] = 'test')
ORDER BY [score] ASC
""", command.CommandText, ignoreLineEndingDifferences: true);
}

[Fact]
Expand Down Expand Up @@ -361,12 +370,11 @@ public void SelectVector_WithDiskAnnIndex_WithScoreThreshold()

Assert.Equal(
"""
SELECT t.[id],t.[name],t.[embedding],
SELECT TOP(5) WITH APPROXIMATE t.[id],t.[name],t.[embedding],
s.[distance] AS [score]
FROM VECTOR_SEARCH(TABLE = [schema].[table] AS t, COLUMN = [embedding], SIMILAR_TO = @vector, METRIC = 'COSINE', TOP_N = 5) AS s
FROM VECTOR_SEARCH(TABLE = [schema].[table] AS t, COLUMN = [embedding], SIMILAR_TO = @vector, METRIC = 'COSINE') AS s
WHERE s.[distance] <= @scoreThreshold
ORDER BY [score] ASC
OFFSET 0 ROWS FETCH NEXT 5 ROWS ONLY;
""", command.CommandText, ignoreLineEndingDifferences: true);
}

Expand Down
Loading
Loading