diff --git a/.gitignore b/.gitignore index fe14c1529238..1eadcae91283 100644 --- a/.gitignore +++ b/.gitignore @@ -502,3 +502,5 @@ swa-cli.config.json # dapr extension files **/dapr.yaml + +*.lscache diff --git a/dotnet/src/VectorData/SqlServer/SqlServerCollection.cs b/dotnet/src/VectorData/SqlServer/SqlServerCollection.cs index 470be57ab494..a9961bfe41d9 100644 --- a/dotnet/src/VectorData/SqlServer/SqlServerCollection.cs +++ b/dotnet/src/VectorData/SqlServer/SqlServerCollection.cs @@ -42,6 +42,12 @@ public class SqlServerCollection /// The database schema. private readonly string? _schema; + /// Whether the model contains any DiskAnn vector properties, requiring Azure SQL. + private readonly bool _requiresAzureSql; + + /// Cached result of the Azure SQL engine edition check (null = not yet checked). + private bool? _isAzureSql; + /// /// Initializes a new instance of the class. /// @@ -78,6 +84,16 @@ internal SqlServerCollection(string connectionString, string name, Func(this._model); + // Check if any vector property uses DiskAnn, which requires Azure SQL. + foreach (var vp in this._model.VectorProperties) + { + if (vp.IndexKind == IndexKind.DiskAnn) + { + this._requiresAzureSql = true; + break; + } + } + var connectionStringBuilder = new SqlConnectionStringBuilder(connectionString); this._collectionMetadata = new() @@ -116,6 +132,12 @@ public override Task EnsureCollectionExistsAsync(CancellationToken cancellationT private async Task CreateCollectionAsync(bool ifNotExists, CancellationToken cancellationToken) { using SqlConnection connection = new(this._connectionString); + + if (this._requiresAzureSql) + { + await this.EnsureAzureSqlForDiskAnnAsync(connection, cancellationToken).ConfigureAwait(false); + } + List commands = SqlServerCommandBuilder.CreateTable( connection, this._schema, @@ -604,6 +626,12 @@ _ when vectorProperty.EmbeddingGenerationDispatcher is not null // Connection and command are going to be disposed by the ReadVectorSearchResultsAsync, // when the user is done with the results. SqlConnection connection = new(this._connectionString); + + if (vectorProperty.IndexKind == IndexKind.DiskAnn) + { + await this.EnsureAzureSqlForDiskAnnAsync(connection, cancellationToken).ConfigureAwait(false); + } + SqlCommand command = SqlServerCommandBuilder.SelectVector( connection, this._schema, @@ -664,6 +692,12 @@ _ when vectorProperty.EmbeddingGenerationDispatcher is not null // Connection and command are going to be disposed by the ReadVectorSearchResultsAsync, // when the user is done with the results. SqlConnection connection = new(this._connectionString); + + if (vectorProperty.IndexKind == IndexKind.DiskAnn) + { + await this.EnsureAzureSqlForDiskAnnAsync(connection, cancellationToken).ConfigureAwait(false); + } + SqlCommand command = SqlServerCommandBuilder.SelectHybrid( connection, this._schema, @@ -807,4 +841,48 @@ public override async IAsyncEnumerable GetAsync(Expression + /// Validates that the connection is to Azure SQL Database or SQL database in Microsoft Fabric, + /// which is required for DiskAnn vector indexes and the VECTOR_SEARCH function. + /// + private async Task EnsureAzureSqlForDiskAnnAsync(SqlConnection connection, CancellationToken cancellationToken) + { + if (this._isAzureSql is true) + { + return; + } + + if (this._isAzureSql is false) + { + connection.Dispose(); + throw new NotSupportedException( + "DiskAnn vector indexes and the VECTOR_SEARCH function require Azure SQL Database or SQL database in Microsoft Fabric. " + + "They are not supported on SQL Server. Use a Flat index kind with VECTOR_DISTANCE instead."); + } + + if (connection.State != System.Data.ConnectionState.Open) + { + await connection.OpenAsync(cancellationToken).ConfigureAwait(false); + } + + using var command = connection.CreateCommand(); + command.CommandText = "SELECT SERVERPROPERTY('EngineEdition')"; + var result = await command.ExecuteScalarAsync(cancellationToken).ConfigureAwait(false); + var engineEdition = Convert.ToInt32(result); + + // 5 = Azure SQL Database, 11 = SQL database in Microsoft Fabric + this._isAzureSql = engineEdition is 5 or 11; + + if (!this._isAzureSql.Value) + { + // Dispose the connection before throwing; in SearchAsync/HybridSearchAsync the connection + // is not in a using block (it's normally disposed by ReadVectorSearchResultsAsync). + connection.Dispose(); + + throw new NotSupportedException( + "DiskAnn vector indexes and the VECTOR_SEARCH function require Azure SQL Database or SQL database in Microsoft Fabric. " + + "They are not supported on SQL Server. Use a Flat index kind with VECTOR_DISTANCE instead."); + } + } } diff --git a/dotnet/src/VectorData/SqlServer/SqlServerCommandBuilder.cs b/dotnet/src/VectorData/SqlServer/SqlServerCommandBuilder.cs index eabfc794f34e..eb5343681b34 100644 --- a/dotnet/src/VectorData/SqlServer/SqlServerCommandBuilder.cs +++ b/dotnet/src/VectorData/SqlServer/SqlServerCommandBuilder.cs @@ -535,41 +535,67 @@ private static SqlCommand SelectVectorWithVectorSearch( string distanceMetric, string sorting) { - // VECTOR_SEARCH() currently only supports post-filtering (TOP_N candidates are returned first, - // then predicates are applied). Pre-filtering is not supported. - if (options.Filter is not null) - { - throw new NotSupportedException( - "Filtering is not supported with approximate vector search (VECTOR_SEARCH). " + - "Remove the filter or use IndexKind.Flat for exact search with VECTOR_DISTANCE."); - } - SqlCommand command = connection.CreateCommand(); command.Parameters.AddWithValue("@vector", vector); StringBuilder sb = new(300); + // When skip > 0, we need a subquery since TOP and OFFSET/FETCH can't coexist in the same SELECT. + bool needsSubquery = options.Skip > 0; + + if (needsSubquery) + { + sb.Append("SELECT * FROM ("); + } + // VECTOR_SEARCH returns all columns from the table plus a 'distance' column. // We select the needed columns from the table alias and alias 'distance' as 'score'. - sb.Append("SELECT "); + // The latest version vector indexes require SELECT TOP(N) WITH APPROXIMATE instead of the deprecated TOP_N parameter. + sb.Append("SELECT TOP(").Append(top + options.Skip).Append(") WITH APPROXIMATE "); sb.AppendIdentifiers(model.Properties, prefix: "t.", includeVectors: options.IncludeVectors); sb.AppendLine(","); sb.AppendLine("s.[distance] AS [score]"); sb.Append("FROM VECTOR_SEARCH(TABLE = "); sb.AppendTableName(schema, tableName); sb.Append(" AS t, COLUMN = ").AppendIdentifier(vectorProperty.StorageName); - sb.Append(", SIMILAR_TO = @vector, METRIC = '").Append(distanceMetric).Append('\''); - sb.Append(", TOP_N = ").Append(top + options.Skip).AppendLine(") AS s"); + sb.Append(", SIMILAR_TO = @vector, METRIC = '").Append(distanceMetric).AppendLine("') AS s"); + + // With latest version vector indexes, WHERE predicates are applied during the vector search process + // (iterative filtering), not after retrieval. + if (options.Filter is not null) + { + int startParamIndex = command.Parameters.Count; + + SqlServerFilterTranslator translator = new(model, options.Filter, sb, startParamIndex: startParamIndex, tableAlias: "t"); + translator.Translate(appendWhere: true); + List parameters = translator.ParameterValues; + + foreach (object parameter in parameters) + { + command.AddParameter(property: null, $"@_{startParamIndex++}", parameter); + } + + sb.AppendLine(); + } if (options.ScoreThreshold is not null) { command.Parameters.AddWithValue("@scoreThreshold", options.ScoreThreshold!.Value); - sb.AppendLine("WHERE s.[distance] <= @scoreThreshold"); + sb.Append(options.Filter is not null ? "AND " : "WHERE "); + sb.AppendLine("s.[distance] <= @scoreThreshold"); } sb.AppendFormat("ORDER BY [score] {0}", sorting); - sb.AppendLine(); - sb.AppendFormat("OFFSET {0} ROWS FETCH NEXT {1} ROWS ONLY;", options.Skip, top); + + if (needsSubquery) + { + sb.AppendLine(); + sb.Append(") AS [inner]"); + sb.AppendLine(); + sb.AppendFormat("ORDER BY [score] {0}", sorting); + sb.AppendLine(); + sb.AppendFormat("OFFSET {0} ROWS FETCH NEXT {1} ROWS ONLY;", options.Skip, top); + } command.CommandText = sb.ToString(); return command; @@ -587,15 +613,6 @@ internal static SqlCommand SelectHybrid( { bool useVectorSearch = UseVectorSearch(vectorProperty); - // VECTOR_SEARCH() currently only supports post-filtering (TOP_N candidates are returned first, - // then predicates are applied). Pre-filtering is not supported. - if (useVectorSearch && options.Filter is not null) - { - throw new NotSupportedException( - "Filtering is not supported with approximate vector search (VECTOR_SEARCH). " + - "Remove the filter or use IndexKind.Flat for exact search with VECTOR_DISTANCE."); - } - string distanceFunction = vectorProperty.DistanceFunction ?? DistanceFunction.CosineDistance; (string distanceMetric, _) = MapDistanceFunction(distanceFunction); @@ -652,16 +669,32 @@ internal static SqlCommand SelectHybrid( // CTE 2: Semantic/vector search if (useVectorSearch) { - // Use VECTOR_SEARCH() for approximate nearest neighbor search with a vector index + // Use VECTOR_SEARCH() for approximate nearest neighbor search with a vector index. + // The latest version vector indexes require SELECT TOP(N) WITH APPROXIMATE instead of the deprecated TOP_N parameter. sb.AppendLine("semantic_search AS ("); - sb.AppendLine(" SELECT TOP(@candidateCount)"); + sb.AppendLine(" SELECT TOP(@candidateCount) WITH APPROXIMATE"); sb.Append(" t.").AppendIdentifier(model.KeyProperty.StorageName).AppendLine(","); sb.AppendLine(" RANK() OVER (ORDER BY s.[distance]) AS [rank]"); sb.AppendLine(" FROM VECTOR_SEARCH(TABLE = "); sb.Append(" ").AppendTableName(schema, tableName); sb.Append(" AS t, COLUMN = ").AppendIdentifier(vectorProperty.StorageName); - sb.Append(", SIMILAR_TO = @vector, METRIC = '").Append(distanceMetric).Append('\''); - sb.Append(", TOP_N = @candidateCount").AppendLine(") AS s"); + sb.Append(", SIMILAR_TO = @vector, METRIC = '").Append(distanceMetric).AppendLine("') AS s"); + + // With latest version vector indexes, WHERE predicates are applied during the vector search process + // (iterative filtering), not after retrieval. + if (options.Filter is not null) + { + int filterParamStart = command.Parameters.Count; + SqlServerFilterTranslator translator = new(model, options.Filter, sb, startParamIndex: filterParamStart, tableAlias: "t"); + translator.Translate(appendWhere: true); + foreach (object parameter in translator.ParameterValues) + { + command.AddParameter(property: null, $"@_{filterParamStart++}", parameter); + } + sb.AppendLine(); + } + + sb.AppendLine(" ORDER BY s.[distance]"); sb.AppendLine("),"); } else diff --git a/dotnet/test/VectorData/SqlServer.ConformanceTests/SqlServerCommandBuilderTests.cs b/dotnet/test/VectorData/SqlServer.ConformanceTests/SqlServerCommandBuilderTests.cs index b78a010d6b4c..40f5ddbc963e 100644 --- a/dotnet/test/VectorData/SqlServer.ConformanceTests/SqlServerCommandBuilderTests.cs +++ b/dotnet/test/VectorData/SqlServer.ConformanceTests/SqlServerCommandBuilderTests.cs @@ -262,11 +262,10 @@ public void SelectVector_WithDiskAnnIndex() Assert.Equal( """ - SELECT t.[id],t.[name],t.[embedding], + SELECT TOP(5) WITH APPROXIMATE t.[id],t.[name],t.[embedding], s.[distance] AS [score] - FROM VECTOR_SEARCH(TABLE = [schema].[table] AS t, COLUMN = [embedding], SIMILAR_TO = @vector, METRIC = 'COSINE', TOP_N = 5) AS s + FROM VECTOR_SEARCH(TABLE = [schema].[table] AS t, COLUMN = [embedding], SIMILAR_TO = @vector, METRIC = 'COSINE') AS s ORDER BY [score] ASC - OFFSET 0 ROWS FETCH NEXT 5 ROWS ONLY; """, command.CommandText, ignoreLineEndingDifferences: true); } @@ -295,16 +294,18 @@ public void SelectVector_WithDiskAnnIndex_WithSkip() Assert.Equal( """ - SELECT t.[id],t.[name], + SELECT * FROM (SELECT TOP(8) WITH APPROXIMATE t.[id],t.[name], s.[distance] AS [score] - FROM VECTOR_SEARCH(TABLE = [schema].[table] AS t, COLUMN = [embedding], SIMILAR_TO = @vector, METRIC = 'COSINE', TOP_N = 8) AS s + FROM VECTOR_SEARCH(TABLE = [schema].[table] AS t, COLUMN = [embedding], SIMILAR_TO = @vector, METRIC = 'COSINE') AS s + ORDER BY [score] ASC + ) AS [inner] ORDER BY [score] ASC OFFSET 3 ROWS FETCH NEXT 5 ROWS ONLY; """, command.CommandText, ignoreLineEndingDifferences: true); } [Fact] - public void SelectVector_WithDiskAnnIndex_WithFilter_Throws() + public void SelectVector_WithDiskAnnIndex_WithFilter() { var model = BuildModel( [ @@ -324,12 +325,20 @@ public void SelectVector_WithDiskAnnIndex_WithFilter_Throws() Filter = d => (string)d["name"]! == "test" }; - Assert.Throws(() => - SqlServerCommandBuilder.SelectVector( - connection, "schema", "table", - model.VectorProperties[0], model, - top: 5, options, - new SqlVector(new float[] { 1f, 2f, 3f }))); + using SqlCommand command = SqlServerCommandBuilder.SelectVector( + connection, "schema", "table", + model.VectorProperties[0], model, + top: 5, options, + new SqlVector(new float[] { 1f, 2f, 3f })); + + Assert.Equal( + """ + SELECT TOP(5) WITH APPROXIMATE t.[id],t.[name], + s.[distance] AS [score] + FROM VECTOR_SEARCH(TABLE = [schema].[table] AS t, COLUMN = [embedding], SIMILAR_TO = @vector, METRIC = 'COSINE') AS s + WHERE (t.[name] = 'test') + ORDER BY [score] ASC + """, command.CommandText, ignoreLineEndingDifferences: true); } [Fact] @@ -361,15 +370,86 @@ public void SelectVector_WithDiskAnnIndex_WithScoreThreshold() Assert.Equal( """ - SELECT t.[id],t.[name],t.[embedding], + SELECT TOP(5) WITH APPROXIMATE t.[id],t.[name],t.[embedding], s.[distance] AS [score] - FROM VECTOR_SEARCH(TABLE = [schema].[table] AS t, COLUMN = [embedding], SIMILAR_TO = @vector, METRIC = 'COSINE', TOP_N = 5) AS s + FROM VECTOR_SEARCH(TABLE = [schema].[table] AS t, COLUMN = [embedding], SIMILAR_TO = @vector, METRIC = 'COSINE') AS s WHERE s.[distance] <= @scoreThreshold ORDER BY [score] ASC - OFFSET 0 ROWS FETCH NEXT 5 ROWS ONLY; """, command.CommandText, ignoreLineEndingDifferences: true); } + [Fact] + public void SelectVector_WithDiskAnnIndex_WithFilterAndScoreThreshold() + { + var model = BuildModel( + [ + new VectorStoreKeyProperty("id", typeof(long)), + new VectorStoreDataProperty("name", typeof(string)), + new VectorStoreVectorProperty("embedding", typeof(ReadOnlyMemory), 3) + { + IndexKind = IndexKind.DiskAnn, + DistanceFunction = DistanceFunction.CosineDistance + } + ]); + + using SqlConnection connection = CreateConnection(); + + var options = new VectorSearchOptions> + { + Filter = d => (string)d["name"]! == "test", + ScoreThreshold = 0.5f + }; + + using SqlCommand command = SqlServerCommandBuilder.SelectVector( + connection, "schema", "table", + model.VectorProperties[0], model, + top: 5, options, + new SqlVector(new float[] { 1f, 2f, 3f })); + + Assert.Equal( + """ + SELECT TOP(5) WITH APPROXIMATE t.[id],t.[name], + s.[distance] AS [score] + FROM VECTOR_SEARCH(TABLE = [schema].[table] AS t, COLUMN = [embedding], SIMILAR_TO = @vector, METRIC = 'COSINE') AS s + WHERE (t.[name] = 'test') + AND s.[distance] <= @scoreThreshold + ORDER BY [score] ASC + """, command.CommandText, ignoreLineEndingDifferences: true); + } + + [Fact] + public void SelectHybrid_WithDiskAnnIndex_WithFilter() + { + var model = BuildModel( + [ + new VectorStoreKeyProperty("id", typeof(long)), + new VectorStoreDataProperty("name", typeof(string)) { IsFullTextIndexed = true }, + new VectorStoreVectorProperty("embedding", typeof(ReadOnlyMemory), 3) + { + IndexKind = IndexKind.DiskAnn, + DistanceFunction = DistanceFunction.CosineDistance + } + ]); + + using SqlConnection connection = CreateConnection(); + + var options = new HybridSearchOptions> + { + Filter = d => (string)d["name"]! == "test" + }; + + using SqlCommand command = SqlServerCommandBuilder.SelectHybrid( + connection, "schema", "table", + model.VectorProperties[0], model.DataProperties.First(p => p.IsFullTextIndexed), model, + top: 5, options, + new SqlVector(new float[] { 1f, 2f, 3f }), + "keyword"); + + Assert.Contains("SELECT TOP(@candidateCount) WITH APPROXIMATE", command.CommandText); + Assert.Contains("WHERE (t.[name] = 'test')", command.CommandText); + Assert.Contains("VECTOR_SEARCH(TABLE =", command.CommandText); + } + [Fact] public void Upsert() { diff --git a/dotnet/test/VectorData/SqlServer.ConformanceTests/SqlServerDiskAnnVectorSearchTests.cs b/dotnet/test/VectorData/SqlServer.ConformanceTests/SqlServerDiskAnnVectorSearchTests.cs deleted file mode 100644 index bf661df40b87..000000000000 --- a/dotnet/test/VectorData/SqlServer.ConformanceTests/SqlServerDiskAnnVectorSearchTests.cs +++ /dev/null @@ -1,179 +0,0 @@ -// Copyright (c) Microsoft. All rights reserved. - -// TEMPORARY: This test class exists to work around a current SQL Server 2025 limitation where tables with -// vector indexes are read-only. Once this limitation is lifted, these tests should be removed and the -// standard conformance tests in SqlServerIndexKindTests should be enabled instead (by removing the Skip on DiskAnn). -// -// The workaround is to: -// 1. Create the table without the vector index (using IndexKind.Flat). -// 2. Insert data while the table is still writable. -// 3. Create the DiskANN vector index via raw SQL. -// 4. Create a new collection instance with IndexKind.DiskAnn to route searches through VECTOR_SEARCH(). - -using Microsoft.Data.SqlClient; -using Microsoft.Extensions.VectorData; -using SqlServer.ConformanceTests.Support; -using VectorData.ConformanceTests.Support; -using VectorData.ConformanceTests.Xunit; -using Xunit; - -namespace SqlServer.ConformanceTests; - -public class SqlServerDiskAnnVectorSearchTests( - SqlServerDiskAnnVectorSearchTests.Fixture fixture) - : IClassFixture -{ - private const string CollectionName = "DiskAnnVectorSearchTests"; - - /// - /// Tests that approximate vector search via VECTOR_SEARCH() returns correct results - /// when a DiskANN index exists on the table. - /// - [ConditionalFact] - public async Task VectorSearch_WithDiskAnnIndex() - { - using var collection = this.CreateDiskAnnCollection(); - - var result = await collection.SearchAsync( - new ReadOnlyMemory([10, 30, 50]), top: 1).SingleAsync(); - - Assert.NotNull(result); - Assert.Equal(2, result.Record.Int); - } - - /// - /// Tests that VECTOR_SEARCH() correctly returns multiple results ordered by distance. - /// - [ConditionalFact] - public async Task VectorSearch_WithDiskAnnIndex_TopN() - { - using var collection = this.CreateDiskAnnCollection(); - - var results = await collection.SearchAsync( - new ReadOnlyMemory([10, 30, 50]), top: 3).ToListAsync(); - - Assert.Equal(3, results.Count); - // The closest match should be the exact vector [10, 30, 50] - Assert.Equal(2, results[0].Record.Int); - } - - /// - /// Tests that VECTOR_SEARCH() throws when a LINQ filter is specified, - /// since SQL Server's VECTOR_SEARCH only supports post-filtering. - /// - [ConditionalFact] - public async Task VectorSearch_WithDiskAnnIndex_WithFilter_Throws() - { - using var collection = this.CreateDiskAnnCollection(); - - var exception = await Assert.ThrowsAsync( - async () => await collection.SearchAsync( - new ReadOnlyMemory([10, 30, 50]), - top: 1, - new VectorSearchOptions - { - Filter = r => r.Int == 2 - }).SingleAsync()); - - Assert.Contains("VECTOR_SEARCH", exception.Message); - } - - private VectorStoreCollection CreateDiskAnnCollection() - { - VectorStoreCollectionDefinition definition = new() - { - Properties = - [ - new VectorStoreKeyProperty(nameof(SearchRecord.Key), typeof(int)), - new VectorStoreDataProperty(nameof(SearchRecord.Int), typeof(int)), - new VectorStoreVectorProperty(nameof(SearchRecord.Vector), typeof(ReadOnlyMemory), dimensions: 3) - { - IndexKind = IndexKind.DiskAnn, - DistanceFunction = DistanceFunction.CosineDistance - } - ] - }; - - return fixture.TestStore.CreateCollection(CollectionName, definition); - } - - public class SearchRecord - { - public int Key { get; set; } - public int Int { get; set; } - public ReadOnlyMemory Vector { get; set; } - } - - // TEMPORARY: This fixture works around the SQL Server 2025 read-only vector index limitation - // by creating the table without a vector index, inserting data, and then creating the index. - // See SqlServerIndexKindTests.DiskAnn for the standard (currently skipped) conformance test. - public class Fixture : VectorStoreFixture, IAsyncLifetime - { - public override TestStore TestStore => SqlServerTestStore.Instance; - - public override async Task InitializeAsync() - { - await base.InitializeAsync(); - - var connectionString = SqlServerTestStore.Instance.ConnectionString; - - // Step 1: Create a "flat" collection (no vector index) and insert data. - VectorStoreCollectionDefinition flatDefinition = new() - { - Properties = - [ - new VectorStoreKeyProperty(nameof(SearchRecord.Key), typeof(int)), - new VectorStoreDataProperty(nameof(SearchRecord.Int), typeof(int)), - new VectorStoreVectorProperty(nameof(SearchRecord.Vector), typeof(ReadOnlyMemory), dimensions: 3) - { - IndexKind = IndexKind.Flat, - DistanceFunction = DistanceFunction.CosineDistance - } - ] - }; - - using var flatCollection = this.TestStore.CreateCollection(CollectionName, flatDefinition); - await flatCollection.EnsureCollectionDeletedAsync(); - await flatCollection.EnsureCollectionExistsAsync(); - - SearchRecord[] records = - [ - new() { Key = 1, Int = 1, Vector = new([1, 2, 3]) }, - new() { Key = 2, Int = 2, Vector = new([10, 30, 50]) }, - new() { Key = 3, Int = 3, Vector = new([100, 40, 70]) } - ]; - - await flatCollection.UpsertAsync(records); - await this.TestStore.WaitForDataAsync(flatCollection, records.Length); - - // Step 2: Create the DiskANN vector index now that data is already in the table. - using var connection = new SqlConnection(connectionString); - await connection.OpenAsync(); - - using (var enablePreview = new SqlCommand( - "ALTER DATABASE SCOPED CONFIGURATION SET PREVIEW_FEATURES = ON;", connection)) - { - await enablePreview.ExecuteNonQueryAsync(); - } - - using (var createIndex = new SqlCommand( - $"CREATE VECTOR INDEX index_{CollectionName}_Vector ON [{CollectionName}]([Vector]) WITH (METRIC = 'COSINE', TYPE = 'DISKANN');", - connection)) - { - await createIndex.ExecuteNonQueryAsync(); - } - } - - public override async Task DisposeAsync() - { - // Clean up the table - var connectionString = SqlServerTestStore.Instance.ConnectionString; - using var connection = new SqlConnection(connectionString); - await connection.OpenAsync(); - using var command = new SqlCommand($"DROP TABLE IF EXISTS [{CollectionName}]", connection); - await command.ExecuteNonQueryAsync(); - - await base.DisposeAsync(); - } - } -} diff --git a/dotnet/test/VectorData/SqlServer.ConformanceTests/SqlServerIndexKindTests.cs b/dotnet/test/VectorData/SqlServer.ConformanceTests/SqlServerIndexKindTests.cs index e17de1f54393..bee3fe41f79c 100644 --- a/dotnet/test/VectorData/SqlServer.ConformanceTests/SqlServerIndexKindTests.cs +++ b/dotnet/test/VectorData/SqlServer.ConformanceTests/SqlServerIndexKindTests.cs @@ -1,5 +1,6 @@ // Copyright (c) Microsoft. All rights reserved. +using Microsoft.Data.SqlClient; using Microsoft.Extensions.VectorData; using SqlServer.ConformanceTests.Support; using VectorData.ConformanceTests; @@ -12,12 +13,95 @@ namespace SqlServer.ConformanceTests; public class SqlServerIndexKindTests(SqlServerIndexKindTests.Fixture fixture) : IndexKindTests(fixture), IClassFixture { - // SQL Server 2025 currently makes tables with vector indexes read-only, so data must be inserted before - // the index is created. See SqlServerDiskAnnVectorSearchTests for a temporary workaround test that inserts - // data first and then creates the index. Remove the Skip and delete that class once this limitation is lifted. - [ConditionalFact(Skip = "SQL Server 2025 read-only vector index limitation; see SqlServerDiskAnnVectorSearchTests")] - public virtual Task DiskAnn() - => this.Test(IndexKind.DiskAnn); + // Latest version vector indexes are only available in Azure SQL, not in on-prem SQL Server. + // They also require at least 100 rows before the vector index can be created, + // so we override the test to insert data first, then create the index. + [ConditionalFact] + [AzureSqlRequired] + public virtual async Task DiskAnn() + { + const string CollectionName = "IndexKindTests_DiskAnn"; + + // Step 1: Create the table using Flat index (no vector index) so we can insert data. + VectorStoreCollectionDefinition flatDefinition = new() + { + Properties = + [ + new VectorStoreKeyProperty(nameof(SearchRecord.Key), typeof(int)), + new VectorStoreDataProperty(nameof(SearchRecord.Int), typeof(int)), + new VectorStoreVectorProperty(nameof(SearchRecord.Vector), typeof(ReadOnlyMemory), dimensions: 3) + { + IndexKind = IndexKind.Flat, + DistanceFunction = DistanceFunction.CosineDistance + } + ] + }; + + using var flatCollection = fixture.TestStore.CreateCollection(CollectionName, flatDefinition); + await flatCollection.EnsureCollectionDeletedAsync(); + await flatCollection.EnsureCollectionExistsAsync(); + + try + { + // Step 2: Insert the 3 test rows + 97 filler rows to meet the 100-row minimum. + SearchRecord[] testRecords = + [ + new() { Key = 1, Int = 1, Vector = new([1, 2, 3]) }, + new() { Key = 2, Int = 2, Vector = new([10, 30, 50]) }, + new() { Key = 3, Int = 3, Vector = new([100, 40, 70]) } + ]; + + await flatCollection.UpsertAsync(testRecords); + + var fillerRecords = Enumerable.Range(100, 97) + .Select(i => new SearchRecord + { + Key = i, + Int = i, + Vector = new([i * 0.1f, i * 0.2f, i * 0.3f]) + }) + .ToArray(); + + await flatCollection.UpsertAsync(fillerRecords); + + // Step 3: Create the DiskANN vector index via raw SQL now that data is in the table. + using var connection = new SqlConnection(SqlServerTestStore.Instance.ConnectionString); + await connection.OpenAsync(); + + using (var createIndex = new SqlCommand( + $"CREATE VECTOR INDEX index_{CollectionName}_Vector ON [{CollectionName}]([Vector]) WITH (METRIC = 'COSINE', TYPE = 'DISKANN');", + connection)) + { + await createIndex.ExecuteNonQueryAsync(); + } + + // Step 4: Create a new collection instance with DiskAnn to route searches through VECTOR_SEARCH(). + VectorStoreCollectionDefinition diskAnnDefinition = new() + { + Properties = + [ + new VectorStoreKeyProperty(nameof(SearchRecord.Key), typeof(int)), + new VectorStoreDataProperty(nameof(SearchRecord.Int), typeof(int)), + new VectorStoreVectorProperty(nameof(SearchRecord.Vector), typeof(ReadOnlyMemory), dimensions: 3) + { + IndexKind = IndexKind.DiskAnn, + DistanceFunction = DistanceFunction.CosineDistance + } + ] + }; + + using var diskAnnCollection = fixture.TestStore.CreateCollection(CollectionName, diskAnnDefinition); + + var result = await diskAnnCollection.SearchAsync(new ReadOnlyMemory([10, 30, 50]), top: 1).SingleAsync(); + + Assert.NotNull(result); + Assert.Equal(2, result.Record.Int); + } + finally + { + await flatCollection.EnsureCollectionDeletedAsync(); + } + } public new class Fixture() : IndexKindTests.Fixture { diff --git a/dotnet/test/VectorData/SqlServer.ConformanceTests/Support/AzureSqlRequiredAttribute.cs b/dotnet/test/VectorData/SqlServer.ConformanceTests/Support/AzureSqlRequiredAttribute.cs new file mode 100644 index 000000000000..796441abd9ac --- /dev/null +++ b/dotnet/test/VectorData/SqlServer.ConformanceTests/Support/AzureSqlRequiredAttribute.cs @@ -0,0 +1,41 @@ +// Copyright (c) Microsoft. All rights reserved. + +using Microsoft.Data.SqlClient; +using VectorData.ConformanceTests.Xunit; + +namespace SqlServer.ConformanceTests.Support; + +/// +/// Skips the test(s) when the database is not Azure SQL Database or SQL database in Microsoft Fabric. +/// This is used for tests that require Azure SQL features not available in on-prem SQL Server (e.g. DiskAnn vector indexes). +/// +[AttributeUsage(AttributeTargets.Method | AttributeTargets.Class | AttributeTargets.Assembly)] +public sealed class AzureSqlRequiredAttribute : Attribute, ITestCondition +{ + private static bool? s_isAzureSql; + + public async ValueTask IsMetAsync() + { + if (s_isAzureSql is not null) + { + return s_isAzureSql.Value; + } + + var connectionString = SqlServerTestStore.Instance.ConnectionString; + + using var connection = new SqlConnection(connectionString); + await connection.OpenAsync(); + + using var command = connection.CreateCommand(); + command.CommandText = "SELECT SERVERPROPERTY('EngineEdition')"; + var result = await command.ExecuteScalarAsync(); + var engineEdition = Convert.ToInt32(result); + + // 5 = Azure SQL Database, 11 = SQL database in Microsoft Fabric + s_isAzureSql = engineEdition is 5 or 11; + return s_isAzureSql.Value; + } + + public string SkipReason + => "This test requires Azure SQL Database or SQL database in Microsoft Fabric."; +}