From 2e053f9aa051da359f2ebe11788e649f12bbea67 Mon Sep 17 00:00:00 2001 From: "jack.lewis" Date: Mon, 15 Jun 2026 12:53:00 +0100 Subject: [PATCH 01/18] Initial commit adding the ability to use TextServices --- .../Manifest/ManifestWriteServiceTests.cs | 138 ++++- .../API.Tests/Integration/GetManifestTests.cs | 70 +++ .../Integration/ModifyManifestCreateTests.cs | 22 + src/IIIFPresentation/API/API.csproj | 2 +- .../Features/Manifest/ManifestReadService.cs | 8 +- .../Features/Manifest/ManifestWriteService.cs | 63 ++- .../Storage/Helpers/PresentationContextX.cs | 10 +- .../API/Infrastructure/ControllerBaseX.cs | 2 +- src/IIIFPresentation/API/Program.cs | 2 + .../AWS/Settings/SQSSettings.cs | 5 + .../BatchCompletionMessageHandlerTests.cs | 2 +- ...ServiceJobCompletionMessageHandlerTests.cs | 328 ++++++++++++ .../TextServiceJobCompletionMessageTests.cs | 77 +++ .../Infrastructure/ServiceCollectionX.cs | 11 +- .../BackgroundHandler/Program.cs | 5 + .../TextServiceJobCompletionMessage.cs | 39 ++ .../TextServiceJobCompletionMessageHandler.cs | 171 ++++++ src/IIIFPresentation/Core/Core.csproj | 2 +- .../Core/Settings/TextServicesSettings.cs | 27 + .../Models/API/Manifest/Pipeline.cs | 20 + .../API/Manifest/PresentationManifest.cs | 4 +- .../Models/Database/Collections/Manifest.cs | 20 +- .../Models/Database/General/PipelineJob.cs | 34 ++ src/IIIFPresentation/Models/Models.csproj | 2 +- .../20260612164039_AddPipelineJob.Designer.cs | 491 ++++++++++++++++++ .../20260612164039_AddPipelineJob.cs | 53 ++ .../PresentationContextModelSnapshot.cs | 65 +++ .../Repository/PresentationContext.cs | 21 + .../Repository/Repository.csproj | 2 +- .../Services.Tests/IIIFSerialisationXTests.cs | 2 +- .../Manifests/ManifestXTests.cs | 103 ++++ .../Manifests/PipelineXTests.cs | 84 +++ .../TextServices/TextServicesClientTests.cs | 132 +++++ .../Services/ServiceCollectionX.cs | 10 +- src/IIIFPresentation/Services/Services.csproj | 2 +- .../TextServices/ITextServicesClient.cs | 20 + .../TextServices/TextServicesClient.cs | 93 ++++ 37 files changed, 2114 insertions(+), 28 deletions(-) create mode 100644 src/IIIFPresentation/BackgroundHandler.Tests/TextCompletion/TextServiceJobCompletionMessageHandlerTests.cs create mode 100644 src/IIIFPresentation/BackgroundHandler.Tests/TextCompletion/TextServiceJobCompletionMessageTests.cs create mode 100644 src/IIIFPresentation/BackgroundHandler/TextCompletion/TextServiceJobCompletionMessage.cs create mode 100644 src/IIIFPresentation/BackgroundHandler/TextCompletion/TextServiceJobCompletionMessageHandler.cs create mode 100644 src/IIIFPresentation/Core/Settings/TextServicesSettings.cs create mode 100644 src/IIIFPresentation/Models/API/Manifest/Pipeline.cs create mode 100644 src/IIIFPresentation/Models/Database/General/PipelineJob.cs create mode 100644 src/IIIFPresentation/Repository/Migrations/20260612164039_AddPipelineJob.Designer.cs create mode 100644 src/IIIFPresentation/Repository/Migrations/20260612164039_AddPipelineJob.cs create mode 100644 src/IIIFPresentation/Services.Tests/Manifests/ManifestXTests.cs create mode 100644 src/IIIFPresentation/Services.Tests/Manifests/PipelineXTests.cs create mode 100644 src/IIIFPresentation/Services.Tests/TextServices/TextServicesClientTests.cs create mode 100644 src/IIIFPresentation/Services/TextServices/ITextServicesClient.cs create mode 100644 src/IIIFPresentation/Services/TextServices/TextServicesClient.cs diff --git a/src/IIIFPresentation/API.Tests/Features/Manifest/ManifestWriteServiceTests.cs b/src/IIIFPresentation/API.Tests/Features/Manifest/ManifestWriteServiceTests.cs index 5526cdf7..3ea7fbfa 100644 --- a/src/IIIFPresentation/API.Tests/Features/Manifest/ManifestWriteServiceTests.cs +++ b/src/IIIFPresentation/API.Tests/Features/Manifest/ManifestWriteServiceTests.cs @@ -23,6 +23,7 @@ using Models.DLCS; using DbBatchStatus = Models.Database.General.BatchStatus; using DbDeliverableType = Models.Database.General.DeliverableType; +using Models.Database.General; using Newtonsoft.Json.Linq; using Repository; using Repository.Paths; @@ -30,12 +31,14 @@ using Services.Manifests.AWS; using Services.Manifests.Helpers; using Services.Manifests.Settings; +using Services.TextServices; using Sqids; using Test.Helpers; using Test.Helpers.Helpers; using Test.Helpers.Integration; using Test.Helpers.Settings; using DbCanvasPainting = Models.Database.CanvasPainting; +using DbManifest = Models.Database.Collections.Manifest; using IIIFManifest = IIIF.Presentation.V3.Manifest; namespace API.Tests.Features.Manifest; @@ -52,6 +55,7 @@ public class ManifestWriteServiceTests private readonly IDlcsApiClient dlcsClient; private readonly IManifestStorageManager manifestStorageManager; private readonly LockManager manifestLockManager; + private readonly ITextServicesClient textServicesClient; public ManifestWriteServiceTests(PresentationContextFixture dbFixture) { @@ -114,9 +118,11 @@ public ManifestWriteServiceTests(PresentationContextFixture dbFixture) manifestLockManager = new LockManager(); + textServicesClient = A.Fake(); sut = new ManifestWriteService(presentationContext, identityManager, canvasPaintingResolver, new TestPathGenerator(presentationGenerator), settingsBasedPathGenerator, dlcsManifestCoordinator, parentSlugParser, - manifestStorageManager, pathRewriteParser, manifestLockManager, new NullLogger()); + manifestStorageManager, pathRewriteParser, manifestLockManager, textServicesClient, + Options.Create(new AWSSettings()), new NullLogger()); var parentCollection = presentationContext.Collections.First(x => x.Id == RootCollection.Id); @@ -530,7 +536,7 @@ public async Task Create_SuccessfullyCreatesManifest_WhenShortCanvasIdUsedWithMa // Setup a fake batch with resource ID, this is unfinished so means it's sync complete A.CallTo(() => dlcsClient.IngestDeliverables(Customer, A>._, false, A._)) - .Returns([new Batch { Finished = null, ResourceId = "12345" }]); + .Returns([new DLCS.Models.Batch { Finished = null, ResourceId = "12345" }]); var (slug, resourceId, assetId) = TestIdentifiers.SlugResourceAsset(); @@ -1118,4 +1124,132 @@ public async Task Create_SuccessfullyCreatesManifest_WhenItemOnlyFollowedByMatch dbManifest.CanvasPaintings[2].CanvasOriginalId.Should().Be( $"https://base/0/canvases/{canvasId}_3"); dbManifest.CanvasPaintings[3].Id.Should().Be( $"{canvasId}_4"); } + + [Fact] + public async Task Create_ReturnsAccepted_WhenManifestHasPipeline() + { + // Arrange + var (slug, resourceId) = TestIdentifiers.SlugResource(); + var manifest = new PresentationManifest + { + Slug = slug, + Pipeline = [new PipelineItem { Name = "text", Config = new PipelineConfig { Action = "Index" } }] + }; + var request = new UpsertManifestRequest(resourceId, null, Customer, manifest, manifest.AsJson(), true); + + // Act + var result = await sut.Create(request, CancellationToken.None); + + // Assert + result.WriteResult.Should().Be(WriteResult.Accepted); + } + + [Fact] + public async Task Create_CallsTextServicesAndCreatesPipelineJob_WhenManifestHasPipeline() + { + // Arrange + var (slug, resourceId) = TestIdentifiers.SlugResource(); + var manifest = new PresentationManifest + { + Slug = slug, + Pipeline = [new PipelineItem { Name = "text", Config = new PipelineConfig { Action = "Index" } }] + }; + var request = new UpsertManifestRequest(resourceId, null, Customer, manifest, manifest.AsJson(), true); + + // Act + var result = await sut.Create(request, CancellationToken.None); + + // Assert + A.CallTo(() => textServicesClient.CreateOrUpdateJob(A._, A._, A._)) + .MustHaveHappenedOnceExactly(); + + var flatId = result.Entity.FlatId; + var pipelineJob = presentationContext.PipelineJobs.FirstOrDefault(p => p.ManifestId == flatId); + pipelineJob.Should().NotBeNull(); + pipelineJob!.Status.Should().Be(PipelineJobStatus.Queued); + pipelineJob.TextJobId.Should().Be($"{Customer}/iiif/{flatId}"); + } + + [Fact] + public async Task Create_DoesNotCallTextServices_WhenManifestHasNoPipeline() + { + // Arrange + var (slug, resourceId) = TestIdentifiers.SlugResource(); + var manifest = new PresentationManifest + { + Slug = slug, + Items = [new Canvas { Id = "https://base/0/canvases/canvas-1" }] + }; + var request = new UpsertManifestRequest(resourceId, null, Customer, manifest, manifest.AsJson(), true); + + // Act + await sut.Create(request, CancellationToken.None); + + // Assert + A.CallTo(() => textServicesClient.CreateOrUpdateJob(A._, A._, A._)) + .MustNotHaveHappened(); + } + + [Fact] + public async Task Create_SavesManifestToStaging_WhenPipelineIsSet() + { + // Arrange + var (slug, resourceId) = TestIdentifiers.SlugResource(); + var manifest = new PresentationManifest + { + Slug = slug, + Pipeline = [new PipelineItem { Name = "text", Config = new PipelineConfig { Action = "Index" } }] + }; + var request = new UpsertManifestRequest(resourceId, null, Customer, manifest, manifest.AsJson(), true); + + // Act + await sut.Create(request, CancellationToken.None); + + // Assert + A.CallTo(() => manifestStorageManager.SaveManifestInStorage( + A._, A._, null, true, A._)) + .MustHaveHappenedOnceExactly(); + } + + [Fact] + public async Task Create_ResetsPipelineJob_WhenJobAlreadyExistsForManifest() + { + // Arrange - simulate resubmit by seeding an existing completed PipelineJob + var (slug, resourceId) = TestIdentifiers.SlugResource(); + + // First create + var manifest = new PresentationManifest + { + Slug = slug, + Pipeline = [new PipelineItem { Name = "text", Config = new PipelineConfig { Action = "Index" } }] + }; + var request = new UpsertManifestRequest(resourceId, null, Customer, manifest, manifest.AsJson(), true); + var firstResult = await sut.Create(request, CancellationToken.None); + var flatId = firstResult.Entity.FlatId; + + // Manually mark the job as completed (simulating a prior run) + var existingJob = presentationContext.PipelineJobs.First(p => p.ManifestId == flatId); + presentationContext.Entry(existingJob).State = Microsoft.EntityFrameworkCore.EntityState.Detached; + + // Second create (update path) — use the existing slug/manifest with pipeline again + var updateManifest = new PresentationManifest + { + Slug = slug, + Pipeline = [new PipelineItem { Name = "text", Config = new PipelineConfig { Action = "Index" } }] + }; + var etag = presentationContext.Manifests.First(m => m.Id == flatId).Etag.ToString(); + var updateRequest = new UpsertManifestRequest(flatId, etag, Customer, updateManifest, updateManifest.AsJson(), false); + + // Act + var result = await sut.Upsert(updateRequest, CancellationToken.None); + + // Assert + result.WriteResult.Should().Be(WriteResult.Accepted); + A.CallTo(() => textServicesClient.CreateOrUpdateJob(A._, A._, A._)) + .MustHaveHappenedTwiceExactly(); + + var jobs = presentationContext.PipelineJobs.Where(p => p.ManifestId == flatId).ToList(); + jobs.Should().HaveCount(1, "resubmit should reset existing job, not create a second one"); + jobs[0].Status.Should().Be(PipelineJobStatus.Queued); + } } diff --git a/src/IIIFPresentation/API.Tests/Integration/GetManifestTests.cs b/src/IIIFPresentation/API.Tests/Integration/GetManifestTests.cs index faa03483..9426a2b0 100644 --- a/src/IIIFPresentation/API.Tests/Integration/GetManifestTests.cs +++ b/src/IIIFPresentation/API.Tests/Integration/GetManifestTests.cs @@ -442,6 +442,76 @@ await amazonS3.PutObjectAsync(new() }); } + [Fact] + public async Task Get_IiifManifest_Flat_ReturnsAccepted_WhenPipelineJobQueued() + { + var id = TestIdentifiers.IdWithSuffix(suffix: "_pipelineQueued"); + + // Arrange + var dbManifest = await dbContext.Manifests.AddTestManifest(id); + await dbContext.PipelineJobs.AddAsync(new PipelineJob + { + ManifestId = id, + CustomerId = 1, + TextJobId = $"1/iiif/{id}", + Status = PipelineJobStatus.Queued, + Created = DateTime.UtcNow + }); + await dbContext.SaveChangesAsync(); + + await amazonS3.PutObjectAsync(new() + { + BucketName = LocalStackFixture.StorageBucketName, + Key = $"staging/1/manifests/{id}", + ContentBody = TestContent.ManifestJson + }); + + var requestMessage = + HttpRequestMessageBuilder.GetPrivateRequest(HttpMethod.Get, $"1/manifests/{id}"); + var response = await httpClient.AsCustomer().SendAsync(requestMessage); + + // Assert + response.StatusCode.Should().Be(HttpStatusCode.Accepted); + response.Headers.Should().NotContainKey(HeaderNames.ETag, "manifest is not yet finalised"); + var manifest = await response.ReadAsPresentationJsonAsync(); + manifest.Should().NotBeNull(); + manifest!.Id.Should().Be($"http://localhost/1/manifests/{id}"); + } + + [Fact] + public async Task Get_IiifManifest_Flat_ReturnsOK_WhenPipelineJobCompleted() + { + var id = TestIdentifiers.IdWithSuffix(suffix: "_pipelineCompleted"); + + // Arrange + var dbManifest = await dbContext.Manifests.AddTestManifest(id); + await dbContext.PipelineJobs.AddAsync(new PipelineJob + { + ManifestId = id, + CustomerId = 1, + TextJobId = $"1/iiif/{id}", + Status = PipelineJobStatus.Completed, + Created = DateTime.UtcNow, + Finished = DateTime.UtcNow + }); + await dbContext.SaveChangesAsync(); + + await amazonS3.PutObjectAsync(new() + { + BucketName = LocalStackFixture.StorageBucketName, + Key = $"1/manifests/{id}", + ContentBody = TestContent.ManifestJson + }); + + var requestMessage = + HttpRequestMessageBuilder.GetPrivateRequest(HttpMethod.Get, $"1/manifests/{id}"); + var response = await httpClient.AsCustomer().SendAsync(requestMessage); + + // Assert + response.StatusCode.Should().Be(HttpStatusCode.OK); + response.Headers.Should().ContainKey(HeaderNames.ETag, "pipeline is complete so manifest is final"); + } + [Fact] public async Task Get_IiifManifest_Hierarchical_ReturnsNotFoundWhenIngesting() { diff --git a/src/IIIFPresentation/API.Tests/Integration/ModifyManifestCreateTests.cs b/src/IIIFPresentation/API.Tests/Integration/ModifyManifestCreateTests.cs index f13ff686..52cde00d 100644 --- a/src/IIIFPresentation/API.Tests/Integration/ModifyManifestCreateTests.cs +++ b/src/IIIFPresentation/API.Tests/Integration/ModifyManifestCreateTests.cs @@ -1885,4 +1885,26 @@ private static void SetupApiClientWithBatchReturn(params string[] assetIds) A._)) .Returns([new Batch { Finished = null, ResourceId = TestIdentifiers.BatchId().ToString() }]); } + + [Fact] + public async Task CreateManifest_ReturnsAccepted_WithNoETag_WhenManifestHasPipeline() + { + // Arrange + var manifest = new PresentationManifest + { + Parent = $"http://localhost/{Customer}/collections/{RootCollection.Id}", + Slug = TestIdentifiers.Slug(), + Pipeline = [new PipelineItem { Name = "text", Config = new PipelineConfig { Action = "Index" } }] + }; + var requestMessage = + HttpRequestMessageBuilder.GetPrivateRequest(HttpMethod.Post, $"{Customer}/manifests", manifest.AsJson()); + + // Act + var response = await httpClient.AsCustomer().SendAsync(requestMessage); + + // Assert + response.StatusCode.Should().Be(HttpStatusCode.Accepted); + response.Headers.Should().NotContainKey(Microsoft.Net.Http.Headers.HeaderNames.ETag, + "202 responses must not include an ETag as the manifest is not yet finalised"); + } } diff --git a/src/IIIFPresentation/API/API.csproj b/src/IIIFPresentation/API/API.csproj index 0c337fe9..c0aa8e93 100644 --- a/src/IIIFPresentation/API/API.csproj +++ b/src/IIIFPresentation/API/API.csproj @@ -10,7 +10,7 @@ - + diff --git a/src/IIIFPresentation/API/Features/Manifest/ManifestReadService.cs b/src/IIIFPresentation/API/Features/Manifest/ManifestReadService.cs index 0409cf76..4b435223 100644 --- a/src/IIIFPresentation/API/Features/Manifest/ManifestReadService.cs +++ b/src/IIIFPresentation/API/Features/Manifest/ManifestReadService.cs @@ -37,7 +37,7 @@ public async Task> GetManifest(int custo IImmutableSet ifNoneMatch, bool pathOnly, CancellationToken cancellationToken) { var dbManifest = await dbContext.RetrieveManifestAsync(manifestId, withBatches: true, - cancellationToken: cancellationToken); + withPipelineJobs: true, cancellationToken: cancellationToken); if (dbManifest == null) return FetchEntityResult.NotFound(); @@ -57,11 +57,11 @@ public async Task> GetManifest(int custo var getAssets = dlcsManifestCoordinator.GetAssets(customerId, dbManifest, cancellationToken); PresentationManifest? manifest = null; - if (dbManifest.IsIngesting()) + if (dbManifest.HasFurtherWork()) { manifest = await iiifS3.ReadIIIFFromS3(dbManifest, BucketLocationType.Staging, cancellationToken); if (manifest == null) - logger.LogError("Manifest {DbManifestId} IsIngesting but can't read from staging", dbManifest.Id); + logger.LogError("Manifest {DbManifestId} has further work pending but can't read from staging", dbManifest.Id); } // if is not ingesting read from "real" location @@ -84,7 +84,7 @@ public async Task> GetManifest(int custo m => Enumerable.Single(m.Hierarchy!, h => h.Canonical)); Guid? etag = dbManifest.Etag; - if (dbManifest.IsIngesting()) + if (dbManifest.HasFurtherWork()) { manifest.CurrentlyIngesting = true; etag = null; diff --git a/src/IIIFPresentation/API/Features/Manifest/ManifestWriteService.cs b/src/IIIFPresentation/API/Features/Manifest/ManifestWriteService.cs index 3024dbdd..6a8137ed 100644 --- a/src/IIIFPresentation/API/Features/Manifest/ManifestWriteService.cs +++ b/src/IIIFPresentation/API/Features/Manifest/ManifestWriteService.cs @@ -5,12 +5,15 @@ using API.Helpers; using API.Infrastructure.Helpers; using API.Infrastructure.IdGenerator; +using AWS.Helpers; +using AWS.Settings; using Core; using Core.Auth; using Core.Helpers; using Core.IIIF; using API.Infrastructure; using DLCS.Exceptions; +using Microsoft.Extensions.Options; using Models.API.General; using Models.API.Manifest; using Models.Database; @@ -24,6 +27,7 @@ using Services.Manifests.AWS; using Services.Manifests.Helpers; using Services.Manifests.Model; +using Services.TextServices; using CanvasPainting = Models.Database.CanvasPainting; using DbManifest = Models.Database.Collections.Manifest; using PresUpdateResult = API.Infrastructure.Requests.ModifyEntityResult; @@ -97,6 +101,8 @@ public class ManifestWriteService( IManifestStorageManager manifestStorageManager, IPathRewriteParser pathRewriteParser, ILockManager manifestLockManager, + ITextServicesClient textServicesClient, + IOptions awsOptions, ILogger logger) : IManifestWrite { /// @@ -187,7 +193,9 @@ await CreateDatabaseRecord(request, resolved.ParsedParentSlug!, dlcsResult.Inter dlcsResult.CanvasPaintings, cancellationToken); if (error != null) return error; - var writeResult = dlcsResult.InteractionResult!.CanBeBuiltUpfront ? WriteResult.Created : WriteResult.Accepted; + var writeResult = dlcsResult.InteractionResult!.CanBeBuiltUpfront && !request.PresentationManifest.HasTextIndexPipeline() + ? WriteResult.Created + : WriteResult.Accepted; return await SaveToS3AndGenerateResult(request, dbManifest!, dlcsResult.InteractionResult!, writeResult, cancellationToken); } @@ -218,7 +226,9 @@ private async Task UpdateInternal(UpsertManifestRequest reques dlcsResult.InteractionResult!.SpaceId, cancellationToken); if (error != null) return error; - var writeResult = dlcsResult.InteractionResult!.CanBeBuiltUpfront ? WriteResult.Updated : WriteResult.Accepted; + var writeResult = dlcsResult.InteractionResult!.CanBeBuiltUpfront && !request.PresentationManifest.HasTextIndexPipeline() + ? WriteResult.Updated + : WriteResult.Accepted; return await SaveToS3AndGenerateResult(request, dbManifest!, dlcsResult.InteractionResult!, writeResult, cancellationToken); } @@ -422,6 +432,7 @@ private async Task SaveToS3(DbManifest dbManifest, WriteManifestRequest request, var hasAssets = request.PresentationManifest.PaintedResources.HasAsset(); var hasAdjuncts = request.PresentationManifest.Adjuncts != null || dbManifest.Batches?.Any(b => b.DeliverableType == DeliverableType.Adjunct) == true; + var hasPipeline = request.PresentationManifest.HasTextIndexPipeline(); // When there is further work to do the JSON saved to S3 differs substantially from the original payload, // and we will want to store it. Otherwise, we'll pass null not to store the raw request. @@ -429,7 +440,10 @@ private async Task SaveToS3(DbManifest dbManifest, WriteManifestRequest request, var originalToStore = requiresExternalContent ? request.RawRequestBody : null; - if (canBeBuiltUpfront && requiresExternalContent) + // Pipeline forces staging even if we'd otherwise save directly to final + var saveToStaging = !canBeBuiltUpfront || hasPipeline; + + if (canBeBuiltUpfront && requiresExternalContent && !hasPipeline) { logger.LogDebug("Manifest {Manifest} can be built upfront, after merging", dbManifest.Id); var manifest = await manifestStorageManager.UpsertManifestInStorage(iiifManifest, dbManifest, @@ -448,22 +462,55 @@ private async Task SaveToS3(DbManifest dbManifest, WriteManifestRequest request, iiifManifest.Items = canvasPaintings.GenerateProvisionalCanvases(savedManifestPathGenerator, iiifManifest.Items, pathRewriteParser); } - + request.PresentationManifest.Items = iiifManifest.Items; await manifestStorageManager.SaveManifestInStorage(iiifManifest, dbManifest, originalToStore, - !canBeBuiltUpfront, cancellationToken); + saveToStaging, cancellationToken); - // Direct save (built upfront, no external content) with nothing to store as original: - // remove any stale original payload left by a previous version of this manifest. - if (originalToStore is null) + if (!saveToStaging && originalToStore is null) { await manifestStorageManager.DeleteOriginalPayload(dbManifest); } } + if (hasPipeline) + { + await SubmitTextPipelineJob(dbManifest, cancellationToken); + } + await dbContext.SaveChangesAsync(cancellationToken); } + private async Task SubmitTextPipelineJob(DbManifest dbManifest, CancellationToken cancellationToken) + { + var stagingKey = dbManifest.GetResourceBucketKey(BucketLocationType.Staging); + var s3Uri = $"s3://{awsOptions.Value.S3.StorageBucket}/{stagingKey}"; + var jobId = $"{dbManifest.CustomerId}/iiif/{dbManifest.Id}"; + + await textServicesClient.CreateOrUpdateJob(jobId, s3Uri, cancellationToken); + + var existing = dbContext.PipelineJobs + .FirstOrDefault(p => p.ManifestId == dbManifest.Id && p.CustomerId == dbManifest.CustomerId); + + if (existing != null) + { + existing.Status = PipelineJobStatus.Queued; + existing.Error = null; + existing.Finished = null; + } + else + { + await dbContext.PipelineJobs.AddAsync(new PipelineJob + { + ManifestId = dbManifest.Id, + CustomerId = dbManifest.CustomerId, + TextJobId = jobId, + Status = PipelineJobStatus.Queued, + Created = DateTime.UtcNow + }, cancellationToken); + } + } + /// /// Stamps the merged IIIF fields (Items, SeeAlso, Rendering, Annotations) from the stored manifest back onto /// so the API response reflects what ManifestMerger produced. diff --git a/src/IIIFPresentation/API/Features/Storage/Helpers/PresentationContextX.cs b/src/IIIFPresentation/API/Features/Storage/Helpers/PresentationContextX.cs index d02663cc..816d987f 100644 --- a/src/IIIFPresentation/API/Features/Storage/Helpers/PresentationContextX.cs +++ b/src/IIIFPresentation/API/Features/Storage/Helpers/PresentationContextX.cs @@ -70,7 +70,8 @@ public static class PresentationContextX /// A cancellation token /// The retrieved collection public static Task RetrieveManifestAsync(this PresentationContext dbContext, - string manifestId, bool tracked = false, bool withCanvasPaintings = true, bool withBatches = false, CancellationToken cancellationToken = default) + string manifestId, bool tracked = false, bool withCanvasPaintings = true, bool withBatches = false, + bool withPipelineJobs = false, CancellationToken cancellationToken = default) { IQueryable dbContextManifests = dbContext.Manifests; @@ -83,7 +84,12 @@ public static class PresentationContextX { dbContextManifests = dbContextManifests.Include(m => m.Batches); } - + + if (withPipelineJobs) + { + dbContextManifests = dbContextManifests.Include(m => m.PipelineJobs); + } + return dbContextManifests.Retrieve(manifestId, tracked, cancellationToken); } diff --git a/src/IIIFPresentation/API/Infrastructure/ControllerBaseX.cs b/src/IIIFPresentation/API/Infrastructure/ControllerBaseX.cs index 88101a7c..ea85a761 100644 --- a/src/IIIFPresentation/API/Infrastructure/ControllerBaseX.cs +++ b/src/IIIFPresentation/API/Infrastructure/ControllerBaseX.cs @@ -70,7 +70,7 @@ public static IActionResult ModifyResultToHttpResult(this ControllerBa { WriteResult.Updated => controller.PresentationContent(entityResult.Entity, etag: entityResult.ETag), WriteResult.Accepted => controller.PresentationWithLocationHeader(controller.Request.GetDisplayUrl(), - entityResult.Entity, (int)HttpStatusCode.Accepted, entityResult.ETag), + entityResult.Entity, (int)HttpStatusCode.Accepted, null), WriteResult.Created => controller.PresentationWithLocationHeader(controller.Request.GetDisplayUrl(), entityResult.Entity, (int)HttpStatusCode.Created, entityResult.ETag), WriteResult.NotFound => controller.PresentationNotFound(entityResult.Error), diff --git a/src/IIIFPresentation/API/Program.cs b/src/IIIFPresentation/API/Program.cs index 7468d682..11daac29 100644 --- a/src/IIIFPresentation/API/Program.cs +++ b/src/IIIFPresentation/API/Program.cs @@ -53,6 +53,7 @@ .BindConfiguration(nameof(CacheSettings)); var dlcsSettings = builder.Configuration.GetSection(DlcsSettings.SettingsName); builder.Services.Configure(dlcsSettings); +builder.Services.Configure(builder.Configuration.GetSection(TextServicesSettings.SettingsName)); var cacheSettings = builder.Configuration.GetSection(nameof(CacheSettings)).Get() ?? new CacheSettings(); var dlcs = dlcsSettings.Get()!; @@ -61,6 +62,7 @@ builder.Services .AddDlcsApiClient(dlcs) .AddDlcsOrchestratorClient(dlcs) + .AddTextServicesClient() .AddDelegatedAuthHandler(opts => { opts.Realm = "DLCS-API"; }); builder.Services.ConfigureDefaultCors(corsPolicyName); builder.Services.AddDataAccess(builder.Configuration); diff --git a/src/IIIFPresentation/AWS/Settings/SQSSettings.cs b/src/IIIFPresentation/AWS/Settings/SQSSettings.cs index 20b9ec98..452bdb9d 100644 --- a/src/IIIFPresentation/AWS/Settings/SQSSettings.cs +++ b/src/IIIFPresentation/AWS/Settings/SQSSettings.cs @@ -11,6 +11,11 @@ public class SQSSettings /// Name of queue that will receive notifications when a batch is completed /// public string? BatchCompletionQueueName { get; set; } + + /// + /// Name of queue that will receive notifications when a text-services job is completed + /// + public string? TextJobQueueName { get; set; } /// /// The duration (in seconds) for which the call waits for a message to arrive in the queue before returning diff --git a/src/IIIFPresentation/BackgroundHandler.Tests/BatchCompletion/BatchCompletionMessageHandlerTests.cs b/src/IIIFPresentation/BackgroundHandler.Tests/BatchCompletion/BatchCompletionMessageHandlerTests.cs index 73840cec..77adb417 100644 --- a/src/IIIFPresentation/BackgroundHandler.Tests/BatchCompletion/BatchCompletionMessageHandlerTests.cs +++ b/src/IIIFPresentation/BackgroundHandler.Tests/BatchCompletion/BatchCompletionMessageHandlerTests.cs @@ -47,7 +47,7 @@ public class BatchCompletionMessageHandlerTests public BatchCompletionMessageHandlerTests(PresentationContextFixture dbFixture) { - // The context from dbFixture doesn't track changes so setup/assert + // The context from dbFixture doesn't track changes so setup/assert dbContext = dbFixture.DbContext; dbFixture.CustomerIdProvider.SetCustomerId(CustomerId); diff --git a/src/IIIFPresentation/BackgroundHandler.Tests/TextCompletion/TextServiceJobCompletionMessageHandlerTests.cs b/src/IIIFPresentation/BackgroundHandler.Tests/TextCompletion/TextServiceJobCompletionMessageHandlerTests.cs new file mode 100644 index 00000000..a40621a8 --- /dev/null +++ b/src/IIIFPresentation/BackgroundHandler.Tests/TextCompletion/TextServiceJobCompletionMessageHandlerTests.cs @@ -0,0 +1,328 @@ +using Amazon.SQS.Model; +using AWS.Helpers; +using AWS.SQS; +using BackgroundHandler.TextCompletion; +using BackgroundHandler.Tests.infrastructure; +using FakeItEasy; +using FluentAssertions; +using IIIF.Presentation.V3; +using IIIF.Search.V2; +using Microsoft.EntityFrameworkCore; +using Microsoft.Extensions.Logging.Abstractions; +using Models.Database.Collections; +using Models.Database.General; +using Repository; +using Services.Manifests.AWS; +using Services.TextServices; +using Test.Helpers; +using Test.Helpers.Helpers; +using Test.Helpers.Integration; +using DbManifest = Models.Database.Collections.Manifest; +using IIIFManifest = IIIF.Presentation.V3.Manifest; + +namespace BackgroundHandler.Tests.TextCompletion; + +[Trait("Category", "Database")] +[Collection(CollectionDefinitions.DatabaseCollection.CollectionName)] +public class TextServiceJobCompletionMessageHandlerTests +{ + private readonly PresentationContext dbContext; + private readonly TextServiceJobCompletionMessageHandler sut; + private readonly IManifestStorageManager manifestStorageManager; + private readonly IIIIFS3Service iiifS3; + private readonly ITextServicesClient textServicesClient; + private const int CustomerId = 1; + + public TextServiceJobCompletionMessageHandlerTests(PresentationContextFixture dbFixture) + { + dbContext = dbFixture.DbContext; + dbFixture.CustomerIdProvider.SetCustomerId(CustomerId); + + var sutContext = dbFixture.GetNewPresentationContext(dbFixture.CustomerIdProvider); + + iiifS3 = A.Fake(); + manifestStorageManager = A.Fake(); + textServicesClient = A.Fake(); + + sut = new TextServiceJobCompletionMessageHandler( + sutContext, + dbFixture.CustomerIdProvider, + manifestStorageManager, + iiifS3, + textServicesClient, + new NullLogger()); + } + + [Theory] + [InlineData(0)] + [InlineData(1)] + public async Task HandleMessage_ReturnsFalse_WhenPipelineJobNotFound_BelowRetryThreshold(int receiveCount) + { + var message = CreateMessage("1/iiif/unknown-manifest", "Completed", receiveCount); + + (await sut.HandleMessage(message, CancellationToken.None)).Should().BeFalse(); + A.CallTo(() => iiifS3.ReadIIIFFromS3(A._, A._, A._)) + .MustNotHaveHappened(); + } + + [Theory] + [InlineData(2)] + [InlineData(5)] + public async Task HandleMessage_ReturnsTrue_WhenPipelineJobNotFound_AboveRetryThreshold(int receiveCount) + { + var message = CreateMessage("1/iiif/unknown-manifest-discard", "Completed", receiveCount); + + (await sut.HandleMessage(message, CancellationToken.None)).Should().BeTrue(); + A.CallTo(() => iiifS3.ReadIIIFFromS3(A._, A._, A._)) + .MustNotHaveHappened(); + } + + [Fact] + public async Task HandleMessage_ReturnsFalse_WhenStagedManifestMissing() + { + var manifestId = TestIdentifiers.IdWithSuffix(suffix: "_staging_missing"); + var jobId = $"{CustomerId}/iiif/{manifestId}"; + await SetupManifestWithPipelineJob(manifestId, jobId); + + A.CallTo(() => iiifS3.ReadIIIFFromS3(A._, BucketLocationType.Staging, A._)) + .Returns((IIIFManifest?)null); + + var message = CreateMessage(jobId, "Completed"); + + (await sut.HandleMessage(message, CancellationToken.None)).Should().BeFalse(); + A.CallTo(() => manifestStorageManager.SaveManifestInStorage(A._, A._, A._, A._, A._)) + .MustNotHaveHappened(); + } + + [Fact] + public async Task HandleMessage_UpdatesStatusToFailed_AndSavesManifest_WhenJobFailed() + { + var manifestId = TestIdentifiers.IdWithSuffix(suffix: "_failed"); + var jobId = $"{CustomerId}/iiif/{manifestId}"; + await SetupManifestWithPipelineJob(manifestId, jobId); + + A.CallTo(() => iiifS3.ReadIIIFFromS3(A._, BucketLocationType.Staging, A._)) + .Returns(new IIIFManifest { Id = manifestId }); + + var message = CreateMessage(jobId, "Failed", errors: "OCR timed out"); + + (await sut.HandleMessage(message, CancellationToken.None)).Should().BeTrue(); + + var job = dbContext.PipelineJobs.Single(p => p.TextJobId == jobId); + job.Status.Should().Be(PipelineJobStatus.Failed); + job.Error.Should().Be("OCR timed out"); + + A.CallTo(() => manifestStorageManager.SaveManifestInStorage( + A._, A._, null, false, A._)) + .MustHaveHappenedOnceExactly(); + A.CallTo(() => textServicesClient.GetTextAugmentedManifest(A._, A._)) + .MustNotHaveHappened(); + A.CallTo(() => iiifS3.DeleteIIIFFromS3(A._, true)) + .MustHaveHappenedOnceExactly(); + } + + [Fact] + public async Task HandleMessage_UpdatesStatusToCompleted_AndSavesManifest_WhenJobCompletedWithNoAugmentedServices() + { + var manifestId = TestIdentifiers.IdWithSuffix(suffix: "_completed_no_services"); + var jobId = $"{CustomerId}/iiif/{manifestId}"; + await SetupManifestWithPipelineJob(manifestId, jobId); + + A.CallTo(() => iiifS3.ReadIIIFFromS3(A._, BucketLocationType.Staging, A._)) + .Returns(new IIIFManifest { Id = manifestId }); + A.CallTo(() => textServicesClient.GetTextAugmentedManifest(jobId, A._)) + .Returns((IIIFManifest?)null); + + var message = CreateMessage(jobId, "Completed"); + + (await sut.HandleMessage(message, CancellationToken.None)).Should().BeTrue(); + + var job = dbContext.PipelineJobs.Single(p => p.TextJobId == jobId); + job.Status.Should().Be(PipelineJobStatus.Completed); + job.Error.Should().BeNull(); + + A.CallTo(() => manifestStorageManager.SaveManifestInStorage( + A._, A._, null, false, A._)) + .MustHaveHappenedOnceExactly(); + A.CallTo(() => iiifS3.DeleteIIIFFromS3(A._, true)) + .MustHaveHappenedOnceExactly(); + } + + [Fact] + public async Task HandleMessage_MergesSearchServicesIntoManifest_WhenAugmentedManifestHasServices() + { + var manifestId = TestIdentifiers.IdWithSuffix(suffix: "_merged_services"); + var jobId = $"{CustomerId}/iiif/{manifestId}"; + await SetupManifestWithPipelineJob(manifestId, jobId); + + var stagedManifest = new IIIFManifest { Id = manifestId }; + A.CallTo(() => iiifS3.ReadIIIFFromS3(A._, BucketLocationType.Staging, A._)) + .Returns(stagedManifest); + + var searchService = new SearchService2 { Id = "https://search.example.com/search" }; + var augmentedManifest = new IIIFManifest + { + Services = [searchService] + }; + A.CallTo(() => textServicesClient.GetTextAugmentedManifest(jobId, A._)) + .Returns(augmentedManifest); + + IIIFManifest? savedManifest = null; + A.CallTo(() => manifestStorageManager.SaveManifestInStorage( + A._, A._, null, false, A._)) + .Invokes((IIIFManifest m, DbManifest _, string? _, bool _, CancellationToken _) => savedManifest = m) + .Returns(Task.CompletedTask); + + var message = CreateMessage(jobId, "Completed"); + + (await sut.HandleMessage(message, CancellationToken.None)).Should().BeTrue(); + + savedManifest.Should().NotBeNull(); + savedManifest!.Services.Should().ContainSingle(s => s.Id == searchService.Id); + } + + [Fact] + public async Task HandleMessage_DoesNotDuplicateServices_WhenAugmentedManifestContainsDuplicateServiceId() + { + var manifestId = TestIdentifiers.IdWithSuffix(suffix: "_dedup_services"); + var jobId = $"{CustomerId}/iiif/{manifestId}"; + await SetupManifestWithPipelineJob(manifestId, jobId); + + const string serviceId = "https://search.example.com/search"; + var stagedManifest = new IIIFManifest + { + Id = manifestId, + Services = [new SearchService2 { Id = serviceId }] + }; + A.CallTo(() => iiifS3.ReadIIIFFromS3(A._, BucketLocationType.Staging, A._)) + .Returns(stagedManifest); + + var augmentedManifest = new IIIFManifest + { + Services = [new SearchService2 { Id = serviceId }] + }; + A.CallTo(() => textServicesClient.GetTextAugmentedManifest(jobId, A._)) + .Returns(augmentedManifest); + + IIIFManifest? savedManifest = null; + A.CallTo(() => manifestStorageManager.SaveManifestInStorage( + A._, A._, null, false, A._)) + .Invokes((IIIFManifest m, DbManifest _, string? _, bool _, CancellationToken _) => savedManifest = m) + .Returns(Task.CompletedTask); + + var message = CreateMessage(jobId, "Completed"); + + await sut.HandleMessage(message, CancellationToken.None); + + savedManifest!.Services.Should().HaveCount(1, "duplicate service ID should not be added twice"); + } + + [Fact] + public async Task HandleMessage_MergesContextFromAugmentedManifest_WhenAugmentedManifestHasContext() + { + var manifestId = TestIdentifiers.IdWithSuffix(suffix: "_context_merge"); + var jobId = $"{CustomerId}/iiif/{manifestId}"; + await SetupManifestWithPipelineJob(manifestId, jobId); + + const string searchContext = "http://iiif.io/api/search/2/context.json"; + var stagedManifest = new IIIFManifest { Id = manifestId }; + A.CallTo(() => iiifS3.ReadIIIFFromS3(A._, BucketLocationType.Staging, A._)) + .Returns(stagedManifest); + + var augmentedManifest = new IIIFManifest + { + Services = [new SearchService2 { Id = "https://search.example.com/search" }], + Context = searchContext + }; + A.CallTo(() => textServicesClient.GetTextAugmentedManifest(jobId, A._)) + .Returns(augmentedManifest); + + await sut.HandleMessage(CreateMessage(jobId, "Completed"), CancellationToken.None); + + stagedManifest.Context.Should().Be(searchContext); + } + + [Fact] + public async Task HandleMessage_DoesNotAddPresentation3Context_FromAugmentedManifest() + { + var manifestId = TestIdentifiers.IdWithSuffix(suffix: "_context_p3_skip"); + var jobId = $"{CustomerId}/iiif/{manifestId}"; + await SetupManifestWithPipelineJob(manifestId, jobId); + + var stagedManifest = new IIIFManifest { Id = manifestId }; + A.CallTo(() => iiifS3.ReadIIIFFromS3(A._, BucketLocationType.Staging, A._)) + .Returns(stagedManifest); + + var augmentedManifest = new IIIFManifest + { + Services = [new SearchService2 { Id = "https://search.example.com/search" }], + Context = IIIF.Presentation.Context.Presentation3Context + }; + A.CallTo(() => textServicesClient.GetTextAugmentedManifest(jobId, A._)) + .Returns(augmentedManifest); + + await sut.HandleMessage(CreateMessage(jobId, "Completed"), CancellationToken.None); + + stagedManifest.Context.Should().BeNull(); + } + + [Fact] + public async Task HandleMessage_SetsFinishedTimestamp_WhenJobCompletes() + { + var manifestId = TestIdentifiers.IdWithSuffix(suffix: "_finished_completed"); + var jobId = $"{CustomerId}/iiif/{manifestId}"; + await SetupManifestWithPipelineJob(manifestId, jobId); + + A.CallTo(() => iiifS3.ReadIIIFFromS3(A._, BucketLocationType.Staging, A._)) + .Returns(new IIIFManifest { Id = manifestId }); + A.CallTo(() => textServicesClient.GetTextAugmentedManifest(jobId, A._)) + .Returns((IIIFManifest?)null); + + await sut.HandleMessage(CreateMessage(jobId, "Completed"), CancellationToken.None); + + var job = dbContext.PipelineJobs.Single(p => p.TextJobId == jobId); + job.Finished.Should().Be(new DateTime(2024, 6, 12, 10, 0, 0, DateTimeKind.Utc)); + } + + [Fact] + public async Task HandleMessage_SetsFinishedTimestamp_WhenJobFails() + { + var manifestId = TestIdentifiers.IdWithSuffix(suffix: "_finished_failed"); + var jobId = $"{CustomerId}/iiif/{manifestId}"; + await SetupManifestWithPipelineJob(manifestId, jobId); + + A.CallTo(() => iiifS3.ReadIIIFFromS3(A._, BucketLocationType.Staging, A._)) + .Returns(new IIIFManifest { Id = manifestId }); + + await sut.HandleMessage(CreateMessage(jobId, "Failed", errors: "OCR error"), CancellationToken.None); + + var job = dbContext.PipelineJobs.Single(p => p.TextJobId == jobId); + job.Finished.Should().Be(new DateTime(2024, 6, 12, 10, 0, 0, DateTimeKind.Utc)); + } + + private async Task SetupManifestWithPipelineJob(string manifestId, string jobId) + { + var manifestEntry = await dbContext.Manifests.AddTestManifest(id: manifestId); + var manifest = manifestEntry.Entity; + await dbContext.PipelineJobs.AddAsync(new PipelineJob + { + ManifestId = manifest.Id, + CustomerId = manifest.CustomerId, + TextJobId = jobId, + Status = PipelineJobStatus.Queued, + Created = DateTime.UtcNow + }); + await dbContext.SaveChangesAsync(); + } + + private static QueueMessage CreateMessage(string jobId, string status, int approximateReceiveCount = 0, string? errors = null) + { + var errorsJson = errors == null ? "null" : $"\"{errors}\""; + var body = $$"""{"jobId":"{{jobId}}","status":"{{status}}","finished":"2024-06-12T10:00:00Z","totalPages":1,"totalWordCount":100,"errors":{{errorsJson}}}"""; + var systemAttributes = new Dictionary + { + ["ApproximateReceiveCount"] = approximateReceiveCount.ToString() + }; + return new QueueMessage(body, new Dictionary(), systemAttributes, $"msg-{jobId}"); + } +} diff --git a/src/IIIFPresentation/BackgroundHandler.Tests/TextCompletion/TextServiceJobCompletionMessageTests.cs b/src/IIIFPresentation/BackgroundHandler.Tests/TextCompletion/TextServiceJobCompletionMessageTests.cs new file mode 100644 index 00000000..db3cafb0 --- /dev/null +++ b/src/IIIFPresentation/BackgroundHandler.Tests/TextCompletion/TextServiceJobCompletionMessageTests.cs @@ -0,0 +1,77 @@ +using System.Text.Json; +using AWS.SQS; +using BackgroundHandler.TextCompletion; +using FluentAssertions; + +namespace BackgroundHandler.Tests.TextCompletion; + +public class TextServiceJobCompletionMessageTests +{ + private static QueueMessage ValidMessage() => + new( + """{"jobId":"1/iiif/manifest-1","status":"Completed","finished":"2024-06-12T10:00:00Z","totalPages":5,"totalWordCount":1200,"errors":null}""", + new Dictionary(), + "msg-1"); + + [Fact] + public void FromQueueMessage_DeserializesBodyProperties() + { + var result = TextServiceJobCompletionMessage.FromQueueMessage(ValidMessage()); + + result.JobId.Should().Be("1/iiif/manifest-1"); + result.Status.Should().Be("Completed"); + result.Finished.Should().Be(new DateTimeOffset(2024, 6, 12, 10, 0, 0, TimeSpan.Zero)); + result.TotalPages.Should().Be(5); + result.TotalWordCount.Should().Be(1200); + result.Errors.Should().BeNull(); + } + + [Fact] + public void FromQueueMessage_DeserializesErrors_WhenPresent() + { + var message = new QueueMessage( + """{"jobId":"1/iiif/x","status":"Failed","finished":null,"totalPages":0,"totalWordCount":0,"errors":"OCR failed on page 3"}""", + new Dictionary(), "msg-err"); + + var result = TextServiceJobCompletionMessage.FromQueueMessage(message); + + result.Errors.Should().Be("OCR failed on page 3"); + } + + [Theory] + [InlineData("Completed")] + [InlineData("completed")] + [InlineData("COMPLETED")] + public void IsCompleted_ReturnsTrue_CaseInsensitive(string status) + { + var message = new QueueMessage( + $$"""{"jobId":"1/iiif/x","status":"{{status}}","finished":null,"totalPages":0,"totalWordCount":0,"errors":null}""", + new Dictionary(), "msg"); + + TextServiceJobCompletionMessage.FromQueueMessage(message).IsCompleted.Should().BeTrue(); + } + + [Theory] + [InlineData("Failed")] + [InlineData("failed")] + [InlineData("FAILED")] + [InlineData("unknown")] + public void IsCompleted_ReturnsFalse_WhenStatusIsNotCompleted(string status) + { + var message = new QueueMessage( + $$"""{"jobId":"1/iiif/x","status":"{{status}}","finished":null,"totalPages":0,"totalWordCount":0,"errors":null}""", + new Dictionary(), "msg"); + + TextServiceJobCompletionMessage.FromQueueMessage(message).IsCompleted.Should().BeFalse(); + } + + [Fact] + public void FromQueueMessage_Throws_WhenBodyIsInvalidJson() + { + var message = new QueueMessage("not-json", new Dictionary(), "msg-bad"); + + var act = () => TextServiceJobCompletionMessage.FromQueueMessage(message); + + act.Should().Throw(); + } +} \ No newline at end of file diff --git a/src/IIIFPresentation/BackgroundHandler/Infrastructure/ServiceCollectionX.cs b/src/IIIFPresentation/BackgroundHandler/Infrastructure/ServiceCollectionX.cs index 6c9e7426..866b328c 100644 --- a/src/IIIFPresentation/BackgroundHandler/Infrastructure/ServiceCollectionX.cs +++ b/src/IIIFPresentation/BackgroundHandler/Infrastructure/ServiceCollectionX.cs @@ -6,6 +6,7 @@ using BackgroundHandler.BatchCompletion; using BackgroundHandler.CustomerCreation; using BackgroundHandler.Listener; +using BackgroundHandler.TextCompletion; using Repository; using Repository.Helpers; @@ -45,11 +46,19 @@ public static IServiceCollection AddBackgroundServices(this IServiceCollection s if (!string.IsNullOrEmpty(aws.SQS.BatchCompletionQueueName)) { services - .AddHostedService(sp => + .AddHostedService(sp => ActivatorUtilities.CreateInstance>(sp, aws.SQS.BatchCompletionQueueName)) .AddScoped(); } + if (!string.IsNullOrEmpty(aws.SQS.TextJobQueueName)) + { + services + .AddHostedService(sp => + ActivatorUtilities.CreateInstance>(sp, aws.SQS.TextJobQueueName)) + .AddScoped(); + } + return services; } diff --git a/src/IIIFPresentation/BackgroundHandler/Program.cs b/src/IIIFPresentation/BackgroundHandler/Program.cs index 766b602b..93104005 100644 --- a/src/IIIFPresentation/BackgroundHandler/Program.cs +++ b/src/IIIFPresentation/BackgroundHandler/Program.cs @@ -1,6 +1,7 @@ using AWS.Settings; using BackgroundHandler.Infrastructure; using BackgroundHandler.Settings; +using Core.Settings; using DLCS; using Repository.Helpers; using Repository.Paths; @@ -30,11 +31,15 @@ var dlcsSettings = builder.Configuration.GetSection(DlcsSettings.SettingsName); var dlcs = dlcsSettings.Get()!; +builder.Services.Configure( + builder.Configuration.GetSection(TextServicesSettings.SettingsName)); + builder.RegisterSharedServiceSettings(); builder.Services.AddAws(builder.Configuration, builder.Environment) .AddDataAccess(builder.Configuration) .AddDlcsOrchestratorClient(dlcs) + .AddTextServicesClient() .AddBackgroundServices(aws) .AddSingleton() .AddSingleton() diff --git a/src/IIIFPresentation/BackgroundHandler/TextCompletion/TextServiceJobCompletionMessage.cs b/src/IIIFPresentation/BackgroundHandler/TextCompletion/TextServiceJobCompletionMessage.cs new file mode 100644 index 00000000..cf68f4f5 --- /dev/null +++ b/src/IIIFPresentation/BackgroundHandler/TextCompletion/TextServiceJobCompletionMessage.cs @@ -0,0 +1,39 @@ +using System.Text.Json; +using System.Text.Json.Serialization; +using AWS.SQS; + +namespace BackgroundHandler.TextCompletion; + +/// +/// Represents a job-completion notification from text-services, matching JobCompletionNotification. +/// +[method: JsonConstructor] +public class TextServiceJobCompletionMessage( + string jobId, + string status, + DateTimeOffset? finished, + int totalPages, + int totalWordCount, + string? errors) +{ + private static readonly JsonSerializerOptions JsonSerializerOptions = new(JsonSerializerDefaults.Web); + + public string JobId { get; } = jobId; + + /// "Completed" or "Failed" + public string Status { get; } = status; + + public DateTimeOffset? Finished { get; } = finished; + + public int TotalPages { get; } = totalPages; + + public int TotalWordCount { get; } = totalWordCount; + + public string? Errors { get; } = errors; + + public bool IsCompleted => string.Equals(Status, "Completed", StringComparison.OrdinalIgnoreCase); + + public static TextServiceJobCompletionMessage FromQueueMessage(QueueMessage message) => + JsonSerializer.Deserialize(message.Body, JsonSerializerOptions) + ?? throw new JsonException("Deserialized TextServiceJobCompletionMessage was null"); +} \ No newline at end of file diff --git a/src/IIIFPresentation/BackgroundHandler/TextCompletion/TextServiceJobCompletionMessageHandler.cs b/src/IIIFPresentation/BackgroundHandler/TextCompletion/TextServiceJobCompletionMessageHandler.cs new file mode 100644 index 00000000..812ba396 --- /dev/null +++ b/src/IIIFPresentation/BackgroundHandler/TextCompletion/TextServiceJobCompletionMessageHandler.cs @@ -0,0 +1,171 @@ +using System.Diagnostics; +using AWS.Helpers; +using AWS.SQS; +using BackgroundHandler.Helpers; +using IIIF; +using IIIF.Presentation.V3; +using Microsoft.EntityFrameworkCore; +using Models.Database.General; +using Repository; +using Repository.Helpers; +using Services.Manifests.AWS; +using Services.TextServices; + +namespace BackgroundHandler.TextCompletion; + +public class TextServiceJobCompletionMessageHandler( + PresentationContext dbContext, + ICustomerIdProvider customerIdProvider, + IManifestStorageManager manifestStorageManager, + IIIIFS3Service iiifS3, + ITextServicesClient textServicesClient, + ILogger logger) + : IMessageHandler +{ + public async Task HandleMessage(QueueMessage message, CancellationToken cancellationToken) + { + using (LogContextHelpers.SetServiceName(nameof(TextServiceJobCompletionMessageHandler), message.MessageId)) + { + try + { + var completionMessage = DeserializeMessage(message, logger); + customerIdProvider.SetCustomerId(ExtractCustomerIdFromJobId(completionMessage.JobId)); + return await TryCompleteManifest(completionMessage, message.ApproximateReceiveCount, cancellationToken); + } + catch (Exception ex) + { + logger.LogError(ex, "Error handling text-service job completion message {MessageId}", message.MessageId); + } + } + + return false; + } + + private async Task TryCompleteManifest(TextServiceJobCompletionMessage completionMessage, + int approximateReceiveCount, CancellationToken cancellationToken) + { + var pipelineJob = await dbContext.PipelineJobs + .Include(p => p.Manifest) + .ThenInclude(m => m!.CanvasPaintings) + .SingleOrDefaultAsync(p => p.TextJobId == completionMessage.JobId, cancellationToken); + + if (pipelineJob == null) + { + var discard = approximateReceiveCount >= 2; + logger.LogTrace( + "PipelineJob for {JobId} not found. ApproximateReceiveCount:{Count}. {Action}", + completionMessage.JobId, approximateReceiveCount, discard ? "Discarding" : "Will retry"); + return discard; + } + + var sw = Stopwatch.StartNew(); + var dbManifest = pipelineJob.Manifest!; + + logger.LogInformation( + "Completing text pipeline for job:{JobId}, customer:{CustomerId}, manifest:{ManifestId}", + completionMessage.JobId, pipelineJob.CustomerId, pipelineJob.ManifestId); + + try + { + var stagedManifest = + await iiifS3.ReadIIIFFromS3(dbManifest, BucketLocationType.Staging, cancellationToken); + + if (stagedManifest == null) + { + logger.LogError("Staged manifest not found for {ManifestId}; cannot complete text pipeline", dbManifest.Id); + return false; + } + + if (!completionMessage.IsCompleted) + { + logger.LogWarning("Text-services job {JobId} failed: {Errors}", completionMessage.JobId, + completionMessage.Errors); + pipelineJob.Error = completionMessage.Errors; + pipelineJob.Status = PipelineJobStatus.Failed; + } + else + { + await ApplyTextServices(completionMessage.JobId, stagedManifest, cancellationToken); + pipelineJob.Status = PipelineJobStatus.Completed; + } + + pipelineJob.Finished = completionMessage.Finished?.UtcDateTime; + + await manifestStorageManager.SaveManifestInStorage(stagedManifest, dbManifest, null, + saveToStaging: false, cancellationToken); + await iiifS3.DeleteIIIFFromS3(dbManifest, true); + } + catch (Exception e) + { + logger.LogError(e, "Error completing text pipeline for job {JobId}", completionMessage.JobId); + return false; + } + + await dbContext.SaveChangesAsync(cancellationToken); + logger.LogInformation( + "Text pipeline completed for job:{JobId}, manifest:{ManifestId}. Elapsed:{Elapsed}ms", + completionMessage.JobId, pipelineJob.ManifestId, sw.ElapsedMilliseconds); + return true; + } + + private async Task ApplyTextServices(string jobId, Manifest stagedManifest, + CancellationToken cancellationToken) + { + var augmented = await textServicesClient.GetTextAugmentedManifest(jobId, cancellationToken); + + if (augmented?.Services == null || augmented.Services.Count == 0) + { + logger.LogDebug("No search services in text-augmented manifest for job {JobId}", jobId); + return; + } + + stagedManifest.Services ??= []; + var existingIds = new HashSet(stagedManifest.Services.Select(s => s.Id)); + foreach (var service in augmented.Services) + { + if (existingIds.Add(service.Id)) + stagedManifest.Services.Add(service); + } + + MergeContext(stagedManifest, augmented); + + logger.LogDebug("Added {Count} search service(s) to manifest for job {JobId}", + augmented.Services.Count, jobId); + } + + private static void MergeContext(Manifest target, Manifest source) + { + IEnumerable contexts = source.Context switch + { + null => [], + string str => [str], + IEnumerable enumerable => enumerable, + _ => [] + }; + + foreach (var context in contexts.Where(c => !IIIF.Presentation.Context.Presentation3Context.Equals(c))) + { + target.EnsureContext(context); + } + } + + private static int ExtractCustomerIdFromJobId(string jobId) + { + // jobId format: "{customerId}/iiif/{manifestId}" + var firstSlash = jobId.IndexOf('/'); + return firstSlash > 0 && int.TryParse(jobId[..firstSlash], out var customerId) ? customerId : 0; + } + + private static TextServiceJobCompletionMessage DeserializeMessage(QueueMessage message, ILogger logger) + { + try + { + return TextServiceJobCompletionMessage.FromQueueMessage(message); + } + catch (Exception ex) + { + logger.LogWarning(ex, "Could not deserialize text-service completion message"); + throw; + } + } +} \ No newline at end of file diff --git a/src/IIIFPresentation/Core/Core.csproj b/src/IIIFPresentation/Core/Core.csproj index f4781c80..fcfdfca3 100644 --- a/src/IIIFPresentation/Core/Core.csproj +++ b/src/IIIFPresentation/Core/Core.csproj @@ -7,7 +7,7 @@ - + diff --git a/src/IIIFPresentation/Core/Settings/TextServicesSettings.cs b/src/IIIFPresentation/Core/Settings/TextServicesSettings.cs new file mode 100644 index 00000000..4db529dd --- /dev/null +++ b/src/IIIFPresentation/Core/Settings/TextServicesSettings.cs @@ -0,0 +1,27 @@ +namespace Core.Settings; + +public class TextServicesSettings +{ + public const string SettingsName = "TextServices"; + + /// + /// Base URI for the text-services Builder API (POST/PUT /textbuilder) + /// + public Uri? BuilderApiUri { get; set; } + + /// + /// Base URI for the text-services Search API (GET /text-augmented/v3/) + /// + public Uri? SearchApiUri { get; set; } + + /// + /// Used as the X-Forwarded-Host header when calling /text-augmented/v3. + /// Falls back to default host if not set. + /// + public string? CustomerOrchestratorUri { get; set; } + + /// + /// Used as the X-Forwarded-Path header when calling /text-augmented/v3 + /// + public string? PathRules { get; set; } +} \ No newline at end of file diff --git a/src/IIIFPresentation/Models/API/Manifest/Pipeline.cs b/src/IIIFPresentation/Models/API/Manifest/Pipeline.cs new file mode 100644 index 00000000..fd1e5346 --- /dev/null +++ b/src/IIIFPresentation/Models/API/Manifest/Pipeline.cs @@ -0,0 +1,20 @@ +namespace Models.API.Manifest; + +public class PipelineItem +{ + public string? Name { get; set; } + public PipelineConfig? Config { get; set; } +} + +public class PipelineConfig +{ + public string? Action { get; set; } +} + +public static class PipelineX +{ + public static bool HasTextIndexPipeline(this PresentationManifest manifest) => + manifest.Pipeline?.Any(p => + string.Equals(p.Name, "text", StringComparison.OrdinalIgnoreCase) && + string.Equals(p.Config?.Action, "Index", StringComparison.OrdinalIgnoreCase)) == true; +} \ No newline at end of file diff --git a/src/IIIFPresentation/Models/API/Manifest/PresentationManifest.cs b/src/IIIFPresentation/Models/API/Manifest/PresentationManifest.cs index 46bc4a9f..4d84949c 100644 --- a/src/IIIFPresentation/Models/API/Manifest/PresentationManifest.cs +++ b/src/IIIFPresentation/Models/API/Manifest/PresentationManifest.cs @@ -14,7 +14,7 @@ public class PresentationManifest : IIIF.Presentation.V3.Manifest, IPresentation [ "slug", "publicId", "parent", "created", "modified", "createdBy", "modifiedBy", "flatId", "paintedResources", "space", "adjuncts", "ingesting", - "fullPath", "currentlyIngesting" + "fullPath", "currentlyIngesting", "pipeline" ]; [JsonProperty(Order = 6)] public string? Slug { get; set; } @@ -50,6 +50,8 @@ public class PresentationManifest : IIIF.Presentation.V3.Manifest, IPresentation /// Whether this manifest contains items that are currently being ingested /// [JsonIgnore] public bool CurrentlyIngesting { get; set; } + + [JsonProperty(Order = 16)] public List? Pipeline { get; set; } } /// diff --git a/src/IIIFPresentation/Models/Database/Collections/Manifest.cs b/src/IIIFPresentation/Models/Database/Collections/Manifest.cs index 957f61e6..fb96bf1d 100644 --- a/src/IIIFPresentation/Models/Database/Collections/Manifest.cs +++ b/src/IIIFPresentation/Models/Database/Collections/Manifest.cs @@ -47,6 +47,8 @@ public class Manifest : IHierarchyResource public List? CanvasPaintings { get; set; } public List? Batches { get; set; } + + public List? PipelineJobs { get; set; } /// /// A timestamp denoting when this batch was last processed into a user viewable format @@ -63,7 +65,23 @@ public static class ManifestX /// public static string GetDefaultSpaceName(string manifestId) => $"For manifest {manifestId} - {DateTime.UtcNow.ToString("s", CultureInfo.InvariantCulture)}"; - + + /// + /// Whether the manifest has an active DLCS batch still ingesting assets. + /// public static bool IsIngesting(this Manifest? manifest) => manifest?.Batches?.Any(m => m.Status == BatchStatus.Ingesting) ?? false; + + /// + /// Whether a text-services pipeline job is pending for this manifest. + /// + public static bool HasPendingPipelineJob(this Manifest? manifest) + => manifest?.PipelineJobs?.Any(p => p.Status == PipelineJobStatus.Queued) ?? false; + + /// + /// Whether the manifest has any outstanding background work that must complete before it reaches its final state. + /// Covers both DLCS batch ingestion and text-services pipeline jobs. + /// + public static bool HasFurtherWork(this Manifest? manifest) + => manifest.IsIngesting() || manifest.HasPendingPipelineJob(); } diff --git a/src/IIIFPresentation/Models/Database/General/PipelineJob.cs b/src/IIIFPresentation/Models/Database/General/PipelineJob.cs new file mode 100644 index 00000000..8ff21628 --- /dev/null +++ b/src/IIIFPresentation/Models/Database/General/PipelineJob.cs @@ -0,0 +1,34 @@ +using Models.Database.Collections; + +namespace Models.Database.General; + +public class PipelineJob : ICustomerEntity +{ + public int Id { get; set; } + + public required string ManifestId { get; set; } + + public required int CustomerId { get; set; } + + /// + /// Text-services job identifier, format: "{customerId}/iiif/{manifestId}" + /// + public required string TextJobId { get; set; } + + public PipelineJobStatus Status { get; set; } + + public string? Error { get; set; } + + public DateTime Created { get; set; } + + public DateTime? Finished { get; set; } + + public Manifest? Manifest { get; set; } +} + +public enum PipelineJobStatus +{ + Queued = 0, + Completed = 1, + Failed = 2 +} \ No newline at end of file diff --git a/src/IIIFPresentation/Models/Models.csproj b/src/IIIFPresentation/Models/Models.csproj index a80701f5..1344b551 100644 --- a/src/IIIFPresentation/Models/Models.csproj +++ b/src/IIIFPresentation/Models/Models.csproj @@ -9,7 +9,7 @@ - + diff --git a/src/IIIFPresentation/Repository/Migrations/20260612164039_AddPipelineJob.Designer.cs b/src/IIIFPresentation/Repository/Migrations/20260612164039_AddPipelineJob.Designer.cs new file mode 100644 index 00000000..833b7622 --- /dev/null +++ b/src/IIIFPresentation/Repository/Migrations/20260612164039_AddPipelineJob.Designer.cs @@ -0,0 +1,491 @@ +// +using System; +using Microsoft.EntityFrameworkCore; +using Microsoft.EntityFrameworkCore.Infrastructure; +using Microsoft.EntityFrameworkCore.Migrations; +using Microsoft.EntityFrameworkCore.Storage.ValueConversion; +using Npgsql.EntityFrameworkCore.PostgreSQL.Metadata; +using Repository; + +#nullable disable + +namespace Repository.Migrations +{ + [DbContext(typeof(PresentationContext))] + [Migration("20260612164039_AddPipelineJob")] + partial class AddPipelineJob + { + /// + protected override void BuildTargetModel(ModelBuilder modelBuilder) + { +#pragma warning disable 612, 618 + modelBuilder + .HasAnnotation("ProductVersion", "8.0.11") + .HasAnnotation("Relational:MaxIdentifierLength", 63); + + NpgsqlModelBuilderExtensions.HasPostgresExtension(modelBuilder, "citext"); + NpgsqlModelBuilderExtensions.UseIdentityByDefaultColumns(modelBuilder); + + modelBuilder.Entity("Models.Database.CanvasPainting", b => + { + b.Property("CanvasPaintingId") + .ValueGeneratedOnAdd() + .HasColumnType("integer") + .HasColumnName("canvas_painting_id"); + + NpgsqlPropertyBuilderExtensions.UseIdentityByDefaultColumn(b.Property("CanvasPaintingId")); + + b.Property("AssetId") + .HasColumnType("text") + .HasColumnName("asset_id"); + + b.Property("CanvasLabel") + .HasColumnType("text") + .HasColumnName("canvas_label"); + + b.Property("CanvasOrder") + .HasColumnType("integer") + .HasColumnName("canvas_order"); + + b.Property("CanvasOriginalId") + .HasColumnType("text") + .HasColumnName("canvas_original_id"); + + b.Property("ChoiceOrder") + .HasColumnType("integer") + .HasColumnName("choice_order"); + + b.Property("Created") + .ValueGeneratedOnAdd() + .HasColumnType("timestamp with time zone") + .HasColumnName("created") + .HasDefaultValueSql("now()"); + + b.Property("CustomerId") + .HasColumnType("integer") + .HasColumnName("customer_id"); + + b.Property("Duration") + .HasColumnType("double precision") + .HasColumnName("duration"); + + b.Property("Id") + .HasColumnType("text") + .HasColumnName("canvas_id"); + + b.Property("Ingesting") + .HasColumnType("boolean") + .HasColumnName("ingesting"); + + b.Property("Label") + .HasColumnType("jsonb") + .HasColumnName("label"); + + b.Property("ManifestId") + .IsRequired() + .HasColumnType("text") + .HasColumnName("manifest_id"); + + b.Property("Modified") + .ValueGeneratedOnAdd() + .HasColumnType("timestamp with time zone") + .HasColumnName("modified") + .HasDefaultValueSql("now()"); + + b.Property("StaticHeight") + .HasColumnType("integer") + .HasColumnName("static_height"); + + b.Property("StaticWidth") + .HasColumnType("integer") + .HasColumnName("static_width"); + + b.Property("Target") + .HasColumnType("text") + .HasColumnName("target"); + + b.Property("Thumbnail") + .HasColumnType("text") + .HasColumnName("thumbnail"); + + b.HasKey("CanvasPaintingId") + .HasName("pk_canvas_paintings"); + + b.HasIndex("ManifestId", "CustomerId") + .HasDatabaseName("ix_canvas_paintings_manifest_id_customer_id"); + + b.HasIndex("Id", "CustomerId", "ManifestId", "CanvasOrder", "ChoiceOrder") + .IsUnique() + .HasDatabaseName("ix_canvas_paintings_canvas_id_customer_id_manifest_id_canvas_o"); + + b.ToTable("canvas_paintings", (string)null); + }); + + modelBuilder.Entity("Models.Database.Collections.Collection", b => + { + b.Property("Id") + .HasColumnType("text") + .HasColumnName("id"); + + b.Property("CustomerId") + .HasColumnType("integer") + .HasColumnName("customer_id"); + + b.Property("Created") + .HasColumnType("timestamp with time zone") + .HasColumnName("created"); + + b.Property("CreatedBy") + .HasColumnType("text") + .HasColumnName("created_by"); + + b.Property("Etag") + .ValueGeneratedOnAddOrUpdate() + .HasColumnType("uuid") + .HasColumnName("etag") + .HasComputedColumnSql("deterministic_uuid_sha256(\"modified\", \"id\")", true); + + b.Property("IsPublic") + .HasColumnType("boolean") + .HasColumnName("is_public"); + + b.Property("IsStorageCollection") + .HasColumnType("boolean") + .HasColumnName("is_storage_collection"); + + b.Property("Label") + .HasColumnType("jsonb") + .HasColumnName("label"); + + b.Property("LockedBy") + .HasColumnType("text") + .HasColumnName("locked_by"); + + b.Property("Modified") + .HasColumnType("timestamp with time zone") + .HasColumnName("modified"); + + b.Property("ModifiedBy") + .HasColumnType("text") + .HasColumnName("modified_by"); + + b.Property("Tags") + .HasColumnType("text") + .HasColumnName("tags"); + + b.Property("Thumbnail") + .HasColumnType("text") + .HasColumnName("thumbnail"); + + b.Property("UsePath") + .HasColumnType("boolean") + .HasColumnName("use_path"); + + b.HasKey("Id", "CustomerId") + .HasName("pk_collections"); + + b.ToTable("collections", (string)null); + }); + + modelBuilder.Entity("Models.Database.Collections.Manifest", b => + { + b.Property("Id") + .HasColumnType("text") + .HasColumnName("id"); + + b.Property("CustomerId") + .HasColumnType("integer") + .HasColumnName("customer_id"); + + b.Property("Created") + .ValueGeneratedOnAdd() + .HasColumnType("timestamp with time zone") + .HasColumnName("created") + .HasDefaultValueSql("now()"); + + b.Property("CreatedBy") + .HasColumnType("text") + .HasColumnName("created_by"); + + b.Property("Etag") + .ValueGeneratedOnAddOrUpdate() + .HasColumnType("uuid") + .HasColumnName("etag") + .HasComputedColumnSql("deterministic_uuid_sha256(\"last_processed\", \"id\")", true); + + b.Property("Label") + .HasColumnType("text") + .HasColumnName("label"); + + b.Property("LastProcessed") + .HasColumnType("timestamp with time zone") + .HasColumnName("last_processed"); + + b.Property("Modified") + .ValueGeneratedOnAdd() + .HasColumnType("timestamp with time zone") + .HasColumnName("modified") + .HasDefaultValueSql("now()"); + + b.Property("ModifiedBy") + .HasColumnType("text") + .HasColumnName("modified_by"); + + b.Property("SpaceId") + .HasColumnType("integer") + .HasColumnName("space_id"); + + b.HasKey("Id", "CustomerId") + .HasName("pk_manifests"); + + b.ToTable("manifests", (string)null); + }); + + modelBuilder.Entity("Models.Database.General.Batch", b => + { + b.Property("Id") + .HasColumnType("integer") + .HasColumnName("id"); + + b.Property("DeliverableType") + .HasColumnType("text") + .HasColumnName("deliverable_type"); + + b.Property("CustomerId") + .HasColumnType("integer") + .HasColumnName("customer_id"); + + b.Property("Finished") + .HasColumnType("timestamp with time zone") + .HasColumnName("finished"); + + b.Property("ManifestId") + .IsRequired() + .HasColumnType("text") + .HasColumnName("manifest_id"); + + b.Property("Processed") + .HasColumnType("timestamp with time zone") + .HasColumnName("processed"); + + b.Property("Status") + .IsRequired() + .HasColumnType("text") + .HasColumnName("status"); + + b.Property("Submitted") + .HasColumnType("timestamp with time zone") + .HasColumnName("submitted"); + + b.HasKey("Id", "DeliverableType") + .HasName("pk_batches"); + + b.HasIndex("ManifestId", "CustomerId") + .HasDatabaseName("ix_batches_manifest_id_customer_id"); + + b.ToTable("batches", (string)null); + }); + + modelBuilder.Entity("Models.Database.General.Hierarchy", b => + { + b.Property("Id") + .ValueGeneratedOnAdd() + .HasColumnType("integer") + .HasColumnName("id"); + + NpgsqlPropertyBuilderExtensions.UseIdentityByDefaultColumn(b.Property("Id")); + + b.Property("Canonical") + .HasColumnType("boolean") + .HasColumnName("canonical"); + + b.Property("CollectionId") + .HasColumnType("text") + .HasColumnName("collection_id"); + + b.Property("CustomerId") + .HasColumnType("integer") + .HasColumnName("customer_id"); + + b.Property("ItemsOrder") + .HasColumnType("integer") + .HasColumnName("items_order"); + + b.Property("ManifestId") + .HasColumnType("text") + .HasColumnName("manifest_id"); + + b.Property("Parent") + .HasColumnType("text") + .HasColumnName("parent"); + + b.Property("Slug") + .IsRequired() + .HasColumnType("citext") + .HasColumnName("slug"); + + b.Property("Type") + .HasColumnType("integer") + .HasColumnName("type"); + + b.HasKey("Id") + .HasName("pk_hierarchy"); + + b.HasIndex("Parent", "CustomerId") + .HasDatabaseName("ix_hierarchy_parent_customer_id"); + + b.HasIndex("CollectionId", "CustomerId", "Canonical") + .IsUnique() + .HasDatabaseName("ix_hierarchy_collection_id_customer_id_canonical") + .HasFilter("canonical is true"); + + b.HasIndex("CustomerId", "Slug", "Parent") + .IsUnique() + .HasDatabaseName("ix_hierarchy_customer_id_slug_parent"); + + b.HasIndex("ManifestId", "CustomerId", "Canonical") + .IsUnique() + .HasDatabaseName("ix_hierarchy_manifest_id_customer_id_canonical") + .HasFilter("canonical is true"); + + b.ToTable("hierarchy", null, t => + { + t.HasCheckConstraint("stop_collection_and_manifest_in_same_record", "num_nonnulls(manifest_id, collection_id) = 1"); + }); + }); + + modelBuilder.Entity("Models.Database.General.PipelineJob", b => + { + b.Property("Id") + .ValueGeneratedOnAdd() + .HasColumnType("integer") + .HasColumnName("id"); + + NpgsqlPropertyBuilderExtensions.UseIdentityByDefaultColumn(b.Property("Id")); + + b.Property("Created") + .ValueGeneratedOnAdd() + .HasColumnType("timestamp with time zone") + .HasColumnName("created") + .HasDefaultValueSql("now()"); + + b.Property("CustomerId") + .HasColumnType("integer") + .HasColumnName("customer_id"); + + b.Property("Error") + .HasColumnType("text") + .HasColumnName("error"); + + b.Property("Finished") + .HasColumnType("timestamp with time zone") + .HasColumnName("finished"); + + b.Property("ManifestId") + .IsRequired() + .HasColumnType("text") + .HasColumnName("manifest_id"); + + b.Property("Status") + .IsRequired() + .HasColumnType("text") + .HasColumnName("status"); + + b.Property("TextJobId") + .IsRequired() + .HasColumnType("text") + .HasColumnName("text_job_id"); + + b.HasKey("Id") + .HasName("pk_pipeline_jobs"); + + b.HasIndex("ManifestId", "CustomerId") + .HasDatabaseName("ix_pipeline_jobs_manifest_id_customer_id"); + + b.ToTable("pipeline_jobs", (string)null); + }); + + modelBuilder.Entity("Models.Database.CanvasPainting", b => + { + b.HasOne("Models.Database.Collections.Manifest", "Manifest") + .WithMany("CanvasPaintings") + .HasForeignKey("ManifestId", "CustomerId") + .OnDelete(DeleteBehavior.Cascade) + .IsRequired() + .HasConstraintName("fk_canvas_paintings_manifests_manifest_id_customer_id"); + + b.Navigation("Manifest"); + }); + + modelBuilder.Entity("Models.Database.General.Batch", b => + { + b.HasOne("Models.Database.Collections.Manifest", "Manifest") + .WithMany("Batches") + .HasForeignKey("ManifestId", "CustomerId") + .OnDelete(DeleteBehavior.Cascade) + .IsRequired() + .HasConstraintName("fk_batches_manifests_manifest_id_customer_id"); + + b.Navigation("Manifest"); + }); + + modelBuilder.Entity("Models.Database.General.Hierarchy", b => + { + b.HasOne("Models.Database.Collections.Collection", "Collection") + .WithMany("Hierarchy") + .HasForeignKey("CollectionId", "CustomerId") + .OnDelete(DeleteBehavior.Cascade) + .HasConstraintName("fk_hierarchy_collections_collection_id_customer_id"); + + b.HasOne("Models.Database.Collections.Manifest", "Manifest") + .WithMany("Hierarchy") + .HasForeignKey("ManifestId", "CustomerId") + .OnDelete(DeleteBehavior.Cascade) + .HasConstraintName("fk_hierarchy_manifests_manifest_id_customer_id"); + + b.HasOne("Models.Database.Collections.Collection", "ParentCollection") + .WithMany("Children") + .HasForeignKey("Parent", "CustomerId") + .OnDelete(DeleteBehavior.NoAction) + .HasConstraintName("fk_hierarchy_collections_parent_customer_id"); + + b.Navigation("Collection"); + + b.Navigation("Manifest"); + + b.Navigation("ParentCollection"); + }); + + modelBuilder.Entity("Models.Database.General.PipelineJob", b => + { + b.HasOne("Models.Database.Collections.Manifest", "Manifest") + .WithMany("PipelineJobs") + .HasForeignKey("ManifestId", "CustomerId") + .OnDelete(DeleteBehavior.Cascade) + .IsRequired() + .HasConstraintName("fk_pipeline_jobs_manifests_manifest_id_customer_id"); + + b.Navigation("Manifest"); + }); + + modelBuilder.Entity("Models.Database.Collections.Collection", b => + { + b.Navigation("Children"); + + b.Navigation("Hierarchy"); + }); + + modelBuilder.Entity("Models.Database.Collections.Manifest", b => + { + b.Navigation("Batches"); + + b.Navigation("CanvasPaintings"); + + b.Navigation("Hierarchy"); + + b.Navigation("PipelineJobs"); + }); +#pragma warning restore 612, 618 + } + } +} diff --git a/src/IIIFPresentation/Repository/Migrations/20260612164039_AddPipelineJob.cs b/src/IIIFPresentation/Repository/Migrations/20260612164039_AddPipelineJob.cs new file mode 100644 index 00000000..0fa0ec15 --- /dev/null +++ b/src/IIIFPresentation/Repository/Migrations/20260612164039_AddPipelineJob.cs @@ -0,0 +1,53 @@ +using System; +using Microsoft.EntityFrameworkCore.Migrations; +using Npgsql.EntityFrameworkCore.PostgreSQL.Metadata; + +#nullable disable + +namespace Repository.Migrations +{ + /// + public partial class AddPipelineJob : Migration + { + /// + protected override void Up(MigrationBuilder migrationBuilder) + { + migrationBuilder.CreateTable( + name: "pipeline_jobs", + columns: table => new + { + id = table.Column(type: "integer", nullable: false) + .Annotation("Npgsql:ValueGenerationStrategy", NpgsqlValueGenerationStrategy.IdentityByDefaultColumn), + manifest_id = table.Column(type: "text", nullable: false), + customer_id = table.Column(type: "integer", nullable: false), + text_job_id = table.Column(type: "text", nullable: false), + status = table.Column(type: "text", nullable: false), + error = table.Column(type: "text", nullable: true), + created = table.Column(type: "timestamp with time zone", nullable: false, defaultValueSql: "now()"), + finished = table.Column(type: "timestamp with time zone", nullable: true) + }, + constraints: table => + { + table.PrimaryKey("pk_pipeline_jobs", x => x.id); + table.ForeignKey( + name: "fk_pipeline_jobs_manifests_manifest_id_customer_id", + columns: x => new { x.manifest_id, x.customer_id }, + principalTable: "manifests", + principalColumns: new[] { "id", "customer_id" }, + onDelete: ReferentialAction.Cascade); + }); + + migrationBuilder.CreateIndex( + name: "ix_pipeline_jobs_manifest_id_customer_id", + table: "pipeline_jobs", + columns: new[] { "manifest_id", "customer_id" }); + } + + /// + protected override void Down(MigrationBuilder migrationBuilder) + { + migrationBuilder.DropTable( + name: "pipeline_jobs"); + } + } +} diff --git a/src/IIIFPresentation/Repository/Migrations/PresentationContextModelSnapshot.cs b/src/IIIFPresentation/Repository/Migrations/PresentationContextModelSnapshot.cs index 7d17f64c..7dbc65e3 100644 --- a/src/IIIFPresentation/Repository/Migrations/PresentationContextModelSnapshot.cs +++ b/src/IIIFPresentation/Repository/Migrations/PresentationContextModelSnapshot.cs @@ -351,6 +351,57 @@ protected override void BuildModel(ModelBuilder modelBuilder) }); }); + modelBuilder.Entity("Models.Database.General.PipelineJob", b => + { + b.Property("Id") + .ValueGeneratedOnAdd() + .HasColumnType("integer") + .HasColumnName("id"); + + NpgsqlPropertyBuilderExtensions.UseIdentityByDefaultColumn(b.Property("Id")); + + b.Property("Created") + .ValueGeneratedOnAdd() + .HasColumnType("timestamp with time zone") + .HasColumnName("created") + .HasDefaultValueSql("now()"); + + b.Property("CustomerId") + .HasColumnType("integer") + .HasColumnName("customer_id"); + + b.Property("Error") + .HasColumnType("text") + .HasColumnName("error"); + + b.Property("Finished") + .HasColumnType("timestamp with time zone") + .HasColumnName("finished"); + + b.Property("ManifestId") + .IsRequired() + .HasColumnType("text") + .HasColumnName("manifest_id"); + + b.Property("Status") + .IsRequired() + .HasColumnType("text") + .HasColumnName("status"); + + b.Property("TextJobId") + .IsRequired() + .HasColumnType("text") + .HasColumnName("text_job_id"); + + b.HasKey("Id") + .HasName("pk_pipeline_jobs"); + + b.HasIndex("ManifestId", "CustomerId") + .HasDatabaseName("ix_pipeline_jobs_manifest_id_customer_id"); + + b.ToTable("pipeline_jobs", (string)null); + }); + modelBuilder.Entity("Models.Database.CanvasPainting", b => { b.HasOne("Models.Database.Collections.Manifest", "Manifest") @@ -402,6 +453,18 @@ protected override void BuildModel(ModelBuilder modelBuilder) b.Navigation("ParentCollection"); }); + modelBuilder.Entity("Models.Database.General.PipelineJob", b => + { + b.HasOne("Models.Database.Collections.Manifest", "Manifest") + .WithMany("PipelineJobs") + .HasForeignKey("ManifestId", "CustomerId") + .OnDelete(DeleteBehavior.Cascade) + .IsRequired() + .HasConstraintName("fk_pipeline_jobs_manifests_manifest_id_customer_id"); + + b.Navigation("Manifest"); + }); + modelBuilder.Entity("Models.Database.Collections.Collection", b => { b.Navigation("Children"); @@ -416,6 +479,8 @@ protected override void BuildModel(ModelBuilder modelBuilder) b.Navigation("CanvasPaintings"); b.Navigation("Hierarchy"); + + b.Navigation("PipelineJobs"); }); #pragma warning restore 612, 618 } diff --git a/src/IIIFPresentation/Repository/PresentationContext.cs b/src/IIIFPresentation/Repository/PresentationContext.cs index ab78490c..8df1fb60 100644 --- a/src/IIIFPresentation/Repository/PresentationContext.cs +++ b/src/IIIFPresentation/Repository/PresentationContext.cs @@ -38,6 +38,8 @@ public PresentationContext(DbContextOptions options, ICusto public virtual DbSet Batches { get; set; } + public virtual DbSet PipelineJobs { get; set; } + protected override void ConfigureConventions(ModelConfigurationBuilder configurationBuilder) { configurationBuilder @@ -156,6 +158,25 @@ protected override void OnModelCreating(ModelBuilder modelBuilder) d => d.ToString(), d => d.GetEnumFromString(true)); }); + + modelBuilder.Entity(entity => + { + entity.HasKey(p => p.Id); + + entity + .HasOne(p => p.Manifest) + .WithMany(m => m.PipelineJobs) + .HasForeignKey(p => new { p.ManifestId, p.CustomerId }) + .OnDelete(DeleteBehavior.Cascade); + + entity.Property(e => e.Status) + .IsRequired() + .HasConversion( + s => s.ToString(), + s => s.GetEnumFromString(true)); + + entity.Property(p => p.Created).HasDefaultValueSql("now()"); + }); } private void ApplyGlobalFilters(ModelBuilder builder) diff --git a/src/IIIFPresentation/Repository/Repository.csproj b/src/IIIFPresentation/Repository/Repository.csproj index e5bda709..10935b11 100644 --- a/src/IIIFPresentation/Repository/Repository.csproj +++ b/src/IIIFPresentation/Repository/Repository.csproj @@ -10,7 +10,7 @@ - + diff --git a/src/IIIFPresentation/Services.Tests/IIIFSerialisationXTests.cs b/src/IIIFPresentation/Services.Tests/IIIFSerialisationXTests.cs index 8ff51e86..ea0575fd 100644 --- a/src/IIIFPresentation/Services.Tests/IIIFSerialisationXTests.cs +++ b/src/IIIFPresentation/Services.Tests/IIIFSerialisationXTests.cs @@ -192,7 +192,7 @@ public void ToCollection_StrippedPropertiesAbsentFromSerialisation() [Fact] public void ToManifest_PresentationPropertyKeys_CoversAllPresentationManifestProperties() - => PresentationManifest.PresentationPropertyKeys.Should().HaveCount(14); + => PresentationManifest.PresentationPropertyKeys.Should().HaveCount(15); [Fact] public void ToCollection_PresentationPropertyKeys_CoversAllPresentationCollectionProperties() diff --git a/src/IIIFPresentation/Services.Tests/Manifests/ManifestXTests.cs b/src/IIIFPresentation/Services.Tests/Manifests/ManifestXTests.cs new file mode 100644 index 00000000..ad9b71a5 --- /dev/null +++ b/src/IIIFPresentation/Services.Tests/Manifests/ManifestXTests.cs @@ -0,0 +1,103 @@ +using Models.Database.Collections; +using Models.Database.General; + +namespace Services.Tests.Manifests; + +public class ManifestXTests +{ + [Fact] + public void IsIngesting_ReturnsFalse_WhenManifestIsNull() + => ((Manifest?)null).IsIngesting().Should().BeFalse(); + + [Fact] + public void IsIngesting_ReturnsFalse_WhenBatchesIsNull() + => new Manifest { Id = "x", CustomerId = 1 }.IsIngesting().Should().BeFalse(); + + [Fact] + public void IsIngesting_ReturnsFalse_WhenNoBatches() + => new Manifest { Id = "x", CustomerId = 1, Batches = [] }.IsIngesting().Should().BeFalse(); + + [Theory] + [InlineData(BatchStatus.Completed)] + [InlineData(BatchStatus.Unknown)] + public void IsIngesting_ReturnsFalse_WhenBatchIsNotIngesting(BatchStatus status) + => new Manifest { Id = "x", CustomerId = 1, Batches = [new Batch { Id = 1, ManifestId = "x", Status = status }] } + .IsIngesting().Should().BeFalse(); + + [Fact] + public void IsIngesting_ReturnsTrue_WhenBatchIsIngesting() + => new Manifest { Id = "x", CustomerId = 1, Batches = [new Batch { Id = 1, ManifestId = "x", Status = BatchStatus.Ingesting }] } + .IsIngesting().Should().BeTrue(); + + [Fact] + public void HasPendingPipelineJob_ReturnsFalse_WhenManifestIsNull() + => ((Manifest?)null).HasPendingPipelineJob().Should().BeFalse(); + + [Fact] + public void HasPendingPipelineJob_ReturnsFalse_WhenPipelineJobsIsNull() + => new Manifest { Id = "x", CustomerId = 1 }.HasPendingPipelineJob().Should().BeFalse(); + + [Fact] + public void HasPendingPipelineJob_ReturnsFalse_WhenNoPipelineJobs() + => ManifestWithJobs().HasPendingPipelineJob().Should().BeFalse(); + + [Theory] + [InlineData(PipelineJobStatus.Completed)] + [InlineData(PipelineJobStatus.Failed)] + public void HasPendingPipelineJob_ReturnsFalse_WhenJobIsNotQueued(PipelineJobStatus status) + => ManifestWithJobs(status).HasPendingPipelineJob().Should().BeFalse(); + + [Fact] + public void HasPendingPipelineJob_ReturnsTrue_WhenJobIsQueued() + => ManifestWithJobs(PipelineJobStatus.Queued).HasPendingPipelineJob().Should().BeTrue(); + + [Fact] + public void HasFurtherWork_ReturnsFalse_WhenNoIngestingBatchAndNoPendingJob() + => new Manifest { Id = "x", CustomerId = 1 }.HasFurtherWork().Should().BeFalse(); + + [Fact] + public void HasFurtherWork_ReturnsTrue_WhenBatchIsIngesting() + { + var manifest = new Manifest + { + Id = "x", CustomerId = 1, + Batches = [new Batch { Id = 1, ManifestId = "x", Status = BatchStatus.Ingesting }] + }; + + manifest.HasFurtherWork().Should().BeTrue(); + } + + [Fact] + public void HasFurtherWork_ReturnsTrue_WhenPipelineJobIsQueued() + => ManifestWithJobs(PipelineJobStatus.Queued).HasFurtherWork().Should().BeTrue(); + + [Fact] + public void HasFurtherWork_ReturnsFalse_WhenBatchCompletedAndJobCompleted() + { + var manifest = new Manifest + { + Id = "x", CustomerId = 1, + Batches = [new Batch { Id = 1, ManifestId = "x", Status = BatchStatus.Completed }], + PipelineJobs = [new PipelineJob { ManifestId = "x", CustomerId = 1, TextJobId = "j", Status = PipelineJobStatus.Completed }] + }; + + manifest.HasFurtherWork().Should().BeFalse(); + } + + private static Manifest ManifestWithJobs(PipelineJobStatus? status = null) + { + var manifest = new Manifest { Id = "x", CustomerId = 1 }; + if (status.HasValue) + { + manifest.PipelineJobs = + [ + new PipelineJob { ManifestId = "x", CustomerId = 1, TextJobId = "j", Status = status.Value } + ]; + } + else + { + manifest.PipelineJobs = []; + } + return manifest; + } +} \ No newline at end of file diff --git a/src/IIIFPresentation/Services.Tests/Manifests/PipelineXTests.cs b/src/IIIFPresentation/Services.Tests/Manifests/PipelineXTests.cs new file mode 100644 index 00000000..778a75c8 --- /dev/null +++ b/src/IIIFPresentation/Services.Tests/Manifests/PipelineXTests.cs @@ -0,0 +1,84 @@ +using Models.API.Manifest; + +namespace Services.Tests.Manifests; + +public class PipelineXTests +{ + [Fact] + public void HasTextIndexPipeline_ReturnsFalse_WhenPipelineIsNull() + { + var manifest = new PresentationManifest { Pipeline = null }; + + manifest.HasTextIndexPipeline().Should().BeFalse(); + } + + [Fact] + public void HasTextIndexPipeline_ReturnsFalse_WhenPipelineIsEmpty() + { + var manifest = new PresentationManifest { Pipeline = [] }; + + manifest.HasTextIndexPipeline().Should().BeFalse(); + } + + [Fact] + public void HasTextIndexPipeline_ReturnsFalse_WhenNameIsNotText() + { + var manifest = new PresentationManifest + { + Pipeline = [new PipelineItem { Name = "ocr", Config = new PipelineConfig { Action = "Index" } }] + }; + + manifest.HasTextIndexPipeline().Should().BeFalse(); + } + + [Fact] + public void HasTextIndexPipeline_ReturnsFalse_WhenActionIsNotIndex() + { + var manifest = new PresentationManifest + { + Pipeline = [new PipelineItem { Name = "text", Config = new PipelineConfig { Action = "Delete" } }] + }; + + manifest.HasTextIndexPipeline().Should().BeFalse(); + } + + [Fact] + public void HasTextIndexPipeline_ReturnsFalse_WhenConfigIsNull() + { + var manifest = new PresentationManifest + { + Pipeline = [new PipelineItem { Name = "text", Config = null }] + }; + + manifest.HasTextIndexPipeline().Should().BeFalse(); + } + + [Theory] + [InlineData("text", "Index")] + [InlineData("TEXT", "INDEX")] + [InlineData("Text", "index")] + public void HasTextIndexPipeline_ReturnsTrue_CaseInsensitive(string name, string action) + { + var manifest = new PresentationManifest + { + Pipeline = [new PipelineItem { Name = name, Config = new PipelineConfig { Action = action } }] + }; + + manifest.HasTextIndexPipeline().Should().BeTrue(); + } + + [Fact] + public void HasTextIndexPipeline_ReturnsTrue_WhenOneOfMultipleItemsMatches() + { + var manifest = new PresentationManifest + { + Pipeline = + [ + new PipelineItem { Name = "other", Config = new PipelineConfig { Action = "Do" } }, + new PipelineItem { Name = "text", Config = new PipelineConfig { Action = "Index" } } + ] + }; + + manifest.HasTextIndexPipeline().Should().BeTrue(); + } +} \ No newline at end of file diff --git a/src/IIIFPresentation/Services.Tests/TextServices/TextServicesClientTests.cs b/src/IIIFPresentation/Services.Tests/TextServices/TextServicesClientTests.cs new file mode 100644 index 00000000..e33e1a9b --- /dev/null +++ b/src/IIIFPresentation/Services.Tests/TextServices/TextServicesClientTests.cs @@ -0,0 +1,132 @@ +using System.Net; +using System.Text; +using Core.Settings; +using IIIF.Presentation.V3; +using IIIF.Serialisation; +using Microsoft.Extensions.Logging.Abstractions; +using Microsoft.Extensions.Options; +using Services.TextServices; + +namespace Services.Tests.TextServices; + +public class TextServicesClientTests +{ + private readonly TestMessageHandler messageHandler = new(); + + private TextServicesClient CreateSut(TextServicesSettings settings) => + new(new HttpClient(messageHandler), Options.Create(settings), new NullLogger()); + + [Fact] + public async Task CreateOrUpdateJob_ReturnsTrue_WhenPostSucceeds() + { + var sut = CreateSut(new TextServicesSettings { BuilderApiUri = new Uri("http://text-services/") }); + messageHandler.Enqueue(HttpStatusCode.OK); + + var result = await sut.CreateOrUpdateJob("1/iiif/my-manifest", "s3://bucket/key"); + + result.Should().BeTrue(); + messageHandler.Requests.Single().Method.Should().Be(HttpMethod.Post); + } + + [Fact] + public async Task CreateOrUpdateJob_FallsBackToPut_WhenPostReturns409() + { + var sut = CreateSut(new TextServicesSettings { BuilderApiUri = new Uri("http://text-services/") }); + messageHandler.Enqueue(HttpStatusCode.Conflict); + messageHandler.Enqueue(HttpStatusCode.OK); + + var result = await sut.CreateOrUpdateJob("1/iiif/my-manifest", "s3://bucket/key"); + + result.Should().BeTrue(); + messageHandler.Requests.Should().HaveCount(2); + messageHandler.Requests[0].Method.Should().Be(HttpMethod.Post); + messageHandler.Requests[1].Method.Should().Be(HttpMethod.Put); + } + + [Fact] + public async Task CreateOrUpdateJob_ReturnsFalse_WhenBuilderApiUriNotConfigured() + { + var sut = CreateSut(new TextServicesSettings { BuilderApiUri = null }); + + var result = await sut.CreateOrUpdateJob("1/iiif/my-manifest", "s3://bucket/key"); + + result.Should().BeFalse(); + messageHandler.Requests.Should().BeEmpty(); + } + + [Fact] + public async Task GetTextAugmentedManifest_ReturnsNull_WhenSearchApiUriNotConfigured() + { + var sut = CreateSut(new TextServicesSettings { SearchApiUri = null }); + + var result = await sut.GetTextAugmentedManifest("1/iiif/my-manifest"); + + result.Should().BeNull(); + messageHandler.Requests.Should().BeEmpty(); + } + + [Fact] + public async Task GetTextAugmentedManifest_ReturnsNull_When404() + { + var sut = CreateSut(new TextServicesSettings { SearchApiUri = new Uri("http://search-api/") }); + messageHandler.Enqueue(HttpStatusCode.NotFound); + + var result = await sut.GetTextAugmentedManifest("1/iiif/my-manifest"); + + result.Should().BeNull(); + } + + [Fact] + public async Task GetTextAugmentedManifest_ReturnsDeserializedManifest_WhenSuccessful() + { + var sut = CreateSut(new TextServicesSettings { SearchApiUri = new Uri("http://search-api/") }); + var manifest = new Manifest { Id = "https://example.com/manifest" }; + messageHandler.Enqueue(HttpStatusCode.OK, manifest.AsJson()); + + var result = await sut.GetTextAugmentedManifest("1/iiif/my-manifest"); + + result.Should().NotBeNull(); + result!.Id.Should().Be("https://example.com/manifest"); + } + + [Fact] + public async Task GetTextAugmentedManifest_SetsForwardedHeaders_WhenConfigured() + { + var sut = CreateSut(new TextServicesSettings + { + SearchApiUri = new Uri("http://search-api/"), + CustomerOrchestratorUri = "orchestrator.example.com", + PathRules = "/path/rules" + }); + messageHandler.Enqueue(HttpStatusCode.OK, new Manifest().AsJson()); + + await sut.GetTextAugmentedManifest("1/iiif/my-manifest"); + + var request = messageHandler.Requests.Single(); + request.Headers.GetValues("X-Forwarded-Host").Single().Should().Be("orchestrator.example.com"); + request.Headers.GetValues("X-Forwarded-Path").Single().Should().Be("/path/rules"); + } + + private class TestMessageHandler : HttpMessageHandler + { + private readonly Queue responses = new(); + public List Requests { get; } = []; + + public void Enqueue(HttpStatusCode statusCode, string? content = null) + { + var response = new HttpResponseMessage(statusCode); + if (content != null) + response.Content = new StringContent(content, Encoding.UTF8, "application/json"); + responses.Enqueue(response); + } + + protected override Task SendAsync(HttpRequestMessage request, + CancellationToken cancellationToken) + { + Requests.Add(request); + return Task.FromResult(responses.Count > 0 + ? responses.Dequeue() + : new HttpResponseMessage(HttpStatusCode.OK)); + } + } +} \ No newline at end of file diff --git a/src/IIIFPresentation/Services/ServiceCollectionX.cs b/src/IIIFPresentation/Services/ServiceCollectionX.cs index 8bbc2c13..f7843589 100644 --- a/src/IIIFPresentation/Services/ServiceCollectionX.cs +++ b/src/IIIFPresentation/Services/ServiceCollectionX.cs @@ -1,8 +1,10 @@ -using Core.Web; +using Core.Settings; +using Core.Web; using DLCS; using Microsoft.AspNetCore.Builder; using Microsoft.Extensions.DependencyInjection; using Services.Manifests.Settings; +using Services.TextServices; namespace Services; @@ -16,4 +18,10 @@ public static void RegisterSharedServiceSettings(this WebApplicationBuilder buil builder.Services.Configure(typedPathTemplateOptions); builder.Services.Configure(builder.Configuration.GetSection(ServicesSettings.SettingsName)); } + + public static IServiceCollection AddTextServicesClient(this IServiceCollection services) + { + services.AddHttpClient(); + return services; + } } diff --git a/src/IIIFPresentation/Services/Services.csproj b/src/IIIFPresentation/Services/Services.csproj index efcb37b6..3d153e77 100644 --- a/src/IIIFPresentation/Services/Services.csproj +++ b/src/IIIFPresentation/Services/Services.csproj @@ -8,7 +8,7 @@ - + diff --git a/src/IIIFPresentation/Services/TextServices/ITextServicesClient.cs b/src/IIIFPresentation/Services/TextServices/ITextServicesClient.cs new file mode 100644 index 00000000..17c61be3 --- /dev/null +++ b/src/IIIFPresentation/Services/TextServices/ITextServicesClient.cs @@ -0,0 +1,20 @@ +using IIIF.Presentation.V3; + +namespace Services.TextServices; + +public interface ITextServicesClient +{ + /// + /// Create a new text-builder job, or reprocess an existing one. + /// + /// Job identifier in format "{customerId}/iiif/{manifestId}" + /// S3 URI of the staged manifest, e.g. "s3://bucket/staging/..." + Task CreateOrUpdateJob(string jobId, string sourceS3Uri, CancellationToken cancellationToken = default); + + /// + /// Retrieve the text-augmented manifest for a completed job. + /// Returns null if the job produced no text resources. + /// + /// Job identifier in format "{customerId}/iiif/{manifestId}" + Task GetTextAugmentedManifest(string jobId, CancellationToken cancellationToken = default); +} \ No newline at end of file diff --git a/src/IIIFPresentation/Services/TextServices/TextServicesClient.cs b/src/IIIFPresentation/Services/TextServices/TextServicesClient.cs new file mode 100644 index 00000000..398f36f1 --- /dev/null +++ b/src/IIIFPresentation/Services/TextServices/TextServicesClient.cs @@ -0,0 +1,93 @@ +using System.Net; +using System.Text; +using System.Text.Json; +using System.Text.Json.Serialization; +using Core.Settings; +using IIIF.Presentation.V3; +using IIIF.Serialisation; +using Microsoft.Extensions.Logging; +using Microsoft.Extensions.Options; + +namespace Services.TextServices; + +public class TextServicesClient( + HttpClient httpClient, + IOptions options, + ILogger logger) : ITextServicesClient +{ + private static readonly JsonSerializerOptions JsonOptions = new(JsonSerializerDefaults.Web) + { + DefaultIgnoreCondition = JsonIgnoreCondition.WhenWritingNull + }; + + // Search=1, Autocomplete=2, TextAugmented=16 + private const int InitialServices = 19; + + public async Task CreateOrUpdateJob(string jobId, string sourceS3Uri, + CancellationToken cancellationToken = default) + { + var settings = options.Value; + if (settings.BuilderApiUri == null) + { + logger.LogWarning("TextServices BuilderApiUri is not configured; skipping job creation for {JobId}", jobId); + return false; + } + + var request = new { id = jobId, sourceUri = sourceS3Uri, services = InitialServices }; + var content = new StringContent(JsonSerializer.Serialize(request, JsonOptions), Encoding.UTF8, "application/json"); + + var postUri = new Uri(settings.BuilderApiUri, "textbuilder"); + var response = await httpClient.PostAsync(postUri, content, cancellationToken); + + if (response.StatusCode == HttpStatusCode.Conflict) + { + logger.LogDebug("Text-services job {JobId} already exists, reprocessing", jobId); + var putUri = new Uri(settings.BuilderApiUri, $"textbuilder/{Uri.EscapeDataString(jobId)}"); + response = await httpClient.PutAsync(putUri, null, cancellationToken); + } + + if (response.IsSuccessStatusCode) + { + logger.LogDebug("Text-services job {JobId} enqueued successfully", jobId); + return true; + } + + logger.LogError("Failed to create/update text-services job {JobId}: {StatusCode}", jobId, response.StatusCode); + return false; + } + + public async Task GetTextAugmentedManifest(string jobId, + CancellationToken cancellationToken = default) + { + var settings = options.Value; + if (settings.SearchApiUri == null) + { + logger.LogWarning("TextServices SearchApiUri is not configured; cannot retrieve augmented manifest for {JobId}", jobId); + return null; + } + + var uri = new Uri(settings.SearchApiUri, $"text-augmented/v3/{jobId}"); + + using var request = new HttpRequestMessage(HttpMethod.Get, uri); + if (!string.IsNullOrEmpty(settings.CustomerOrchestratorUri)) + request.Headers.TryAddWithoutValidation("X-Forwarded-Host", settings.CustomerOrchestratorUri); + if (!string.IsNullOrEmpty(settings.PathRules)) + request.Headers.TryAddWithoutValidation("X-Forwarded-Path", settings.PathRules); + + var response = await httpClient.SendAsync(request, cancellationToken); + + if (response.StatusCode == HttpStatusCode.NotFound) + { + logger.LogDebug("No text-augmented manifest found for job {JobId}", jobId); + return null; + } + + if (!response.IsSuccessStatusCode) + { + logger.LogError("Failed to retrieve text-augmented manifest for job {JobId}: {StatusCode}", jobId, response.StatusCode); + return null; + } + + return (await response.Content.ReadAsStreamAsync(cancellationToken)).FromJsonStream(); + } +} \ No newline at end of file From 7504e59d16509d33ad38607b00acaaf7c855e58f Mon Sep 17 00:00:00 2001 From: "jack.lewis" Date: Mon, 15 Jun 2026 16:31:59 +0100 Subject: [PATCH 02/18] Make PipelineJob more generic --- .../Manifest/ManifestWriteServiceTests.cs | 14 +++---- .../API.Tests/Integration/GetManifestTests.cs | 12 ++++-- .../Features/Manifest/ManifestWriteService.cs | 42 +++++++++---------- .../Storage/Helpers/PresentationContextX.cs | 12 ++++-- ...ServiceJobCompletionMessageHandlerTests.cs | 14 ++++--- .../TextServiceJobCompletionMessageHandler.cs | 33 +++++++++++---- .../Models/Database/Collections/Manifest.cs | 1 + .../Models/Database/General/PipelineJob.cs | 30 ++++++++----- ...20260615144249_AddPipelineJob.Designer.cs} | 36 ++++++---------- ...ob.cs => 20260615144249_AddPipelineJob.cs} | 16 ++----- .../PresentationContextModelSnapshot.cs | 34 +++++---------- .../Repository/PresentationContext.cs | 18 +++++--- .../Manifests/ManifestXTests.cs | 4 +- .../Manifests/PipelineJobXTests.cs | 35 ++++++++++++++++ .../TextServices/TextServicesClientTests.cs | 35 ++++++++++++++-- .../TextServices/ITextServicesClient.cs | 10 +++-- .../TextServices/TextServicesClient.cs | 5 ++- 17 files changed, 214 insertions(+), 137 deletions(-) rename src/IIIFPresentation/Repository/Migrations/{20260612164039_AddPipelineJob.Designer.cs => 20260615144249_AddPipelineJob.Designer.cs} (95%) rename src/IIIFPresentation/Repository/Migrations/{20260612164039_AddPipelineJob.cs => 20260615144249_AddPipelineJob.cs} (67%) create mode 100644 src/IIIFPresentation/Services.Tests/Manifests/PipelineJobXTests.cs diff --git a/src/IIIFPresentation/API.Tests/Features/Manifest/ManifestWriteServiceTests.cs b/src/IIIFPresentation/API.Tests/Features/Manifest/ManifestWriteServiceTests.cs index 3ea7fbfa..f59c2244 100644 --- a/src/IIIFPresentation/API.Tests/Features/Manifest/ManifestWriteServiceTests.cs +++ b/src/IIIFPresentation/API.Tests/Features/Manifest/ManifestWriteServiceTests.cs @@ -1160,14 +1160,14 @@ public async Task Create_CallsTextServicesAndCreatesPipelineJob_WhenManifestHasP var result = await sut.Create(request, CancellationToken.None); // Assert - A.CallTo(() => textServicesClient.CreateOrUpdateJob(A._, A._, A._)) + A.CallTo(() => textServicesClient.CreateOrUpdateJob(A._, A._, A._, A._)) .MustHaveHappenedOnceExactly(); var flatId = result.Entity.FlatId; - var pipelineJob = presentationContext.PipelineJobs.FirstOrDefault(p => p.ManifestId == flatId); + var pipelineJob = presentationContext.PipelineJobs.FirstOrDefault(p => p.ResourceId == flatId); pipelineJob.Should().NotBeNull(); pipelineJob!.Status.Should().Be(PipelineJobStatus.Queued); - pipelineJob.TextJobId.Should().Be($"{Customer}/iiif/{flatId}"); + pipelineJob.GetJobId().Should().Be($"{Customer}/iiif/{flatId}"); } [Fact] @@ -1186,7 +1186,7 @@ public async Task Create_DoesNotCallTextServices_WhenManifestHasNoPipeline() await sut.Create(request, CancellationToken.None); // Assert - A.CallTo(() => textServicesClient.CreateOrUpdateJob(A._, A._, A._)) + A.CallTo(() => textServicesClient.CreateOrUpdateJob(A._, A._, A._, A._)) .MustNotHaveHappened(); } @@ -1228,7 +1228,7 @@ public async Task Create_ResetsPipelineJob_WhenJobAlreadyExistsForManifest() var flatId = firstResult.Entity.FlatId; // Manually mark the job as completed (simulating a prior run) - var existingJob = presentationContext.PipelineJobs.First(p => p.ManifestId == flatId); + var existingJob = presentationContext.PipelineJobs.First(p => p.ResourceId == flatId); presentationContext.Entry(existingJob).State = Microsoft.EntityFrameworkCore.EntityState.Detached; // Second create (update path) — use the existing slug/manifest with pipeline again @@ -1245,10 +1245,10 @@ public async Task Create_ResetsPipelineJob_WhenJobAlreadyExistsForManifest() // Assert result.WriteResult.Should().Be(WriteResult.Accepted); - A.CallTo(() => textServicesClient.CreateOrUpdateJob(A._, A._, A._)) + A.CallTo(() => textServicesClient.CreateOrUpdateJob(A._, A._, A._, A._)) .MustHaveHappenedTwiceExactly(); - var jobs = presentationContext.PipelineJobs.Where(p => p.ManifestId == flatId).ToList(); + var jobs = presentationContext.PipelineJobs.Where(p => p.ResourceId == flatId).ToList(); jobs.Should().HaveCount(1, "resubmit should reset existing job, not create a second one"); jobs[0].Status.Should().Be(PipelineJobStatus.Queued); } diff --git a/src/IIIFPresentation/API.Tests/Integration/GetManifestTests.cs b/src/IIIFPresentation/API.Tests/Integration/GetManifestTests.cs index 9426a2b0..b8fa4b55 100644 --- a/src/IIIFPresentation/API.Tests/Integration/GetManifestTests.cs +++ b/src/IIIFPresentation/API.Tests/Integration/GetManifestTests.cs @@ -451,9 +451,11 @@ public async Task Get_IiifManifest_Flat_ReturnsAccepted_WhenPipelineJobQueued() var dbManifest = await dbContext.Manifests.AddTestManifest(id); await dbContext.PipelineJobs.AddAsync(new PipelineJob { - ManifestId = id, + ResourceId = id, + ResourceType = ResourceType.IIIFManifest, + JobType = PipelineJobType.TextService, CustomerId = 1, - TextJobId = $"1/iiif/{id}", + Status = PipelineJobStatus.Queued, Created = DateTime.UtcNow }); @@ -487,9 +489,11 @@ public async Task Get_IiifManifest_Flat_ReturnsOK_WhenPipelineJobCompleted() var dbManifest = await dbContext.Manifests.AddTestManifest(id); await dbContext.PipelineJobs.AddAsync(new PipelineJob { - ManifestId = id, + ResourceId = id, + ResourceType = ResourceType.IIIFManifest, + JobType = PipelineJobType.TextService, CustomerId = 1, - TextJobId = $"1/iiif/{id}", + Status = PipelineJobStatus.Completed, Created = DateTime.UtcNow, Finished = DateTime.UtcNow diff --git a/src/IIIFPresentation/API/Features/Manifest/ManifestWriteService.cs b/src/IIIFPresentation/API/Features/Manifest/ManifestWriteService.cs index 6a8137ed..5148058b 100644 --- a/src/IIIFPresentation/API/Features/Manifest/ManifestWriteService.cs +++ b/src/IIIFPresentation/API/Features/Manifest/ManifestWriteService.cs @@ -483,32 +483,28 @@ await manifestStorageManager.SaveManifestInStorage(iiifManifest, dbManifest, ori private async Task SubmitTextPipelineJob(DbManifest dbManifest, CancellationToken cancellationToken) { - var stagingKey = dbManifest.GetResourceBucketKey(BucketLocationType.Staging); - var s3Uri = $"s3://{awsOptions.Value.S3.StorageBucket}/{stagingKey}"; - var jobId = $"{dbManifest.CustomerId}/iiif/{dbManifest.Id}"; - - await textServicesClient.CreateOrUpdateJob(jobId, s3Uri, cancellationToken); - var existing = dbContext.PipelineJobs - .FirstOrDefault(p => p.ManifestId == dbManifest.Id && p.CustomerId == dbManifest.CustomerId); + .FirstOrDefault(p => p.ResourceId == dbManifest.Id && p.ResourceType == ResourceType.IIIFManifest + && p.CustomerId == dbManifest.CustomerId); - if (existing != null) - { - existing.Status = PipelineJobStatus.Queued; - existing.Error = null; - existing.Finished = null; - } - else + var job = existing ?? new PipelineJob { - await dbContext.PipelineJobs.AddAsync(new PipelineJob - { - ManifestId = dbManifest.Id, - CustomerId = dbManifest.CustomerId, - TextJobId = jobId, - Status = PipelineJobStatus.Queued, - Created = DateTime.UtcNow - }, cancellationToken); - } + ResourceId = dbManifest.Id, + ResourceType = ResourceType.IIIFManifest, + JobType = PipelineJobType.TextService, + CustomerId = dbManifest.CustomerId, + Created = DateTime.UtcNow + }; + + await textServicesClient.CreateOrUpdateJob(job, awsOptions.Value.S3.StorageBucket, + dbManifest.GetResourceBucketKey(BucketLocationType.Staging), cancellationToken); + + job.Status = PipelineJobStatus.Queued; + job.Error = null; + job.Finished = null; + + if (existing == null) + await dbContext.PipelineJobs.AddAsync(job, cancellationToken); } /// diff --git a/src/IIIFPresentation/API/Features/Storage/Helpers/PresentationContextX.cs b/src/IIIFPresentation/API/Features/Storage/Helpers/PresentationContextX.cs index 816d987f..8bba3eb0 100644 --- a/src/IIIFPresentation/API/Features/Storage/Helpers/PresentationContextX.cs +++ b/src/IIIFPresentation/API/Features/Storage/Helpers/PresentationContextX.cs @@ -69,7 +69,7 @@ public static class PresentationContextX /// Whether the Batches records should be included /// A cancellation token /// The retrieved collection - public static Task RetrieveManifestAsync(this PresentationContext dbContext, + public static async Task RetrieveManifestAsync(this PresentationContext dbContext, string manifestId, bool tracked = false, bool withCanvasPaintings = true, bool withBatches = false, bool withPipelineJobs = false, CancellationToken cancellationToken = default) { @@ -85,12 +85,16 @@ public static class PresentationContextX dbContextManifests = dbContextManifests.Include(m => m.Batches); } - if (withPipelineJobs) + var manifest = await dbContextManifests.Retrieve(manifestId, tracked, cancellationToken); + + if (withPipelineJobs && manifest != null) { - dbContextManifests = dbContextManifests.Include(m => m.PipelineJobs); + manifest.PipelineJobs = await dbContext.PipelineJobs + .Where(p => p.ResourceId == manifest.Id && p.ResourceType == ResourceType.IIIFManifest) + .ToListAsync(cancellationToken); } - return dbContextManifests.Retrieve(manifestId, tracked, cancellationToken); + return manifest; } /// diff --git a/src/IIIFPresentation/BackgroundHandler.Tests/TextCompletion/TextServiceJobCompletionMessageHandlerTests.cs b/src/IIIFPresentation/BackgroundHandler.Tests/TextCompletion/TextServiceJobCompletionMessageHandlerTests.cs index a40621a8..e1bda26c 100644 --- a/src/IIIFPresentation/BackgroundHandler.Tests/TextCompletion/TextServiceJobCompletionMessageHandlerTests.cs +++ b/src/IIIFPresentation/BackgroundHandler.Tests/TextCompletion/TextServiceJobCompletionMessageHandlerTests.cs @@ -108,7 +108,7 @@ public async Task HandleMessage_UpdatesStatusToFailed_AndSavesManifest_WhenJobFa (await sut.HandleMessage(message, CancellationToken.None)).Should().BeTrue(); - var job = dbContext.PipelineJobs.Single(p => p.TextJobId == jobId); + var job = dbContext.PipelineJobs.Single(p => p.ResourceId == manifestId); job.Status.Should().Be(PipelineJobStatus.Failed); job.Error.Should().Be("OCR timed out"); @@ -137,7 +137,7 @@ public async Task HandleMessage_UpdatesStatusToCompleted_AndSavesManifest_WhenJo (await sut.HandleMessage(message, CancellationToken.None)).Should().BeTrue(); - var job = dbContext.PipelineJobs.Single(p => p.TextJobId == jobId); + var job = dbContext.PipelineJobs.Single(p => p.ResourceId == manifestId); job.Status.Should().Be(PipelineJobStatus.Completed); job.Error.Should().BeNull(); @@ -280,7 +280,7 @@ public async Task HandleMessage_SetsFinishedTimestamp_WhenJobCompletes() await sut.HandleMessage(CreateMessage(jobId, "Completed"), CancellationToken.None); - var job = dbContext.PipelineJobs.Single(p => p.TextJobId == jobId); + var job = dbContext.PipelineJobs.Single(p => p.ResourceId == manifestId); job.Finished.Should().Be(new DateTime(2024, 6, 12, 10, 0, 0, DateTimeKind.Utc)); } @@ -296,7 +296,7 @@ public async Task HandleMessage_SetsFinishedTimestamp_WhenJobFails() await sut.HandleMessage(CreateMessage(jobId, "Failed", errors: "OCR error"), CancellationToken.None); - var job = dbContext.PipelineJobs.Single(p => p.TextJobId == jobId); + var job = dbContext.PipelineJobs.Single(p => p.ResourceId == manifestId); job.Finished.Should().Be(new DateTime(2024, 6, 12, 10, 0, 0, DateTimeKind.Utc)); } @@ -306,9 +306,11 @@ private async Task SetupManifestWithPipelineJob(string manifestId, string jobId) var manifest = manifestEntry.Entity; await dbContext.PipelineJobs.AddAsync(new PipelineJob { - ManifestId = manifest.Id, + ResourceId = manifest.Id, + ResourceType = ResourceType.IIIFManifest, + JobType = PipelineJobType.TextService, CustomerId = manifest.CustomerId, - TextJobId = jobId, + Status = PipelineJobStatus.Queued, Created = DateTime.UtcNow }); diff --git a/src/IIIFPresentation/BackgroundHandler/TextCompletion/TextServiceJobCompletionMessageHandler.cs b/src/IIIFPresentation/BackgroundHandler/TextCompletion/TextServiceJobCompletionMessageHandler.cs index 812ba396..991511ac 100644 --- a/src/IIIFPresentation/BackgroundHandler/TextCompletion/TextServiceJobCompletionMessageHandler.cs +++ b/src/IIIFPresentation/BackgroundHandler/TextCompletion/TextServiceJobCompletionMessageHandler.cs @@ -44,10 +44,10 @@ public async Task HandleMessage(QueueMessage message, CancellationToken ca private async Task TryCompleteManifest(TextServiceJobCompletionMessage completionMessage, int approximateReceiveCount, CancellationToken cancellationToken) { + var resourceId = ExtractResourceIdFromJobId(completionMessage.JobId); var pipelineJob = await dbContext.PipelineJobs - .Include(p => p.Manifest) - .ThenInclude(m => m!.CanvasPaintings) - .SingleOrDefaultAsync(p => p.TextJobId == completionMessage.JobId, cancellationToken); + .SingleOrDefaultAsync(p => p.ResourceId == resourceId && p.JobType == PipelineJobType.TextService, + cancellationToken); if (pipelineJob == null) { @@ -59,11 +59,21 @@ private async Task TryCompleteManifest(TextServiceJobCompletionMessage com } var sw = Stopwatch.StartNew(); - var dbManifest = pipelineJob.Manifest!; + var dbManifest = await dbContext.Manifests + .Include(m => m.CanvasPaintings) + .SingleOrDefaultAsync(m => m.Id == pipelineJob.ResourceId && m.CustomerId == pipelineJob.CustomerId, + cancellationToken); + + if (dbManifest == null) + { + logger.LogError("Manifest {ResourceId} for pipeline job {JobId} not found", + pipelineJob.ResourceId, completionMessage.JobId); + return false; + } logger.LogInformation( "Completing text pipeline for job:{JobId}, customer:{CustomerId}, manifest:{ManifestId}", - completionMessage.JobId, pipelineJob.CustomerId, pipelineJob.ManifestId); + completionMessage.JobId, pipelineJob.CustomerId, pipelineJob.ResourceId); try { @@ -104,7 +114,7 @@ await manifestStorageManager.SaveManifestInStorage(stagedManifest, dbManifest, n await dbContext.SaveChangesAsync(cancellationToken); logger.LogInformation( "Text pipeline completed for job:{JobId}, manifest:{ManifestId}. Elapsed:{Elapsed}ms", - completionMessage.JobId, pipelineJob.ManifestId, sw.ElapsedMilliseconds); + completionMessage.JobId, pipelineJob.ResourceId, sw.ElapsedMilliseconds); return true; } @@ -151,11 +161,20 @@ private static void MergeContext(Manifest target, Manifest source) private static int ExtractCustomerIdFromJobId(string jobId) { - // jobId format: "{customerId}/iiif/{manifestId}" + // jobId format: "{customerId}/iiif/{resourceId}" var firstSlash = jobId.IndexOf('/'); return firstSlash > 0 && int.TryParse(jobId[..firstSlash], out var customerId) ? customerId : 0; } + private static string ExtractResourceIdFromJobId(string jobId) + { + // jobId format: "{customerId}/iiif/{resourceId}" + var firstSlash = jobId.IndexOf('/'); + if (firstSlash < 0) return string.Empty; + var secondSlash = jobId.IndexOf('/', firstSlash + 1); + return secondSlash > 0 && secondSlash < jobId.Length - 1 ? jobId[(secondSlash + 1)..] : string.Empty; + } + private static TextServiceJobCompletionMessage DeserializeMessage(QueueMessage message, ILogger logger) { try diff --git a/src/IIIFPresentation/Models/Database/Collections/Manifest.cs b/src/IIIFPresentation/Models/Database/Collections/Manifest.cs index fb96bf1d..4aaca2a9 100644 --- a/src/IIIFPresentation/Models/Database/Collections/Manifest.cs +++ b/src/IIIFPresentation/Models/Database/Collections/Manifest.cs @@ -48,6 +48,7 @@ public class Manifest : IHierarchyResource public List? Batches { get; set; } + [System.ComponentModel.DataAnnotations.Schema.NotMapped] public List? PipelineJobs { get; set; } /// diff --git a/src/IIIFPresentation/Models/Database/General/PipelineJob.cs b/src/IIIFPresentation/Models/Database/General/PipelineJob.cs index 8ff21628..5be46931 100644 --- a/src/IIIFPresentation/Models/Database/General/PipelineJob.cs +++ b/src/IIIFPresentation/Models/Database/General/PipelineJob.cs @@ -1,19 +1,16 @@ -using Models.Database.Collections; - namespace Models.Database.General; public class PipelineJob : ICustomerEntity { public int Id { get; set; } - public required string ManifestId { get; set; } + public required string ResourceId { get; set; } - public required int CustomerId { get; set; } + public ResourceType ResourceType { get; set; } + + public PipelineJobType JobType { get; set; } - /// - /// Text-services job identifier, format: "{customerId}/iiif/{manifestId}" - /// - public required string TextJobId { get; set; } + public required int CustomerId { get; set; } public PipelineJobStatus Status { get; set; } @@ -22,8 +19,6 @@ public class PipelineJob : ICustomerEntity public DateTime Created { get; set; } public DateTime? Finished { get; set; } - - public Manifest? Manifest { get; set; } } public enum PipelineJobStatus @@ -31,4 +26,19 @@ public enum PipelineJobStatus Queued = 0, Completed = 1, Failed = 2 +} + +public enum PipelineJobType +{ + TextService = 0 +} + +public static class PipelineJobX +{ + public static string GetJobId(this PipelineJob job) => job.JobType switch + { + PipelineJobType.TextService => $"{job.CustomerId}/iiif/{job.ResourceId}", + _ => throw new ArgumentOutOfRangeException(nameof(job.JobType), $"Unknown job type: {job.JobType}") + }; + } \ No newline at end of file diff --git a/src/IIIFPresentation/Repository/Migrations/20260612164039_AddPipelineJob.Designer.cs b/src/IIIFPresentation/Repository/Migrations/20260615144249_AddPipelineJob.Designer.cs similarity index 95% rename from src/IIIFPresentation/Repository/Migrations/20260612164039_AddPipelineJob.Designer.cs rename to src/IIIFPresentation/Repository/Migrations/20260615144249_AddPipelineJob.Designer.cs index 833b7622..0e947298 100644 --- a/src/IIIFPresentation/Repository/Migrations/20260612164039_AddPipelineJob.Designer.cs +++ b/src/IIIFPresentation/Repository/Migrations/20260615144249_AddPipelineJob.Designer.cs @@ -12,7 +12,7 @@ namespace Repository.Migrations { [DbContext(typeof(PresentationContext))] - [Migration("20260612164039_AddPipelineJob")] + [Migration("20260615144249_AddPipelineJob")] partial class AddPipelineJob { /// @@ -381,27 +381,29 @@ protected override void BuildTargetModel(ModelBuilder modelBuilder) .HasColumnType("timestamp with time zone") .HasColumnName("finished"); - b.Property("ManifestId") + b.Property("JobType") .IsRequired() .HasColumnType("text") - .HasColumnName("manifest_id"); + .HasColumnName("job_type"); - b.Property("Status") + b.Property("ResourceId") .IsRequired() .HasColumnType("text") - .HasColumnName("status"); + .HasColumnName("resource_id"); - b.Property("TextJobId") + b.Property("ResourceType") .IsRequired() .HasColumnType("text") - .HasColumnName("text_job_id"); + .HasColumnName("resource_type"); + + b.Property("Status") + .IsRequired() + .HasColumnType("text") + .HasColumnName("status"); b.HasKey("Id") .HasName("pk_pipeline_jobs"); - b.HasIndex("ManifestId", "CustomerId") - .HasDatabaseName("ix_pipeline_jobs_manifest_id_customer_id"); - b.ToTable("pipeline_jobs", (string)null); }); @@ -456,18 +458,6 @@ protected override void BuildTargetModel(ModelBuilder modelBuilder) b.Navigation("ParentCollection"); }); - modelBuilder.Entity("Models.Database.General.PipelineJob", b => - { - b.HasOne("Models.Database.Collections.Manifest", "Manifest") - .WithMany("PipelineJobs") - .HasForeignKey("ManifestId", "CustomerId") - .OnDelete(DeleteBehavior.Cascade) - .IsRequired() - .HasConstraintName("fk_pipeline_jobs_manifests_manifest_id_customer_id"); - - b.Navigation("Manifest"); - }); - modelBuilder.Entity("Models.Database.Collections.Collection", b => { b.Navigation("Children"); @@ -482,8 +472,6 @@ protected override void BuildTargetModel(ModelBuilder modelBuilder) b.Navigation("CanvasPaintings"); b.Navigation("Hierarchy"); - - b.Navigation("PipelineJobs"); }); #pragma warning restore 612, 618 } diff --git a/src/IIIFPresentation/Repository/Migrations/20260612164039_AddPipelineJob.cs b/src/IIIFPresentation/Repository/Migrations/20260615144249_AddPipelineJob.cs similarity index 67% rename from src/IIIFPresentation/Repository/Migrations/20260612164039_AddPipelineJob.cs rename to src/IIIFPresentation/Repository/Migrations/20260615144249_AddPipelineJob.cs index 0fa0ec15..79b07e1d 100644 --- a/src/IIIFPresentation/Repository/Migrations/20260612164039_AddPipelineJob.cs +++ b/src/IIIFPresentation/Repository/Migrations/20260615144249_AddPipelineJob.cs @@ -18,9 +18,10 @@ protected override void Up(MigrationBuilder migrationBuilder) { id = table.Column(type: "integer", nullable: false) .Annotation("Npgsql:ValueGenerationStrategy", NpgsqlValueGenerationStrategy.IdentityByDefaultColumn), - manifest_id = table.Column(type: "text", nullable: false), + resource_id = table.Column(type: "text", nullable: false), + resource_type = table.Column(type: "text", nullable: false), + job_type = table.Column(type: "text", nullable: false), customer_id = table.Column(type: "integer", nullable: false), - text_job_id = table.Column(type: "text", nullable: false), status = table.Column(type: "text", nullable: false), error = table.Column(type: "text", nullable: true), created = table.Column(type: "timestamp with time zone", nullable: false, defaultValueSql: "now()"), @@ -29,18 +30,7 @@ protected override void Up(MigrationBuilder migrationBuilder) constraints: table => { table.PrimaryKey("pk_pipeline_jobs", x => x.id); - table.ForeignKey( - name: "fk_pipeline_jobs_manifests_manifest_id_customer_id", - columns: x => new { x.manifest_id, x.customer_id }, - principalTable: "manifests", - principalColumns: new[] { "id", "customer_id" }, - onDelete: ReferentialAction.Cascade); }); - - migrationBuilder.CreateIndex( - name: "ix_pipeline_jobs_manifest_id_customer_id", - table: "pipeline_jobs", - columns: new[] { "manifest_id", "customer_id" }); } /// diff --git a/src/IIIFPresentation/Repository/Migrations/PresentationContextModelSnapshot.cs b/src/IIIFPresentation/Repository/Migrations/PresentationContextModelSnapshot.cs index 7dbc65e3..d5b0231e 100644 --- a/src/IIIFPresentation/Repository/Migrations/PresentationContextModelSnapshot.cs +++ b/src/IIIFPresentation/Repository/Migrations/PresentationContextModelSnapshot.cs @@ -378,27 +378,29 @@ protected override void BuildModel(ModelBuilder modelBuilder) .HasColumnType("timestamp with time zone") .HasColumnName("finished"); - b.Property("ManifestId") + b.Property("JobType") .IsRequired() .HasColumnType("text") - .HasColumnName("manifest_id"); + .HasColumnName("job_type"); - b.Property("Status") + b.Property("ResourceId") .IsRequired() .HasColumnType("text") - .HasColumnName("status"); + .HasColumnName("resource_id"); - b.Property("TextJobId") + b.Property("ResourceType") .IsRequired() .HasColumnType("text") - .HasColumnName("text_job_id"); + .HasColumnName("resource_type"); + + b.Property("Status") + .IsRequired() + .HasColumnType("text") + .HasColumnName("status"); b.HasKey("Id") .HasName("pk_pipeline_jobs"); - b.HasIndex("ManifestId", "CustomerId") - .HasDatabaseName("ix_pipeline_jobs_manifest_id_customer_id"); - b.ToTable("pipeline_jobs", (string)null); }); @@ -453,18 +455,6 @@ protected override void BuildModel(ModelBuilder modelBuilder) b.Navigation("ParentCollection"); }); - modelBuilder.Entity("Models.Database.General.PipelineJob", b => - { - b.HasOne("Models.Database.Collections.Manifest", "Manifest") - .WithMany("PipelineJobs") - .HasForeignKey("ManifestId", "CustomerId") - .OnDelete(DeleteBehavior.Cascade) - .IsRequired() - .HasConstraintName("fk_pipeline_jobs_manifests_manifest_id_customer_id"); - - b.Navigation("Manifest"); - }); - modelBuilder.Entity("Models.Database.Collections.Collection", b => { b.Navigation("Children"); @@ -479,8 +469,6 @@ protected override void BuildModel(ModelBuilder modelBuilder) b.Navigation("CanvasPaintings"); b.Navigation("Hierarchy"); - - b.Navigation("PipelineJobs"); }); #pragma warning restore 612, 618 } diff --git a/src/IIIFPresentation/Repository/PresentationContext.cs b/src/IIIFPresentation/Repository/PresentationContext.cs index 8df1fb60..6c0ddec4 100644 --- a/src/IIIFPresentation/Repository/PresentationContext.cs +++ b/src/IIIFPresentation/Repository/PresentationContext.cs @@ -163,18 +163,24 @@ protected override void OnModelCreating(ModelBuilder modelBuilder) { entity.HasKey(p => p.Id); - entity - .HasOne(p => p.Manifest) - .WithMany(m => m.PipelineJobs) - .HasForeignKey(p => new { p.ManifestId, p.CustomerId }) - .OnDelete(DeleteBehavior.Cascade); - entity.Property(e => e.Status) .IsRequired() .HasConversion( s => s.ToString(), s => s.GetEnumFromString(true)); + entity.Property(e => e.ResourceType) + .IsRequired() + .HasConversion( + r => r.ToString(), + r => r.GetEnumFromString(true)); + + entity.Property(e => e.JobType) + .IsRequired() + .HasConversion( + j => j.ToString(), + j => j.GetEnumFromString(true)); + entity.Property(p => p.Created).HasDefaultValueSql("now()"); }); } diff --git a/src/IIIFPresentation/Services.Tests/Manifests/ManifestXTests.cs b/src/IIIFPresentation/Services.Tests/Manifests/ManifestXTests.cs index ad9b71a5..c21e801e 100644 --- a/src/IIIFPresentation/Services.Tests/Manifests/ManifestXTests.cs +++ b/src/IIIFPresentation/Services.Tests/Manifests/ManifestXTests.cs @@ -78,7 +78,7 @@ public void HasFurtherWork_ReturnsFalse_WhenBatchCompletedAndJobCompleted() { Id = "x", CustomerId = 1, Batches = [new Batch { Id = 1, ManifestId = "x", Status = BatchStatus.Completed }], - PipelineJobs = [new PipelineJob { ManifestId = "x", CustomerId = 1, TextJobId = "j", Status = PipelineJobStatus.Completed }] + PipelineJobs = [new PipelineJob { ResourceId = "x", ResourceType = ResourceType.IIIFManifest, CustomerId = 1, Status = PipelineJobStatus.Completed }] }; manifest.HasFurtherWork().Should().BeFalse(); @@ -91,7 +91,7 @@ private static Manifest ManifestWithJobs(PipelineJobStatus? status = null) { manifest.PipelineJobs = [ - new PipelineJob { ManifestId = "x", CustomerId = 1, TextJobId = "j", Status = status.Value } + new PipelineJob { ResourceId = "x", ResourceType = ResourceType.IIIFManifest, CustomerId = 1, Status = status.Value } ]; } else diff --git a/src/IIIFPresentation/Services.Tests/Manifests/PipelineJobXTests.cs b/src/IIIFPresentation/Services.Tests/Manifests/PipelineJobXTests.cs new file mode 100644 index 00000000..c00cc321 --- /dev/null +++ b/src/IIIFPresentation/Services.Tests/Manifests/PipelineJobXTests.cs @@ -0,0 +1,35 @@ +using Models.Database.General; + +namespace Services.Tests.Manifests; + +public class PipelineJobXTests +{ + [Fact] + public void GetJobId_ReturnsExpectedFormat_ForTextService() + { + var job = new PipelineJob + { + ResourceId = "my-manifest", + ResourceType = ResourceType.IIIFManifest, + JobType = PipelineJobType.TextService, + CustomerId = 99 + }; + + job.GetJobId().Should().Be("99/iiif/my-manifest"); + } + + [Fact] + public void GetJobId_Throws_ForUnknownJobType() + { + var job = new PipelineJob + { + ResourceId = "x", + ResourceType = ResourceType.IIIFManifest, + JobType = (PipelineJobType)999, + CustomerId = 1 + }; + + job.Invoking(j => j.GetJobId()).Should().Throw(); + } + +} \ No newline at end of file diff --git a/src/IIIFPresentation/Services.Tests/TextServices/TextServicesClientTests.cs b/src/IIIFPresentation/Services.Tests/TextServices/TextServicesClientTests.cs index e33e1a9b..4dfb0472 100644 --- a/src/IIIFPresentation/Services.Tests/TextServices/TextServicesClientTests.cs +++ b/src/IIIFPresentation/Services.Tests/TextServices/TextServicesClientTests.cs @@ -5,6 +5,7 @@ using IIIF.Serialisation; using Microsoft.Extensions.Logging.Abstractions; using Microsoft.Extensions.Options; +using Models.Database.General; using Services.TextServices; namespace Services.Tests.TextServices; @@ -16,13 +17,16 @@ public class TextServicesClientTests private TextServicesClient CreateSut(TextServicesSettings settings) => new(new HttpClient(messageHandler), Options.Create(settings), new NullLogger()); + private static PipelineJob MakeJob(int customerId = 1, string resourceId = "my-manifest") => + new() { CustomerId = customerId, ResourceId = resourceId, ResourceType = ResourceType.IIIFManifest, JobType = PipelineJobType.TextService }; + [Fact] public async Task CreateOrUpdateJob_ReturnsTrue_WhenPostSucceeds() { var sut = CreateSut(new TextServicesSettings { BuilderApiUri = new Uri("http://text-services/") }); messageHandler.Enqueue(HttpStatusCode.OK); - var result = await sut.CreateOrUpdateJob("1/iiif/my-manifest", "s3://bucket/key"); + var result = await sut.CreateOrUpdateJob(MakeJob(), "my-bucket", "staging/1/manifests/my-manifest"); result.Should().BeTrue(); messageHandler.Requests.Single().Method.Should().Be(HttpMethod.Post); @@ -35,7 +39,7 @@ public async Task CreateOrUpdateJob_FallsBackToPut_WhenPostReturns409() messageHandler.Enqueue(HttpStatusCode.Conflict); messageHandler.Enqueue(HttpStatusCode.OK); - var result = await sut.CreateOrUpdateJob("1/iiif/my-manifest", "s3://bucket/key"); + var result = await sut.CreateOrUpdateJob(MakeJob(), "my-bucket", "staging/1/manifests/my-manifest"); result.Should().BeTrue(); messageHandler.Requests.Should().HaveCount(2); @@ -48,12 +52,37 @@ public async Task CreateOrUpdateJob_ReturnsFalse_WhenBuilderApiUriNotConfigured( { var sut = CreateSut(new TextServicesSettings { BuilderApiUri = null }); - var result = await sut.CreateOrUpdateJob("1/iiif/my-manifest", "s3://bucket/key"); + var result = await sut.CreateOrUpdateJob(MakeJob(), "my-bucket", "staging/1/manifests/my-manifest"); result.Should().BeFalse(); messageHandler.Requests.Should().BeEmpty(); } + [Fact] + public async Task CreateOrUpdateJob_ReturnsFalse_WhenPostReturnsNonSuccess() + { + var sut = CreateSut(new TextServicesSettings { BuilderApiUri = new Uri("http://text-services/") }); + messageHandler.Enqueue(HttpStatusCode.InternalServerError); + + var result = await sut.CreateOrUpdateJob(MakeJob(), "my-bucket", "staging/1/manifests/my-manifest"); + + result.Should().BeFalse(); + } + + [Fact] + public async Task CreateOrUpdateJob_SendsCorrectJobIdAndS3Uri_InPostBody() + { + var sut = CreateSut(new TextServicesSettings { BuilderApiUri = new Uri("http://text-services/") }); + messageHandler.Enqueue(HttpStatusCode.OK); + + await sut.CreateOrUpdateJob(MakeJob(customerId: 5, resourceId: "test-manifest"), "my-bucket", + "staging/5/manifests/test-manifest"); + + var body = await messageHandler.Requests.Single().Content!.ReadAsStringAsync(); + body.Should().Contain("\"id\":\"5/iiif/test-manifest\""); + body.Should().Contain("\"sourceUri\":\"s3://my-bucket/staging/5/manifests/test-manifest\""); + } + [Fact] public async Task GetTextAugmentedManifest_ReturnsNull_WhenSearchApiUriNotConfigured() { diff --git a/src/IIIFPresentation/Services/TextServices/ITextServicesClient.cs b/src/IIIFPresentation/Services/TextServices/ITextServicesClient.cs index 17c61be3..ae961916 100644 --- a/src/IIIFPresentation/Services/TextServices/ITextServicesClient.cs +++ b/src/IIIFPresentation/Services/TextServices/ITextServicesClient.cs @@ -1,4 +1,5 @@ using IIIF.Presentation.V3; +using Models.Database.General; namespace Services.TextServices; @@ -7,9 +8,10 @@ public interface ITextServicesClient /// /// Create a new text-builder job, or reprocess an existing one. /// - /// Job identifier in format "{customerId}/iiif/{manifestId}" - /// S3 URI of the staged manifest, e.g. "s3://bucket/staging/..." - Task CreateOrUpdateJob(string jobId, string sourceS3Uri, CancellationToken cancellationToken = default); + /// The pipeline job to submit + /// S3 bucket containing the staged manifest + /// S3 key of the staged manifest + Task CreateOrUpdateJob(PipelineJob job, string bucket, string resourceKey, CancellationToken cancellationToken = default); /// /// Retrieve the text-augmented manifest for a completed job. @@ -17,4 +19,4 @@ public interface ITextServicesClient /// /// Job identifier in format "{customerId}/iiif/{manifestId}" Task GetTextAugmentedManifest(string jobId, CancellationToken cancellationToken = default); -} \ No newline at end of file +} diff --git a/src/IIIFPresentation/Services/TextServices/TextServicesClient.cs b/src/IIIFPresentation/Services/TextServices/TextServicesClient.cs index 398f36f1..28e9ff67 100644 --- a/src/IIIFPresentation/Services/TextServices/TextServicesClient.cs +++ b/src/IIIFPresentation/Services/TextServices/TextServicesClient.cs @@ -7,6 +7,7 @@ using IIIF.Serialisation; using Microsoft.Extensions.Logging; using Microsoft.Extensions.Options; +using Models.Database.General; namespace Services.TextServices; @@ -23,16 +24,18 @@ public class TextServicesClient( // Search=1, Autocomplete=2, TextAugmented=16 private const int InitialServices = 19; - public async Task CreateOrUpdateJob(string jobId, string sourceS3Uri, + public async Task CreateOrUpdateJob(PipelineJob job, string bucket, string resourceKey, CancellationToken cancellationToken = default) { var settings = options.Value; + var jobId = job.GetJobId(); if (settings.BuilderApiUri == null) { logger.LogWarning("TextServices BuilderApiUri is not configured; skipping job creation for {JobId}", jobId); return false; } + var sourceS3Uri = $"s3://{bucket}/{resourceKey}"; var request = new { id = jobId, sourceUri = sourceS3Uri, services = InitialServices }; var content = new StringContent(JsonSerializer.Serialize(request, JsonOptions), Encoding.UTF8, "application/json"); From e42a11ff9b89fd86bfdfb2e475ce27c2df9b1ba9 Mon Sep 17 00:00:00 2001 From: "jack.lewis" Date: Mon, 15 Jun 2026 17:18:46 +0100 Subject: [PATCH 03/18] Changes before testing --- .../Manifest/ManifestWriteServiceTests.cs | 27 ++++++++++++++++++ .../Features/Manifest/ManifestWriteService.cs | 28 ++++++++++++------- ...ServiceJobCompletionMessageHandlerTests.cs | 13 +++++++++ .../TextServiceJobCompletionMessageHandler.cs | 16 ++++++++--- .../TextServices/TextServicesClient.cs | 2 +- 5 files changed, 71 insertions(+), 15 deletions(-) diff --git a/src/IIIFPresentation/API.Tests/Features/Manifest/ManifestWriteServiceTests.cs b/src/IIIFPresentation/API.Tests/Features/Manifest/ManifestWriteServiceTests.cs index f59c2244..78a63e65 100644 --- a/src/IIIFPresentation/API.Tests/Features/Manifest/ManifestWriteServiceTests.cs +++ b/src/IIIFPresentation/API.Tests/Features/Manifest/ManifestWriteServiceTests.cs @@ -119,6 +119,8 @@ public ManifestWriteServiceTests(PresentationContextFixture dbFixture) manifestLockManager = new LockManager(); textServicesClient = A.Fake(); + A.CallTo(() => textServicesClient.CreateOrUpdateJob(A._, A._, A._, A._)) + .Returns(true); sut = new ManifestWriteService(presentationContext, identityManager, canvasPaintingResolver, new TestPathGenerator(presentationGenerator), settingsBasedPathGenerator, dlcsManifestCoordinator, parentSlugParser, manifestStorageManager, pathRewriteParser, manifestLockManager, textServicesClient, @@ -1170,6 +1172,31 @@ public async Task Create_CallsTextServicesAndCreatesPipelineJob_WhenManifestHasP pipelineJob.GetJobId().Should().Be($"{Customer}/iiif/{flatId}"); } + [Fact] + public async Task Create_SetsPipelineJobToFailed_WhenTextServiceSubmissionFails() + { + // Arrange + var (slug, resourceId) = TestIdentifiers.SlugResource(); + var manifest = new PresentationManifest + { + Slug = slug, + Pipeline = [new PipelineItem { Name = "text", Config = new PipelineConfig { Action = "Index" } }] + }; + var request = new UpsertManifestRequest(resourceId, null, Customer, manifest, manifest.AsJson(), true); + A.CallTo(() => textServicesClient.CreateOrUpdateJob(A._, A._, A._, A._)) + .Returns(false); + + // Act + var result = await sut.Create(request, CancellationToken.None); + + // Assert + var flatId = result.Entity.FlatId; + var pipelineJob = presentationContext.PipelineJobs.FirstOrDefault(p => p.ResourceId == flatId); + pipelineJob.Should().NotBeNull(); + pipelineJob!.Status.Should().Be(PipelineJobStatus.Failed); + pipelineJob.Error.Should().NotBeNullOrEmpty(); + } + [Fact] public async Task Create_DoesNotCallTextServices_WhenManifestHasNoPipeline() { diff --git a/src/IIIFPresentation/API/Features/Manifest/ManifestWriteService.cs b/src/IIIFPresentation/API/Features/Manifest/ManifestWriteService.cs index 5148058b..c2a64311 100644 --- a/src/IIIFPresentation/API/Features/Manifest/ManifestWriteService.cs +++ b/src/IIIFPresentation/API/Features/Manifest/ManifestWriteService.cs @@ -13,11 +13,11 @@ using Core.IIIF; using API.Infrastructure; using DLCS.Exceptions; +using Microsoft.EntityFrameworkCore; using Microsoft.Extensions.Options; using Models.API.General; using Models.API.Manifest; using Models.Database; -using Models.Database.Collections; using Models.DLCS; using Models.Database.General; using Repository; @@ -483,9 +483,9 @@ await manifestStorageManager.SaveManifestInStorage(iiifManifest, dbManifest, ori private async Task SubmitTextPipelineJob(DbManifest dbManifest, CancellationToken cancellationToken) { - var existing = dbContext.PipelineJobs - .FirstOrDefault(p => p.ResourceId == dbManifest.Id && p.ResourceType == ResourceType.IIIFManifest - && p.CustomerId == dbManifest.CustomerId); + var existing = await dbContext.PipelineJobs + .FirstOrDefaultAsync(p => p.ResourceId == dbManifest.Id && p.ResourceType == ResourceType.IIIFManifest + && p.CustomerId == dbManifest.CustomerId, cancellationToken); var job = existing ?? new PipelineJob { @@ -496,15 +496,23 @@ private async Task SubmitTextPipelineJob(DbManifest dbManifest, CancellationToke Created = DateTime.UtcNow }; - await textServicesClient.CreateOrUpdateJob(job, awsOptions.Value.S3.StorageBucket, + var submitted = await textServicesClient.CreateOrUpdateJob(job, awsOptions.Value.S3.StorageBucket, dbManifest.GetResourceBucketKey(BucketLocationType.Staging), cancellationToken); - job.Status = PipelineJobStatus.Queued; - job.Error = null; - job.Finished = null; + if (submitted) + { + job.Status = PipelineJobStatus.Queued; + job.Error = null; + job.Finished = null; + } + else + { + job.Status = PipelineJobStatus.Failed; + job.Error = "Failed to submit job to text-services builder"; + job.Finished = DateTime.UtcNow; + } - if (existing == null) - await dbContext.PipelineJobs.AddAsync(job, cancellationToken); + if (existing == null) await dbContext.PipelineJobs.AddAsync(job, cancellationToken); } /// diff --git a/src/IIIFPresentation/BackgroundHandler.Tests/TextCompletion/TextServiceJobCompletionMessageHandlerTests.cs b/src/IIIFPresentation/BackgroundHandler.Tests/TextCompletion/TextServiceJobCompletionMessageHandlerTests.cs index e1bda26c..1f152e23 100644 --- a/src/IIIFPresentation/BackgroundHandler.Tests/TextCompletion/TextServiceJobCompletionMessageHandlerTests.cs +++ b/src/IIIFPresentation/BackgroundHandler.Tests/TextCompletion/TextServiceJobCompletionMessageHandlerTests.cs @@ -53,6 +53,19 @@ public TextServiceJobCompletionMessageHandlerTests(PresentationContextFixture db new NullLogger()); } + [Theory] + [InlineData("not-a-valid-id")] + [InlineData("noSlashAtAll")] + [InlineData("/iiif/resource")] + public async Task HandleMessage_ReturnsTrue_WhenJobIdCannotBeParsed(string malformedJobId) + { + var message = CreateMessage(malformedJobId, "Completed"); + + (await sut.HandleMessage(message, CancellationToken.None)).Should().BeTrue(); + A.CallTo(() => iiifS3.ReadIIIFFromS3(A._, A._, A._)) + .MustNotHaveHappened(); + } + [Theory] [InlineData(0)] [InlineData(1)] diff --git a/src/IIIFPresentation/BackgroundHandler/TextCompletion/TextServiceJobCompletionMessageHandler.cs b/src/IIIFPresentation/BackgroundHandler/TextCompletion/TextServiceJobCompletionMessageHandler.cs index 991511ac..0e41afc2 100644 --- a/src/IIIFPresentation/BackgroundHandler/TextCompletion/TextServiceJobCompletionMessageHandler.cs +++ b/src/IIIFPresentation/BackgroundHandler/TextCompletion/TextServiceJobCompletionMessageHandler.cs @@ -29,7 +29,15 @@ public async Task HandleMessage(QueueMessage message, CancellationToken ca try { var completionMessage = DeserializeMessage(message, logger); - customerIdProvider.SetCustomerId(ExtractCustomerIdFromJobId(completionMessage.JobId)); + var customerId = ExtractCustomerIdFromJobId(completionMessage.JobId); + if (customerId == null) + { + logger.LogWarning("Could not parse customer id from job id {JobId}; discarding message", + completionMessage.JobId); + return true; + } + + customerIdProvider.SetCustomerId(customerId.Value); return await TryCompleteManifest(completionMessage, message.ApproximateReceiveCount, cancellationToken); } catch (Exception ex) @@ -159,11 +167,11 @@ private static void MergeContext(Manifest target, Manifest source) } } - private static int ExtractCustomerIdFromJobId(string jobId) + private static int? ExtractCustomerIdFromJobId(string jobId) { // jobId format: "{customerId}/iiif/{resourceId}" var firstSlash = jobId.IndexOf('/'); - return firstSlash > 0 && int.TryParse(jobId[..firstSlash], out var customerId) ? customerId : 0; + return firstSlash > 0 && int.TryParse(jobId[..firstSlash], out var customerId) ? customerId : null; } private static string ExtractResourceIdFromJobId(string jobId) @@ -187,4 +195,4 @@ private static TextServiceJobCompletionMessage DeserializeMessage(QueueMessage m throw; } } -} \ No newline at end of file +} diff --git a/src/IIIFPresentation/Services/TextServices/TextServicesClient.cs b/src/IIIFPresentation/Services/TextServices/TextServicesClient.cs index 28e9ff67..f25ac47e 100644 --- a/src/IIIFPresentation/Services/TextServices/TextServicesClient.cs +++ b/src/IIIFPresentation/Services/TextServices/TextServicesClient.cs @@ -93,4 +93,4 @@ public async Task CreateOrUpdateJob(PipelineJob job, string bucket, string return (await response.Content.ReadAsStreamAsync(cancellationToken)).FromJsonStream(); } -} \ No newline at end of file +} From 9a155519ec05464fe334b68e312c7cf869050f9a Mon Sep 17 00:00:00 2001 From: "jack.lewis" Date: Fri, 19 Jun 2026 16:00:26 +0100 Subject: [PATCH 04/18] Allow pipleines to be displayed on output --- .../Converters/ManifestConverterTests.cs | 86 ++++++++++++++++++- .../Manifest/ManifestWriteServiceTests.cs | 14 +-- .../API.Tests/Integration/GetManifestTests.cs | 3 + .../API/Converters/ManifestConverter.cs | 10 +++ .../Features/Manifest/ManifestWriteService.cs | 56 +++++++----- .../API/appsettings.Example.json | 4 + .../TextServiceJobCompletionMessageTests.cs | 23 ++--- .../TextServiceJobCompletionMessage.cs | 18 ++-- .../appsettings.Example.json | 5 +- .../Models/API/Manifest/Pipeline.cs | 17 +++- .../Manifests/PipelineXTests.cs | 49 +++++++++++ 11 files changed, 236 insertions(+), 49 deletions(-) diff --git a/src/IIIFPresentation/API.Tests/Converters/ManifestConverterTests.cs b/src/IIIFPresentation/API.Tests/Converters/ManifestConverterTests.cs index 60fc5ba3..c02a89fd 100644 --- a/src/IIIFPresentation/API.Tests/Converters/ManifestConverterTests.cs +++ b/src/IIIFPresentation/API.Tests/Converters/ManifestConverterTests.cs @@ -310,7 +310,91 @@ public void SetGeneratedFields_SetsSpace() // Assert result.Space.Should().Be("https://dlcs.test/customers/123/spaces/321"); } - + + [Fact] + public void SetGeneratedFields_SetsPipelineFromJobs_WhenPipelineJobsPresent() + { + // Arrange + var iiifManifest = new PresentationManifest(); + var dbManifest = new DBManifest + { + CustomerId = 1, Id = "id", + Hierarchy = [new Hierarchy { Slug = "slug" }], + PipelineJobs = + [ + new PipelineJob + { + ResourceId = "id", CustomerId = 1, + JobType = PipelineJobType.TextService, + Status = PipelineJobStatus.Queued, + Created = DateTime.UtcNow + } + ] + }; + + // Act + var result = iiifManifest.SetGeneratedFields(dbManifest, pathGenerator, settingsBasedPathGenerator); + + // Assert + result.Pipeline.Should().ContainSingle(p => p.Name == "TextService" && p.Status == "Queued"); + } + + [Fact] + public void SetGeneratedFields_ReturnsLatestJobPerType_WhenMultipleJobsOfSameType() + { + // Arrange + var iiifManifest = new PresentationManifest(); + var older = DateTime.UtcNow.AddHours(-1); + var newer = DateTime.UtcNow; + var dbManifest = new DBManifest + { + CustomerId = 1, Id = "id", + Hierarchy = [new Hierarchy { Slug = "slug" }], + PipelineJobs = + [ + new PipelineJob + { + ResourceId = "id", CustomerId = 1, + JobType = PipelineJobType.TextService, + Status = PipelineJobStatus.Completed, + Created = older + }, + new PipelineJob + { + ResourceId = "id", CustomerId = 1, + JobType = PipelineJobType.TextService, + Status = PipelineJobStatus.Queued, + Created = newer + } + ] + }; + + // Act + var result = iiifManifest.SetGeneratedFields(dbManifest, pathGenerator, settingsBasedPathGenerator); + + // Assert + result.Pipeline.Should().ContainSingle(p => p.Status == "Queued", "only the most recently created job per type should appear"); + } + + [Fact] + public void SetGeneratedFields_DoesNotSetPipeline_WhenNoPipelineJobs() + { + // Arrange + var iiifManifest = new PresentationManifest { Pipeline = null }; + var dbManifest = new DBManifest + { + CustomerId = 1, Id = "id", + Hierarchy = [new Hierarchy { Slug = "slug" }], + PipelineJobs = null + }; + + // Act + var result = iiifManifest.SetGeneratedFields(dbManifest, pathGenerator, settingsBasedPathGenerator); + + // Assert + result.Pipeline.Should().BeNull(); + } + [Fact] public void GenerateProvisionalCanvases_SetsItems_IfNotSet() { diff --git a/src/IIIFPresentation/API.Tests/Features/Manifest/ManifestWriteServiceTests.cs b/src/IIIFPresentation/API.Tests/Features/Manifest/ManifestWriteServiceTests.cs index 78a63e65..bdea09da 100644 --- a/src/IIIFPresentation/API.Tests/Features/Manifest/ManifestWriteServiceTests.cs +++ b/src/IIIFPresentation/API.Tests/Features/Manifest/ManifestWriteServiceTests.cs @@ -1170,10 +1170,11 @@ public async Task Create_CallsTextServicesAndCreatesPipelineJob_WhenManifestHasP pipelineJob.Should().NotBeNull(); pipelineJob!.Status.Should().Be(PipelineJobStatus.Queued); pipelineJob.GetJobId().Should().Be($"{Customer}/iiif/{flatId}"); + result.Entity.Pipeline.Should().ContainSingle(p => p.Name == "TextService" && p.Status == "Queued"); } [Fact] - public async Task Create_SetsPipelineJobToFailed_WhenTextServiceSubmissionFails() + public async Task Create_ReturnsError_AndDoesNotPersistManifest_WhenTextServiceSubmissionFails() { // Arrange var (slug, resourceId) = TestIdentifiers.SlugResource(); @@ -1190,11 +1191,12 @@ public async Task Create_SetsPipelineJobToFailed_WhenTextServiceSubmissionFails( var result = await sut.Create(request, CancellationToken.None); // Assert - var flatId = result.Entity.FlatId; - var pipelineJob = presentationContext.PipelineJobs.FirstOrDefault(p => p.ResourceId == flatId); - pipelineJob.Should().NotBeNull(); - pipelineJob!.Status.Should().Be(PipelineJobStatus.Failed); - pipelineJob.Error.Should().NotBeNullOrEmpty(); + result.IsSuccess.Should().BeFalse(); + result.WriteResult.Should().Be(WriteResult.Error); + + // Manifest and pipeline job should be rolled back — resubmitting the same slug must not conflict + presentationContext.Hierarchy.Any(h => h.Slug == slug).Should().BeFalse(); + presentationContext.PipelineJobs.Any(p => p.ResourceId == resourceId).Should().BeFalse(); } [Fact] diff --git a/src/IIIFPresentation/API.Tests/Integration/GetManifestTests.cs b/src/IIIFPresentation/API.Tests/Integration/GetManifestTests.cs index b8fa4b55..709d82c7 100644 --- a/src/IIIFPresentation/API.Tests/Integration/GetManifestTests.cs +++ b/src/IIIFPresentation/API.Tests/Integration/GetManifestTests.cs @@ -478,6 +478,7 @@ await amazonS3.PutObjectAsync(new() var manifest = await response.ReadAsPresentationJsonAsync(); manifest.Should().NotBeNull(); manifest!.Id.Should().Be($"http://localhost/1/manifests/{id}"); + manifest.Pipeline.Should().ContainSingle(p => p.Name == "TextService" && p.Status == "Queued"); } [Fact] @@ -514,6 +515,8 @@ await amazonS3.PutObjectAsync(new() // Assert response.StatusCode.Should().Be(HttpStatusCode.OK); response.Headers.Should().ContainKey(HeaderNames.ETag, "pipeline is complete so manifest is final"); + var manifest = await response.ReadAsPresentationJsonAsync(); + manifest!.Pipeline.Should().ContainSingle(p => p.Name == "TextService" && p.Status == "Completed"); } [Fact] diff --git a/src/IIIFPresentation/API/Converters/ManifestConverter.cs b/src/IIIFPresentation/API/Converters/ManifestConverter.cs index 44289570..610da137 100644 --- a/src/IIIFPresentation/API/Converters/ManifestConverter.cs +++ b/src/IIIFPresentation/API/Converters/ManifestConverter.cs @@ -67,6 +67,16 @@ public static PresentationManifest SetGeneratedFields(this PresentationManifest foreach (var adjunct in iiifManifest.Adjuncts!) adjunct.Remove(AssetProperties.Asset); } + if (!dbManifest.PipelineJobs.IsNullOrEmpty()) + { + iiifManifest.Pipeline = dbManifest.PipelineJobs! + .GroupBy(j => j.JobType) + // get the last created version of each pipeline item - we don't care about history + .Select(g => g.OrderByDescending(j => j.Created).First() + .ToPipelineItem()) + .ToList(); + } + iiifManifest.EnsurePresentation3Context(); iiifManifest.EnsureContext(PresentationJsonLdContext.Context); diff --git a/src/IIIFPresentation/API/Features/Manifest/ManifestWriteService.cs b/src/IIIFPresentation/API/Features/Manifest/ManifestWriteService.cs index c2a64311..e65ac947 100644 --- a/src/IIIFPresentation/API/Features/Manifest/ManifestWriteService.cs +++ b/src/IIIFPresentation/API/Features/Manifest/ManifestWriteService.cs @@ -188,16 +188,21 @@ private async Task CreateInternal(WriteManifestRequest request cancellationToken: cancellationToken); if (dlcsResult.Error != null) return dlcsResult.Error; + await using var transaction = await dbContext.Database.BeginTransactionAsync(cancellationToken); + var (error, dbManifest) = await CreateDatabaseRecord(request, resolved.ParsedParentSlug!, dlcsResult.InteractionResult!.SpaceId, dlcsResult.CanvasPaintings, cancellationToken); if (error != null) return error; - var writeResult = dlcsResult.InteractionResult!.CanBeBuiltUpfront && !request.PresentationManifest.HasTextIndexPipeline() + var writeResult = dlcsResult.InteractionResult!.CanBeBuiltUpfront && !request.PresentationManifest.HasPipelineJob() ? WriteResult.Created : WriteResult.Accepted; - return await SaveToS3AndGenerateResult(request, dbManifest!, dlcsResult.InteractionResult!, writeResult, + var createResult = await SaveToS3AndGenerateResult(request, dbManifest!, dlcsResult.InteractionResult!, writeResult, cancellationToken); + + if (createResult.IsSuccess) await transaction.CommitAsync(cancellationToken); + return createResult; } } @@ -222,15 +227,20 @@ private async Task UpdateInternal(UpsertManifestRequest reques var dlcsResult = await HandleDlcsInteractions(request, existingManifest.Id, resolved.ParsedManifestResult!, existingAssetIds, existingManifest, cancellationToken); if (dlcsResult.Error != null) return dlcsResult.Error; + await using var updateTx = await dbContext.Database.BeginTransactionAsync(cancellationToken); + var (error, dbManifest) = await UpdateDatabaseRecord(request, resolved.ParsedParentSlug!, existingManifest, dlcsResult.InteractionResult!.SpaceId, cancellationToken); if (error != null) return error; - var writeResult = dlcsResult.InteractionResult!.CanBeBuiltUpfront && !request.PresentationManifest.HasTextIndexPipeline() + var writeResult = dlcsResult.InteractionResult!.CanBeBuiltUpfront && !request.PresentationManifest.HasPipelineJob() ? WriteResult.Updated : WriteResult.Accepted; - return await SaveToS3AndGenerateResult(request, dbManifest!, dlcsResult.InteractionResult!, writeResult, + var updateResult = await SaveToS3AndGenerateResult(request, dbManifest!, dlcsResult.InteractionResult!, writeResult, cancellationToken); + + if (updateResult.IsSuccess) await updateTx.CommitAsync(cancellationToken); + return updateResult; } } @@ -312,7 +322,8 @@ private async Task ResolveCanvasPaintingsAndParentSlug(Wri private async Task SaveToS3AndGenerateResult(WriteManifestRequest request, DbManifest dbManifest, DlcsInteractionResult dlcsInteractionResult, WriteResult writeResult, CancellationToken cancellationToken) { - await SaveToS3(dbManifest, request, dlcsInteractionResult.CanBeBuiltUpfront, cancellationToken); + var saveError = await SaveToS3(dbManifest, request, dlcsInteractionResult.CanBeBuiltUpfront, cancellationToken); + if (saveError != null) return saveError; return await GeneratePresentationSuccessResult(request.PresentationManifest, request.CustomerId, dbManifest, writeResult, cancellationToken); } @@ -425,14 +436,14 @@ await ManifestRetrieval.RetrieveFullPathForManifest(dbManifest.Id, dbManifest.Cu /// This is relevant for painted resources + resource level adjuncts /// /// A cancellation token - private async Task SaveToS3(DbManifest dbManifest, WriteManifestRequest request, bool canBeBuiltUpfront, + private async Task SaveToS3(DbManifest dbManifest, WriteManifestRequest request, bool canBeBuiltUpfront, CancellationToken cancellationToken) { var iiifManifest = request.RawRequestBody.ToManifest()!; var hasAssets = request.PresentationManifest.PaintedResources.HasAsset(); var hasAdjuncts = request.PresentationManifest.Adjuncts != null || dbManifest.Batches?.Any(b => b.DeliverableType == DeliverableType.Adjunct) == true; - var hasPipeline = request.PresentationManifest.HasTextIndexPipeline(); + var hasPipeline = request.PresentationManifest.HasPipelineJob(); // When there is further work to do the JSON saved to S3 differs substantially from the original payload, // and we will want to store it. Otherwise, we'll pass null not to store the raw request. @@ -473,15 +484,20 @@ await manifestStorageManager.SaveManifestInStorage(iiifManifest, dbManifest, ori } } - if (hasPipeline) + if (request.PresentationManifest.HasTextIndexPipeline()) { - await SubmitTextPipelineJob(dbManifest, cancellationToken); + if (!await SubmitTextPipelineJob(dbManifest, cancellationToken)) + { + return PresUpdateResult.Failure("Failed to submit text-services job", + ModifyCollectionType.Unknown, WriteResult.Error); + } } await dbContext.SaveChangesAsync(cancellationToken); + return null; } - private async Task SubmitTextPipelineJob(DbManifest dbManifest, CancellationToken cancellationToken) + private async Task SubmitTextPipelineJob(DbManifest dbManifest, CancellationToken cancellationToken) { var existing = await dbContext.PipelineJobs .FirstOrDefaultAsync(p => p.ResourceId == dbManifest.Id && p.ResourceType == ResourceType.IIIFManifest @@ -499,20 +515,20 @@ private async Task SubmitTextPipelineJob(DbManifest dbManifest, CancellationToke var submitted = await textServicesClient.CreateOrUpdateJob(job, awsOptions.Value.S3.StorageBucket, dbManifest.GetResourceBucketKey(BucketLocationType.Staging), cancellationToken); - if (submitted) - { - job.Status = PipelineJobStatus.Queued; - job.Error = null; - job.Finished = null; - } - else + if (!submitted) { - job.Status = PipelineJobStatus.Failed; - job.Error = "Failed to submit job to text-services builder"; - job.Finished = DateTime.UtcNow; + logger.LogError("Failed to submit text-services job for manifest {ManifestId}", dbManifest.Id); + return false; } + job.Status = PipelineJobStatus.Queued; + job.Error = null; + job.Finished = null; + if (existing == null) await dbContext.PipelineJobs.AddAsync(job, cancellationToken); + + dbManifest.PipelineJobs = [job]; + return true; } /// diff --git a/src/IIIFPresentation/API/appsettings.Example.json b/src/IIIFPresentation/API/appsettings.Example.json index 7bd56234..5781275b 100644 --- a/src/IIIFPresentation/API/appsettings.Example.json +++ b/src/IIIFPresentation/API/appsettings.Example.json @@ -50,5 +50,9 @@ } } } + }, + "TextServices" : { + "BuilderApiUri" : "https://text-builder.example.com", + "SearchApiUri" : "https://search.example.com" } } diff --git a/src/IIIFPresentation/BackgroundHandler.Tests/TextCompletion/TextServiceJobCompletionMessageTests.cs b/src/IIIFPresentation/BackgroundHandler.Tests/TextCompletion/TextServiceJobCompletionMessageTests.cs index db3cafb0..1e8a8414 100644 --- a/src/IIIFPresentation/BackgroundHandler.Tests/TextCompletion/TextServiceJobCompletionMessageTests.cs +++ b/src/IIIFPresentation/BackgroundHandler.Tests/TextCompletion/TextServiceJobCompletionMessageTests.cs @@ -19,7 +19,7 @@ public void FromQueueMessage_DeserializesBodyProperties() var result = TextServiceJobCompletionMessage.FromQueueMessage(ValidMessage()); result.JobId.Should().Be("1/iiif/manifest-1"); - result.Status.Should().Be("Completed"); + result.Status.Should().Be(TextServiceJobStatus.Completed); result.Finished.Should().Be(new DateTimeOffset(2024, 6, 12, 10, 0, 0, TimeSpan.Zero)); result.TotalPages.Should().Be(5); result.TotalWordCount.Should().Be(1200); @@ -38,28 +38,21 @@ public void FromQueueMessage_DeserializesErrors_WhenPresent() result.Errors.Should().Be("OCR failed on page 3"); } - [Theory] - [InlineData("Completed")] - [InlineData("completed")] - [InlineData("COMPLETED")] - public void IsCompleted_ReturnsTrue_CaseInsensitive(string status) + [Fact] + public void IsCompleted_ReturnsTrue_WhenStatusIsCompleted() { var message = new QueueMessage( - $$"""{"jobId":"1/iiif/x","status":"{{status}}","finished":null,"totalPages":0,"totalWordCount":0,"errors":null}""", + """{"jobId":"1/iiif/x","status":"Completed","finished":null,"totalPages":0,"totalWordCount":0,"errors":null}""", new Dictionary(), "msg"); TextServiceJobCompletionMessage.FromQueueMessage(message).IsCompleted.Should().BeTrue(); } - [Theory] - [InlineData("Failed")] - [InlineData("failed")] - [InlineData("FAILED")] - [InlineData("unknown")] - public void IsCompleted_ReturnsFalse_WhenStatusIsNotCompleted(string status) + [Fact] + public void IsCompleted_ReturnsFalse_WhenStatusIsFailed() { var message = new QueueMessage( - $$"""{"jobId":"1/iiif/x","status":"{{status}}","finished":null,"totalPages":0,"totalWordCount":0,"errors":null}""", + """{"jobId":"1/iiif/x","status":"Failed","finished":null,"totalPages":0,"totalWordCount":0,"errors":null}""", new Dictionary(), "msg"); TextServiceJobCompletionMessage.FromQueueMessage(message).IsCompleted.Should().BeFalse(); @@ -74,4 +67,4 @@ public void FromQueueMessage_Throws_WhenBodyIsInvalidJson() act.Should().Throw(); } -} \ No newline at end of file +} diff --git a/src/IIIFPresentation/BackgroundHandler/TextCompletion/TextServiceJobCompletionMessage.cs b/src/IIIFPresentation/BackgroundHandler/TextCompletion/TextServiceJobCompletionMessage.cs index cf68f4f5..8e47fac2 100644 --- a/src/IIIFPresentation/BackgroundHandler/TextCompletion/TextServiceJobCompletionMessage.cs +++ b/src/IIIFPresentation/BackgroundHandler/TextCompletion/TextServiceJobCompletionMessage.cs @@ -4,13 +4,22 @@ namespace BackgroundHandler.TextCompletion; +[JsonConverter(typeof(JsonStringEnumConverter))] +public enum TextServiceJobStatus +{ + Waiting = 0, + Running = 1, + Completed = 2, + Failed = 3 +} + /// /// Represents a job-completion notification from text-services, matching JobCompletionNotification. /// [method: JsonConstructor] public class TextServiceJobCompletionMessage( string jobId, - string status, + TextServiceJobStatus status, DateTimeOffset? finished, int totalPages, int totalWordCount, @@ -20,8 +29,7 @@ public class TextServiceJobCompletionMessage( public string JobId { get; } = jobId; - /// "Completed" or "Failed" - public string Status { get; } = status; + public TextServiceJobStatus Status { get; } = status; public DateTimeOffset? Finished { get; } = finished; @@ -31,9 +39,9 @@ public class TextServiceJobCompletionMessage( public string? Errors { get; } = errors; - public bool IsCompleted => string.Equals(Status, "Completed", StringComparison.OrdinalIgnoreCase); + public bool IsCompleted => Status == TextServiceJobStatus.Completed; public static TextServiceJobCompletionMessage FromQueueMessage(QueueMessage message) => JsonSerializer.Deserialize(message.Body, JsonSerializerOptions) ?? throw new JsonException("Deserialized TextServiceJobCompletionMessage was null"); -} \ No newline at end of file +} diff --git a/src/IIIFPresentation/BackgroundHandler/appsettings.Example.json b/src/IIIFPresentation/BackgroundHandler/appsettings.Example.json index 1ecbe636..3ae50fd6 100644 --- a/src/IIIFPresentation/BackgroundHandler/appsettings.Example.json +++ b/src/IIIFPresentation/BackgroundHandler/appsettings.Example.json @@ -6,7 +6,10 @@ "StorageBucket": "presentation-bucket" }, "SQS": { - "BatchCompletionQueueName": "batch-completion" + "CustomerCreatedQueueName": "customer-creation", + "BatchCompletionQueueName": "batch-completion", + "AdjunctBatchCompletionQueueName": "adjunctbatch-completion", + "TextJobQueueName": "text-services-job" } }, "ConnectionStrings": { diff --git a/src/IIIFPresentation/Models/API/Manifest/Pipeline.cs b/src/IIIFPresentation/Models/API/Manifest/Pipeline.cs index fd1e5346..e2324d59 100644 --- a/src/IIIFPresentation/Models/API/Manifest/Pipeline.cs +++ b/src/IIIFPresentation/Models/API/Manifest/Pipeline.cs @@ -1,9 +1,14 @@ +using Models.Database.General; +using Newtonsoft.Json; + namespace Models.API.Manifest; public class PipelineItem { public string? Name { get; set; } public PipelineConfig? Config { get; set; } + [JsonProperty(NullValueHandling = NullValueHandling.Ignore)] + public string? Status { get; set; } } public class PipelineConfig @@ -13,8 +18,18 @@ public class PipelineConfig public static class PipelineX { + public static bool HasPipelineJob(this PresentationManifest manifest) => + manifest.Pipeline?.Count > 0; + public static bool HasTextIndexPipeline(this PresentationManifest manifest) => manifest.Pipeline?.Any(p => string.Equals(p.Name, "text", StringComparison.OrdinalIgnoreCase) && string.Equals(p.Config?.Action, "Index", StringComparison.OrdinalIgnoreCase)) == true; -} \ No newline at end of file + + public static PipelineItem ToPipelineItem(this PipelineJob job) => new PipelineItem() + { + Name = job.JobType.ToString(), + Config = new PipelineConfig { Action = "Index" }, + Status = job.Status.ToString() + }; +} diff --git a/src/IIIFPresentation/Services.Tests/Manifests/PipelineXTests.cs b/src/IIIFPresentation/Services.Tests/Manifests/PipelineXTests.cs index 778a75c8..80a803e0 100644 --- a/src/IIIFPresentation/Services.Tests/Manifests/PipelineXTests.cs +++ b/src/IIIFPresentation/Services.Tests/Manifests/PipelineXTests.cs @@ -1,9 +1,38 @@ using Models.API.Manifest; +using Models.Database.General; namespace Services.Tests.Manifests; public class PipelineXTests { + [Fact] + public void HasPipelineJob_ReturnsFalse_WhenPipelineIsNull() + { + var manifest = new PresentationManifest { Pipeline = null }; + + manifest.HasPipelineJob().Should().BeFalse(); + } + + [Fact] + public void HasPipelineJob_ReturnsFalse_WhenPipelineIsEmpty() + { + var manifest = new PresentationManifest { Pipeline = [] }; + + manifest.HasPipelineJob().Should().BeFalse(); + } + + [Fact] + public void HasPipelineJob_ReturnsTrue_WhenPipelineHasAnyItem() + { + var manifest = new PresentationManifest + { + Pipeline = [new PipelineItem { Name = "any", Config = new PipelineConfig { Action = "Do" } }] + }; + + manifest.HasPipelineJob().Should().BeTrue(); + } + + [Fact] public void HasTextIndexPipeline_ReturnsFalse_WhenPipelineIsNull() { @@ -81,4 +110,24 @@ public void HasTextIndexPipeline_ReturnsTrue_WhenOneOfMultipleItemsMatches() manifest.HasTextIndexPipeline().Should().BeTrue(); } + + [Theory] + [InlineData(PipelineJobStatus.Queued, "Queued")] + [InlineData(PipelineJobStatus.Completed, "Completed")] + [InlineData(PipelineJobStatus.Failed, "Failed")] + public void ToPipelineItem_SetsStatusFromJob(PipelineJobStatus status, string expectedStatus) + { + var job = new PipelineJob + { + ResourceId = "id", CustomerId = 1, + JobType = PipelineJobType.TextService, + Status = status + }; + + var result = job.ToPipelineItem(); + + result.Name.Should().Be("TextService"); + result.Config!.Action.Should().Be("Index"); + result.Status.Should().Be(expectedStatus); + } } \ No newline at end of file From a07cc9f83f16c8006e95f6c1b5c26f1e5b018f90 Mon Sep 17 00:00:00 2001 From: "jack.lewis" Date: Fri, 19 Jun 2026 16:56:42 +0100 Subject: [PATCH 05/18] Add PipelineConfig to the database --- .../Manifest/ManifestWriteServiceTests.cs | 1 + .../Integration/ModifyManifestCreateTests.cs | 6 +- .../Features/Manifest/ManifestWriteService.cs | 11 +- .../Models/API/Manifest/Pipeline.cs | 4 +- .../Models/Database/General/PipelineJob.cs | 4 + .../Converters/PipelineConfigConverter.cs | 15 + ...9120000_AddConfigToPipelineJob.Designer.cs | 483 ++++++++++++++++++ .../20260619120000_AddConfigToPipelineJob.cs | 28 + .../Repository/PresentationContext.cs | 6 +- .../Manifests/PipelineXTests.cs | 19 +- 10 files changed, 570 insertions(+), 7 deletions(-) create mode 100644 src/IIIFPresentation/Repository/Converters/PipelineConfigConverter.cs create mode 100644 src/IIIFPresentation/Repository/Migrations/20260619120000_AddConfigToPipelineJob.Designer.cs create mode 100644 src/IIIFPresentation/Repository/Migrations/20260619120000_AddConfigToPipelineJob.cs diff --git a/src/IIIFPresentation/API.Tests/Features/Manifest/ManifestWriteServiceTests.cs b/src/IIIFPresentation/API.Tests/Features/Manifest/ManifestWriteServiceTests.cs index bdea09da..2b5d87d7 100644 --- a/src/IIIFPresentation/API.Tests/Features/Manifest/ManifestWriteServiceTests.cs +++ b/src/IIIFPresentation/API.Tests/Features/Manifest/ManifestWriteServiceTests.cs @@ -1169,6 +1169,7 @@ public async Task Create_CallsTextServicesAndCreatesPipelineJob_WhenManifestHasP var pipelineJob = presentationContext.PipelineJobs.FirstOrDefault(p => p.ResourceId == flatId); pipelineJob.Should().NotBeNull(); pipelineJob!.Status.Should().Be(PipelineJobStatus.Queued); + pipelineJob.Config!.Action.Should().Be("Index"); pipelineJob.GetJobId().Should().Be($"{Customer}/iiif/{flatId}"); result.Entity.Pipeline.Should().ContainSingle(p => p.Name == "TextService" && p.Status == "Queued"); } diff --git a/src/IIIFPresentation/API.Tests/Integration/ModifyManifestCreateTests.cs b/src/IIIFPresentation/API.Tests/Integration/ModifyManifestCreateTests.cs index 52cde00d..3cc27171 100644 --- a/src/IIIFPresentation/API.Tests/Integration/ModifyManifestCreateTests.cs +++ b/src/IIIFPresentation/API.Tests/Integration/ModifyManifestCreateTests.cs @@ -22,6 +22,7 @@ using Test.Helpers; using Test.Helpers.Helpers; using Test.Helpers.Integration; +using Services.TextServices; using Batch = DLCS.Models.Batch; using Collection = Models.Database.Collections.Collection; using Manifest = Models.Database.Collections.Manifest; @@ -36,6 +37,7 @@ public class ModifyManifestCreateTests : IClassFixture(); + private static readonly ITextServicesClient TextServicesClient = A.Fake(); private const int Customer = 1; private const int ExampleCustomer = 601; private const int InvalidSpaceCustomer = 34512; @@ -49,10 +51,12 @@ public ModifyManifestCreateTests(StorageFixture storageFixture, PresentationAppF .Returns(new Space { Id = NewlyCreatedSpace, Name = "test" }); A.CallTo(() => DLCSApiClient.CreateSpace(InvalidSpaceCustomer, A._, A._)) .ThrowsAsync(new DlcsException("Error creating DLCS space", HttpStatusCode.BadRequest)); + A.CallTo(() => TextServicesClient.CreateOrUpdateJob(A._, A._, A._, A._)) + .Returns(true); httpClient = factory .ConfigureBasicIntegrationTestHttpClient(storageFixture.DbFixture, appFactory => appFactory.WithLocalStack(storageFixture.LocalStackFixture), - services => services.AddSingleton(DLCSApiClient) + services => services.AddSingleton(DLCSApiClient).AddSingleton(TextServicesClient) ); storageFixture.DbFixture.CleanUp(); diff --git a/src/IIIFPresentation/API/Features/Manifest/ManifestWriteService.cs b/src/IIIFPresentation/API/Features/Manifest/ManifestWriteService.cs index e65ac947..218b6d99 100644 --- a/src/IIIFPresentation/API/Features/Manifest/ManifestWriteService.cs +++ b/src/IIIFPresentation/API/Features/Manifest/ManifestWriteService.cs @@ -486,7 +486,12 @@ await manifestStorageManager.SaveManifestInStorage(iiifManifest, dbManifest, ori if (request.PresentationManifest.HasTextIndexPipeline()) { - if (!await SubmitTextPipelineJob(dbManifest, cancellationToken)) + var pipelineConfig = request.PresentationManifest.Pipeline! + .First(p => string.Equals(p.Name, "text", StringComparison.OrdinalIgnoreCase) + && string.Equals(p.Config?.Action, "Index", StringComparison.OrdinalIgnoreCase)) + .Config; + + if (!await SubmitTextPipelineJob(dbManifest, pipelineConfig, cancellationToken)) { return PresUpdateResult.Failure("Failed to submit text-services job", ModifyCollectionType.Unknown, WriteResult.Error); @@ -497,7 +502,8 @@ await manifestStorageManager.SaveManifestInStorage(iiifManifest, dbManifest, ori return null; } - private async Task SubmitTextPipelineJob(DbManifest dbManifest, CancellationToken cancellationToken) + private async Task SubmitTextPipelineJob(DbManifest dbManifest, PipelineConfig? config, + CancellationToken cancellationToken) { var existing = await dbContext.PipelineJobs .FirstOrDefaultAsync(p => p.ResourceId == dbManifest.Id && p.ResourceType == ResourceType.IIIFManifest @@ -522,6 +528,7 @@ private async Task SubmitTextPipelineJob(DbManifest dbManifest, Cancellati } job.Status = PipelineJobStatus.Queued; + job.Config = config; job.Error = null; job.Finished = null; diff --git a/src/IIIFPresentation/Models/API/Manifest/Pipeline.cs b/src/IIIFPresentation/Models/API/Manifest/Pipeline.cs index e2324d59..ab9f243a 100644 --- a/src/IIIFPresentation/Models/API/Manifest/Pipeline.cs +++ b/src/IIIFPresentation/Models/API/Manifest/Pipeline.cs @@ -26,10 +26,10 @@ public static bool HasTextIndexPipeline(this PresentationManifest manifest) => string.Equals(p.Name, "text", StringComparison.OrdinalIgnoreCase) && string.Equals(p.Config?.Action, "Index", StringComparison.OrdinalIgnoreCase)) == true; - public static PipelineItem ToPipelineItem(this PipelineJob job) => new PipelineItem() + public static PipelineItem ToPipelineItem(this PipelineJob job) => new () { Name = job.JobType.ToString(), - Config = new PipelineConfig { Action = "Index" }, + Config = job.Config, Status = job.Status.ToString() }; } diff --git a/src/IIIFPresentation/Models/Database/General/PipelineJob.cs b/src/IIIFPresentation/Models/Database/General/PipelineJob.cs index 5be46931..25d0439a 100644 --- a/src/IIIFPresentation/Models/Database/General/PipelineJob.cs +++ b/src/IIIFPresentation/Models/Database/General/PipelineJob.cs @@ -1,3 +1,5 @@ +using Models.API.Manifest; + namespace Models.Database.General; public class PipelineJob : ICustomerEntity @@ -19,6 +21,8 @@ public class PipelineJob : ICustomerEntity public DateTime Created { get; set; } public DateTime? Finished { get; set; } + + public PipelineConfig? Config { get; set; } } public enum PipelineJobStatus diff --git a/src/IIIFPresentation/Repository/Converters/PipelineConfigConverter.cs b/src/IIIFPresentation/Repository/Converters/PipelineConfigConverter.cs new file mode 100644 index 00000000..0487f809 --- /dev/null +++ b/src/IIIFPresentation/Repository/Converters/PipelineConfigConverter.cs @@ -0,0 +1,15 @@ +using Microsoft.EntityFrameworkCore.Storage.ValueConversion; +using Models.API.Manifest; +using Newtonsoft.Json; + +namespace Repository.Converters; + +public class PipelineConfigConverter : ValueConverter +{ + public PipelineConfigConverter() + : base( + v => JsonConvert.SerializeObject(v), + v => JsonConvert.DeserializeObject(v)!) + { + } +} \ No newline at end of file diff --git a/src/IIIFPresentation/Repository/Migrations/20260619120000_AddConfigToPipelineJob.Designer.cs b/src/IIIFPresentation/Repository/Migrations/20260619120000_AddConfigToPipelineJob.Designer.cs new file mode 100644 index 00000000..c35f175b --- /dev/null +++ b/src/IIIFPresentation/Repository/Migrations/20260619120000_AddConfigToPipelineJob.Designer.cs @@ -0,0 +1,483 @@ +// +using System; +using Microsoft.EntityFrameworkCore; +using Microsoft.EntityFrameworkCore.Infrastructure; +using Microsoft.EntityFrameworkCore.Migrations; +using Microsoft.EntityFrameworkCore.Storage.ValueConversion; +using Npgsql.EntityFrameworkCore.PostgreSQL.Metadata; +using Repository; + +#nullable disable + +namespace Repository.Migrations +{ + [DbContext(typeof(PresentationContext))] + [Migration("20260619120000_AddConfigToPipelineJob")] + partial class AddConfigToPipelineJob + { + /// + protected override void BuildTargetModel(ModelBuilder modelBuilder) + { +#pragma warning disable 612, 618 + modelBuilder + .HasAnnotation("ProductVersion", "8.0.11") + .HasAnnotation("Relational:MaxIdentifierLength", 63); + + NpgsqlModelBuilderExtensions.HasPostgresExtension(modelBuilder, "citext"); + NpgsqlModelBuilderExtensions.UseIdentityByDefaultColumns(modelBuilder); + + modelBuilder.Entity("Models.Database.CanvasPainting", b => + { + b.Property("CanvasPaintingId") + .ValueGeneratedOnAdd() + .HasColumnType("integer") + .HasColumnName("canvas_painting_id"); + + NpgsqlPropertyBuilderExtensions.UseIdentityByDefaultColumn(b.Property("CanvasPaintingId")); + + b.Property("AssetId") + .HasColumnType("text") + .HasColumnName("asset_id"); + + b.Property("CanvasLabel") + .HasColumnType("text") + .HasColumnName("canvas_label"); + + b.Property("CanvasOrder") + .HasColumnType("integer") + .HasColumnName("canvas_order"); + + b.Property("CanvasOriginalId") + .HasColumnType("text") + .HasColumnName("canvas_original_id"); + + b.Property("ChoiceOrder") + .HasColumnType("integer") + .HasColumnName("choice_order"); + + b.Property("Created") + .ValueGeneratedOnAdd() + .HasColumnType("timestamp with time zone") + .HasColumnName("created") + .HasDefaultValueSql("now()"); + + b.Property("CustomerId") + .HasColumnType("integer") + .HasColumnName("customer_id"); + + b.Property("Duration") + .HasColumnType("double precision") + .HasColumnName("duration"); + + b.Property("Id") + .HasColumnType("text") + .HasColumnName("canvas_id"); + + b.Property("Ingesting") + .HasColumnType("boolean") + .HasColumnName("ingesting"); + + b.Property("Label") + .HasColumnType("jsonb") + .HasColumnName("label"); + + b.Property("ManifestId") + .IsRequired() + .HasColumnType("text") + .HasColumnName("manifest_id"); + + b.Property("Modified") + .ValueGeneratedOnAdd() + .HasColumnType("timestamp with time zone") + .HasColumnName("modified") + .HasDefaultValueSql("now()"); + + b.Property("StaticHeight") + .HasColumnType("integer") + .HasColumnName("static_height"); + + b.Property("StaticWidth") + .HasColumnType("integer") + .HasColumnName("static_width"); + + b.Property("Target") + .HasColumnType("text") + .HasColumnName("target"); + + b.Property("Thumbnail") + .HasColumnType("text") + .HasColumnName("thumbnail"); + + b.HasKey("CanvasPaintingId") + .HasName("pk_canvas_paintings"); + + b.HasIndex("ManifestId", "CustomerId") + .HasDatabaseName("ix_canvas_paintings_manifest_id_customer_id"); + + b.HasIndex("Id", "CustomerId", "ManifestId", "CanvasOrder", "ChoiceOrder") + .IsUnique() + .HasDatabaseName("ix_canvas_paintings_canvas_id_customer_id_manifest_id_canvas_o"); + + b.ToTable("canvas_paintings", (string)null); + }); + + modelBuilder.Entity("Models.Database.Collections.Collection", b => + { + b.Property("Id") + .HasColumnType("text") + .HasColumnName("id"); + + b.Property("CustomerId") + .HasColumnType("integer") + .HasColumnName("customer_id"); + + b.Property("Created") + .HasColumnType("timestamp with time zone") + .HasColumnName("created"); + + b.Property("CreatedBy") + .HasColumnType("text") + .HasColumnName("created_by"); + + b.Property("Etag") + .ValueGeneratedOnAddOrUpdate() + .HasColumnType("uuid") + .HasColumnName("etag") + .HasComputedColumnSql("deterministic_uuid_sha256(\"modified\", \"id\")", true); + + b.Property("IsPublic") + .HasColumnType("boolean") + .HasColumnName("is_public"); + + b.Property("IsStorageCollection") + .HasColumnType("boolean") + .HasColumnName("is_storage_collection"); + + b.Property("Label") + .HasColumnType("jsonb") + .HasColumnName("label"); + + b.Property("LockedBy") + .HasColumnType("text") + .HasColumnName("locked_by"); + + b.Property("Modified") + .HasColumnType("timestamp with time zone") + .HasColumnName("modified"); + + b.Property("ModifiedBy") + .HasColumnType("text") + .HasColumnName("modified_by"); + + b.Property("Tags") + .HasColumnType("text") + .HasColumnName("tags"); + + b.Property("Thumbnail") + .HasColumnType("text") + .HasColumnName("thumbnail"); + + b.Property("UsePath") + .HasColumnType("boolean") + .HasColumnName("use_path"); + + b.HasKey("Id", "CustomerId") + .HasName("pk_collections"); + + b.ToTable("collections", (string)null); + }); + + modelBuilder.Entity("Models.Database.Collections.Manifest", b => + { + b.Property("Id") + .HasColumnType("text") + .HasColumnName("id"); + + b.Property("CustomerId") + .HasColumnType("integer") + .HasColumnName("customer_id"); + + b.Property("Created") + .ValueGeneratedOnAdd() + .HasColumnType("timestamp with time zone") + .HasColumnName("created") + .HasDefaultValueSql("now()"); + + b.Property("CreatedBy") + .HasColumnType("text") + .HasColumnName("created_by"); + + b.Property("Etag") + .ValueGeneratedOnAddOrUpdate() + .HasColumnType("uuid") + .HasColumnName("etag") + .HasComputedColumnSql("deterministic_uuid_sha256(\"last_processed\", \"id\")", true); + + b.Property("Label") + .HasColumnType("text") + .HasColumnName("label"); + + b.Property("LastProcessed") + .HasColumnType("timestamp with time zone") + .HasColumnName("last_processed"); + + b.Property("Modified") + .ValueGeneratedOnAdd() + .HasColumnType("timestamp with time zone") + .HasColumnName("modified") + .HasDefaultValueSql("now()"); + + b.Property("ModifiedBy") + .HasColumnType("text") + .HasColumnName("modified_by"); + + b.Property("SpaceId") + .HasColumnType("integer") + .HasColumnName("space_id"); + + b.HasKey("Id", "CustomerId") + .HasName("pk_manifests"); + + b.ToTable("manifests", (string)null); + }); + + modelBuilder.Entity("Models.Database.General.Batch", b => + { + b.Property("Id") + .HasColumnType("integer") + .HasColumnName("id"); + + b.Property("DeliverableType") + .HasColumnType("text") + .HasColumnName("deliverable_type"); + + b.Property("CustomerId") + .HasColumnType("integer") + .HasColumnName("customer_id"); + + b.Property("Finished") + .HasColumnType("timestamp with time zone") + .HasColumnName("finished"); + + b.Property("ManifestId") + .IsRequired() + .HasColumnType("text") + .HasColumnName("manifest_id"); + + b.Property("Processed") + .HasColumnType("timestamp with time zone") + .HasColumnName("processed"); + + b.Property("Status") + .IsRequired() + .HasColumnType("text") + .HasColumnName("status"); + + b.Property("Submitted") + .HasColumnType("timestamp with time zone") + .HasColumnName("submitted"); + + b.HasKey("Id", "DeliverableType") + .HasName("pk_batches"); + + b.HasIndex("ManifestId", "CustomerId") + .HasDatabaseName("ix_batches_manifest_id_customer_id"); + + b.ToTable("batches", (string)null); + }); + + modelBuilder.Entity("Models.Database.General.Hierarchy", b => + { + b.Property("Id") + .ValueGeneratedOnAdd() + .HasColumnType("integer") + .HasColumnName("id"); + + NpgsqlPropertyBuilderExtensions.UseIdentityByDefaultColumn(b.Property("Id")); + + b.Property("Canonical") + .HasColumnType("boolean") + .HasColumnName("canonical"); + + b.Property("CollectionId") + .HasColumnType("text") + .HasColumnName("collection_id"); + + b.Property("CustomerId") + .HasColumnType("integer") + .HasColumnName("customer_id"); + + b.Property("ItemsOrder") + .HasColumnType("integer") + .HasColumnName("items_order"); + + b.Property("ManifestId") + .HasColumnType("text") + .HasColumnName("manifest_id"); + + b.Property("Parent") + .HasColumnType("text") + .HasColumnName("parent"); + + b.Property("Slug") + .IsRequired() + .HasColumnType("citext") + .HasColumnName("slug"); + + b.Property("Type") + .HasColumnType("integer") + .HasColumnName("type"); + + b.HasKey("Id") + .HasName("pk_hierarchy"); + + b.HasIndex("Parent", "CustomerId") + .HasDatabaseName("ix_hierarchy_parent_customer_id"); + + b.HasIndex("CollectionId", "CustomerId", "Canonical") + .IsUnique() + .HasDatabaseName("ix_hierarchy_collection_id_customer_id_canonical") + .HasFilter("canonical is true"); + + b.HasIndex("CustomerId", "Slug", "Parent") + .IsUnique() + .HasDatabaseName("ix_hierarchy_customer_id_slug_parent"); + + b.HasIndex("ManifestId", "CustomerId", "Canonical") + .IsUnique() + .HasDatabaseName("ix_hierarchy_manifest_id_customer_id_canonical") + .HasFilter("canonical is true"); + + b.ToTable("hierarchy", null, t => + { + t.HasCheckConstraint("stop_collection_and_manifest_in_same_record", "num_nonnulls(manifest_id, collection_id) = 1"); + }); + }); + + modelBuilder.Entity("Models.Database.General.PipelineJob", b => + { + b.Property("Id") + .ValueGeneratedOnAdd() + .HasColumnType("integer") + .HasColumnName("id"); + + NpgsqlPropertyBuilderExtensions.UseIdentityByDefaultColumn(b.Property("Id")); + + b.Property("Config") + .HasColumnType("jsonb") + .HasColumnName("config"); + + b.Property("Created") + .ValueGeneratedOnAdd() + .HasColumnType("timestamp with time zone") + .HasColumnName("created") + .HasDefaultValueSql("now()"); + + b.Property("CustomerId") + .HasColumnType("integer") + .HasColumnName("customer_id"); + + b.Property("Error") + .HasColumnType("text") + .HasColumnName("error"); + + b.Property("Finished") + .HasColumnType("timestamp with time zone") + .HasColumnName("finished"); + + b.Property("JobType") + .IsRequired() + .HasColumnType("text") + .HasColumnName("job_type"); + + b.Property("ResourceId") + .IsRequired() + .HasColumnType("text") + .HasColumnName("resource_id"); + + b.Property("ResourceType") + .IsRequired() + .HasColumnType("text") + .HasColumnName("resource_type"); + + b.Property("Status") + .IsRequired() + .HasColumnType("text") + .HasColumnName("status"); + + b.HasKey("Id") + .HasName("pk_pipeline_jobs"); + + b.ToTable("pipeline_jobs", (string)null); + }); + + modelBuilder.Entity("Models.Database.CanvasPainting", b => + { + b.HasOne("Models.Database.Collections.Manifest", "Manifest") + .WithMany("CanvasPaintings") + .HasForeignKey("ManifestId", "CustomerId") + .OnDelete(DeleteBehavior.Cascade) + .IsRequired() + .HasConstraintName("fk_canvas_paintings_manifests_manifest_id_customer_id"); + + b.Navigation("Manifest"); + }); + + modelBuilder.Entity("Models.Database.General.Batch", b => + { + b.HasOne("Models.Database.Collections.Manifest", "Manifest") + .WithMany("Batches") + .HasForeignKey("ManifestId", "CustomerId") + .OnDelete(DeleteBehavior.Cascade) + .IsRequired() + .HasConstraintName("fk_batches_manifests_manifest_id_customer_id"); + + b.Navigation("Manifest"); + }); + + modelBuilder.Entity("Models.Database.General.Hierarchy", b => + { + b.HasOne("Models.Database.Collections.Collection", "Collection") + .WithMany("Hierarchy") + .HasForeignKey("CollectionId", "CustomerId") + .OnDelete(DeleteBehavior.Cascade) + .HasConstraintName("fk_hierarchy_collections_collection_id_customer_id"); + + b.HasOne("Models.Database.Collections.Manifest", "Manifest") + .WithMany("Hierarchy") + .HasForeignKey("ManifestId", "CustomerId") + .OnDelete(DeleteBehavior.Cascade) + .HasConstraintName("fk_hierarchy_manifests_manifest_id_customer_id"); + + b.HasOne("Models.Database.Collections.Collection", "ParentCollection") + .WithMany("Children") + .HasForeignKey("Parent", "CustomerId") + .OnDelete(DeleteBehavior.NoAction) + .HasConstraintName("fk_hierarchy_collections_parent_customer_id"); + + b.Navigation("Collection"); + + b.Navigation("Manifest"); + + b.Navigation("ParentCollection"); + }); + + modelBuilder.Entity("Models.Database.Collections.Collection", b => + { + b.Navigation("Children"); + + b.Navigation("Hierarchy"); + }); + + modelBuilder.Entity("Models.Database.Collections.Manifest", b => + { + b.Navigation("Batches"); + + b.Navigation("CanvasPaintings"); + + b.Navigation("Hierarchy"); + }); +#pragma warning restore 612, 618 + } + } +} \ No newline at end of file diff --git a/src/IIIFPresentation/Repository/Migrations/20260619120000_AddConfigToPipelineJob.cs b/src/IIIFPresentation/Repository/Migrations/20260619120000_AddConfigToPipelineJob.cs new file mode 100644 index 00000000..ecede2f6 --- /dev/null +++ b/src/IIIFPresentation/Repository/Migrations/20260619120000_AddConfigToPipelineJob.cs @@ -0,0 +1,28 @@ +using Microsoft.EntityFrameworkCore.Migrations; + +#nullable disable + +namespace Repository.Migrations +{ + /// + public partial class AddConfigToPipelineJob : Migration + { + /// + protected override void Up(MigrationBuilder migrationBuilder) + { + migrationBuilder.AddColumn( + name: "config", + table: "pipeline_jobs", + type: "jsonb", + nullable: true); + } + + /// + protected override void Down(MigrationBuilder migrationBuilder) + { + migrationBuilder.DropColumn( + name: "config", + table: "pipeline_jobs"); + } + } +} \ No newline at end of file diff --git a/src/IIIFPresentation/Repository/PresentationContext.cs b/src/IIIFPresentation/Repository/PresentationContext.cs index 6c0ddec4..e250fff4 100644 --- a/src/IIIFPresentation/Repository/PresentationContext.cs +++ b/src/IIIFPresentation/Repository/PresentationContext.cs @@ -182,9 +182,13 @@ protected override void OnModelCreating(ModelBuilder modelBuilder) j => j.GetEnumFromString(true)); entity.Property(p => p.Created).HasDefaultValueSql("now()"); + + entity.Property(e => e.Config) + .HasConversion() + .HasColumnType("jsonb"); }); } - + private void ApplyGlobalFilters(ModelBuilder builder) { // get the method GetCustomerId from this class diff --git a/src/IIIFPresentation/Services.Tests/Manifests/PipelineXTests.cs b/src/IIIFPresentation/Services.Tests/Manifests/PipelineXTests.cs index 80a803e0..720bc470 100644 --- a/src/IIIFPresentation/Services.Tests/Manifests/PipelineXTests.cs +++ b/src/IIIFPresentation/Services.Tests/Manifests/PipelineXTests.cs @@ -121,7 +121,8 @@ public void ToPipelineItem_SetsStatusFromJob(PipelineJobStatus status, string ex { ResourceId = "id", CustomerId = 1, JobType = PipelineJobType.TextService, - Status = status + Status = status, + Config = new PipelineConfig { Action = "Index" } }; var result = job.ToPipelineItem(); @@ -130,4 +131,20 @@ public void ToPipelineItem_SetsStatusFromJob(PipelineJobStatus status, string ex result.Config!.Action.Should().Be("Index"); result.Status.Should().Be(expectedStatus); } + + [Fact] + public void ToPipelineItem_SetsNullConfig_WhenJobConfigIsNull() + { + var job = new PipelineJob + { + ResourceId = "id", CustomerId = 1, + JobType = PipelineJobType.TextService, + Status = PipelineJobStatus.Queued, + Config = null + }; + + var result = job.ToPipelineItem(); + + result.Config.Should().BeNull(); + } } \ No newline at end of file From fe76a93d3679d427ecf0ebfe0c69490f3b3ad3c0 Mon Sep 17 00:00:00 2001 From: "jack.lewis" Date: Fri, 19 Jun 2026 17:26:51 +0100 Subject: [PATCH 06/18] make PipelineJob append only --- .../Manifest/ManifestWriteServiceTests.cs | 14 +++++--------- .../Features/Manifest/ManifestWriteService.cs | 16 ++++------------ ...tServiceJobCompletionMessageHandlerTests.cs | 2 +- .../TextServiceJobCompletionMessageTests.cs | 3 ++- .../TextServiceJobCompletionMessage.cs | 18 ++++-------------- .../TextServiceJobCompletionMessageHandler.cs | 5 +++-- .../Models/Database/Collections/Manifest.cs | 2 +- .../Models/Database/General/PipelineJob.cs | 9 +++++---- .../Services.Tests/Manifests/PipelineXTests.cs | 4 ++-- 9 files changed, 27 insertions(+), 46 deletions(-) diff --git a/src/IIIFPresentation/API.Tests/Features/Manifest/ManifestWriteServiceTests.cs b/src/IIIFPresentation/API.Tests/Features/Manifest/ManifestWriteServiceTests.cs index 2b5d87d7..aa43c50a 100644 --- a/src/IIIFPresentation/API.Tests/Features/Manifest/ManifestWriteServiceTests.cs +++ b/src/IIIFPresentation/API.Tests/Features/Manifest/ManifestWriteServiceTests.cs @@ -1242,9 +1242,9 @@ public async Task Create_SavesManifestToStaging_WhenPipelineIsSet() } [Fact] - public async Task Create_ResetsPipelineJob_WhenJobAlreadyExistsForManifest() + public async Task Create_AddsNewPipelineJob_WhenJobAlreadyExistsForManifest() { - // Arrange - simulate resubmit by seeding an existing completed PipelineJob + // Arrange var (slug, resourceId) = TestIdentifiers.SlugResource(); // First create @@ -1257,11 +1257,7 @@ public async Task Create_ResetsPipelineJob_WhenJobAlreadyExistsForManifest() var firstResult = await sut.Create(request, CancellationToken.None); var flatId = firstResult.Entity.FlatId; - // Manually mark the job as completed (simulating a prior run) - var existingJob = presentationContext.PipelineJobs.First(p => p.ResourceId == flatId); - presentationContext.Entry(existingJob).State = Microsoft.EntityFrameworkCore.EntityState.Detached; - - // Second create (update path) — use the existing slug/manifest with pipeline again + // Second create (update path) — resubmit the same manifest with pipeline var updateManifest = new PresentationManifest { Slug = slug, @@ -1279,7 +1275,7 @@ public async Task Create_ResetsPipelineJob_WhenJobAlreadyExistsForManifest() .MustHaveHappenedTwiceExactly(); var jobs = presentationContext.PipelineJobs.Where(p => p.ResourceId == flatId).ToList(); - jobs.Should().HaveCount(1, "resubmit should reset existing job, not create a second one"); - jobs[0].Status.Should().Be(PipelineJobStatus.Queued); + jobs.Should().HaveCount(2, "each resubmission creates a new job record for history"); + jobs.Should().AllSatisfy(j => j.Status.Should().Be(PipelineJobStatus.Queued)); } } diff --git a/src/IIIFPresentation/API/Features/Manifest/ManifestWriteService.cs b/src/IIIFPresentation/API/Features/Manifest/ManifestWriteService.cs index 218b6d99..fc3de68f 100644 --- a/src/IIIFPresentation/API/Features/Manifest/ManifestWriteService.cs +++ b/src/IIIFPresentation/API/Features/Manifest/ManifestWriteService.cs @@ -505,16 +505,14 @@ await manifestStorageManager.SaveManifestInStorage(iiifManifest, dbManifest, ori private async Task SubmitTextPipelineJob(DbManifest dbManifest, PipelineConfig? config, CancellationToken cancellationToken) { - var existing = await dbContext.PipelineJobs - .FirstOrDefaultAsync(p => p.ResourceId == dbManifest.Id && p.ResourceType == ResourceType.IIIFManifest - && p.CustomerId == dbManifest.CustomerId, cancellationToken); - - var job = existing ?? new PipelineJob + var job = new PipelineJob { ResourceId = dbManifest.Id, ResourceType = ResourceType.IIIFManifest, JobType = PipelineJobType.TextService, CustomerId = dbManifest.CustomerId, + Status = PipelineJobStatus.Waiting, + Config = config, Created = DateTime.UtcNow }; @@ -527,13 +525,7 @@ private async Task SubmitTextPipelineJob(DbManifest dbManifest, PipelineCo return false; } - job.Status = PipelineJobStatus.Queued; - job.Config = config; - job.Error = null; - job.Finished = null; - - if (existing == null) await dbContext.PipelineJobs.AddAsync(job, cancellationToken); - + await dbContext.PipelineJobs.AddAsync(job, cancellationToken); dbManifest.PipelineJobs = [job]; return true; } diff --git a/src/IIIFPresentation/BackgroundHandler.Tests/TextCompletion/TextServiceJobCompletionMessageHandlerTests.cs b/src/IIIFPresentation/BackgroundHandler.Tests/TextCompletion/TextServiceJobCompletionMessageHandlerTests.cs index 1f152e23..b893d210 100644 --- a/src/IIIFPresentation/BackgroundHandler.Tests/TextCompletion/TextServiceJobCompletionMessageHandlerTests.cs +++ b/src/IIIFPresentation/BackgroundHandler.Tests/TextCompletion/TextServiceJobCompletionMessageHandlerTests.cs @@ -324,7 +324,7 @@ await dbContext.PipelineJobs.AddAsync(new PipelineJob JobType = PipelineJobType.TextService, CustomerId = manifest.CustomerId, - Status = PipelineJobStatus.Queued, + Status = PipelineJobStatus.Waiting, Created = DateTime.UtcNow }); await dbContext.SaveChangesAsync(); diff --git a/src/IIIFPresentation/BackgroundHandler.Tests/TextCompletion/TextServiceJobCompletionMessageTests.cs b/src/IIIFPresentation/BackgroundHandler.Tests/TextCompletion/TextServiceJobCompletionMessageTests.cs index 1e8a8414..91c5e7d7 100644 --- a/src/IIIFPresentation/BackgroundHandler.Tests/TextCompletion/TextServiceJobCompletionMessageTests.cs +++ b/src/IIIFPresentation/BackgroundHandler.Tests/TextCompletion/TextServiceJobCompletionMessageTests.cs @@ -2,6 +2,7 @@ using AWS.SQS; using BackgroundHandler.TextCompletion; using FluentAssertions; +using Models.Database.General; namespace BackgroundHandler.Tests.TextCompletion; @@ -19,7 +20,7 @@ public void FromQueueMessage_DeserializesBodyProperties() var result = TextServiceJobCompletionMessage.FromQueueMessage(ValidMessage()); result.JobId.Should().Be("1/iiif/manifest-1"); - result.Status.Should().Be(TextServiceJobStatus.Completed); + result.Status.Should().Be(PipelineJobStatus.Completed); result.Finished.Should().Be(new DateTimeOffset(2024, 6, 12, 10, 0, 0, TimeSpan.Zero)); result.TotalPages.Should().Be(5); result.TotalWordCount.Should().Be(1200); diff --git a/src/IIIFPresentation/BackgroundHandler/TextCompletion/TextServiceJobCompletionMessage.cs b/src/IIIFPresentation/BackgroundHandler/TextCompletion/TextServiceJobCompletionMessage.cs index 8e47fac2..6690315c 100644 --- a/src/IIIFPresentation/BackgroundHandler/TextCompletion/TextServiceJobCompletionMessage.cs +++ b/src/IIIFPresentation/BackgroundHandler/TextCompletion/TextServiceJobCompletionMessage.cs @@ -1,25 +1,15 @@ using System.Text.Json; -using System.Text.Json.Serialization; using AWS.SQS; +using Models.Database.General; namespace BackgroundHandler.TextCompletion; -[JsonConverter(typeof(JsonStringEnumConverter))] -public enum TextServiceJobStatus -{ - Waiting = 0, - Running = 1, - Completed = 2, - Failed = 3 -} - /// /// Represents a job-completion notification from text-services, matching JobCompletionNotification. /// -[method: JsonConstructor] public class TextServiceJobCompletionMessage( string jobId, - TextServiceJobStatus status, + PipelineJobStatus status, DateTimeOffset? finished, int totalPages, int totalWordCount, @@ -29,7 +19,7 @@ public class TextServiceJobCompletionMessage( public string JobId { get; } = jobId; - public TextServiceJobStatus Status { get; } = status; + public PipelineJobStatus Status { get; } = status; public DateTimeOffset? Finished { get; } = finished; @@ -39,7 +29,7 @@ public class TextServiceJobCompletionMessage( public string? Errors { get; } = errors; - public bool IsCompleted => Status == TextServiceJobStatus.Completed; + public bool IsCompleted => Status == PipelineJobStatus.Completed; public static TextServiceJobCompletionMessage FromQueueMessage(QueueMessage message) => JsonSerializer.Deserialize(message.Body, JsonSerializerOptions) diff --git a/src/IIIFPresentation/BackgroundHandler/TextCompletion/TextServiceJobCompletionMessageHandler.cs b/src/IIIFPresentation/BackgroundHandler/TextCompletion/TextServiceJobCompletionMessageHandler.cs index 0e41afc2..a9d2ba15 100644 --- a/src/IIIFPresentation/BackgroundHandler/TextCompletion/TextServiceJobCompletionMessageHandler.cs +++ b/src/IIIFPresentation/BackgroundHandler/TextCompletion/TextServiceJobCompletionMessageHandler.cs @@ -54,8 +54,9 @@ private async Task TryCompleteManifest(TextServiceJobCompletionMessage com { var resourceId = ExtractResourceIdFromJobId(completionMessage.JobId); var pipelineJob = await dbContext.PipelineJobs - .SingleOrDefaultAsync(p => p.ResourceId == resourceId && p.JobType == PipelineJobType.TextService, - cancellationToken); + .Where(p => p.ResourceId == resourceId && p.JobType == PipelineJobType.TextService) + .OrderByDescending(p => p.Created) + .FirstOrDefaultAsync(cancellationToken); if (pipelineJob == null) { diff --git a/src/IIIFPresentation/Models/Database/Collections/Manifest.cs b/src/IIIFPresentation/Models/Database/Collections/Manifest.cs index 4aaca2a9..0a6e1dd0 100644 --- a/src/IIIFPresentation/Models/Database/Collections/Manifest.cs +++ b/src/IIIFPresentation/Models/Database/Collections/Manifest.cs @@ -77,7 +77,7 @@ public static bool IsIngesting(this Manifest? manifest) /// Whether a text-services pipeline job is pending for this manifest. /// public static bool HasPendingPipelineJob(this Manifest? manifest) - => manifest?.PipelineJobs?.Any(p => p.Status == PipelineJobStatus.Queued) ?? false; + => manifest?.PipelineJobs?.Any(p => p.Status == PipelineJobStatus.Waiting) ?? false; /// /// Whether the manifest has any outstanding background work that must complete before it reaches its final state. diff --git a/src/IIIFPresentation/Models/Database/General/PipelineJob.cs b/src/IIIFPresentation/Models/Database/General/PipelineJob.cs index 25d0439a..35763b53 100644 --- a/src/IIIFPresentation/Models/Database/General/PipelineJob.cs +++ b/src/IIIFPresentation/Models/Database/General/PipelineJob.cs @@ -27,9 +27,10 @@ public class PipelineJob : ICustomerEntity public enum PipelineJobStatus { - Queued = 0, - Completed = 1, - Failed = 2 + Waiting = 0, + Running = 1, + Completed = 2, + Failed = 3 } public enum PipelineJobType @@ -45,4 +46,4 @@ public static class PipelineJobX _ => throw new ArgumentOutOfRangeException(nameof(job.JobType), $"Unknown job type: {job.JobType}") }; -} \ No newline at end of file +} diff --git a/src/IIIFPresentation/Services.Tests/Manifests/PipelineXTests.cs b/src/IIIFPresentation/Services.Tests/Manifests/PipelineXTests.cs index 720bc470..7e77a736 100644 --- a/src/IIIFPresentation/Services.Tests/Manifests/PipelineXTests.cs +++ b/src/IIIFPresentation/Services.Tests/Manifests/PipelineXTests.cs @@ -112,7 +112,7 @@ public void HasTextIndexPipeline_ReturnsTrue_WhenOneOfMultipleItemsMatches() } [Theory] - [InlineData(PipelineJobStatus.Queued, "Queued")] + [InlineData(PipelineJobStatus.Waiting, "Waiting")] [InlineData(PipelineJobStatus.Completed, "Completed")] [InlineData(PipelineJobStatus.Failed, "Failed")] public void ToPipelineItem_SetsStatusFromJob(PipelineJobStatus status, string expectedStatus) @@ -147,4 +147,4 @@ public void ToPipelineItem_SetsNullConfig_WhenJobConfigIsNull() result.Config.Should().BeNull(); } -} \ No newline at end of file +} From 181fbe8997b0bc968a0035d941ae3b1b032185a1 Mon Sep 17 00:00:00 2001 From: "jack.lewis" Date: Fri, 19 Jun 2026 17:59:52 +0100 Subject: [PATCH 07/18] Fix tests --- .../Converters/ManifestConverterTests.cs | 8 +++--- .../Manifest/ManifestWriteServiceTests.cs | 6 ++-- .../API.Tests/Integration/GetManifestTests.cs | 4 +-- ...ServiceJobCompletionMessageHandlerTests.cs | 28 +++++++++---------- .../TextServiceJobCompletionMessageTests.cs | 10 +++---- .../Manifests/ManifestXTests.cs | 6 ++-- .../Manifests/PipelineXTests.cs | 2 +- 7 files changed, 32 insertions(+), 32 deletions(-) diff --git a/src/IIIFPresentation/API.Tests/Converters/ManifestConverterTests.cs b/src/IIIFPresentation/API.Tests/Converters/ManifestConverterTests.cs index c02a89fd..b2a18da8 100644 --- a/src/IIIFPresentation/API.Tests/Converters/ManifestConverterTests.cs +++ b/src/IIIFPresentation/API.Tests/Converters/ManifestConverterTests.cs @@ -326,7 +326,7 @@ public void SetGeneratedFields_SetsPipelineFromJobs_WhenPipelineJobsPresent() { ResourceId = "id", CustomerId = 1, JobType = PipelineJobType.TextService, - Status = PipelineJobStatus.Queued, + Status = PipelineJobStatus.Waiting, Created = DateTime.UtcNow } ] @@ -336,7 +336,7 @@ public void SetGeneratedFields_SetsPipelineFromJobs_WhenPipelineJobsPresent() var result = iiifManifest.SetGeneratedFields(dbManifest, pathGenerator, settingsBasedPathGenerator); // Assert - result.Pipeline.Should().ContainSingle(p => p.Name == "TextService" && p.Status == "Queued"); + result.Pipeline.Should().ContainSingle(p => p.Name == "TextService" && p.Status == "Waiting"); } [Fact] @@ -363,7 +363,7 @@ public void SetGeneratedFields_ReturnsLatestJobPerType_WhenMultipleJobsOfSameTyp { ResourceId = "id", CustomerId = 1, JobType = PipelineJobType.TextService, - Status = PipelineJobStatus.Queued, + Status = PipelineJobStatus.Waiting, Created = newer } ] @@ -373,7 +373,7 @@ public void SetGeneratedFields_ReturnsLatestJobPerType_WhenMultipleJobsOfSameTyp var result = iiifManifest.SetGeneratedFields(dbManifest, pathGenerator, settingsBasedPathGenerator); // Assert - result.Pipeline.Should().ContainSingle(p => p.Status == "Queued", "only the most recently created job per type should appear"); + result.Pipeline.Should().ContainSingle(p => p.Status == "Waiting", "only the most recently created job per type should appear"); } [Fact] diff --git a/src/IIIFPresentation/API.Tests/Features/Manifest/ManifestWriteServiceTests.cs b/src/IIIFPresentation/API.Tests/Features/Manifest/ManifestWriteServiceTests.cs index aa43c50a..228251da 100644 --- a/src/IIIFPresentation/API.Tests/Features/Manifest/ManifestWriteServiceTests.cs +++ b/src/IIIFPresentation/API.Tests/Features/Manifest/ManifestWriteServiceTests.cs @@ -1168,10 +1168,10 @@ public async Task Create_CallsTextServicesAndCreatesPipelineJob_WhenManifestHasP var flatId = result.Entity.FlatId; var pipelineJob = presentationContext.PipelineJobs.FirstOrDefault(p => p.ResourceId == flatId); pipelineJob.Should().NotBeNull(); - pipelineJob!.Status.Should().Be(PipelineJobStatus.Queued); + pipelineJob!.Status.Should().Be(PipelineJobStatus.Waiting); pipelineJob.Config!.Action.Should().Be("Index"); pipelineJob.GetJobId().Should().Be($"{Customer}/iiif/{flatId}"); - result.Entity.Pipeline.Should().ContainSingle(p => p.Name == "TextService" && p.Status == "Queued"); + result.Entity.Pipeline.Should().ContainSingle(p => p.Name == "TextService" && p.Status == "Waiting"); } [Fact] @@ -1276,6 +1276,6 @@ public async Task Create_AddsNewPipelineJob_WhenJobAlreadyExistsForManifest() var jobs = presentationContext.PipelineJobs.Where(p => p.ResourceId == flatId).ToList(); jobs.Should().HaveCount(2, "each resubmission creates a new job record for history"); - jobs.Should().AllSatisfy(j => j.Status.Should().Be(PipelineJobStatus.Queued)); + jobs.Should().AllSatisfy(j => j.Status.Should().Be(PipelineJobStatus.Waiting)); } } diff --git a/src/IIIFPresentation/API.Tests/Integration/GetManifestTests.cs b/src/IIIFPresentation/API.Tests/Integration/GetManifestTests.cs index 709d82c7..faaee447 100644 --- a/src/IIIFPresentation/API.Tests/Integration/GetManifestTests.cs +++ b/src/IIIFPresentation/API.Tests/Integration/GetManifestTests.cs @@ -456,7 +456,7 @@ await dbContext.PipelineJobs.AddAsync(new PipelineJob JobType = PipelineJobType.TextService, CustomerId = 1, - Status = PipelineJobStatus.Queued, + Status = PipelineJobStatus.Waiting, Created = DateTime.UtcNow }); await dbContext.SaveChangesAsync(); @@ -478,7 +478,7 @@ await amazonS3.PutObjectAsync(new() var manifest = await response.ReadAsPresentationJsonAsync(); manifest.Should().NotBeNull(); manifest!.Id.Should().Be($"http://localhost/1/manifests/{id}"); - manifest.Pipeline.Should().ContainSingle(p => p.Name == "TextService" && p.Status == "Queued"); + manifest.Pipeline.Should().ContainSingle(p => p.Name == "TextService" && p.Status == "Waiting"); } [Fact] diff --git a/src/IIIFPresentation/BackgroundHandler.Tests/TextCompletion/TextServiceJobCompletionMessageHandlerTests.cs b/src/IIIFPresentation/BackgroundHandler.Tests/TextCompletion/TextServiceJobCompletionMessageHandlerTests.cs index b893d210..dbe10f31 100644 --- a/src/IIIFPresentation/BackgroundHandler.Tests/TextCompletion/TextServiceJobCompletionMessageHandlerTests.cs +++ b/src/IIIFPresentation/BackgroundHandler.Tests/TextCompletion/TextServiceJobCompletionMessageHandlerTests.cs @@ -59,7 +59,7 @@ public TextServiceJobCompletionMessageHandlerTests(PresentationContextFixture db [InlineData("/iiif/resource")] public async Task HandleMessage_ReturnsTrue_WhenJobIdCannotBeParsed(string malformedJobId) { - var message = CreateMessage(malformedJobId, "Completed"); + var message = CreateMessage(malformedJobId, PipelineJobStatus.Completed); (await sut.HandleMessage(message, CancellationToken.None)).Should().BeTrue(); A.CallTo(() => iiifS3.ReadIIIFFromS3(A._, A._, A._)) @@ -71,7 +71,7 @@ public async Task HandleMessage_ReturnsTrue_WhenJobIdCannotBeParsed(string malfo [InlineData(1)] public async Task HandleMessage_ReturnsFalse_WhenPipelineJobNotFound_BelowRetryThreshold(int receiveCount) { - var message = CreateMessage("1/iiif/unknown-manifest", "Completed", receiveCount); + var message = CreateMessage("1/iiif/unknown-manifest", PipelineJobStatus.Completed, receiveCount); (await sut.HandleMessage(message, CancellationToken.None)).Should().BeFalse(); A.CallTo(() => iiifS3.ReadIIIFFromS3(A._, A._, A._)) @@ -83,7 +83,7 @@ public async Task HandleMessage_ReturnsFalse_WhenPipelineJobNotFound_BelowRetryT [InlineData(5)] public async Task HandleMessage_ReturnsTrue_WhenPipelineJobNotFound_AboveRetryThreshold(int receiveCount) { - var message = CreateMessage("1/iiif/unknown-manifest-discard", "Completed", receiveCount); + var message = CreateMessage("1/iiif/unknown-manifest-discard", PipelineJobStatus.Completed, receiveCount); (await sut.HandleMessage(message, CancellationToken.None)).Should().BeTrue(); A.CallTo(() => iiifS3.ReadIIIFFromS3(A._, A._, A._)) @@ -100,7 +100,7 @@ public async Task HandleMessage_ReturnsFalse_WhenStagedManifestMissing() A.CallTo(() => iiifS3.ReadIIIFFromS3(A._, BucketLocationType.Staging, A._)) .Returns((IIIFManifest?)null); - var message = CreateMessage(jobId, "Completed"); + var message = CreateMessage(jobId, PipelineJobStatus.Completed); (await sut.HandleMessage(message, CancellationToken.None)).Should().BeFalse(); A.CallTo(() => manifestStorageManager.SaveManifestInStorage(A._, A._, A._, A._, A._)) @@ -117,7 +117,7 @@ public async Task HandleMessage_UpdatesStatusToFailed_AndSavesManifest_WhenJobFa A.CallTo(() => iiifS3.ReadIIIFFromS3(A._, BucketLocationType.Staging, A._)) .Returns(new IIIFManifest { Id = manifestId }); - var message = CreateMessage(jobId, "Failed", errors: "OCR timed out"); + var message = CreateMessage(jobId, PipelineJobStatus.Failed, errors: "OCR timed out"); (await sut.HandleMessage(message, CancellationToken.None)).Should().BeTrue(); @@ -146,7 +146,7 @@ public async Task HandleMessage_UpdatesStatusToCompleted_AndSavesManifest_WhenJo A.CallTo(() => textServicesClient.GetTextAugmentedManifest(jobId, A._)) .Returns((IIIFManifest?)null); - var message = CreateMessage(jobId, "Completed"); + var message = CreateMessage(jobId, PipelineJobStatus.Completed); (await sut.HandleMessage(message, CancellationToken.None)).Should().BeTrue(); @@ -186,7 +186,7 @@ public async Task HandleMessage_MergesSearchServicesIntoManifest_WhenAugmentedMa .Invokes((IIIFManifest m, DbManifest _, string? _, bool _, CancellationToken _) => savedManifest = m) .Returns(Task.CompletedTask); - var message = CreateMessage(jobId, "Completed"); + var message = CreateMessage(jobId, PipelineJobStatus.Completed); (await sut.HandleMessage(message, CancellationToken.None)).Should().BeTrue(); @@ -223,7 +223,7 @@ public async Task HandleMessage_DoesNotDuplicateServices_WhenAugmentedManifestCo .Invokes((IIIFManifest m, DbManifest _, string? _, bool _, CancellationToken _) => savedManifest = m) .Returns(Task.CompletedTask); - var message = CreateMessage(jobId, "Completed"); + var message = CreateMessage(jobId, PipelineJobStatus.Completed); await sut.HandleMessage(message, CancellationToken.None); @@ -250,7 +250,7 @@ public async Task HandleMessage_MergesContextFromAugmentedManifest_WhenAugmented A.CallTo(() => textServicesClient.GetTextAugmentedManifest(jobId, A._)) .Returns(augmentedManifest); - await sut.HandleMessage(CreateMessage(jobId, "Completed"), CancellationToken.None); + await sut.HandleMessage(CreateMessage(jobId, PipelineJobStatus.Completed), CancellationToken.None); stagedManifest.Context.Should().Be(searchContext); } @@ -274,7 +274,7 @@ public async Task HandleMessage_DoesNotAddPresentation3Context_FromAugmentedMani A.CallTo(() => textServicesClient.GetTextAugmentedManifest(jobId, A._)) .Returns(augmentedManifest); - await sut.HandleMessage(CreateMessage(jobId, "Completed"), CancellationToken.None); + await sut.HandleMessage(CreateMessage(jobId, PipelineJobStatus.Completed), CancellationToken.None); stagedManifest.Context.Should().BeNull(); } @@ -291,7 +291,7 @@ public async Task HandleMessage_SetsFinishedTimestamp_WhenJobCompletes() A.CallTo(() => textServicesClient.GetTextAugmentedManifest(jobId, A._)) .Returns((IIIFManifest?)null); - await sut.HandleMessage(CreateMessage(jobId, "Completed"), CancellationToken.None); + await sut.HandleMessage(CreateMessage(jobId, PipelineJobStatus.Completed), CancellationToken.None); var job = dbContext.PipelineJobs.Single(p => p.ResourceId == manifestId); job.Finished.Should().Be(new DateTime(2024, 6, 12, 10, 0, 0, DateTimeKind.Utc)); @@ -307,7 +307,7 @@ public async Task HandleMessage_SetsFinishedTimestamp_WhenJobFails() A.CallTo(() => iiifS3.ReadIIIFFromS3(A._, BucketLocationType.Staging, A._)) .Returns(new IIIFManifest { Id = manifestId }); - await sut.HandleMessage(CreateMessage(jobId, "Failed", errors: "OCR error"), CancellationToken.None); + await sut.HandleMessage(CreateMessage(jobId, PipelineJobStatus.Failed, errors: "OCR error"), CancellationToken.None); var job = dbContext.PipelineJobs.Single(p => p.ResourceId == manifestId); job.Finished.Should().Be(new DateTime(2024, 6, 12, 10, 0, 0, DateTimeKind.Utc)); @@ -330,10 +330,10 @@ await dbContext.PipelineJobs.AddAsync(new PipelineJob await dbContext.SaveChangesAsync(); } - private static QueueMessage CreateMessage(string jobId, string status, int approximateReceiveCount = 0, string? errors = null) + private static QueueMessage CreateMessage(string jobId, PipelineJobStatus status, int approximateReceiveCount = 0, string? errors = null) { var errorsJson = errors == null ? "null" : $"\"{errors}\""; - var body = $$"""{"jobId":"{{jobId}}","status":"{{status}}","finished":"2024-06-12T10:00:00Z","totalPages":1,"totalWordCount":100,"errors":{{errorsJson}}}"""; + var body = $$"""{"jobId":"{{jobId}}","status":{{(int)status}},"finished":"2024-06-12T10:00:00Z","totalPages":1,"totalWordCount":100,"errors":{{errorsJson}}}"""; var systemAttributes = new Dictionary { ["ApproximateReceiveCount"] = approximateReceiveCount.ToString() diff --git a/src/IIIFPresentation/BackgroundHandler.Tests/TextCompletion/TextServiceJobCompletionMessageTests.cs b/src/IIIFPresentation/BackgroundHandler.Tests/TextCompletion/TextServiceJobCompletionMessageTests.cs index 91c5e7d7..76f630b7 100644 --- a/src/IIIFPresentation/BackgroundHandler.Tests/TextCompletion/TextServiceJobCompletionMessageTests.cs +++ b/src/IIIFPresentation/BackgroundHandler.Tests/TextCompletion/TextServiceJobCompletionMessageTests.cs @@ -10,7 +10,7 @@ public class TextServiceJobCompletionMessageTests { private static QueueMessage ValidMessage() => new( - """{"jobId":"1/iiif/manifest-1","status":"Completed","finished":"2024-06-12T10:00:00Z","totalPages":5,"totalWordCount":1200,"errors":null}""", + """{"jobId":"1/iiif/manifest-1","status":2,"finished":"2024-06-12T10:00:00Z","totalPages":5,"totalWordCount":1200,"errors":null}""", new Dictionary(), "msg-1"); @@ -31,7 +31,7 @@ public void FromQueueMessage_DeserializesBodyProperties() public void FromQueueMessage_DeserializesErrors_WhenPresent() { var message = new QueueMessage( - """{"jobId":"1/iiif/x","status":"Failed","finished":null,"totalPages":0,"totalWordCount":0,"errors":"OCR failed on page 3"}""", + """{"jobId":"1/iiif/x","status":3,"finished":null,"totalPages":0,"totalWordCount":0,"errors":"OCR failed on page 3"}""", new Dictionary(), "msg-err"); var result = TextServiceJobCompletionMessage.FromQueueMessage(message); @@ -43,7 +43,7 @@ public void FromQueueMessage_DeserializesErrors_WhenPresent() public void IsCompleted_ReturnsTrue_WhenStatusIsCompleted() { var message = new QueueMessage( - """{"jobId":"1/iiif/x","status":"Completed","finished":null,"totalPages":0,"totalWordCount":0,"errors":null}""", + """{"jobId":"1/iiif/x","status":2,"finished":null,"totalPages":0,"totalWordCount":0,"errors":null}""", new Dictionary(), "msg"); TextServiceJobCompletionMessage.FromQueueMessage(message).IsCompleted.Should().BeTrue(); @@ -53,7 +53,7 @@ public void IsCompleted_ReturnsTrue_WhenStatusIsCompleted() public void IsCompleted_ReturnsFalse_WhenStatusIsFailed() { var message = new QueueMessage( - """{"jobId":"1/iiif/x","status":"Failed","finished":null,"totalPages":0,"totalWordCount":0,"errors":null}""", + """{"jobId":"1/iiif/x","status":3,"finished":null,"totalPages":0,"totalWordCount":0,"errors":null}""", new Dictionary(), "msg"); TextServiceJobCompletionMessage.FromQueueMessage(message).IsCompleted.Should().BeFalse(); @@ -68,4 +68,4 @@ public void FromQueueMessage_Throws_WhenBodyIsInvalidJson() act.Should().Throw(); } -} +} \ No newline at end of file diff --git a/src/IIIFPresentation/Services.Tests/Manifests/ManifestXTests.cs b/src/IIIFPresentation/Services.Tests/Manifests/ManifestXTests.cs index c21e801e..30aa510b 100644 --- a/src/IIIFPresentation/Services.Tests/Manifests/ManifestXTests.cs +++ b/src/IIIFPresentation/Services.Tests/Manifests/ManifestXTests.cs @@ -49,7 +49,7 @@ public void HasPendingPipelineJob_ReturnsFalse_WhenJobIsNotQueued(PipelineJobSta [Fact] public void HasPendingPipelineJob_ReturnsTrue_WhenJobIsQueued() - => ManifestWithJobs(PipelineJobStatus.Queued).HasPendingPipelineJob().Should().BeTrue(); + => ManifestWithJobs(PipelineJobStatus.Waiting).HasPendingPipelineJob().Should().BeTrue(); [Fact] public void HasFurtherWork_ReturnsFalse_WhenNoIngestingBatchAndNoPendingJob() @@ -69,7 +69,7 @@ public void HasFurtherWork_ReturnsTrue_WhenBatchIsIngesting() [Fact] public void HasFurtherWork_ReturnsTrue_WhenPipelineJobIsQueued() - => ManifestWithJobs(PipelineJobStatus.Queued).HasFurtherWork().Should().BeTrue(); + => ManifestWithJobs(PipelineJobStatus.Waiting).HasFurtherWork().Should().BeTrue(); [Fact] public void HasFurtherWork_ReturnsFalse_WhenBatchCompletedAndJobCompleted() @@ -100,4 +100,4 @@ private static Manifest ManifestWithJobs(PipelineJobStatus? status = null) } return manifest; } -} \ No newline at end of file +} diff --git a/src/IIIFPresentation/Services.Tests/Manifests/PipelineXTests.cs b/src/IIIFPresentation/Services.Tests/Manifests/PipelineXTests.cs index 7e77a736..f074038c 100644 --- a/src/IIIFPresentation/Services.Tests/Manifests/PipelineXTests.cs +++ b/src/IIIFPresentation/Services.Tests/Manifests/PipelineXTests.cs @@ -139,7 +139,7 @@ public void ToPipelineItem_SetsNullConfig_WhenJobConfigIsNull() { ResourceId = "id", CustomerId = 1, JobType = PipelineJobType.TextService, - Status = PipelineJobStatus.Queued, + Status = PipelineJobStatus.Waiting, Config = null }; From c812a183cdb1b6a749aa353a6eb83a000afa7d90 Mon Sep 17 00:00:00 2001 From: "jack.lewis" Date: Mon, 22 Jun 2026 12:53:27 +0100 Subject: [PATCH 08/18] Make submit text pipeline more generic --- .../Features/Manifest/ManifestWriteService.cs | 26 +++++++++++++------ .../Models/API/Manifest/Pipeline.cs | 4 ++- 2 files changed, 21 insertions(+), 9 deletions(-) diff --git a/src/IIIFPresentation/API/Features/Manifest/ManifestWriteService.cs b/src/IIIFPresentation/API/Features/Manifest/ManifestWriteService.cs index fc3de68f..2536a42f 100644 --- a/src/IIIFPresentation/API/Features/Manifest/ManifestWriteService.cs +++ b/src/IIIFPresentation/API/Features/Manifest/ManifestWriteService.cs @@ -484,16 +484,11 @@ await manifestStorageManager.SaveManifestInStorage(iiifManifest, dbManifest, ori } } - if (request.PresentationManifest.HasTextIndexPipeline()) + if (request.PresentationManifest.HasPipelineJob()) { - var pipelineConfig = request.PresentationManifest.Pipeline! - .First(p => string.Equals(p.Name, "text", StringComparison.OrdinalIgnoreCase) - && string.Equals(p.Config?.Action, "Index", StringComparison.OrdinalIgnoreCase)) - .Config; - - if (!await SubmitTextPipelineJob(dbManifest, pipelineConfig, cancellationToken)) + if (!await SubmitPipelineJobs(dbManifest, request.PresentationManifest.Pipeline!, cancellationToken)) { - return PresUpdateResult.Failure("Failed to submit text-services job", + return PresUpdateResult.Failure("Failed to submit pipeline job", ModifyCollectionType.Unknown, WriteResult.Error); } } @@ -502,6 +497,21 @@ await manifestStorageManager.SaveManifestInStorage(iiifManifest, dbManifest, ori return null; } + private async Task SubmitPipelineJobs(DbManifest dbManifest, List pipeline, + CancellationToken cancellationToken) + { + foreach (var pipelineItem in pipeline) + { + if (string.Equals(pipelineItem.Name, PipelineX.TextPipelineName, StringComparison.OrdinalIgnoreCase)) + { + if (!await SubmitTextPipelineJob(dbManifest, pipelineItem.Config, cancellationToken)) + return false; + } + } + + return false; + } + private async Task SubmitTextPipelineJob(DbManifest dbManifest, PipelineConfig? config, CancellationToken cancellationToken) { diff --git a/src/IIIFPresentation/Models/API/Manifest/Pipeline.cs b/src/IIIFPresentation/Models/API/Manifest/Pipeline.cs index ab9f243a..0abb8a41 100644 --- a/src/IIIFPresentation/Models/API/Manifest/Pipeline.cs +++ b/src/IIIFPresentation/Models/API/Manifest/Pipeline.cs @@ -18,12 +18,14 @@ public class PipelineConfig public static class PipelineX { + public const string TextPipelineName = "text"; + public static bool HasPipelineJob(this PresentationManifest manifest) => manifest.Pipeline?.Count > 0; public static bool HasTextIndexPipeline(this PresentationManifest manifest) => manifest.Pipeline?.Any(p => - string.Equals(p.Name, "text", StringComparison.OrdinalIgnoreCase) && + string.Equals(p.Name, TextPipelineName, StringComparison.OrdinalIgnoreCase) && string.Equals(p.Config?.Action, "Index", StringComparison.OrdinalIgnoreCase)) == true; public static PipelineItem ToPipelineItem(this PipelineJob job) => new () From 209f0270fffae7ded2b75f142bceb5a3945d115c Mon Sep 17 00:00:00 2001 From: "jack.lewis" Date: Mon, 22 Jun 2026 15:12:09 +0100 Subject: [PATCH 09/18] Remove uneeded pipelineX method --- .../Features/Manifest/ManifestWriteService.cs | 3 +- .../Models/API/Manifest/Pipeline.cs | 5 -- .../Manifests/PipelineXTests.cs | 78 ------------------- 3 files changed, 1 insertion(+), 85 deletions(-) diff --git a/src/IIIFPresentation/API/Features/Manifest/ManifestWriteService.cs b/src/IIIFPresentation/API/Features/Manifest/ManifestWriteService.cs index 2536a42f..31f110b3 100644 --- a/src/IIIFPresentation/API/Features/Manifest/ManifestWriteService.cs +++ b/src/IIIFPresentation/API/Features/Manifest/ManifestWriteService.cs @@ -504,8 +504,7 @@ private async Task SubmitPipelineJobs(DbManifest dbManifest, List manifest.Pipeline?.Count > 0; - public static bool HasTextIndexPipeline(this PresentationManifest manifest) => - manifest.Pipeline?.Any(p => - string.Equals(p.Name, TextPipelineName, StringComparison.OrdinalIgnoreCase) && - string.Equals(p.Config?.Action, "Index", StringComparison.OrdinalIgnoreCase)) == true; - public static PipelineItem ToPipelineItem(this PipelineJob job) => new () { Name = job.JobType.ToString(), diff --git a/src/IIIFPresentation/Services.Tests/Manifests/PipelineXTests.cs b/src/IIIFPresentation/Services.Tests/Manifests/PipelineXTests.cs index f074038c..dc12c839 100644 --- a/src/IIIFPresentation/Services.Tests/Manifests/PipelineXTests.cs +++ b/src/IIIFPresentation/Services.Tests/Manifests/PipelineXTests.cs @@ -33,84 +33,6 @@ public void HasPipelineJob_ReturnsTrue_WhenPipelineHasAnyItem() } - [Fact] - public void HasTextIndexPipeline_ReturnsFalse_WhenPipelineIsNull() - { - var manifest = new PresentationManifest { Pipeline = null }; - - manifest.HasTextIndexPipeline().Should().BeFalse(); - } - - [Fact] - public void HasTextIndexPipeline_ReturnsFalse_WhenPipelineIsEmpty() - { - var manifest = new PresentationManifest { Pipeline = [] }; - - manifest.HasTextIndexPipeline().Should().BeFalse(); - } - - [Fact] - public void HasTextIndexPipeline_ReturnsFalse_WhenNameIsNotText() - { - var manifest = new PresentationManifest - { - Pipeline = [new PipelineItem { Name = "ocr", Config = new PipelineConfig { Action = "Index" } }] - }; - - manifest.HasTextIndexPipeline().Should().BeFalse(); - } - - [Fact] - public void HasTextIndexPipeline_ReturnsFalse_WhenActionIsNotIndex() - { - var manifest = new PresentationManifest - { - Pipeline = [new PipelineItem { Name = "text", Config = new PipelineConfig { Action = "Delete" } }] - }; - - manifest.HasTextIndexPipeline().Should().BeFalse(); - } - - [Fact] - public void HasTextIndexPipeline_ReturnsFalse_WhenConfigIsNull() - { - var manifest = new PresentationManifest - { - Pipeline = [new PipelineItem { Name = "text", Config = null }] - }; - - manifest.HasTextIndexPipeline().Should().BeFalse(); - } - - [Theory] - [InlineData("text", "Index")] - [InlineData("TEXT", "INDEX")] - [InlineData("Text", "index")] - public void HasTextIndexPipeline_ReturnsTrue_CaseInsensitive(string name, string action) - { - var manifest = new PresentationManifest - { - Pipeline = [new PipelineItem { Name = name, Config = new PipelineConfig { Action = action } }] - }; - - manifest.HasTextIndexPipeline().Should().BeTrue(); - } - - [Fact] - public void HasTextIndexPipeline_ReturnsTrue_WhenOneOfMultipleItemsMatches() - { - var manifest = new PresentationManifest - { - Pipeline = - [ - new PipelineItem { Name = "other", Config = new PipelineConfig { Action = "Do" } }, - new PipelineItem { Name = "text", Config = new PipelineConfig { Action = "Index" } } - ] - }; - - manifest.HasTextIndexPipeline().Should().BeTrue(); - } - [Theory] [InlineData(PipelineJobStatus.Waiting, "Waiting")] [InlineData(PipelineJobStatus.Completed, "Completed")] From ce78ed44a32d4572e577bb2698b0c818052a6faf Mon Sep 17 00:00:00 2001 From: "jack.lewis" Date: Tue, 23 Jun 2026 13:51:32 +0100 Subject: [PATCH 10/18] Modify to use JobServices --- .../Manifest/ManifestWriteServiceTests.cs | 18 +++++++++------ .../Features/Manifest/ManifestWriteService.cs | 5 ++-- .../TextServiceJobCompletionMessageTests.cs | 8 +++---- .../TextServiceJobCompletionMessage.cs | 6 ++++- .../TextServices/TextServicesClientTests.cs | 15 +++++++++++- .../Manifests/AWS/ManifestS3Manager.cs | 23 ++++++++++++------- .../Services/TextServices/JobServices.cs | 19 +++++++++++++++ .../TextServices/TextServicesClient.cs | 5 +--- 8 files changed, 72 insertions(+), 27 deletions(-) create mode 100644 src/IIIFPresentation/Services/TextServices/JobServices.cs diff --git a/src/IIIFPresentation/API.Tests/Features/Manifest/ManifestWriteServiceTests.cs b/src/IIIFPresentation/API.Tests/Features/Manifest/ManifestWriteServiceTests.cs index 228251da..40341c95 100644 --- a/src/IIIFPresentation/API.Tests/Features/Manifest/ManifestWriteServiceTests.cs +++ b/src/IIIFPresentation/API.Tests/Features/Manifest/ManifestWriteServiceTests.cs @@ -667,7 +667,7 @@ public async Task Upsert_MergesStubAdjuncts_WithUserSetValues_WhenCanBeBuiltUpfr // UpsertManifestInStorage returns a manifest carrying both user-set and stub values for all // three adjunct types, as ManifestMerger would produce after applying ApplyManifestLevelAdjuncts A.CallTo(() => manifestStorageManager.UpsertManifestInStorage( - A._, A._, A._, A._)) + A._, A._, A._, A._, A._)) .ReturnsLazily(() => new IIIFManifest { SeeAlso = @@ -765,7 +765,7 @@ public async Task Upsert_PreservesEmptyAdjuncts_WhenStubCanvasHasNoAdjuncts() // ManifestMerger preserves the base manifest's empty lists when the stub canvas has no adjuncts, // so UpsertManifestInStorage returns empty (not null) for each adjunct type A.CallTo(() => manifestStorageManager.UpsertManifestInStorage( - A._, A._, A._, A._)) + A._, A._, A._, A._, A._)) .ReturnsLazily(() => new IIIFManifest { SeeAlso = [], Rendering = [], Annotations = [] }); A.CallTo(() => dlcsClient.GetCustomerImages(Customer, A._, A._)) @@ -814,7 +814,7 @@ public async Task Upsert_CallsUpsertManifestInStorage_WhenAdjunctsNull_AndExisti await presentationContext.SaveChangesAsync(); A.CallTo(() => manifestStorageManager.UpsertManifestInStorage( - A._, A._, A._, A._)) + A._, A._, A._, A._, A._)) .ReturnsLazily(() => new IIIFManifest()); const string existingAdjunctId = "existing-adjunct.xml"; @@ -846,7 +846,7 @@ public async Task Upsert_CallsUpsertManifestInStorage_WhenAdjunctsNull_AndExisti .Which.Value("id").Should().Be(existingAdjunctId, "existing adjuncts from the DLCS stub asset must be returned when Adjuncts was null on the request"); A.CallTo(() => manifestStorageManager.UpsertManifestInStorage( - A._, A._, A._, A._)) + A._, A._, A._, A._, A._)) .MustHaveHappenedOnceExactly(); } @@ -875,7 +875,7 @@ public async Task Upsert_CallsSaveManifestInStorage_WhenAdjunctsNull_AndNoPriorD result.Error.Should().BeNull(); result.Entity.Adjuncts.Should().BeNull("no DLCS content exists so the stub asset has no adjuncts to return"); A.CallTo(() => manifestStorageManager.UpsertManifestInStorage( - A._, A._, A._, A._)) + A._, A._, A._, A._, A._)) .MustNotHaveHappened(); } @@ -900,7 +900,7 @@ public async Task Upsert_CallsUpsertManifestInStorage_WhenAdjunctsEmpty_AndNoAss ])); A.CallTo(() => manifestStorageManager.UpsertManifestInStorage( - A._, A._, A._, A._)) + A._, A._, A._, A._, A._)) .ReturnsLazily(() => new IIIFManifest()); var manifest = new PresentationManifest { Slug = slug, Adjuncts = [] }; @@ -915,7 +915,7 @@ public async Task Upsert_CallsUpsertManifestInStorage_WhenAdjunctsEmpty_AndNoAss result.Error.Should().BeNull(); result.Entity.Adjuncts.Should().BeEmpty("Adjuncts=[] (explicit clear) is preserved — stub lookup finds no adjuncts so the value is unchanged"); A.CallTo(() => manifestStorageManager.UpsertManifestInStorage( - A._, A._, A._, A._)) + A._, A._, A._, A._, A._)) .MustHaveHappenedOnceExactly(); } @@ -1198,6 +1198,10 @@ public async Task Create_ReturnsError_AndDoesNotPersistManifest_WhenTextServiceS // Manifest and pipeline job should be rolled back — resubmitting the same slug must not conflict presentationContext.Hierarchy.Any(h => h.Slug == slug).Should().BeFalse(); presentationContext.PipelineJobs.Any(p => p.ResourceId == resourceId).Should().BeFalse(); + + // Staged S3 objects must be cleaned up + A.CallTo(() => manifestStorageManager.DeleteStagedManifest(A._)) + .MustHaveHappenedOnceExactly(); } [Fact] diff --git a/src/IIIFPresentation/API/Features/Manifest/ManifestWriteService.cs b/src/IIIFPresentation/API/Features/Manifest/ManifestWriteService.cs index 31f110b3..7054c9fc 100644 --- a/src/IIIFPresentation/API/Features/Manifest/ManifestWriteService.cs +++ b/src/IIIFPresentation/API/Features/Manifest/ManifestWriteService.cs @@ -454,11 +454,11 @@ await ManifestRetrieval.RetrieveFullPathForManifest(dbManifest.Id, dbManifest.Cu // Pipeline forces staging even if we'd otherwise save directly to final var saveToStaging = !canBeBuiltUpfront || hasPipeline; - if (canBeBuiltUpfront && requiresExternalContent && !hasPipeline) + if (canBeBuiltUpfront && requiresExternalContent) { logger.LogDebug("Manifest {Manifest} can be built upfront, after merging", dbManifest.Id); var manifest = await manifestStorageManager.UpsertManifestInStorage(iiifManifest, dbManifest, - originalToStore, cancellationToken); + originalToStore, saveToStaging, cancellationToken); MergeManifestFields(manifest, request.PresentationManifest); } else @@ -488,6 +488,7 @@ await manifestStorageManager.SaveManifestInStorage(iiifManifest, dbManifest, ori { if (!await SubmitPipelineJobs(dbManifest, request.PresentationManifest.Pipeline!, cancellationToken)) { + await manifestStorageManager.DeleteStagedManifest(dbManifest); return PresUpdateResult.Failure("Failed to submit pipeline job", ModifyCollectionType.Unknown, WriteResult.Error); } diff --git a/src/IIIFPresentation/BackgroundHandler.Tests/TextCompletion/TextServiceJobCompletionMessageTests.cs b/src/IIIFPresentation/BackgroundHandler.Tests/TextCompletion/TextServiceJobCompletionMessageTests.cs index 76f630b7..3a382bb1 100644 --- a/src/IIIFPresentation/BackgroundHandler.Tests/TextCompletion/TextServiceJobCompletionMessageTests.cs +++ b/src/IIIFPresentation/BackgroundHandler.Tests/TextCompletion/TextServiceJobCompletionMessageTests.cs @@ -10,7 +10,7 @@ public class TextServiceJobCompletionMessageTests { private static QueueMessage ValidMessage() => new( - """{"jobId":"1/iiif/manifest-1","status":2,"finished":"2024-06-12T10:00:00Z","totalPages":5,"totalWordCount":1200,"errors":null}""", + """{"jobId":"1/iiif/manifest-1","status":"Completed","finished":"2024-06-12T10:00:00Z","totalPages":5,"totalWordCount":1200}""", new Dictionary(), "msg-1"); @@ -31,7 +31,7 @@ public void FromQueueMessage_DeserializesBodyProperties() public void FromQueueMessage_DeserializesErrors_WhenPresent() { var message = new QueueMessage( - """{"jobId":"1/iiif/x","status":3,"finished":null,"totalPages":0,"totalWordCount":0,"errors":"OCR failed on page 3"}""", + """{"jobId":"1/iiif/x","status":"Failed","totalPages":0,"totalWordCount":0,"errors":"OCR failed on page 3"}""", new Dictionary(), "msg-err"); var result = TextServiceJobCompletionMessage.FromQueueMessage(message); @@ -43,7 +43,7 @@ public void FromQueueMessage_DeserializesErrors_WhenPresent() public void IsCompleted_ReturnsTrue_WhenStatusIsCompleted() { var message = new QueueMessage( - """{"jobId":"1/iiif/x","status":2,"finished":null,"totalPages":0,"totalWordCount":0,"errors":null}""", + """{"jobId":"1/iiif/x","status":"Completed","totalPages":0,"totalWordCount":0}""", new Dictionary(), "msg"); TextServiceJobCompletionMessage.FromQueueMessage(message).IsCompleted.Should().BeTrue(); @@ -53,7 +53,7 @@ public void IsCompleted_ReturnsTrue_WhenStatusIsCompleted() public void IsCompleted_ReturnsFalse_WhenStatusIsFailed() { var message = new QueueMessage( - """{"jobId":"1/iiif/x","status":3,"finished":null,"totalPages":0,"totalWordCount":0,"errors":null}""", + """{"jobId":"1/iiif/x","status":"Failed","totalPages":0,"totalWordCount":0}""", new Dictionary(), "msg"); TextServiceJobCompletionMessage.FromQueueMessage(message).IsCompleted.Should().BeFalse(); diff --git a/src/IIIFPresentation/BackgroundHandler/TextCompletion/TextServiceJobCompletionMessage.cs b/src/IIIFPresentation/BackgroundHandler/TextCompletion/TextServiceJobCompletionMessage.cs index 6690315c..f7dafa80 100644 --- a/src/IIIFPresentation/BackgroundHandler/TextCompletion/TextServiceJobCompletionMessage.cs +++ b/src/IIIFPresentation/BackgroundHandler/TextCompletion/TextServiceJobCompletionMessage.cs @@ -1,4 +1,5 @@ using System.Text.Json; +using System.Text.Json.Serialization; using AWS.SQS; using Models.Database.General; @@ -15,7 +16,10 @@ public class TextServiceJobCompletionMessage( int totalWordCount, string? errors) { - private static readonly JsonSerializerOptions JsonSerializerOptions = new(JsonSerializerDefaults.Web); + private static readonly JsonSerializerOptions JsonSerializerOptions = new(JsonSerializerDefaults.Web) + { + Converters = { new JsonStringEnumConverter() } + }; public string JobId { get; } = jobId; diff --git a/src/IIIFPresentation/Services.Tests/TextServices/TextServicesClientTests.cs b/src/IIIFPresentation/Services.Tests/TextServices/TextServicesClientTests.cs index 4dfb0472..04f506cf 100644 --- a/src/IIIFPresentation/Services.Tests/TextServices/TextServicesClientTests.cs +++ b/src/IIIFPresentation/Services.Tests/TextServices/TextServicesClientTests.cs @@ -83,6 +83,19 @@ await sut.CreateOrUpdateJob(MakeJob(customerId: 5, resourceId: "test-manifest"), body.Should().Contain("\"sourceUri\":\"s3://my-bucket/staging/5/manifests/test-manifest\""); } + [Fact] + public async Task CreateOrUpdateJob_SendsSearchAutocompleteTextAugmented_AsServicesField() + { + var sut = CreateSut(new TextServicesSettings { BuilderApiUri = new Uri("http://text-services/") }); + messageHandler.Enqueue(HttpStatusCode.OK); + + await sut.CreateOrUpdateJob(MakeJob(), "my-bucket", "staging/1/manifests/my-manifest"); + + var body = await messageHandler.Requests.Single().Content!.ReadAsStringAsync(); + var expected = (int)(JobServices.All); + body.Should().Contain($"\"services\":{expected}"); + } + [Fact] public async Task GetTextAugmentedManifest_ReturnsNull_WhenSearchApiUriNotConfigured() { @@ -158,4 +171,4 @@ protected override Task SendAsync(HttpRequestMessage reques : new HttpResponseMessage(HttpStatusCode.OK)); } } -} \ No newline at end of file +} diff --git a/src/IIIFPresentation/Services/Manifests/AWS/ManifestS3Manager.cs b/src/IIIFPresentation/Services/Manifests/AWS/ManifestS3Manager.cs index 5fd13b5f..b075ad51 100644 --- a/src/IIIFPresentation/Services/Manifests/AWS/ManifestS3Manager.cs +++ b/src/IIIFPresentation/Services/Manifests/AWS/ManifestS3Manager.cs @@ -22,13 +22,12 @@ public class ManifestS3Manager( ILogger logger) : IManifestStorageManager { public async Task UpsertManifestInStorage(Manifest manifest, - Models.Database.Collections.Manifest dbManifest, string? originalPayload, CancellationToken cancellationToken) + Models.Database.Collections.Manifest dbManifest, string? originalPayload, bool saveToStaging, + CancellationToken cancellationToken) { logger.LogInformation("Creating manifest {Manifest} in S3", dbManifest.Id); - var mergedManifest = await UpsertManifest(manifest, dbManifest, originalPayload, cancellationToken); - - return mergedManifest; + return await UpsertManifest(manifest, dbManifest, originalPayload, saveToStaging, cancellationToken); } public async Task UpsertManifestFromStagingInStorage(Models.Database.Collections.Manifest dbManifest, @@ -51,7 +50,7 @@ public async Task UpsertManifestFromStagingInStorage(Models.Database.Collections } } - await UpsertManifest(manifest!, dbManifest, stagedOriginal, cancellationToken); + await UpsertManifest(manifest!, dbManifest, stagedOriginal, false, cancellationToken); await iiifS3.DeleteIIIFFromS3(dbManifest, true); } @@ -77,6 +76,9 @@ public async Task SaveManifestInStorage(Manifest manifest, Models.Database.Colle } } + public Task DeleteStagedManifest(Models.Database.Collections.Manifest dbManifest) => + iiifS3.DeleteIIIFFromS3(dbManifest, true); + public async Task DeleteOriginalPayload(Models.Database.Collections.Manifest dbManifest) { if (!behaviour.CurrentValue.ShouldHaveStoredOriginal(dbManifest.Created)) return; @@ -86,7 +88,7 @@ public async Task DeleteOriginalPayload(Models.Database.Collections.Manifest dbM } private async Task UpsertManifest(Manifest manifest, Models.Database.Collections.Manifest dbManifest, - string? originalPayload, CancellationToken cancellationToken) + string? originalPayload, bool saveToStaging, CancellationToken cancellationToken) { var namedQueryManifest = await dlcsOrchestratorClient.RetrieveAssetsForManifest(dbManifest.CustomerId, dbManifest.Id, @@ -99,7 +101,7 @@ await dlcsOrchestratorClient.RetrieveAssetsForManifest(dbManifest.CustomerId, db dbManifest.CustomerId, dbManifest.Id); - await SaveManifestInStorage(mergedManifest, dbManifest, originalPayload, false, cancellationToken); + await SaveManifestInStorage(mergedManifest, dbManifest, originalPayload, saveToStaging, cancellationToken); return mergedManifest; } @@ -117,7 +119,7 @@ public Task UpsertManifestFromStagingInStorage(Models.Database.Collections.Manif /// Upserts a manifest that requires external content, merged with Manifest provided. /// public Task UpsertManifestInStorage(Manifest manifest, Models.Database.Collections.Manifest dbManifest, - string? originalPayload, CancellationToken cancellationToken); + string? originalPayload, bool saveToStaging, CancellationToken cancellationToken); /// /// Upserts provided manifest directly to storage @@ -125,6 +127,11 @@ public Task UpsertManifestInStorage(Manifest manifest, Models.Database public Task SaveManifestInStorage(Manifest manifest, Models.Database.Collections.Manifest dbManifest, string? originalPayload, bool saveToStaging, CancellationToken cancellationToken); + /// + /// Removes the staged manifest and staged original payload from S3 (e.g. on pipeline submission failure). + /// + public Task DeleteStagedManifest(Models.Database.Collections.Manifest dbManifest); + /// /// Removes any stored original payload for a manifest (e.g. a stale one left by a prior version). /// diff --git a/src/IIIFPresentation/Services/TextServices/JobServices.cs b/src/IIIFPresentation/Services/TextServices/JobServices.cs new file mode 100644 index 00000000..fe4bc3de --- /dev/null +++ b/src/IIIFPresentation/Services/TextServices/JobServices.cs @@ -0,0 +1,19 @@ +namespace Services.TextServices; + +/// +/// Bitmask of services/derivatives a text-services job should produce and expose. +/// Note: Mirrors TextServices.Storage.JobServices — keep values in sync if that enum changes. +/// +[Flags] +public enum JobServices +{ + None = 0, + Search = 1 << 0, + Autocomplete = 1 << 1, + FullText = 1 << 2, + Annotations = 1 << 3, + Pdf = 1 << 4, + TextAugmented = 1 << 5, + Figures = 1 << 6, + All = ~0, +} diff --git a/src/IIIFPresentation/Services/TextServices/TextServicesClient.cs b/src/IIIFPresentation/Services/TextServices/TextServicesClient.cs index f25ac47e..2b9c1aad 100644 --- a/src/IIIFPresentation/Services/TextServices/TextServicesClient.cs +++ b/src/IIIFPresentation/Services/TextServices/TextServicesClient.cs @@ -21,9 +21,6 @@ public class TextServicesClient( DefaultIgnoreCondition = JsonIgnoreCondition.WhenWritingNull }; - // Search=1, Autocomplete=2, TextAugmented=16 - private const int InitialServices = 19; - public async Task CreateOrUpdateJob(PipelineJob job, string bucket, string resourceKey, CancellationToken cancellationToken = default) { @@ -36,7 +33,7 @@ public async Task CreateOrUpdateJob(PipelineJob job, string bucket, string } var sourceS3Uri = $"s3://{bucket}/{resourceKey}"; - var request = new { id = jobId, sourceUri = sourceS3Uri, services = InitialServices }; + var request = new { id = jobId, sourceUri = sourceS3Uri, services = (int)JobServices.All }; var content = new StringContent(JsonSerializer.Serialize(request, JsonOptions), Encoding.UTF8, "application/json"); var postUri = new Uri(settings.BuilderApiUri, "textbuilder"); From 8078da8a96132414ee9db81772fa1bb10e4a849d Mon Sep 17 00:00:00 2001 From: "jack.lewis" Date: Tue, 23 Jun 2026 14:35:59 +0100 Subject: [PATCH 11/18] Make it so DB save happens before pipeline jobs, but the transaction rolls back if there's a failure --- .../Converters/ManifestConverterTests.cs | 2 +- .../Manifest/ManifestWriteServiceTests.cs | 3 +- .../Features/Manifest/ManifestWriteService.cs | 73 ++++++++++--------- .../BatchCompletionMessageHandlerTests.cs | 2 +- .../Models/API/Manifest/Pipeline.cs | 2 +- .../Manifests/PipelineXTests.cs | 2 +- 6 files changed, 44 insertions(+), 40 deletions(-) diff --git a/src/IIIFPresentation/API.Tests/Converters/ManifestConverterTests.cs b/src/IIIFPresentation/API.Tests/Converters/ManifestConverterTests.cs index b2a18da8..d9663532 100644 --- a/src/IIIFPresentation/API.Tests/Converters/ManifestConverterTests.cs +++ b/src/IIIFPresentation/API.Tests/Converters/ManifestConverterTests.cs @@ -336,7 +336,7 @@ public void SetGeneratedFields_SetsPipelineFromJobs_WhenPipelineJobsPresent() var result = iiifManifest.SetGeneratedFields(dbManifest, pathGenerator, settingsBasedPathGenerator); // Assert - result.Pipeline.Should().ContainSingle(p => p.Name == "TextService" && p.Status == "Waiting"); + result.Pipeline.Should().ContainSingle(p => p.Name == PipelineX.TextPipelineName && p.Status == "Waiting"); } [Fact] diff --git a/src/IIIFPresentation/API.Tests/Features/Manifest/ManifestWriteServiceTests.cs b/src/IIIFPresentation/API.Tests/Features/Manifest/ManifestWriteServiceTests.cs index 40341c95..8b62bc72 100644 --- a/src/IIIFPresentation/API.Tests/Features/Manifest/ManifestWriteServiceTests.cs +++ b/src/IIIFPresentation/API.Tests/Features/Manifest/ManifestWriteServiceTests.cs @@ -1171,7 +1171,7 @@ public async Task Create_CallsTextServicesAndCreatesPipelineJob_WhenManifestHasP pipelineJob!.Status.Should().Be(PipelineJobStatus.Waiting); pipelineJob.Config!.Action.Should().Be("Index"); pipelineJob.GetJobId().Should().Be($"{Customer}/iiif/{flatId}"); - result.Entity.Pipeline.Should().ContainSingle(p => p.Name == "TextService" && p.Status == "Waiting"); + result.Entity.Pipeline.Should().ContainSingle(p => p.Name == PipelineX.TextPipelineName && p.Status == "Waiting"); } [Fact] @@ -1194,6 +1194,7 @@ public async Task Create_ReturnsError_AndDoesNotPersistManifest_WhenTextServiceS // Assert result.IsSuccess.Should().BeFalse(); result.WriteResult.Should().Be(WriteResult.Error); + result.Error.Should().Contain("pipeline job"); // Manifest and pipeline job should be rolled back — resubmitting the same slug must not conflict presentationContext.Hierarchy.Any(h => h.Slug == slug).Should().BeFalse(); diff --git a/src/IIIFPresentation/API/Features/Manifest/ManifestWriteService.cs b/src/IIIFPresentation/API/Features/Manifest/ManifestWriteService.cs index 7054c9fc..a92afd48 100644 --- a/src/IIIFPresentation/API/Features/Manifest/ManifestWriteService.cs +++ b/src/IIIFPresentation/API/Features/Manifest/ManifestWriteService.cs @@ -484,60 +484,63 @@ await manifestStorageManager.SaveManifestInStorage(iiifManifest, dbManifest, ori } } + // Add any pipeline job entities to the context before SaveChangesAsync so they are + // persisted within the transaction. The external HTTP submission happens after, so that + // if it fails the uncommitted transaction is rolled back cleanly. + PipelineJob? pendingPipelineJob = null; if (request.PresentationManifest.HasPipelineJob()) { - if (!await SubmitPipelineJobs(dbManifest, request.PresentationManifest.Pipeline!, cancellationToken)) + pendingPipelineJob = BuildPipelineJob(dbManifest, request.PresentationManifest.Pipeline!); + if (pendingPipelineJob == null) { await manifestStorageManager.DeleteStagedManifest(dbManifest); - return PresUpdateResult.Failure("Failed to submit pipeline job", - ModifyCollectionType.Unknown, WriteResult.Error); + return PresUpdateResult.Failure("No recognised pipeline type in request", + ModifyCollectionType.Unknown, WriteResult.BadRequest); } + await dbContext.PipelineJobs.AddAsync(pendingPipelineJob, cancellationToken); + dbManifest.PipelineJobs = [pendingPipelineJob]; } await dbContext.SaveChangesAsync(cancellationToken); - return null; - } - private async Task SubmitPipelineJobs(DbManifest dbManifest, List pipeline, - CancellationToken cancellationToken) - { - foreach (var pipelineItem in pipeline) + if (pendingPipelineJob != null) { - if (string.Equals(pipelineItem.Name, PipelineX.TextPipelineName, StringComparison.OrdinalIgnoreCase)) + var submitted = await textServicesClient.CreateOrUpdateJob(pendingPipelineJob, + awsOptions.Value.S3.StorageBucket, + dbManifest.GetResourceBucketKey(BucketLocationType.Staging), cancellationToken); + + if (!submitted) { - return await SubmitTextPipelineJob(dbManifest, pipelineItem.Config, cancellationToken); + logger.LogError("Failed to submit text-services job for manifest {ManifestId}", dbManifest.Id); + await manifestStorageManager.DeleteStagedManifest(dbManifest); + return PresUpdateResult.Failure( + "Failed to submit text-services pipeline job", + ModifyCollectionType.Unknown, WriteResult.Error); } } - return false; + return null; } - private async Task SubmitTextPipelineJob(DbManifest dbManifest, PipelineConfig? config, - CancellationToken cancellationToken) + private PipelineJob? BuildPipelineJob(DbManifest dbManifest, List pipeline) { - var job = new PipelineJob - { - ResourceId = dbManifest.Id, - ResourceType = ResourceType.IIIFManifest, - JobType = PipelineJobType.TextService, - CustomerId = dbManifest.CustomerId, - Status = PipelineJobStatus.Waiting, - Config = config, - Created = DateTime.UtcNow - }; - - var submitted = await textServicesClient.CreateOrUpdateJob(job, awsOptions.Value.S3.StorageBucket, - dbManifest.GetResourceBucketKey(BucketLocationType.Staging), cancellationToken); - - if (!submitted) + foreach (var pipelineItem in pipeline) { - logger.LogError("Failed to submit text-services job for manifest {ManifestId}", dbManifest.Id); - return false; + if (string.Equals(pipelineItem.Name, PipelineX.TextPipelineName, StringComparison.OrdinalIgnoreCase)) + { + return new PipelineJob + { + ResourceId = dbManifest.Id, + ResourceType = ResourceType.IIIFManifest, + JobType = PipelineJobType.TextService, + CustomerId = dbManifest.CustomerId, + Status = PipelineJobStatus.Waiting, + Config = pipelineItem.Config, + Created = DateTime.UtcNow + }; + } } - - await dbContext.PipelineJobs.AddAsync(job, cancellationToken); - dbManifest.PipelineJobs = [job]; - return true; + return null; } /// diff --git a/src/IIIFPresentation/BackgroundHandler.Tests/BatchCompletion/BatchCompletionMessageHandlerTests.cs b/src/IIIFPresentation/BackgroundHandler.Tests/BatchCompletion/BatchCompletionMessageHandlerTests.cs index 77adb417..73840cec 100644 --- a/src/IIIFPresentation/BackgroundHandler.Tests/BatchCompletion/BatchCompletionMessageHandlerTests.cs +++ b/src/IIIFPresentation/BackgroundHandler.Tests/BatchCompletion/BatchCompletionMessageHandlerTests.cs @@ -47,7 +47,7 @@ public class BatchCompletionMessageHandlerTests public BatchCompletionMessageHandlerTests(PresentationContextFixture dbFixture) { - // The context from dbFixture doesn't track changes so setup/assert + // The context from dbFixture doesn't track changes so setup/assert dbContext = dbFixture.DbContext; dbFixture.CustomerIdProvider.SetCustomerId(CustomerId); diff --git a/src/IIIFPresentation/Models/API/Manifest/Pipeline.cs b/src/IIIFPresentation/Models/API/Manifest/Pipeline.cs index 74cedded..514862cd 100644 --- a/src/IIIFPresentation/Models/API/Manifest/Pipeline.cs +++ b/src/IIIFPresentation/Models/API/Manifest/Pipeline.cs @@ -25,7 +25,7 @@ public static bool HasPipelineJob(this PresentationManifest manifest) => public static PipelineItem ToPipelineItem(this PipelineJob job) => new () { - Name = job.JobType.ToString(), + Name = TextPipelineName, Config = job.Config, Status = job.Status.ToString() }; diff --git a/src/IIIFPresentation/Services.Tests/Manifests/PipelineXTests.cs b/src/IIIFPresentation/Services.Tests/Manifests/PipelineXTests.cs index dc12c839..baf5072c 100644 --- a/src/IIIFPresentation/Services.Tests/Manifests/PipelineXTests.cs +++ b/src/IIIFPresentation/Services.Tests/Manifests/PipelineXTests.cs @@ -49,7 +49,7 @@ public void ToPipelineItem_SetsStatusFromJob(PipelineJobStatus status, string ex var result = job.ToPipelineItem(); - result.Name.Should().Be("TextService"); + result.Name.Should().Be(PipelineX.TextPipelineName); result.Config!.Action.Should().Be("Index"); result.Status.Should().Be(expectedStatus); } From 638f46c984e90f48e4bac0b145826dde5b347bae Mon Sep 17 00:00:00 2001 From: "jack.lewis" Date: Wed, 24 Jun 2026 08:33:22 +0100 Subject: [PATCH 12/18] Changes before opening for review --- .../Converters/ManifestConverterTests.cs | 2 +- .../Manifest/ManifestWriteServiceTests.cs | 2 +- .../API.Tests/Integration/GetManifestTests.cs | 4 +- .../Features/Manifest/ManifestWriteService.cs | 55 ++++++++++--------- .../TextServiceJobCompletionMessageHandler.cs | 28 ++++++---- .../Core/Settings/TextServicesSettings.cs | 2 +- .../Models/API/Manifest/Pipeline.cs | 2 +- .../Models/Database/Collections/Manifest.cs | 5 +- ...pelineXTests.cs => PipelineHelperTests.cs} | 4 +- .../TextServices/TextServicesClient.cs | 2 +- 10 files changed, 59 insertions(+), 47 deletions(-) rename src/IIIFPresentation/Services.Tests/Manifests/{PipelineXTests.cs => PipelineHelperTests.cs} (95%) diff --git a/src/IIIFPresentation/API.Tests/Converters/ManifestConverterTests.cs b/src/IIIFPresentation/API.Tests/Converters/ManifestConverterTests.cs index d9663532..67518fac 100644 --- a/src/IIIFPresentation/API.Tests/Converters/ManifestConverterTests.cs +++ b/src/IIIFPresentation/API.Tests/Converters/ManifestConverterTests.cs @@ -336,7 +336,7 @@ public void SetGeneratedFields_SetsPipelineFromJobs_WhenPipelineJobsPresent() var result = iiifManifest.SetGeneratedFields(dbManifest, pathGenerator, settingsBasedPathGenerator); // Assert - result.Pipeline.Should().ContainSingle(p => p.Name == PipelineX.TextPipelineName && p.Status == "Waiting"); + result.Pipeline.Should().ContainSingle(p => p.Name == PipelineHelper.TextPipelineName && p.Status == "Waiting"); } [Fact] diff --git a/src/IIIFPresentation/API.Tests/Features/Manifest/ManifestWriteServiceTests.cs b/src/IIIFPresentation/API.Tests/Features/Manifest/ManifestWriteServiceTests.cs index 8b62bc72..5b3f31fe 100644 --- a/src/IIIFPresentation/API.Tests/Features/Manifest/ManifestWriteServiceTests.cs +++ b/src/IIIFPresentation/API.Tests/Features/Manifest/ManifestWriteServiceTests.cs @@ -1171,7 +1171,7 @@ public async Task Create_CallsTextServicesAndCreatesPipelineJob_WhenManifestHasP pipelineJob!.Status.Should().Be(PipelineJobStatus.Waiting); pipelineJob.Config!.Action.Should().Be("Index"); pipelineJob.GetJobId().Should().Be($"{Customer}/iiif/{flatId}"); - result.Entity.Pipeline.Should().ContainSingle(p => p.Name == PipelineX.TextPipelineName && p.Status == "Waiting"); + result.Entity.Pipeline.Should().ContainSingle(p => p.Name == PipelineHelper.TextPipelineName && p.Status == "Waiting"); } [Fact] diff --git a/src/IIIFPresentation/API.Tests/Integration/GetManifestTests.cs b/src/IIIFPresentation/API.Tests/Integration/GetManifestTests.cs index faaee447..f7ddf89a 100644 --- a/src/IIIFPresentation/API.Tests/Integration/GetManifestTests.cs +++ b/src/IIIFPresentation/API.Tests/Integration/GetManifestTests.cs @@ -478,7 +478,7 @@ await amazonS3.PutObjectAsync(new() var manifest = await response.ReadAsPresentationJsonAsync(); manifest.Should().NotBeNull(); manifest!.Id.Should().Be($"http://localhost/1/manifests/{id}"); - manifest.Pipeline.Should().ContainSingle(p => p.Name == "TextService" && p.Status == "Waiting"); + manifest.Pipeline.Should().ContainSingle(p => p.Name == PipelineHelper.TextPipelineName && p.Status == "Waiting"); } [Fact] @@ -516,7 +516,7 @@ await amazonS3.PutObjectAsync(new() response.StatusCode.Should().Be(HttpStatusCode.OK); response.Headers.Should().ContainKey(HeaderNames.ETag, "pipeline is complete so manifest is final"); var manifest = await response.ReadAsPresentationJsonAsync(); - manifest!.Pipeline.Should().ContainSingle(p => p.Name == "TextService" && p.Status == "Completed"); + manifest!.Pipeline.Should().ContainSingle(p => p.Name == PipelineHelper.TextPipelineName && p.Status == "Completed"); } [Fact] diff --git a/src/IIIFPresentation/API/Features/Manifest/ManifestWriteService.cs b/src/IIIFPresentation/API/Features/Manifest/ManifestWriteService.cs index a92afd48..afddcc22 100644 --- a/src/IIIFPresentation/API/Features/Manifest/ManifestWriteService.cs +++ b/src/IIIFPresentation/API/Features/Manifest/ManifestWriteService.cs @@ -484,39 +484,42 @@ await manifestStorageManager.SaveManifestInStorage(iiifManifest, dbManifest, ori } } - // Add any pipeline job entities to the context before SaveChangesAsync so they are - // persisted within the transaction. The external HTTP submission happens after, so that - // if it fails the uncommitted transaction is rolled back cleanly. - PipelineJob? pendingPipelineJob = null; if (request.PresentationManifest.HasPipelineJob()) { - pendingPipelineJob = BuildPipelineJob(dbManifest, request.PresentationManifest.Pipeline!); - if (pendingPipelineJob == null) - { - await manifestStorageManager.DeleteStagedManifest(dbManifest); - return PresUpdateResult.Failure("No recognised pipeline type in request", - ModifyCollectionType.Unknown, WriteResult.BadRequest); - } - await dbContext.PipelineJobs.AddAsync(pendingPipelineJob, cancellationToken); - dbManifest.PipelineJobs = [pendingPipelineJob]; + return await RegisterAndSubmitPipelineJobs(dbManifest, request.PresentationManifest.Pipeline!, + cancellationToken); } + // save changes called if there are no pipeline jobs await dbContext.SaveChangesAsync(cancellationToken); + return null; + } - if (pendingPipelineJob != null) + // Persists pipeline job entities within the open transaction, then submits to external services. + // Submitting after SaveChangesAsync ensures DB state is consistent if the HTTP call fails and + // the transaction is rolled back by the caller. + private async Task RegisterAndSubmitPipelineJobs(DbManifest dbManifest, + List pipeline, CancellationToken cancellationToken) + { + var job = BuildPipelineJob(dbManifest, pipeline); + if (job == null) { - var submitted = await textServicesClient.CreateOrUpdateJob(pendingPipelineJob, - awsOptions.Value.S3.StorageBucket, - dbManifest.GetResourceBucketKey(BucketLocationType.Staging), cancellationToken); + await manifestStorageManager.DeleteStagedManifest(dbManifest); + return PresUpdateResult.Failure("No recognised pipeline type in request", + ModifyCollectionType.Unknown, WriteResult.BadRequest); + } - if (!submitted) - { - logger.LogError("Failed to submit text-services job for manifest {ManifestId}", dbManifest.Id); - await manifestStorageManager.DeleteStagedManifest(dbManifest); - return PresUpdateResult.Failure( - "Failed to submit text-services pipeline job", - ModifyCollectionType.Unknown, WriteResult.Error); - } + await dbContext.PipelineJobs.AddAsync(job, cancellationToken); + dbManifest.PipelineJobs = [job]; + await dbContext.SaveChangesAsync(cancellationToken); + + if (!await textServicesClient.CreateOrUpdateJob(job, awsOptions.Value.S3.StorageBucket, + dbManifest.GetResourceBucketKey(BucketLocationType.Staging), cancellationToken)) + { + logger.LogError("Failed to submit {JobType} pipeline job for manifest {ManifestId}", job.JobType, dbManifest.Id); + await manifestStorageManager.DeleteStagedManifest(dbManifest); + return PresUpdateResult.Failure("Failed to submit pipeline job; manifest has not been saved", + ModifyCollectionType.Unknown, WriteResult.Error); } return null; @@ -526,7 +529,7 @@ await manifestStorageManager.SaveManifestInStorage(iiifManifest, dbManifest, ori { foreach (var pipelineItem in pipeline) { - if (string.Equals(pipelineItem.Name, PipelineX.TextPipelineName, StringComparison.OrdinalIgnoreCase)) + if (string.Equals(pipelineItem.Name, PipelineHelper.TextPipelineName, StringComparison.OrdinalIgnoreCase)) { return new PipelineJob { diff --git a/src/IIIFPresentation/BackgroundHandler/TextCompletion/TextServiceJobCompletionMessageHandler.cs b/src/IIIFPresentation/BackgroundHandler/TextCompletion/TextServiceJobCompletionMessageHandler.cs index a9d2ba15..8ae145ae 100644 --- a/src/IIIFPresentation/BackgroundHandler/TextCompletion/TextServiceJobCompletionMessageHandler.cs +++ b/src/IIIFPresentation/BackgroundHandler/TextCompletion/TextServiceJobCompletionMessageHandler.cs @@ -53,6 +53,13 @@ private async Task TryCompleteManifest(TextServiceJobCompletionMessage com int approximateReceiveCount, CancellationToken cancellationToken) { var resourceId = ExtractResourceIdFromJobId(completionMessage.JobId); + if (resourceId == null) + { + logger.LogWarning("Could not parse resource id from job id {JobId}; discarding message", + completionMessage.JobId); + return true; + } + var pipelineJob = await dbContext.PipelineJobs .Where(p => p.ResourceId == resourceId && p.JobType == PipelineJobType.TextService) .OrderByDescending(p => p.Created) @@ -112,7 +119,11 @@ private async Task TryCompleteManifest(TextServiceJobCompletionMessage com await manifestStorageManager.SaveManifestInStorage(stagedManifest, dbManifest, null, saveToStaging: false, cancellationToken); + await dbContext.SaveChangesAsync(cancellationToken); await iiifS3.DeleteIIIFFromS3(dbManifest, true); + logger.LogInformation( + "Text pipeline completed for job:{JobId}, manifest:{ManifestId}. Elapsed:{Elapsed}ms", + completionMessage.JobId, pipelineJob.ResourceId, sw.ElapsedMilliseconds); } catch (Exception e) { @@ -120,10 +131,6 @@ await manifestStorageManager.SaveManifestInStorage(stagedManifest, dbManifest, n return false; } - await dbContext.SaveChangesAsync(cancellationToken); - logger.LogInformation( - "Text pipeline completed for job:{JobId}, manifest:{ManifestId}. Elapsed:{Elapsed}ms", - completionMessage.JobId, pipelineJob.ResourceId, sw.ElapsedMilliseconds); return true; } @@ -139,10 +146,11 @@ private async Task ApplyTextServices(string jobId, Manifest stagedManifest, } stagedManifest.Services ??= []; - var existingIds = new HashSet(stagedManifest.Services.Select(s => s.Id)); - foreach (var service in augmented.Services) + var existingIds = new HashSet( + stagedManifest.Services.Select(s => s.Id).Where(id => id != null)!); + foreach (var service in augmented.Services.Where(s => s.Id != null)) { - if (existingIds.Add(service.Id)) + if (existingIds.Add(service.Id!)) stagedManifest.Services.Add(service); } @@ -175,13 +183,13 @@ private static void MergeContext(Manifest target, Manifest source) return firstSlash > 0 && int.TryParse(jobId[..firstSlash], out var customerId) ? customerId : null; } - private static string ExtractResourceIdFromJobId(string jobId) + private static string? ExtractResourceIdFromJobId(string jobId) { // jobId format: "{customerId}/iiif/{resourceId}" var firstSlash = jobId.IndexOf('/'); - if (firstSlash < 0) return string.Empty; + if (firstSlash < 0) return null; var secondSlash = jobId.IndexOf('/', firstSlash + 1); - return secondSlash > 0 && secondSlash < jobId.Length - 1 ? jobId[(secondSlash + 1)..] : string.Empty; + return secondSlash > 0 && secondSlash < jobId.Length - 1 ? jobId[(secondSlash + 1)..] : null; } private static TextServiceJobCompletionMessage DeserializeMessage(QueueMessage message, ILogger logger) diff --git a/src/IIIFPresentation/Core/Settings/TextServicesSettings.cs b/src/IIIFPresentation/Core/Settings/TextServicesSettings.cs index 4db529dd..174368a4 100644 --- a/src/IIIFPresentation/Core/Settings/TextServicesSettings.cs +++ b/src/IIIFPresentation/Core/Settings/TextServicesSettings.cs @@ -24,4 +24,4 @@ public class TextServicesSettings /// Used as the X-Forwarded-Path header when calling /text-augmented/v3 /// public string? PathRules { get; set; } -} \ No newline at end of file +} diff --git a/src/IIIFPresentation/Models/API/Manifest/Pipeline.cs b/src/IIIFPresentation/Models/API/Manifest/Pipeline.cs index 514862cd..1fa3039d 100644 --- a/src/IIIFPresentation/Models/API/Manifest/Pipeline.cs +++ b/src/IIIFPresentation/Models/API/Manifest/Pipeline.cs @@ -16,7 +16,7 @@ public class PipelineConfig public string? Action { get; set; } } -public static class PipelineX +public static class PipelineHelper { public const string TextPipelineName = "text"; diff --git a/src/IIIFPresentation/Models/Database/Collections/Manifest.cs b/src/IIIFPresentation/Models/Database/Collections/Manifest.cs index 0a6e1dd0..8cfb3ec0 100644 --- a/src/IIIFPresentation/Models/Database/Collections/Manifest.cs +++ b/src/IIIFPresentation/Models/Database/Collections/Manifest.cs @@ -1,4 +1,5 @@ -using System.Globalization; +using System.ComponentModel.DataAnnotations.Schema; +using System.Globalization; using IIIF.Presentation.V3.Strings; using Models.Database.General; @@ -48,7 +49,7 @@ public class Manifest : IHierarchyResource public List? Batches { get; set; } - [System.ComponentModel.DataAnnotations.Schema.NotMapped] + [NotMapped] public List? PipelineJobs { get; set; } /// diff --git a/src/IIIFPresentation/Services.Tests/Manifests/PipelineXTests.cs b/src/IIIFPresentation/Services.Tests/Manifests/PipelineHelperTests.cs similarity index 95% rename from src/IIIFPresentation/Services.Tests/Manifests/PipelineXTests.cs rename to src/IIIFPresentation/Services.Tests/Manifests/PipelineHelperTests.cs index baf5072c..ed97bff8 100644 --- a/src/IIIFPresentation/Services.Tests/Manifests/PipelineXTests.cs +++ b/src/IIIFPresentation/Services.Tests/Manifests/PipelineHelperTests.cs @@ -3,7 +3,7 @@ namespace Services.Tests.Manifests; -public class PipelineXTests +public class PipelineHelperTests { [Fact] public void HasPipelineJob_ReturnsFalse_WhenPipelineIsNull() @@ -49,7 +49,7 @@ public void ToPipelineItem_SetsStatusFromJob(PipelineJobStatus status, string ex var result = job.ToPipelineItem(); - result.Name.Should().Be(PipelineX.TextPipelineName); + result.Name.Should().Be(PipelineHelper.TextPipelineName); result.Config!.Action.Should().Be("Index"); result.Status.Should().Be(expectedStatus); } diff --git a/src/IIIFPresentation/Services/TextServices/TextServicesClient.cs b/src/IIIFPresentation/Services/TextServices/TextServicesClient.cs index 2b9c1aad..1bc3ac6c 100644 --- a/src/IIIFPresentation/Services/TextServices/TextServicesClient.cs +++ b/src/IIIFPresentation/Services/TextServices/TextServicesClient.cs @@ -42,7 +42,7 @@ public async Task CreateOrUpdateJob(PipelineJob job, string bucket, string if (response.StatusCode == HttpStatusCode.Conflict) { logger.LogDebug("Text-services job {JobId} already exists, reprocessing", jobId); - var putUri = new Uri(settings.BuilderApiUri, $"textbuilder/{Uri.EscapeDataString(jobId)}"); + var putUri = new Uri(settings.BuilderApiUri, $"textbuilder/{jobId}"); response = await httpClient.PutAsync(putUri, null, cancellationToken); } From 1337c21e5d3a0d2726eed16c750c37d14846cd22 Mon Sep 17 00:00:00 2001 From: "jack.lewis" Date: Wed, 24 Jun 2026 14:41:18 +0100 Subject: [PATCH 13/18] Fixing various todo comments --- .../Features/Manifest/ManifestWriteService.cs | 10 +-- ...ServiceJobCompletionMessageHandlerTests.cs | 14 ++-- .../TextServiceJobCompletionMessageHandler.cs | 69 ++++++++++--------- src/IIIFPresentation/Core/IIIF/IServiceX.cs | 12 ++++ .../Models/Database/Collections/Manifest.cs | 2 +- .../Helpers/ManifestContextHelper.cs | 22 ++++++ .../Services/Manifests/ManifestMerger.cs | 19 +---- 7 files changed, 84 insertions(+), 64 deletions(-) create mode 100644 src/IIIFPresentation/Core/IIIF/IServiceX.cs create mode 100644 src/IIIFPresentation/Services/Manifests/Helpers/ManifestContextHelper.cs diff --git a/src/IIIFPresentation/API/Features/Manifest/ManifestWriteService.cs b/src/IIIFPresentation/API/Features/Manifest/ManifestWriteService.cs index afddcc22..f6fe1701 100644 --- a/src/IIIFPresentation/API/Features/Manifest/ManifestWriteService.cs +++ b/src/IIIFPresentation/API/Features/Manifest/ManifestWriteService.cs @@ -504,13 +504,13 @@ await manifestStorageManager.SaveManifestInStorage(iiifManifest, dbManifest, ori var job = BuildPipelineJob(dbManifest, pipeline); if (job == null) { - await manifestStorageManager.DeleteStagedManifest(dbManifest); - return PresUpdateResult.Failure("No recognised pipeline type in request", - ModifyCollectionType.Unknown, WriteResult.BadRequest); + logger.LogWarning("No recognised pipeline type for manifest {ManifestId}; ignoring pipeline", dbManifest.Id); + await dbContext.SaveChangesAsync(cancellationToken); + return null; } - await dbContext.PipelineJobs.AddAsync(job, cancellationToken); - dbManifest.PipelineJobs = [job]; + await dbContext.PipelineJobs.AddAsync(job, cancellationToken); // explicit Add required: PipelineJobs is [NotMapped] on Manifest + dbManifest.PipelineJobs = [job]; // in-memory only, for API response generation await dbContext.SaveChangesAsync(cancellationToken); if (!await textServicesClient.CreateOrUpdateJob(job, awsOptions.Value.S3.StorageBucket, diff --git a/src/IIIFPresentation/BackgroundHandler.Tests/TextCompletion/TextServiceJobCompletionMessageHandlerTests.cs b/src/IIIFPresentation/BackgroundHandler.Tests/TextCompletion/TextServiceJobCompletionMessageHandlerTests.cs index dbe10f31..f6a01235 100644 --- a/src/IIIFPresentation/BackgroundHandler.Tests/TextCompletion/TextServiceJobCompletionMessageHandlerTests.cs +++ b/src/IIIFPresentation/BackgroundHandler.Tests/TextCompletion/TextServiceJobCompletionMessageHandlerTests.cs @@ -108,15 +108,12 @@ public async Task HandleMessage_ReturnsFalse_WhenStagedManifestMissing() } [Fact] - public async Task HandleMessage_UpdatesStatusToFailed_AndSavesManifest_WhenJobFailed() + public async Task HandleMessage_UpdatesStatusToFailed_WhenJobFailed() { var manifestId = TestIdentifiers.IdWithSuffix(suffix: "_failed"); var jobId = $"{CustomerId}/iiif/{manifestId}"; await SetupManifestWithPipelineJob(manifestId, jobId); - A.CallTo(() => iiifS3.ReadIIIFFromS3(A._, BucketLocationType.Staging, A._)) - .Returns(new IIIFManifest { Id = manifestId }); - var message = CreateMessage(jobId, PipelineJobStatus.Failed, errors: "OCR timed out"); (await sut.HandleMessage(message, CancellationToken.None)).Should().BeTrue(); @@ -125,9 +122,11 @@ public async Task HandleMessage_UpdatesStatusToFailed_AndSavesManifest_WhenJobFa job.Status.Should().Be(PipelineJobStatus.Failed); job.Error.Should().Be("OCR timed out"); + A.CallTo(() => iiifS3.ReadIIIFFromS3(A._, A._, A._)) + .MustNotHaveHappened(); A.CallTo(() => manifestStorageManager.SaveManifestInStorage( - A._, A._, null, false, A._)) - .MustHaveHappenedOnceExactly(); + A._, A._, A._, A._, A._)) + .MustNotHaveHappened(); A.CallTo(() => textServicesClient.GetTextAugmentedManifest(A._, A._)) .MustNotHaveHappened(); A.CallTo(() => iiifS3.DeleteIIIFFromS3(A._, true)) @@ -304,9 +303,6 @@ public async Task HandleMessage_SetsFinishedTimestamp_WhenJobFails() var jobId = $"{CustomerId}/iiif/{manifestId}"; await SetupManifestWithPipelineJob(manifestId, jobId); - A.CallTo(() => iiifS3.ReadIIIFFromS3(A._, BucketLocationType.Staging, A._)) - .Returns(new IIIFManifest { Id = manifestId }); - await sut.HandleMessage(CreateMessage(jobId, PipelineJobStatus.Failed, errors: "OCR error"), CancellationToken.None); var job = dbContext.PipelineJobs.Single(p => p.ResourceId == manifestId); diff --git a/src/IIIFPresentation/BackgroundHandler/TextCompletion/TextServiceJobCompletionMessageHandler.cs b/src/IIIFPresentation/BackgroundHandler/TextCompletion/TextServiceJobCompletionMessageHandler.cs index 8ae145ae..4918257e 100644 --- a/src/IIIFPresentation/BackgroundHandler/TextCompletion/TextServiceJobCompletionMessageHandler.cs +++ b/src/IIIFPresentation/BackgroundHandler/TextCompletion/TextServiceJobCompletionMessageHandler.cs @@ -2,13 +2,16 @@ using AWS.Helpers; using AWS.SQS; using BackgroundHandler.Helpers; +using Core.IIIF; using IIIF; using IIIF.Presentation.V3; +using IIIF.Search.V2; using Microsoft.EntityFrameworkCore; using Models.Database.General; using Repository; using Repository.Helpers; using Services.Manifests.AWS; +using Services.Manifests.Helpers; using Services.TextServices; namespace BackgroundHandler.TextCompletion; @@ -22,6 +25,8 @@ public class TextServiceJobCompletionMessageHandler( ILogger logger) : IMessageHandler { + private const string Search2Context = "http://iiif.io/api/search/2/context.json"; + public async Task HandleMessage(QueueMessage message, CancellationToken cancellationToken) { using (LogContextHelpers.SetServiceName(nameof(TextServiceJobCompletionMessageHandler), message.MessageId)) @@ -74,7 +79,10 @@ private async Task TryCompleteManifest(TextServiceJobCompletionMessage com return discard; } - var sw = Stopwatch.StartNew(); + if (pipelineJob.Finished != null) + logger.LogWarning("PipelineJob for {JobId} already finished at {Finished}; re-processing", + completionMessage.JobId, pipelineJob.Finished); + var dbManifest = await dbContext.Manifests .Include(m => m.CanvasPaintings) .SingleOrDefaultAsync(m => m.Id == pipelineJob.ResourceId && m.CustomerId == pipelineJob.CustomerId, @@ -91,6 +99,20 @@ private async Task TryCompleteManifest(TextServiceJobCompletionMessage com "Completing text pipeline for job:{JobId}, customer:{CustomerId}, manifest:{ManifestId}", completionMessage.JobId, pipelineJob.CustomerId, pipelineJob.ResourceId); + if (!completionMessage.IsCompleted) + { + logger.LogWarning("Text-services job {JobId} failed: {Errors}", completionMessage.JobId, + completionMessage.Errors); + pipelineJob.Error = completionMessage.Errors; + pipelineJob.Status = PipelineJobStatus.Failed; + pipelineJob.Finished = completionMessage.Finished?.UtcDateTime; + await dbContext.SaveChangesAsync(cancellationToken); + await iiifS3.DeleteIIIFFromS3(dbManifest, true); + return true; + } + + var sw = Stopwatch.StartNew(); + try { var stagedManifest = @@ -102,19 +124,8 @@ private async Task TryCompleteManifest(TextServiceJobCompletionMessage com return false; } - if (!completionMessage.IsCompleted) - { - logger.LogWarning("Text-services job {JobId} failed: {Errors}", completionMessage.JobId, - completionMessage.Errors); - pipelineJob.Error = completionMessage.Errors; - pipelineJob.Status = PipelineJobStatus.Failed; - } - else - { - await ApplyTextServices(completionMessage.JobId, stagedManifest, cancellationToken); - pipelineJob.Status = PipelineJobStatus.Completed; - } - + await ApplyTextServices(completionMessage.JobId, stagedManifest, cancellationToken); + pipelineJob.Status = PipelineJobStatus.Completed; pipelineJob.Finished = completionMessage.Finished?.UtcDateTime; await manifestStorageManager.SaveManifestInStorage(stagedManifest, dbManifest, null, @@ -139,38 +150,32 @@ private async Task ApplyTextServices(string jobId, Manifest stagedManifest, { var augmented = await textServicesClient.GetTextAugmentedManifest(jobId, cancellationToken); - if (augmented?.Services == null || augmented.Services.Count == 0) + if (augmented?.Service == null || !augmented.Service.OfType().Any()) { logger.LogDebug("No search services in text-augmented manifest for job {JobId}", jobId); return; } - stagedManifest.Services ??= []; - var existingIds = new HashSet( - stagedManifest.Services.Select(s => s.Id).Where(id => id != null)!); - foreach (var service in augmented.Services.Where(s => s.Id != null)) + stagedManifest.Service ??= []; + var existingIds = stagedManifest.Service.GetDistinctIds(); + + // new HashSet( + // stagedManifest.Service.Select(s => s.Id).Where(id => id != null)!); + + foreach (var service in augmented.Service.OfType()) { - if (existingIds.Add(service.Id!)) - stagedManifest.Services.Add(service); + if (existingIds.Add(service.Id!)) stagedManifest.Service.Add(service); } - + MergeContext(stagedManifest, augmented); logger.LogDebug("Added {Count} search service(s) to manifest for job {JobId}", - augmented.Services.Count, jobId); + augmented.Service.Count, jobId); } private static void MergeContext(Manifest target, Manifest source) { - IEnumerable contexts = source.Context switch - { - null => [], - string str => [str], - IEnumerable enumerable => enumerable, - _ => [] - }; - - foreach (var context in contexts.Where(c => !IIIF.Presentation.Context.Presentation3Context.Equals(c))) + foreach (var context in source.GetContextStrings().Where(c => c == Search2Context)) { target.EnsureContext(context); } diff --git a/src/IIIFPresentation/Core/IIIF/IServiceX.cs b/src/IIIFPresentation/Core/IIIF/IServiceX.cs new file mode 100644 index 00000000..c8167480 --- /dev/null +++ b/src/IIIFPresentation/Core/IIIF/IServiceX.cs @@ -0,0 +1,12 @@ +using IIIF; + +namespace Core.IIIF; + +public static class IServiceX +{ + /// + /// Get a list of unique ids. + /// + public static HashSet GetDistinctIds(this IList target) where T : IService + => [..target.Select(s => s.Id).Where(id => id != null)!]; +} diff --git a/src/IIIFPresentation/Models/Database/Collections/Manifest.cs b/src/IIIFPresentation/Models/Database/Collections/Manifest.cs index 8cfb3ec0..c0620587 100644 --- a/src/IIIFPresentation/Models/Database/Collections/Manifest.cs +++ b/src/IIIFPresentation/Models/Database/Collections/Manifest.cs @@ -49,7 +49,7 @@ public class Manifest : IHierarchyResource public List? Batches { get; set; } - [NotMapped] + [NotMapped] // PipelineJob uses ResourceId+ResourceType rather than a direct FK, to support future resource types (e.g. collections) public List? PipelineJobs { get; set; } /// diff --git a/src/IIIFPresentation/Services/Manifests/Helpers/ManifestContextHelper.cs b/src/IIIFPresentation/Services/Manifests/Helpers/ManifestContextHelper.cs new file mode 100644 index 00000000..2f5bdc4b --- /dev/null +++ b/src/IIIFPresentation/Services/Manifests/Helpers/ManifestContextHelper.cs @@ -0,0 +1,22 @@ +using IIIF.Presentation.V3; +using Newtonsoft.Json.Linq; + +namespace Services.Manifests.Helpers; + +public static class ManifestContextHelper +{ + /// + /// Extracts all context strings from a manifest's Context property, + /// normalising the various representations (string, array, JArray, JValue). + /// + public static IEnumerable GetContextStrings(this Manifest manifest) => + manifest.Context switch + { + null => [], + string str => [str], + IEnumerable enumerable => enumerable, + JArray jArray => jArray.Values().Where(s => s != null).Select(s => s!), + JValue { Type: JTokenType.String } jValue when jValue.ToString() is { } plain => [plain], + _ => [] + }; +} diff --git a/src/IIIFPresentation/Services/Manifests/ManifestMerger.cs b/src/IIIFPresentation/Services/Manifests/ManifestMerger.cs index bbcce8b3..20b4bcb8 100644 --- a/src/IIIFPresentation/Services/Manifests/ManifestMerger.cs +++ b/src/IIIFPresentation/Services/Manifests/ManifestMerger.cs @@ -8,7 +8,6 @@ using IIIF.Presentation.V3.Content; using Microsoft.Extensions.Logging; using Models.DLCS; -using Newtonsoft.Json.Linq; using Repository.Paths; using Services.Manifests.Helpers; using CanvasPainting = Models.Database.CanvasPainting; @@ -425,22 +424,8 @@ private void AlignCanvasPaintingAndBody(CanvasPainting canvasPainting, Canvas na private void SetManifestContext(Manifest baseManifest, Manifest namedQueryManifest) { - // Grab any contexts from NQ manifest - IEnumerable contexts = namedQueryManifest.Context switch - { - null => [], - string str => [str], - IEnumerable enumerable => enumerable, - JArray jArray => jArray.Values(), - JValue { Type: JTokenType.String } jValue when jValue.ToString() is { } plain => [plain], - _ => [] - }; - - // skip the default one - contexts = contexts.Where(c => !Context.Presentation3Context.Equals(c)); - - // ensure if any - foreach (var context in contexts) + foreach (var context in namedQueryManifest.GetContextStrings() + .Where(c => !Context.Presentation3Context.Equals(c))) { logger.LogTrace("Adding context {Context} to {ManifestId}", context, baseManifest.Id); baseManifest.EnsureContext(context); From 2223ff3a7021b85b4dc2c6fced7d3732671bac9a Mon Sep 17 00:00:00 2001 From: "jack.lewis" Date: Wed, 24 Jun 2026 15:09:43 +0100 Subject: [PATCH 14/18] Fixing tests --- .../Manifest/ManifestWriteServiceTests.cs | 2 +- .../Features/Manifest/ManifestWriteService.cs | 4 +- ...ServiceJobCompletionMessageHandlerTests.cs | 47 ++++++++++++++++--- .../TextServiceJobCompletionMessageHandler.cs | 8 +--- .../API/General/ModifyCollectionType.cs | 1 + 5 files changed, 46 insertions(+), 16 deletions(-) diff --git a/src/IIIFPresentation/API.Tests/Features/Manifest/ManifestWriteServiceTests.cs b/src/IIIFPresentation/API.Tests/Features/Manifest/ManifestWriteServiceTests.cs index 5b3f31fe..192e4167 100644 --- a/src/IIIFPresentation/API.Tests/Features/Manifest/ManifestWriteServiceTests.cs +++ b/src/IIIFPresentation/API.Tests/Features/Manifest/ManifestWriteServiceTests.cs @@ -1194,7 +1194,7 @@ public async Task Create_ReturnsError_AndDoesNotPersistManifest_WhenTextServiceS // Assert result.IsSuccess.Should().BeFalse(); result.WriteResult.Should().Be(WriteResult.Error); - result.Error.Should().Contain("pipeline job"); + result.Error.Should().Contain("text service"); // Manifest and pipeline job should be rolled back — resubmitting the same slug must not conflict presentationContext.Hierarchy.Any(h => h.Slug == slug).Should().BeFalse(); diff --git a/src/IIIFPresentation/API/Features/Manifest/ManifestWriteService.cs b/src/IIIFPresentation/API/Features/Manifest/ManifestWriteService.cs index f6fe1701..05711d13 100644 --- a/src/IIIFPresentation/API/Features/Manifest/ManifestWriteService.cs +++ b/src/IIIFPresentation/API/Features/Manifest/ManifestWriteService.cs @@ -518,8 +518,8 @@ await manifestStorageManager.SaveManifestInStorage(iiifManifest, dbManifest, ori { logger.LogError("Failed to submit {JobType} pipeline job for manifest {ManifestId}", job.JobType, dbManifest.Id); await manifestStorageManager.DeleteStagedManifest(dbManifest); - return PresUpdateResult.Failure("Failed to submit pipeline job; manifest has not been saved", - ModifyCollectionType.Unknown, WriteResult.Error); + return PresUpdateResult.Failure("Error connecting to the text service", + ModifyCollectionType.CannotConnectToTextService, WriteResult.Error); } return null; diff --git a/src/IIIFPresentation/BackgroundHandler.Tests/TextCompletion/TextServiceJobCompletionMessageHandlerTests.cs b/src/IIIFPresentation/BackgroundHandler.Tests/TextCompletion/TextServiceJobCompletionMessageHandlerTests.cs index f6a01235..376c2c07 100644 --- a/src/IIIFPresentation/BackgroundHandler.Tests/TextCompletion/TextServiceJobCompletionMessageHandlerTests.cs +++ b/src/IIIFPresentation/BackgroundHandler.Tests/TextCompletion/TextServiceJobCompletionMessageHandlerTests.cs @@ -6,6 +6,7 @@ using FakeItEasy; using FluentAssertions; using IIIF.Presentation.V3; +using IIIF.Search.V1; using IIIF.Search.V2; using Microsoft.EntityFrameworkCore; using Microsoft.Extensions.Logging.Abstractions; @@ -174,7 +175,7 @@ public async Task HandleMessage_MergesSearchServicesIntoManifest_WhenAugmentedMa var searchService = new SearchService2 { Id = "https://search.example.com/search" }; var augmentedManifest = new IIIFManifest { - Services = [searchService] + Service = [searchService] }; A.CallTo(() => textServicesClient.GetTextAugmentedManifest(jobId, A._)) .Returns(augmentedManifest); @@ -190,7 +191,7 @@ public async Task HandleMessage_MergesSearchServicesIntoManifest_WhenAugmentedMa (await sut.HandleMessage(message, CancellationToken.None)).Should().BeTrue(); savedManifest.Should().NotBeNull(); - savedManifest!.Services.Should().ContainSingle(s => s.Id == searchService.Id); + savedManifest!.Service.Should().ContainSingle(s => s.Id == searchService.Id); } [Fact] @@ -204,14 +205,14 @@ public async Task HandleMessage_DoesNotDuplicateServices_WhenAugmentedManifestCo var stagedManifest = new IIIFManifest { Id = manifestId, - Services = [new SearchService2 { Id = serviceId }] + Service = [new SearchService2 { Id = serviceId }] }; A.CallTo(() => iiifS3.ReadIIIFFromS3(A._, BucketLocationType.Staging, A._)) .Returns(stagedManifest); var augmentedManifest = new IIIFManifest { - Services = [new SearchService2 { Id = serviceId }] + Service = [new SearchService2 { Id = serviceId }] }; A.CallTo(() => textServicesClient.GetTextAugmentedManifest(jobId, A._)) .Returns(augmentedManifest); @@ -226,7 +227,7 @@ public async Task HandleMessage_DoesNotDuplicateServices_WhenAugmentedManifestCo await sut.HandleMessage(message, CancellationToken.None); - savedManifest!.Services.Should().HaveCount(1, "duplicate service ID should not be added twice"); + savedManifest!.Service.Should().HaveCount(1, "duplicate service ID should not be added twice"); } [Fact] @@ -243,7 +244,7 @@ public async Task HandleMessage_MergesContextFromAugmentedManifest_WhenAugmented var augmentedManifest = new IIIFManifest { - Services = [new SearchService2 { Id = "https://search.example.com/search" }], + Service = [new SearchService2 { Id = "https://search.example.com/search" }], Context = searchContext }; A.CallTo(() => textServicesClient.GetTextAugmentedManifest(jobId, A._)) @@ -267,7 +268,7 @@ public async Task HandleMessage_DoesNotAddPresentation3Context_FromAugmentedMani var augmentedManifest = new IIIFManifest { - Services = [new SearchService2 { Id = "https://search.example.com/search" }], + Service = [new SearchService2 { Id = "https://search.example.com/search" }], Context = IIIF.Presentation.Context.Presentation3Context }; A.CallTo(() => textServicesClient.GetTextAugmentedManifest(jobId, A._)) @@ -309,6 +310,38 @@ public async Task HandleMessage_SetsFinishedTimestamp_WhenJobFails() job.Finished.Should().Be(new DateTime(2024, 6, 12, 10, 0, 0, DateTimeKind.Utc)); } + [Fact] + public async Task HandleMessage_OnlyAddsSearchService2_WhenAugmentedManifestHasOtherServiceTypes() + { + var manifestId = TestIdentifiers.IdWithSuffix(suffix: "_filter_services"); + var jobId = $"{CustomerId}/iiif/{manifestId}"; + await SetupManifestWithPipelineJob(manifestId, jobId); + + var stagedManifest = new IIIFManifest { Id = manifestId }; + A.CallTo(() => iiifS3.ReadIIIFFromS3(A._, BucketLocationType.Staging, A._)) + .Returns(stagedManifest); + + var searchService = new SearchService2 { Id = "https://search.example.com/search" }; + var otherService = new SearchService { Id = "https://image.example.com/image" }; + var augmentedManifest = new IIIFManifest + { + Service = [searchService, otherService] + }; + A.CallTo(() => textServicesClient.GetTextAugmentedManifest(jobId, A._)) + .Returns(augmentedManifest); + + IIIFManifest? savedManifest = null; + A.CallTo(() => manifestStorageManager.SaveManifestInStorage( + A._, A._, null, false, A._)) + .Invokes((IIIFManifest m, DbManifest _, string? _, bool _, CancellationToken _) => savedManifest = m) + .Returns(Task.CompletedTask); + + await sut.HandleMessage(CreateMessage(jobId, PipelineJobStatus.Completed), CancellationToken.None); + + savedManifest!.Service.Should().ContainSingle() + .Which.Should().BeOfType(); + } + private async Task SetupManifestWithPipelineJob(string manifestId, string jobId) { var manifestEntry = await dbContext.Manifests.AddTestManifest(id: manifestId); diff --git a/src/IIIFPresentation/BackgroundHandler/TextCompletion/TextServiceJobCompletionMessageHandler.cs b/src/IIIFPresentation/BackgroundHandler/TextCompletion/TextServiceJobCompletionMessageHandler.cs index 4918257e..4bad27ab 100644 --- a/src/IIIFPresentation/BackgroundHandler/TextCompletion/TextServiceJobCompletionMessageHandler.cs +++ b/src/IIIFPresentation/BackgroundHandler/TextCompletion/TextServiceJobCompletionMessageHandler.cs @@ -158,10 +158,7 @@ private async Task ApplyTextServices(string jobId, Manifest stagedManifest, stagedManifest.Service ??= []; var existingIds = stagedManifest.Service.GetDistinctIds(); - - // new HashSet( - // stagedManifest.Service.Select(s => s.Id).Where(id => id != null)!); - +fixing tests foreach (var service in augmented.Service.OfType()) { if (existingIds.Add(service.Id!)) stagedManifest.Service.Add(service); @@ -169,8 +166,7 @@ private async Task ApplyTextServices(string jobId, Manifest stagedManifest, MergeContext(stagedManifest, augmented); - logger.LogDebug("Added {Count} search service(s) to manifest for job {JobId}", - augmented.Service.Count, jobId); + logger.LogDebug("Added search service to manifest for job {JobId}", jobId); } private static void MergeContext(Manifest target, Manifest source) diff --git a/src/IIIFPresentation/Models/API/General/ModifyCollectionType.cs b/src/IIIFPresentation/Models/API/General/ModifyCollectionType.cs index 62337718..ba0b1eaa 100644 --- a/src/IIIFPresentation/Models/API/General/ModifyCollectionType.cs +++ b/src/IIIFPresentation/Models/API/General/ModifyCollectionType.cs @@ -30,5 +30,6 @@ public enum ModifyCollectionType CustomerIdDoesNotMatchCaller = 26, AssetsAdjunctsDoNotMatch = 27, ManifestCurrentlyIngesting = 28, + CannotConnectToTextService = 29, Unknown = 1000 } From d5e55eb28d2c49b126cf4ccb335bc55f7fc13dfb Mon Sep 17 00:00:00 2001 From: "jack.lewis" Date: Wed, 24 Jun 2026 15:10:20 +0100 Subject: [PATCH 15/18] Remove random text --- .../TextCompletion/TextServiceJobCompletionMessageHandler.cs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/IIIFPresentation/BackgroundHandler/TextCompletion/TextServiceJobCompletionMessageHandler.cs b/src/IIIFPresentation/BackgroundHandler/TextCompletion/TextServiceJobCompletionMessageHandler.cs index 4bad27ab..62ea8c0c 100644 --- a/src/IIIFPresentation/BackgroundHandler/TextCompletion/TextServiceJobCompletionMessageHandler.cs +++ b/src/IIIFPresentation/BackgroundHandler/TextCompletion/TextServiceJobCompletionMessageHandler.cs @@ -158,7 +158,7 @@ private async Task ApplyTextServices(string jobId, Manifest stagedManifest, stagedManifest.Service ??= []; var existingIds = stagedManifest.Service.GetDistinctIds(); -fixing tests + foreach (var service in augmented.Service.OfType()) { if (existingIds.Add(service.Id!)) stagedManifest.Service.Add(service); From 706467de691118de0ebdc111b9247ed5c4c85ef8 Mon Sep 17 00:00:00 2001 From: "jack.lewis" Date: Wed, 24 Jun 2026 15:52:01 +0100 Subject: [PATCH 16/18] Make the PipelineJob table work better --- .../Converters/ManifestConverterTests.cs | 6 +- .../Manifest/ManifestWriteServiceTests.cs | 35 +- .../API.Tests/Integration/GetManifestTests.cs | 6 +- .../Features/Manifest/ManifestWriteService.cs | 6 +- .../Storage/Helpers/PresentationContextX.cs | 2 +- ...ServiceJobCompletionMessageHandlerTests.cs | 11 +- .../TextServiceJobCompletionMessageHandler.cs | 2 +- .../Models/Database/Collections/Collection.cs | 2 + .../Models/Database/Collections/Manifest.cs | 4 +- .../Models/Database/General/PipelineJob.cs | 15 +- ...pelineJobManifestCollectionFKs.Designer.cs | 513 ++++++++++++++++++ ...142143_PipelineJobManifestCollectionFKs.cs | 111 ++++ .../PresentationContextModelSnapshot.cs | 52 +- .../Repository/PresentationContext.cs | 23 +- .../Manifests/ManifestXTests.cs | 4 +- .../Manifests/PipelineHelperTests.cs | 4 +- .../Manifests/PipelineJobXTests.cs | 9 +- .../TextServices/TextServicesClientTests.cs | 2 +- 18 files changed, 740 insertions(+), 67 deletions(-) create mode 100644 src/IIIFPresentation/Repository/Migrations/20260624142143_PipelineJobManifestCollectionFKs.Designer.cs create mode 100644 src/IIIFPresentation/Repository/Migrations/20260624142143_PipelineJobManifestCollectionFKs.cs diff --git a/src/IIIFPresentation/API.Tests/Converters/ManifestConverterTests.cs b/src/IIIFPresentation/API.Tests/Converters/ManifestConverterTests.cs index 67518fac..330cfa28 100644 --- a/src/IIIFPresentation/API.Tests/Converters/ManifestConverterTests.cs +++ b/src/IIIFPresentation/API.Tests/Converters/ManifestConverterTests.cs @@ -324,7 +324,7 @@ public void SetGeneratedFields_SetsPipelineFromJobs_WhenPipelineJobsPresent() [ new PipelineJob { - ResourceId = "id", CustomerId = 1, + ManifestId = "id", CustomerId = 1, JobType = PipelineJobType.TextService, Status = PipelineJobStatus.Waiting, Created = DateTime.UtcNow @@ -354,14 +354,14 @@ public void SetGeneratedFields_ReturnsLatestJobPerType_WhenMultipleJobsOfSameTyp [ new PipelineJob { - ResourceId = "id", CustomerId = 1, + ManifestId = "id", CustomerId = 1, JobType = PipelineJobType.TextService, Status = PipelineJobStatus.Completed, Created = older }, new PipelineJob { - ResourceId = "id", CustomerId = 1, + ManifestId = "id", CustomerId = 1, JobType = PipelineJobType.TextService, Status = PipelineJobStatus.Waiting, Created = newer diff --git a/src/IIIFPresentation/API.Tests/Features/Manifest/ManifestWriteServiceTests.cs b/src/IIIFPresentation/API.Tests/Features/Manifest/ManifestWriteServiceTests.cs index 192e4167..7be5a781 100644 --- a/src/IIIFPresentation/API.Tests/Features/Manifest/ManifestWriteServiceTests.cs +++ b/src/IIIFPresentation/API.Tests/Features/Manifest/ManifestWriteServiceTests.cs @@ -61,19 +61,22 @@ public ManifestWriteServiceTests(PresentationContextFixture dbFixture) { presentationContext = dbFixture.DbContext; dbFixture.CustomerIdProvider.SetCustomerId(Customer); - + + // Use a tracked context for the SUT to mirror production behaviour (EF navigation-property cascades require tracking) + var sutContext = dbFixture.GetNewPresentationContext(dbFixture.CustomerIdProvider); + dlcsSettings = DefaultSettings.DlcsSettings(); var typedPathTemplateOptions = Options.Create(PathRewriteOptions.Default); - + var sqidsEncoder = new SqidsEncoder(); var idGenerator = new SqidsGenerator(sqidsEncoder, new NullLogger()); - - var identityManager = new IdentityManager(idGenerator, presentationContext, new NullLogger()); - + + var identityManager = new IdentityManager(idGenerator, sutContext, new NullLogger()); + var presentationGenerator = new TestPresentationConfigGenerator("https://localhost:5000", PathRewriteOptions.Default); - + var pathRewriteParser = new PathRewriteParser(typedPathTemplateOptions, new NullLogger()); var pathSettings = new PathSettings { PresentationApiUrl = new Uri("https://base") }; @@ -86,24 +89,24 @@ public ManifestWriteServiceTests(PresentationContextFixture dbFixture) new NullLogger()); var manifestPaintedResourceParser = new ManifestPaintedResourceParser(pathRewriteParser, presentationGenerator, - Options.Create(pathSettings), presentationContext, canvasHelper, new NullLogger()); + Options.Create(pathSettings), sutContext, canvasHelper, new NullLogger()); var canvasPaintingMerger = new CanvasPaintingMerger(pathRewriteParser); var canvasPaintingResolver = new CanvasPaintingResolver(identityManager, manifestItemsParser, manifestPaintedResourceParser, canvasPaintingMerger, new NullLogger()); - + dlcsClient = A.Fake(); - + var apiOptions = Options.Create(new ApiSettings() { AWS = new AWSSettings(), DLCS = dlcsSettings }); - - var managedResultFinder = new ManagedAssetResultFinder(dlcsClient, presentationContext, apiOptions, + + var managedResultFinder = new ManagedAssetResultFinder(dlcsClient, sutContext, apiOptions, new NullLogger()); - var dlcsManifestCoordinator = new DlcsManifestCoordinator(dlcsClient, presentationContext, managedResultFinder, + var dlcsManifestCoordinator = new DlcsManifestCoordinator(dlcsClient, sutContext, managedResultFinder, new NullLogger()); var parentSlugParser = A.Fake(); @@ -121,7 +124,7 @@ public ManifestWriteServiceTests(PresentationContextFixture dbFixture) textServicesClient = A.Fake(); A.CallTo(() => textServicesClient.CreateOrUpdateJob(A._, A._, A._, A._)) .Returns(true); - sut = new ManifestWriteService(presentationContext, identityManager, canvasPaintingResolver, + sut = new ManifestWriteService(sutContext, identityManager, canvasPaintingResolver, new TestPathGenerator(presentationGenerator), settingsBasedPathGenerator, dlcsManifestCoordinator, parentSlugParser, manifestStorageManager, pathRewriteParser, manifestLockManager, textServicesClient, Options.Create(new AWSSettings()), new NullLogger()); @@ -1166,7 +1169,7 @@ public async Task Create_CallsTextServicesAndCreatesPipelineJob_WhenManifestHasP .MustHaveHappenedOnceExactly(); var flatId = result.Entity.FlatId; - var pipelineJob = presentationContext.PipelineJobs.FirstOrDefault(p => p.ResourceId == flatId); + var pipelineJob = presentationContext.PipelineJobs.FirstOrDefault(p => p.ManifestId == flatId); pipelineJob.Should().NotBeNull(); pipelineJob!.Status.Should().Be(PipelineJobStatus.Waiting); pipelineJob.Config!.Action.Should().Be("Index"); @@ -1198,7 +1201,7 @@ public async Task Create_ReturnsError_AndDoesNotPersistManifest_WhenTextServiceS // Manifest and pipeline job should be rolled back — resubmitting the same slug must not conflict presentationContext.Hierarchy.Any(h => h.Slug == slug).Should().BeFalse(); - presentationContext.PipelineJobs.Any(p => p.ResourceId == resourceId).Should().BeFalse(); + presentationContext.PipelineJobs.Any(p => p.ManifestId == resourceId).Should().BeFalse(); // Staged S3 objects must be cleaned up A.CallTo(() => manifestStorageManager.DeleteStagedManifest(A._)) @@ -1279,7 +1282,7 @@ public async Task Create_AddsNewPipelineJob_WhenJobAlreadyExistsForManifest() A.CallTo(() => textServicesClient.CreateOrUpdateJob(A._, A._, A._, A._)) .MustHaveHappenedTwiceExactly(); - var jobs = presentationContext.PipelineJobs.Where(p => p.ResourceId == flatId).ToList(); + var jobs = presentationContext.PipelineJobs.Where(p => p.ManifestId == flatId).ToList(); jobs.Should().HaveCount(2, "each resubmission creates a new job record for history"); jobs.Should().AllSatisfy(j => j.Status.Should().Be(PipelineJobStatus.Waiting)); } diff --git a/src/IIIFPresentation/API.Tests/Integration/GetManifestTests.cs b/src/IIIFPresentation/API.Tests/Integration/GetManifestTests.cs index f7ddf89a..9f967473 100644 --- a/src/IIIFPresentation/API.Tests/Integration/GetManifestTests.cs +++ b/src/IIIFPresentation/API.Tests/Integration/GetManifestTests.cs @@ -451,8 +451,7 @@ public async Task Get_IiifManifest_Flat_ReturnsAccepted_WhenPipelineJobQueued() var dbManifest = await dbContext.Manifests.AddTestManifest(id); await dbContext.PipelineJobs.AddAsync(new PipelineJob { - ResourceId = id, - ResourceType = ResourceType.IIIFManifest, + ManifestId = id, JobType = PipelineJobType.TextService, CustomerId = 1, @@ -490,8 +489,7 @@ public async Task Get_IiifManifest_Flat_ReturnsOK_WhenPipelineJobCompleted() var dbManifest = await dbContext.Manifests.AddTestManifest(id); await dbContext.PipelineJobs.AddAsync(new PipelineJob { - ResourceId = id, - ResourceType = ResourceType.IIIFManifest, + ManifestId = id, JobType = PipelineJobType.TextService, CustomerId = 1, diff --git a/src/IIIFPresentation/API/Features/Manifest/ManifestWriteService.cs b/src/IIIFPresentation/API/Features/Manifest/ManifestWriteService.cs index 05711d13..4fee57b5 100644 --- a/src/IIIFPresentation/API/Features/Manifest/ManifestWriteService.cs +++ b/src/IIIFPresentation/API/Features/Manifest/ManifestWriteService.cs @@ -509,8 +509,7 @@ await manifestStorageManager.SaveManifestInStorage(iiifManifest, dbManifest, ori return null; } - await dbContext.PipelineJobs.AddAsync(job, cancellationToken); // explicit Add required: PipelineJobs is [NotMapped] on Manifest - dbManifest.PipelineJobs = [job]; // in-memory only, for API response generation + (dbManifest.PipelineJobs ??= []).Add(job); await dbContext.SaveChangesAsync(cancellationToken); if (!await textServicesClient.CreateOrUpdateJob(job, awsOptions.Value.S3.StorageBucket, @@ -533,8 +532,7 @@ await manifestStorageManager.SaveManifestInStorage(iiifManifest, dbManifest, ori { return new PipelineJob { - ResourceId = dbManifest.Id, - ResourceType = ResourceType.IIIFManifest, + ManifestId = dbManifest.Id, JobType = PipelineJobType.TextService, CustomerId = dbManifest.CustomerId, Status = PipelineJobStatus.Waiting, diff --git a/src/IIIFPresentation/API/Features/Storage/Helpers/PresentationContextX.cs b/src/IIIFPresentation/API/Features/Storage/Helpers/PresentationContextX.cs index 8bba3eb0..436a9424 100644 --- a/src/IIIFPresentation/API/Features/Storage/Helpers/PresentationContextX.cs +++ b/src/IIIFPresentation/API/Features/Storage/Helpers/PresentationContextX.cs @@ -90,7 +90,7 @@ public static class PresentationContextX if (withPipelineJobs && manifest != null) { manifest.PipelineJobs = await dbContext.PipelineJobs - .Where(p => p.ResourceId == manifest.Id && p.ResourceType == ResourceType.IIIFManifest) + .Where(p => p.ManifestId == manifest.Id) .ToListAsync(cancellationToken); } diff --git a/src/IIIFPresentation/BackgroundHandler.Tests/TextCompletion/TextServiceJobCompletionMessageHandlerTests.cs b/src/IIIFPresentation/BackgroundHandler.Tests/TextCompletion/TextServiceJobCompletionMessageHandlerTests.cs index 376c2c07..827beb92 100644 --- a/src/IIIFPresentation/BackgroundHandler.Tests/TextCompletion/TextServiceJobCompletionMessageHandlerTests.cs +++ b/src/IIIFPresentation/BackgroundHandler.Tests/TextCompletion/TextServiceJobCompletionMessageHandlerTests.cs @@ -119,7 +119,7 @@ public async Task HandleMessage_UpdatesStatusToFailed_WhenJobFailed() (await sut.HandleMessage(message, CancellationToken.None)).Should().BeTrue(); - var job = dbContext.PipelineJobs.Single(p => p.ResourceId == manifestId); + var job = dbContext.PipelineJobs.Single(p => p.ManifestId == manifestId); job.Status.Should().Be(PipelineJobStatus.Failed); job.Error.Should().Be("OCR timed out"); @@ -150,7 +150,7 @@ public async Task HandleMessage_UpdatesStatusToCompleted_AndSavesManifest_WhenJo (await sut.HandleMessage(message, CancellationToken.None)).Should().BeTrue(); - var job = dbContext.PipelineJobs.Single(p => p.ResourceId == manifestId); + var job = dbContext.PipelineJobs.Single(p => p.ManifestId == manifestId); job.Status.Should().Be(PipelineJobStatus.Completed); job.Error.Should().BeNull(); @@ -293,7 +293,7 @@ public async Task HandleMessage_SetsFinishedTimestamp_WhenJobCompletes() await sut.HandleMessage(CreateMessage(jobId, PipelineJobStatus.Completed), CancellationToken.None); - var job = dbContext.PipelineJobs.Single(p => p.ResourceId == manifestId); + var job = dbContext.PipelineJobs.Single(p => p.ManifestId == manifestId); job.Finished.Should().Be(new DateTime(2024, 6, 12, 10, 0, 0, DateTimeKind.Utc)); } @@ -306,7 +306,7 @@ public async Task HandleMessage_SetsFinishedTimestamp_WhenJobFails() await sut.HandleMessage(CreateMessage(jobId, PipelineJobStatus.Failed, errors: "OCR error"), CancellationToken.None); - var job = dbContext.PipelineJobs.Single(p => p.ResourceId == manifestId); + var job = dbContext.PipelineJobs.Single(p => p.ManifestId == manifestId); job.Finished.Should().Be(new DateTime(2024, 6, 12, 10, 0, 0, DateTimeKind.Utc)); } @@ -348,8 +348,7 @@ private async Task SetupManifestWithPipelineJob(string manifestId, string jobId) var manifest = manifestEntry.Entity; await dbContext.PipelineJobs.AddAsync(new PipelineJob { - ResourceId = manifest.Id, - ResourceType = ResourceType.IIIFManifest, + ManifestId = manifest.Id, JobType = PipelineJobType.TextService, CustomerId = manifest.CustomerId, diff --git a/src/IIIFPresentation/BackgroundHandler/TextCompletion/TextServiceJobCompletionMessageHandler.cs b/src/IIIFPresentation/BackgroundHandler/TextCompletion/TextServiceJobCompletionMessageHandler.cs index 62ea8c0c..3e8471ae 100644 --- a/src/IIIFPresentation/BackgroundHandler/TextCompletion/TextServiceJobCompletionMessageHandler.cs +++ b/src/IIIFPresentation/BackgroundHandler/TextCompletion/TextServiceJobCompletionMessageHandler.cs @@ -66,7 +66,7 @@ private async Task TryCompleteManifest(TextServiceJobCompletionMessage com } var pipelineJob = await dbContext.PipelineJobs - .Where(p => p.ResourceId == resourceId && p.JobType == PipelineJobType.TextService) + .Where(p => p.ManifestId == resourceId && p.JobType == PipelineJobType.TextService) .OrderByDescending(p => p.Created) .FirstOrDefaultAsync(cancellationToken); diff --git a/src/IIIFPresentation/Models/Database/Collections/Collection.cs b/src/IIIFPresentation/Models/Database/Collections/Collection.cs index 95790a9d..dee78387 100644 --- a/src/IIIFPresentation/Models/Database/Collections/Collection.cs +++ b/src/IIIFPresentation/Models/Database/Collections/Collection.cs @@ -74,6 +74,8 @@ public class Collection : IHierarchyResource /// public IEnumerable? Children { get; set; } + public List? PipelineJobs { get; set; } + public Guid Etag { get; set; } } diff --git a/src/IIIFPresentation/Models/Database/Collections/Manifest.cs b/src/IIIFPresentation/Models/Database/Collections/Manifest.cs index c0620587..6253d02d 100644 --- a/src/IIIFPresentation/Models/Database/Collections/Manifest.cs +++ b/src/IIIFPresentation/Models/Database/Collections/Manifest.cs @@ -1,5 +1,4 @@ -using System.ComponentModel.DataAnnotations.Schema; -using System.Globalization; +using System.Globalization; using IIIF.Presentation.V3.Strings; using Models.Database.General; @@ -49,7 +48,6 @@ public class Manifest : IHierarchyResource public List? Batches { get; set; } - [NotMapped] // PipelineJob uses ResourceId+ResourceType rather than a direct FK, to support future resource types (e.g. collections) public List? PipelineJobs { get; set; } /// diff --git a/src/IIIFPresentation/Models/Database/General/PipelineJob.cs b/src/IIIFPresentation/Models/Database/General/PipelineJob.cs index 35763b53..2ea6670d 100644 --- a/src/IIIFPresentation/Models/Database/General/PipelineJob.cs +++ b/src/IIIFPresentation/Models/Database/General/PipelineJob.cs @@ -1,4 +1,5 @@ using Models.API.Manifest; +using Models.Database.Collections; namespace Models.Database.General; @@ -6,9 +7,18 @@ public class PipelineJob : ICustomerEntity { public int Id { get; set; } - public required string ResourceId { get; set; } + public string? ManifestId { get; set; } - public ResourceType ResourceType { get; set; } + public virtual Manifest? Manifest { get; set; } + + public string? CollectionId { get; set; } + + public virtual Collection? Collection { get; set; } + + /// + /// Id of related Manifest or Collection + /// + public string? ResourceId => ManifestId ?? CollectionId; public PipelineJobType JobType { get; set; } @@ -45,5 +55,4 @@ public static class PipelineJobX PipelineJobType.TextService => $"{job.CustomerId}/iiif/{job.ResourceId}", _ => throw new ArgumentOutOfRangeException(nameof(job.JobType), $"Unknown job type: {job.JobType}") }; - } diff --git a/src/IIIFPresentation/Repository/Migrations/20260624142143_PipelineJobManifestCollectionFKs.Designer.cs b/src/IIIFPresentation/Repository/Migrations/20260624142143_PipelineJobManifestCollectionFKs.Designer.cs new file mode 100644 index 00000000..7cc0de58 --- /dev/null +++ b/src/IIIFPresentation/Repository/Migrations/20260624142143_PipelineJobManifestCollectionFKs.Designer.cs @@ -0,0 +1,513 @@ +// +using System; +using Microsoft.EntityFrameworkCore; +using Microsoft.EntityFrameworkCore.Infrastructure; +using Microsoft.EntityFrameworkCore.Migrations; +using Microsoft.EntityFrameworkCore.Storage.ValueConversion; +using Npgsql.EntityFrameworkCore.PostgreSQL.Metadata; +using Repository; + +#nullable disable + +namespace Repository.Migrations +{ + [DbContext(typeof(PresentationContext))] + [Migration("20260624142143_PipelineJobManifestCollectionFKs")] + partial class PipelineJobManifestCollectionFKs + { + /// + protected override void BuildTargetModel(ModelBuilder modelBuilder) + { +#pragma warning disable 612, 618 + modelBuilder + .HasAnnotation("ProductVersion", "8.0.11") + .HasAnnotation("Relational:MaxIdentifierLength", 63); + + NpgsqlModelBuilderExtensions.HasPostgresExtension(modelBuilder, "citext"); + NpgsqlModelBuilderExtensions.UseIdentityByDefaultColumns(modelBuilder); + + modelBuilder.Entity("Models.Database.CanvasPainting", b => + { + b.Property("CanvasPaintingId") + .ValueGeneratedOnAdd() + .HasColumnType("integer") + .HasColumnName("canvas_painting_id"); + + NpgsqlPropertyBuilderExtensions.UseIdentityByDefaultColumn(b.Property("CanvasPaintingId")); + + b.Property("AssetId") + .HasColumnType("text") + .HasColumnName("asset_id"); + + b.Property("CanvasLabel") + .HasColumnType("text") + .HasColumnName("canvas_label"); + + b.Property("CanvasOrder") + .HasColumnType("integer") + .HasColumnName("canvas_order"); + + b.Property("CanvasOriginalId") + .HasColumnType("text") + .HasColumnName("canvas_original_id"); + + b.Property("ChoiceOrder") + .HasColumnType("integer") + .HasColumnName("choice_order"); + + b.Property("Created") + .ValueGeneratedOnAdd() + .HasColumnType("timestamp with time zone") + .HasColumnName("created") + .HasDefaultValueSql("now()"); + + b.Property("CustomerId") + .HasColumnType("integer") + .HasColumnName("customer_id"); + + b.Property("Duration") + .HasColumnType("double precision") + .HasColumnName("duration"); + + b.Property("Id") + .HasColumnType("text") + .HasColumnName("canvas_id"); + + b.Property("Ingesting") + .HasColumnType("boolean") + .HasColumnName("ingesting"); + + b.Property("Label") + .HasColumnType("jsonb") + .HasColumnName("label"); + + b.Property("ManifestId") + .IsRequired() + .HasColumnType("text") + .HasColumnName("manifest_id"); + + b.Property("Modified") + .ValueGeneratedOnAdd() + .HasColumnType("timestamp with time zone") + .HasColumnName("modified") + .HasDefaultValueSql("now()"); + + b.Property("StaticHeight") + .HasColumnType("integer") + .HasColumnName("static_height"); + + b.Property("StaticWidth") + .HasColumnType("integer") + .HasColumnName("static_width"); + + b.Property("Target") + .HasColumnType("text") + .HasColumnName("target"); + + b.Property("Thumbnail") + .HasColumnType("text") + .HasColumnName("thumbnail"); + + b.HasKey("CanvasPaintingId") + .HasName("pk_canvas_paintings"); + + b.HasIndex("ManifestId", "CustomerId") + .HasDatabaseName("ix_canvas_paintings_manifest_id_customer_id"); + + b.HasIndex("Id", "CustomerId", "ManifestId", "CanvasOrder", "ChoiceOrder") + .IsUnique() + .HasDatabaseName("ix_canvas_paintings_canvas_id_customer_id_manifest_id_canvas_o"); + + b.ToTable("canvas_paintings", (string)null); + }); + + modelBuilder.Entity("Models.Database.Collections.Collection", b => + { + b.Property("Id") + .HasColumnType("text") + .HasColumnName("id"); + + b.Property("CustomerId") + .HasColumnType("integer") + .HasColumnName("customer_id"); + + b.Property("Created") + .HasColumnType("timestamp with time zone") + .HasColumnName("created"); + + b.Property("CreatedBy") + .HasColumnType("text") + .HasColumnName("created_by"); + + b.Property("Etag") + .ValueGeneratedOnAddOrUpdate() + .HasColumnType("uuid") + .HasColumnName("etag") + .HasComputedColumnSql("deterministic_uuid_sha256(\"modified\", \"id\")", true); + + b.Property("IsPublic") + .HasColumnType("boolean") + .HasColumnName("is_public"); + + b.Property("IsStorageCollection") + .HasColumnType("boolean") + .HasColumnName("is_storage_collection"); + + b.Property("Label") + .HasColumnType("jsonb") + .HasColumnName("label"); + + b.Property("LockedBy") + .HasColumnType("text") + .HasColumnName("locked_by"); + + b.Property("Modified") + .HasColumnType("timestamp with time zone") + .HasColumnName("modified"); + + b.Property("ModifiedBy") + .HasColumnType("text") + .HasColumnName("modified_by"); + + b.Property("Tags") + .HasColumnType("text") + .HasColumnName("tags"); + + b.Property("Thumbnail") + .HasColumnType("text") + .HasColumnName("thumbnail"); + + b.Property("UsePath") + .HasColumnType("boolean") + .HasColumnName("use_path"); + + b.HasKey("Id", "CustomerId") + .HasName("pk_collections"); + + b.ToTable("collections", (string)null); + }); + + modelBuilder.Entity("Models.Database.Collections.Manifest", b => + { + b.Property("Id") + .HasColumnType("text") + .HasColumnName("id"); + + b.Property("CustomerId") + .HasColumnType("integer") + .HasColumnName("customer_id"); + + b.Property("Created") + .ValueGeneratedOnAdd() + .HasColumnType("timestamp with time zone") + .HasColumnName("created") + .HasDefaultValueSql("now()"); + + b.Property("CreatedBy") + .HasColumnType("text") + .HasColumnName("created_by"); + + b.Property("Etag") + .ValueGeneratedOnAddOrUpdate() + .HasColumnType("uuid") + .HasColumnName("etag") + .HasComputedColumnSql("deterministic_uuid_sha256(\"last_processed\", \"id\")", true); + + b.Property("Label") + .HasColumnType("text") + .HasColumnName("label"); + + b.Property("LastProcessed") + .HasColumnType("timestamp with time zone") + .HasColumnName("last_processed"); + + b.Property("Modified") + .ValueGeneratedOnAdd() + .HasColumnType("timestamp with time zone") + .HasColumnName("modified") + .HasDefaultValueSql("now()"); + + b.Property("ModifiedBy") + .HasColumnType("text") + .HasColumnName("modified_by"); + + b.Property("SpaceId") + .HasColumnType("integer") + .HasColumnName("space_id"); + + b.HasKey("Id", "CustomerId") + .HasName("pk_manifests"); + + b.ToTable("manifests", (string)null); + }); + + modelBuilder.Entity("Models.Database.General.Batch", b => + { + b.Property("Id") + .HasColumnType("integer") + .HasColumnName("id"); + + b.Property("DeliverableType") + .HasColumnType("text") + .HasColumnName("deliverable_type"); + + b.Property("CustomerId") + .HasColumnType("integer") + .HasColumnName("customer_id"); + + b.Property("Finished") + .HasColumnType("timestamp with time zone") + .HasColumnName("finished"); + + b.Property("ManifestId") + .IsRequired() + .HasColumnType("text") + .HasColumnName("manifest_id"); + + b.Property("Processed") + .HasColumnType("timestamp with time zone") + .HasColumnName("processed"); + + b.Property("Status") + .IsRequired() + .HasColumnType("text") + .HasColumnName("status"); + + b.Property("Submitted") + .HasColumnType("timestamp with time zone") + .HasColumnName("submitted"); + + b.HasKey("Id", "DeliverableType") + .HasName("pk_batches"); + + b.HasIndex("ManifestId", "CustomerId") + .HasDatabaseName("ix_batches_manifest_id_customer_id"); + + b.ToTable("batches", (string)null); + }); + + modelBuilder.Entity("Models.Database.General.Hierarchy", b => + { + b.Property("Id") + .ValueGeneratedOnAdd() + .HasColumnType("integer") + .HasColumnName("id"); + + NpgsqlPropertyBuilderExtensions.UseIdentityByDefaultColumn(b.Property("Id")); + + b.Property("Canonical") + .HasColumnType("boolean") + .HasColumnName("canonical"); + + b.Property("CollectionId") + .HasColumnType("text") + .HasColumnName("collection_id"); + + b.Property("CustomerId") + .HasColumnType("integer") + .HasColumnName("customer_id"); + + b.Property("ItemsOrder") + .HasColumnType("integer") + .HasColumnName("items_order"); + + b.Property("ManifestId") + .HasColumnType("text") + .HasColumnName("manifest_id"); + + b.Property("Parent") + .HasColumnType("text") + .HasColumnName("parent"); + + b.Property("Slug") + .IsRequired() + .HasColumnType("citext") + .HasColumnName("slug"); + + b.Property("Type") + .HasColumnType("integer") + .HasColumnName("type"); + + b.HasKey("Id") + .HasName("pk_hierarchy"); + + b.HasIndex("Parent", "CustomerId") + .HasDatabaseName("ix_hierarchy_parent_customer_id"); + + b.HasIndex("CollectionId", "CustomerId", "Canonical") + .IsUnique() + .HasDatabaseName("ix_hierarchy_collection_id_customer_id_canonical") + .HasFilter("canonical is true"); + + b.HasIndex("CustomerId", "Slug", "Parent") + .IsUnique() + .HasDatabaseName("ix_hierarchy_customer_id_slug_parent"); + + b.HasIndex("ManifestId", "CustomerId", "Canonical") + .IsUnique() + .HasDatabaseName("ix_hierarchy_manifest_id_customer_id_canonical") + .HasFilter("canonical is true"); + + b.ToTable("hierarchy", null, t => + { + t.HasCheckConstraint("stop_collection_and_manifest_in_same_record", "num_nonnulls(manifest_id, collection_id) = 1"); + }); + }); + + modelBuilder.Entity("Models.Database.General.PipelineJob", b => + { + b.Property("Id") + .ValueGeneratedOnAdd() + .HasColumnType("integer") + .HasColumnName("id"); + + NpgsqlPropertyBuilderExtensions.UseIdentityByDefaultColumn(b.Property("Id")); + + b.Property("CollectionId") + .HasColumnType("text") + .HasColumnName("collection_id"); + + b.Property("Config") + .HasColumnType("jsonb") + .HasColumnName("config"); + + b.Property("Created") + .ValueGeneratedOnAdd() + .HasColumnType("timestamp with time zone") + .HasColumnName("created") + .HasDefaultValueSql("now()"); + + b.Property("CustomerId") + .HasColumnType("integer") + .HasColumnName("customer_id"); + + b.Property("Error") + .HasColumnType("text") + .HasColumnName("error"); + + b.Property("Finished") + .HasColumnType("timestamp with time zone") + .HasColumnName("finished"); + + b.Property("JobType") + .IsRequired() + .HasColumnType("text") + .HasColumnName("job_type"); + + b.Property("ManifestId") + .HasColumnType("text") + .HasColumnName("manifest_id"); + + b.Property("Status") + .IsRequired() + .HasColumnType("text") + .HasColumnName("status"); + + b.HasKey("Id") + .HasName("pk_pipeline_jobs"); + + b.HasIndex("CollectionId", "CustomerId") + .HasDatabaseName("ix_pipeline_jobs_collection_id_customer_id"); + + b.HasIndex("ManifestId", "CustomerId") + .HasDatabaseName("ix_pipeline_jobs_manifest_id_customer_id"); + + b.ToTable("pipeline_jobs", null, t => + { + t.HasCheckConstraint("stop_collection_and_manifest_in_same_record", "num_nonnulls(manifest_id, collection_id) = 1"); + }); + }); + + modelBuilder.Entity("Models.Database.CanvasPainting", b => + { + b.HasOne("Models.Database.Collections.Manifest", "Manifest") + .WithMany("CanvasPaintings") + .HasForeignKey("ManifestId", "CustomerId") + .OnDelete(DeleteBehavior.Cascade) + .IsRequired() + .HasConstraintName("fk_canvas_paintings_manifests_manifest_id_customer_id"); + + b.Navigation("Manifest"); + }); + + modelBuilder.Entity("Models.Database.General.Batch", b => + { + b.HasOne("Models.Database.Collections.Manifest", "Manifest") + .WithMany("Batches") + .HasForeignKey("ManifestId", "CustomerId") + .OnDelete(DeleteBehavior.Cascade) + .IsRequired() + .HasConstraintName("fk_batches_manifests_manifest_id_customer_id"); + + b.Navigation("Manifest"); + }); + + modelBuilder.Entity("Models.Database.General.Hierarchy", b => + { + b.HasOne("Models.Database.Collections.Collection", "Collection") + .WithMany("Hierarchy") + .HasForeignKey("CollectionId", "CustomerId") + .OnDelete(DeleteBehavior.Cascade) + .HasConstraintName("fk_hierarchy_collections_collection_id_customer_id"); + + b.HasOne("Models.Database.Collections.Manifest", "Manifest") + .WithMany("Hierarchy") + .HasForeignKey("ManifestId", "CustomerId") + .OnDelete(DeleteBehavior.Cascade) + .HasConstraintName("fk_hierarchy_manifests_manifest_id_customer_id"); + + b.HasOne("Models.Database.Collections.Collection", "ParentCollection") + .WithMany("Children") + .HasForeignKey("Parent", "CustomerId") + .OnDelete(DeleteBehavior.NoAction) + .HasConstraintName("fk_hierarchy_collections_parent_customer_id"); + + b.Navigation("Collection"); + + b.Navigation("Manifest"); + + b.Navigation("ParentCollection"); + }); + + modelBuilder.Entity("Models.Database.General.PipelineJob", b => + { + b.HasOne("Models.Database.Collections.Collection", "Collection") + .WithMany("PipelineJobs") + .HasForeignKey("CollectionId", "CustomerId") + .OnDelete(DeleteBehavior.Cascade) + .HasConstraintName("fk_pipeline_jobs_collections_collection_id_customer_id"); + + b.HasOne("Models.Database.Collections.Manifest", "Manifest") + .WithMany("PipelineJobs") + .HasForeignKey("ManifestId", "CustomerId") + .OnDelete(DeleteBehavior.Cascade) + .HasConstraintName("fk_pipeline_jobs_manifests_manifest_id_customer_id"); + + b.Navigation("Collection"); + + b.Navigation("Manifest"); + }); + + modelBuilder.Entity("Models.Database.Collections.Collection", b => + { + b.Navigation("Children"); + + b.Navigation("Hierarchy"); + + b.Navigation("PipelineJobs"); + }); + + modelBuilder.Entity("Models.Database.Collections.Manifest", b => + { + b.Navigation("Batches"); + + b.Navigation("CanvasPaintings"); + + b.Navigation("Hierarchy"); + + b.Navigation("PipelineJobs"); + }); +#pragma warning restore 612, 618 + } + } +} diff --git a/src/IIIFPresentation/Repository/Migrations/20260624142143_PipelineJobManifestCollectionFKs.cs b/src/IIIFPresentation/Repository/Migrations/20260624142143_PipelineJobManifestCollectionFKs.cs new file mode 100644 index 00000000..909e60c7 --- /dev/null +++ b/src/IIIFPresentation/Repository/Migrations/20260624142143_PipelineJobManifestCollectionFKs.cs @@ -0,0 +1,111 @@ +using Microsoft.EntityFrameworkCore.Migrations; + +#nullable disable + +namespace Repository.Migrations +{ + /// + public partial class PipelineJobManifestCollectionFKs : Migration + { + /// + protected override void Up(MigrationBuilder migrationBuilder) + { + migrationBuilder.DropColumn( + name: "resource_id", + table: "pipeline_jobs"); + + migrationBuilder.DropColumn( + name: "resource_type", + table: "pipeline_jobs"); + + migrationBuilder.AddColumn( + name: "collection_id", + table: "pipeline_jobs", + type: "text", + nullable: true); + + migrationBuilder.AddColumn( + name: "manifest_id", + table: "pipeline_jobs", + type: "text", + nullable: true); + + migrationBuilder.CreateIndex( + name: "ix_pipeline_jobs_collection_id_customer_id", + table: "pipeline_jobs", + columns: new[] { "collection_id", "customer_id" }); + + migrationBuilder.CreateIndex( + name: "ix_pipeline_jobs_manifest_id_customer_id", + table: "pipeline_jobs", + columns: new[] { "manifest_id", "customer_id" }); + + migrationBuilder.AddCheckConstraint( + name: "stop_collection_and_manifest_in_same_record", + table: "pipeline_jobs", + sql: "num_nonnulls(manifest_id, collection_id) = 1"); + + migrationBuilder.AddForeignKey( + name: "fk_pipeline_jobs_collections_collection_id_customer_id", + table: "pipeline_jobs", + columns: new[] { "collection_id", "customer_id" }, + principalTable: "collections", + principalColumns: new[] { "id", "customer_id" }, + onDelete: ReferentialAction.Cascade); + + migrationBuilder.AddForeignKey( + name: "fk_pipeline_jobs_manifests_manifest_id_customer_id", + table: "pipeline_jobs", + columns: new[] { "manifest_id", "customer_id" }, + principalTable: "manifests", + principalColumns: new[] { "id", "customer_id" }, + onDelete: ReferentialAction.Cascade); + } + + /// + protected override void Down(MigrationBuilder migrationBuilder) + { + migrationBuilder.DropForeignKey( + name: "fk_pipeline_jobs_collections_collection_id_customer_id", + table: "pipeline_jobs"); + + migrationBuilder.DropForeignKey( + name: "fk_pipeline_jobs_manifests_manifest_id_customer_id", + table: "pipeline_jobs"); + + migrationBuilder.DropIndex( + name: "ix_pipeline_jobs_collection_id_customer_id", + table: "pipeline_jobs"); + + migrationBuilder.DropIndex( + name: "ix_pipeline_jobs_manifest_id_customer_id", + table: "pipeline_jobs"); + + migrationBuilder.DropCheckConstraint( + name: "stop_collection_and_manifest_in_same_record", + table: "pipeline_jobs"); + + migrationBuilder.DropColumn( + name: "collection_id", + table: "pipeline_jobs"); + + migrationBuilder.DropColumn( + name: "manifest_id", + table: "pipeline_jobs"); + + migrationBuilder.AddColumn( + name: "resource_id", + table: "pipeline_jobs", + type: "text", + nullable: false, + defaultValue: ""); + + migrationBuilder.AddColumn( + name: "resource_type", + table: "pipeline_jobs", + type: "text", + nullable: false, + defaultValue: ""); + } + } +} diff --git a/src/IIIFPresentation/Repository/Migrations/PresentationContextModelSnapshot.cs b/src/IIIFPresentation/Repository/Migrations/PresentationContextModelSnapshot.cs index d5b0231e..33331a6e 100644 --- a/src/IIIFPresentation/Repository/Migrations/PresentationContextModelSnapshot.cs +++ b/src/IIIFPresentation/Repository/Migrations/PresentationContextModelSnapshot.cs @@ -360,6 +360,14 @@ protected override void BuildModel(ModelBuilder modelBuilder) NpgsqlPropertyBuilderExtensions.UseIdentityByDefaultColumn(b.Property("Id")); + b.Property("CollectionId") + .HasColumnType("text") + .HasColumnName("collection_id"); + + b.Property("Config") + .HasColumnType("jsonb") + .HasColumnName("config"); + b.Property("Created") .ValueGeneratedOnAdd() .HasColumnType("timestamp with time zone") @@ -383,15 +391,9 @@ protected override void BuildModel(ModelBuilder modelBuilder) .HasColumnType("text") .HasColumnName("job_type"); - b.Property("ResourceId") - .IsRequired() - .HasColumnType("text") - .HasColumnName("resource_id"); - - b.Property("ResourceType") - .IsRequired() + b.Property("ManifestId") .HasColumnType("text") - .HasColumnName("resource_type"); + .HasColumnName("manifest_id"); b.Property("Status") .IsRequired() @@ -401,7 +403,16 @@ protected override void BuildModel(ModelBuilder modelBuilder) b.HasKey("Id") .HasName("pk_pipeline_jobs"); - b.ToTable("pipeline_jobs", (string)null); + b.HasIndex("CollectionId", "CustomerId") + .HasDatabaseName("ix_pipeline_jobs_collection_id_customer_id"); + + b.HasIndex("ManifestId", "CustomerId") + .HasDatabaseName("ix_pipeline_jobs_manifest_id_customer_id"); + + b.ToTable("pipeline_jobs", null, t => + { + t.HasCheckConstraint("stop_collection_and_manifest_in_same_record", "num_nonnulls(manifest_id, collection_id) = 1"); + }); }); modelBuilder.Entity("Models.Database.CanvasPainting", b => @@ -455,11 +466,32 @@ protected override void BuildModel(ModelBuilder modelBuilder) b.Navigation("ParentCollection"); }); + modelBuilder.Entity("Models.Database.General.PipelineJob", b => + { + b.HasOne("Models.Database.Collections.Collection", "Collection") + .WithMany("PipelineJobs") + .HasForeignKey("CollectionId", "CustomerId") + .OnDelete(DeleteBehavior.Cascade) + .HasConstraintName("fk_pipeline_jobs_collections_collection_id_customer_id"); + + b.HasOne("Models.Database.Collections.Manifest", "Manifest") + .WithMany("PipelineJobs") + .HasForeignKey("ManifestId", "CustomerId") + .OnDelete(DeleteBehavior.Cascade) + .HasConstraintName("fk_pipeline_jobs_manifests_manifest_id_customer_id"); + + b.Navigation("Collection"); + + b.Navigation("Manifest"); + }); + modelBuilder.Entity("Models.Database.Collections.Collection", b => { b.Navigation("Children"); b.Navigation("Hierarchy"); + + b.Navigation("PipelineJobs"); }); modelBuilder.Entity("Models.Database.Collections.Manifest", b => @@ -469,6 +501,8 @@ protected override void BuildModel(ModelBuilder modelBuilder) b.Navigation("CanvasPaintings"); b.Navigation("Hierarchy"); + + b.Navigation("PipelineJobs"); }); #pragma warning restore 612, 618 } diff --git a/src/IIIFPresentation/Repository/PresentationContext.cs b/src/IIIFPresentation/Repository/PresentationContext.cs index e250fff4..85ad865e 100644 --- a/src/IIIFPresentation/Repository/PresentationContext.cs +++ b/src/IIIFPresentation/Repository/PresentationContext.cs @@ -71,6 +71,12 @@ protected override void OnModelCreating(ModelBuilder modelBuilder) .HasPrincipalKey(e => new { e.Id, e.CustomerId }) .OnDelete(DeleteBehavior.NoAction); + entity.HasMany(e => e.PipelineJobs) + .WithOne(e => e.Collection) + .HasForeignKey(e => new { e.CollectionId, e.CustomerId }) + .HasPrincipalKey(e => new { e.Id, e.CustomerId }) + .OnDelete(DeleteBehavior.Cascade); + entity.Property(e => e.Etag) .HasComputedColumnSql("""deterministic_uuid_sha256("modified", "id")""", stored: true); }); @@ -85,6 +91,12 @@ protected override void OnModelCreating(ModelBuilder modelBuilder) .HasPrincipalKey(e => new { e.Id, e.CustomerId }) .OnDelete(DeleteBehavior.Cascade); + entity.HasMany(e => e.PipelineJobs) + .WithOne(e => e.Manifest) + .HasForeignKey(e => new { e.ManifestId, e.CustomerId }) + .HasPrincipalKey(e => new { e.Id, e.CustomerId }) + .OnDelete(DeleteBehavior.Cascade); + entity.Property(p => p.Created).HasDefaultValueSql("now()"); entity.Property(p => p.Modified).HasDefaultValueSql("now()"); @@ -169,12 +181,6 @@ protected override void OnModelCreating(ModelBuilder modelBuilder) s => s.ToString(), s => s.GetEnumFromString(true)); - entity.Property(e => e.ResourceType) - .IsRequired() - .HasConversion( - r => r.ToString(), - r => r.GetEnumFromString(true)); - entity.Property(e => e.JobType) .IsRequired() .HasConversion( @@ -186,6 +192,11 @@ protected override void OnModelCreating(ModelBuilder modelBuilder) entity.Property(e => e.Config) .HasConversion() .HasColumnType("jsonb"); + + entity.Ignore(p => p.ResourceId); + + entity.ToTable(p => p.HasCheckConstraint("stop_collection_and_manifest_in_same_record", + "num_nonnulls(manifest_id, collection_id) = 1")); }); } diff --git a/src/IIIFPresentation/Services.Tests/Manifests/ManifestXTests.cs b/src/IIIFPresentation/Services.Tests/Manifests/ManifestXTests.cs index 30aa510b..acd4a6ad 100644 --- a/src/IIIFPresentation/Services.Tests/Manifests/ManifestXTests.cs +++ b/src/IIIFPresentation/Services.Tests/Manifests/ManifestXTests.cs @@ -78,7 +78,7 @@ public void HasFurtherWork_ReturnsFalse_WhenBatchCompletedAndJobCompleted() { Id = "x", CustomerId = 1, Batches = [new Batch { Id = 1, ManifestId = "x", Status = BatchStatus.Completed }], - PipelineJobs = [new PipelineJob { ResourceId = "x", ResourceType = ResourceType.IIIFManifest, CustomerId = 1, Status = PipelineJobStatus.Completed }] + PipelineJobs = [new PipelineJob { ManifestId = "x", CustomerId = 1, Status = PipelineJobStatus.Completed }] }; manifest.HasFurtherWork().Should().BeFalse(); @@ -91,7 +91,7 @@ private static Manifest ManifestWithJobs(PipelineJobStatus? status = null) { manifest.PipelineJobs = [ - new PipelineJob { ResourceId = "x", ResourceType = ResourceType.IIIFManifest, CustomerId = 1, Status = status.Value } + new PipelineJob { ManifestId = "x", CustomerId = 1, Status = status.Value } ]; } else diff --git a/src/IIIFPresentation/Services.Tests/Manifests/PipelineHelperTests.cs b/src/IIIFPresentation/Services.Tests/Manifests/PipelineHelperTests.cs index ed97bff8..fac5be9b 100644 --- a/src/IIIFPresentation/Services.Tests/Manifests/PipelineHelperTests.cs +++ b/src/IIIFPresentation/Services.Tests/Manifests/PipelineHelperTests.cs @@ -41,7 +41,7 @@ public void ToPipelineItem_SetsStatusFromJob(PipelineJobStatus status, string ex { var job = new PipelineJob { - ResourceId = "id", CustomerId = 1, + ManifestId = "id", CustomerId = 1, JobType = PipelineJobType.TextService, Status = status, Config = new PipelineConfig { Action = "Index" } @@ -59,7 +59,7 @@ public void ToPipelineItem_SetsNullConfig_WhenJobConfigIsNull() { var job = new PipelineJob { - ResourceId = "id", CustomerId = 1, + ManifestId = "id", CustomerId = 1, JobType = PipelineJobType.TextService, Status = PipelineJobStatus.Waiting, Config = null diff --git a/src/IIIFPresentation/Services.Tests/Manifests/PipelineJobXTests.cs b/src/IIIFPresentation/Services.Tests/Manifests/PipelineJobXTests.cs index c00cc321..89d449fc 100644 --- a/src/IIIFPresentation/Services.Tests/Manifests/PipelineJobXTests.cs +++ b/src/IIIFPresentation/Services.Tests/Manifests/PipelineJobXTests.cs @@ -9,8 +9,7 @@ public void GetJobId_ReturnsExpectedFormat_ForTextService() { var job = new PipelineJob { - ResourceId = "my-manifest", - ResourceType = ResourceType.IIIFManifest, + ManifestId = "my-manifest", JobType = PipelineJobType.TextService, CustomerId = 99 }; @@ -23,13 +22,11 @@ public void GetJobId_Throws_ForUnknownJobType() { var job = new PipelineJob { - ResourceId = "x", - ResourceType = ResourceType.IIIFManifest, + ManifestId = "x", JobType = (PipelineJobType)999, CustomerId = 1 }; job.Invoking(j => j.GetJobId()).Should().Throw(); } - -} \ No newline at end of file +} diff --git a/src/IIIFPresentation/Services.Tests/TextServices/TextServicesClientTests.cs b/src/IIIFPresentation/Services.Tests/TextServices/TextServicesClientTests.cs index 04f506cf..5df2bb62 100644 --- a/src/IIIFPresentation/Services.Tests/TextServices/TextServicesClientTests.cs +++ b/src/IIIFPresentation/Services.Tests/TextServices/TextServicesClientTests.cs @@ -18,7 +18,7 @@ private TextServicesClient CreateSut(TextServicesSettings settings) => new(new HttpClient(messageHandler), Options.Create(settings), new NullLogger()); private static PipelineJob MakeJob(int customerId = 1, string resourceId = "my-manifest") => - new() { CustomerId = customerId, ResourceId = resourceId, ResourceType = ResourceType.IIIFManifest, JobType = PipelineJobType.TextService }; + new() { CustomerId = customerId, ManifestId = resourceId, JobType = PipelineJobType.TextService }; [Fact] public async Task CreateOrUpdateJob_ReturnsTrue_WhenPostSucceeds() From 769599ff43f221b936eb8f7e3f2302c999c96d5f Mon Sep 17 00:00:00 2001 From: "jack.lewis" Date: Wed, 24 Jun 2026 16:20:33 +0100 Subject: [PATCH 17/18] Code review changes --- .../Features/Manifest/ManifestWriteService.cs | 5 +++ .../TextServiceJobCompletionMessageHandler.cs | 34 ++++++------------- src/IIIFPresentation/Core/IIIF/IServiceX.cs | 2 +- .../Models/Database/General/PipelineJob.cs | 13 +++++++ .../TextServices/TextServicesClient.cs | 9 +++-- 5 files changed, 36 insertions(+), 27 deletions(-) diff --git a/src/IIIFPresentation/API/Features/Manifest/ManifestWriteService.cs b/src/IIIFPresentation/API/Features/Manifest/ManifestWriteService.cs index 4fee57b5..2fbeb199 100644 --- a/src/IIIFPresentation/API/Features/Manifest/ManifestWriteService.cs +++ b/src/IIIFPresentation/API/Features/Manifest/ManifestWriteService.cs @@ -498,6 +498,10 @@ await manifestStorageManager.SaveManifestInStorage(iiifManifest, dbManifest, ori // Persists pipeline job entities within the open transaction, then submits to external services. // Submitting after SaveChangesAsync ensures DB state is consistent if the HTTP call fails and // the transaction is rolled back by the caller. + // Trade-off: the HTTP call to text-services runs while the DB transaction is still open. + // This keeps rollback simple (no compensating transaction needed) at the cost of holding + // the transaction for the duration of the HTTP round-trip. The HttpClient should be + // configured with a short timeout to bound this window. private async Task RegisterAndSubmitPipelineJobs(DbManifest dbManifest, List pipeline, CancellationToken cancellationToken) { @@ -526,6 +530,7 @@ await manifestStorageManager.SaveManifestInStorage(iiifManifest, dbManifest, ori private PipelineJob? BuildPipelineJob(DbManifest dbManifest, List pipeline) { + // Returns a job for the first recognised pipeline step; additional steps of the same type are ignored. foreach (var pipelineItem in pipeline) { if (string.Equals(pipelineItem.Name, PipelineHelper.TextPipelineName, StringComparison.OrdinalIgnoreCase)) diff --git a/src/IIIFPresentation/BackgroundHandler/TextCompletion/TextServiceJobCompletionMessageHandler.cs b/src/IIIFPresentation/BackgroundHandler/TextCompletion/TextServiceJobCompletionMessageHandler.cs index 3e8471ae..a5b33147 100644 --- a/src/IIIFPresentation/BackgroundHandler/TextCompletion/TextServiceJobCompletionMessageHandler.cs +++ b/src/IIIFPresentation/BackgroundHandler/TextCompletion/TextServiceJobCompletionMessageHandler.cs @@ -34,7 +34,7 @@ public async Task HandleMessage(QueueMessage message, CancellationToken ca try { var completionMessage = DeserializeMessage(message, logger); - var customerId = ExtractCustomerIdFromJobId(completionMessage.JobId); + var (customerId, _) = PipelineJobX.ParseJobId(completionMessage.JobId); if (customerId == null) { logger.LogWarning("Could not parse customer id from job id {JobId}; discarding message", @@ -57,7 +57,7 @@ public async Task HandleMessage(QueueMessage message, CancellationToken ca private async Task TryCompleteManifest(TextServiceJobCompletionMessage completionMessage, int approximateReceiveCount, CancellationToken cancellationToken) { - var resourceId = ExtractResourceIdFromJobId(completionMessage.JobId); + var (customerId, resourceId) = PipelineJobX.ParseJobId(completionMessage.JobId); if (resourceId == null) { logger.LogWarning("Could not parse resource id from job id {JobId}; discarding message", @@ -79,13 +79,16 @@ private async Task TryCompleteManifest(TextServiceJobCompletionMessage com return discard; } - if (pipelineJob.Finished != null) - logger.LogWarning("PipelineJob for {JobId} already finished at {Finished}; re-processing", + if (pipelineJob.Status == PipelineJobStatus.Completed && pipelineJob.Finished != null) + { + logger.LogWarning("PipelineJob for {JobId} already completed at {Finished}; acknowledging", completionMessage.JobId, pipelineJob.Finished); + return true; + } var dbManifest = await dbContext.Manifests .Include(m => m.CanvasPaintings) - .SingleOrDefaultAsync(m => m.Id == pipelineJob.ResourceId && m.CustomerId == pipelineJob.CustomerId, + .SingleOrDefaultAsync(m => m.Id == pipelineJob.ManifestId && m.CustomerId == pipelineJob.CustomerId, cancellationToken); if (dbManifest == null) @@ -150,7 +153,8 @@ private async Task ApplyTextServices(string jobId, Manifest stagedManifest, { var augmented = await textServicesClient.GetTextAugmentedManifest(jobId, cancellationToken); - if (augmented?.Service == null || !augmented.Service.OfType().Any()) + var searchServices = augmented?.Service?.OfType().ToList(); + if (searchServices is not { Count: > 0 }) { logger.LogDebug("No search services in text-augmented manifest for job {JobId}", jobId); return; @@ -159,7 +163,7 @@ private async Task ApplyTextServices(string jobId, Manifest stagedManifest, stagedManifest.Service ??= []; var existingIds = stagedManifest.Service.GetDistinctIds(); - foreach (var service in augmented.Service.OfType()) + foreach (var service in searchServices) { if (existingIds.Add(service.Id!)) stagedManifest.Service.Add(service); } @@ -177,22 +181,6 @@ private static void MergeContext(Manifest target, Manifest source) } } - private static int? ExtractCustomerIdFromJobId(string jobId) - { - // jobId format: "{customerId}/iiif/{resourceId}" - var firstSlash = jobId.IndexOf('/'); - return firstSlash > 0 && int.TryParse(jobId[..firstSlash], out var customerId) ? customerId : null; - } - - private static string? ExtractResourceIdFromJobId(string jobId) - { - // jobId format: "{customerId}/iiif/{resourceId}" - var firstSlash = jobId.IndexOf('/'); - if (firstSlash < 0) return null; - var secondSlash = jobId.IndexOf('/', firstSlash + 1); - return secondSlash > 0 && secondSlash < jobId.Length - 1 ? jobId[(secondSlash + 1)..] : null; - } - private static TextServiceJobCompletionMessage DeserializeMessage(QueueMessage message, ILogger logger) { try diff --git a/src/IIIFPresentation/Core/IIIF/IServiceX.cs b/src/IIIFPresentation/Core/IIIF/IServiceX.cs index c8167480..92c72ef5 100644 --- a/src/IIIFPresentation/Core/IIIF/IServiceX.cs +++ b/src/IIIFPresentation/Core/IIIF/IServiceX.cs @@ -8,5 +8,5 @@ public static class IServiceX /// Get a list of unique ids. /// public static HashSet GetDistinctIds(this IList target) where T : IService - => [..target.Select(s => s.Id).Where(id => id != null)!]; + => [..target.Select(s => s.Id).Where(id => id != null).Select(id => id!)]; } diff --git a/src/IIIFPresentation/Models/Database/General/PipelineJob.cs b/src/IIIFPresentation/Models/Database/General/PipelineJob.cs index 2ea6670d..93da7a5d 100644 --- a/src/IIIFPresentation/Models/Database/General/PipelineJob.cs +++ b/src/IIIFPresentation/Models/Database/General/PipelineJob.cs @@ -55,4 +55,17 @@ public static class PipelineJobX PipelineJobType.TextService => $"{job.CustomerId}/iiif/{job.ResourceId}", _ => throw new ArgumentOutOfRangeException(nameof(job.JobType), $"Unknown job type: {job.JobType}") }; + + /// + /// Parses a job id of the form "{customerId}/iiif/{resourceId}" into its components. + /// Returns null for either component if the format is not recognised. + /// + public static (int? CustomerId, string? ResourceId) ParseJobId(string jobId) + { + var firstSlash = jobId.IndexOf('/'); + if (firstSlash <= 0 || !int.TryParse(jobId[..firstSlash], out var customerId)) return (null, null); + var secondSlash = jobId.IndexOf('/', firstSlash + 1); + var resourceId = secondSlash > 0 && secondSlash < jobId.Length - 1 ? jobId[(secondSlash + 1)..] : null; + return (customerId, resourceId); + } } diff --git a/src/IIIFPresentation/Services/TextServices/TextServicesClient.cs b/src/IIIFPresentation/Services/TextServices/TextServicesClient.cs index 1bc3ac6c..70c97b3e 100644 --- a/src/IIIFPresentation/Services/TextServices/TextServicesClient.cs +++ b/src/IIIFPresentation/Services/TextServices/TextServicesClient.cs @@ -33,8 +33,9 @@ public async Task CreateOrUpdateJob(PipelineJob job, string bucket, string } var sourceS3Uri = $"s3://{bucket}/{resourceKey}"; - var request = new { id = jobId, sourceUri = sourceS3Uri, services = (int)JobServices.All }; - var content = new StringContent(JsonSerializer.Serialize(request, JsonOptions), Encoding.UTF8, "application/json"); + var body = new { id = jobId, sourceUri = sourceS3Uri, services = (int)JobServices.All }; + var serialisedBody = JsonSerializer.Serialize(body, JsonOptions); + var content = new StringContent(serialisedBody, Encoding.UTF8, "application/json"); var postUri = new Uri(settings.BuilderApiUri, "textbuilder"); var response = await httpClient.PostAsync(postUri, content, cancellationToken); @@ -43,7 +44,9 @@ public async Task CreateOrUpdateJob(PipelineJob job, string bucket, string { logger.LogDebug("Text-services job {JobId} already exists, reprocessing", jobId); var putUri = new Uri(settings.BuilderApiUri, $"textbuilder/{jobId}"); - response = await httpClient.PutAsync(putUri, null, cancellationToken); + // StringContent from POST is disposed; create a new one with the same body + var putContent = new StringContent(serialisedBody, Encoding.UTF8, "application/json"); + response = await httpClient.PutAsync(putUri, putContent, cancellationToken); } if (response.IsSuccessStatusCode) From 5f149467a56cd29e648c238240fdaeae3e34c68c Mon Sep 17 00:00:00 2001 From: Donald Gray Date: Thu, 25 Jun 2026 17:37:34 +0100 Subject: [PATCH 18/18] Add SearchService2 and set label Use AddDistinctById() helper as used elsewhere, updated it to return the number of items added, change base type to make more accommodating and add optional hook to alter item on add. Set label on AutoComplete and SearchService if not set. Rather than iterate and read contexts, add search2 context manually as it's a published constant --- .../Manifest/DlcsManifestCoordinator.cs | 2 +- ...ServiceJobCompletionMessageHandlerTests.cs | 108 +++++++++++------- .../TextServiceJobCompletionMessageHandler.cs | 34 +++--- .../IIIF/ResourceBaseCollectionXTests.cs | 26 ++++- src/IIIFPresentation/Core/IIIF/IServiceX.cs | 12 -- .../Core/IIIF/ResourceBaseCollectionX.cs | 16 ++- .../TextServices/ITextServicesClient.cs | 4 +- 7 files changed, 121 insertions(+), 81 deletions(-) delete mode 100644 src/IIIFPresentation/Core/IIIF/IServiceX.cs diff --git a/src/IIIFPresentation/API/Features/Manifest/DlcsManifestCoordinator.cs b/src/IIIFPresentation/API/Features/Manifest/DlcsManifestCoordinator.cs index f74648c8..4fa082f6 100644 --- a/src/IIIFPresentation/API/Features/Manifest/DlcsManifestCoordinator.cs +++ b/src/IIIFPresentation/API/Features/Manifest/DlcsManifestCoordinator.cs @@ -307,7 +307,7 @@ private async Task DeleteUnusedAdjuncts(int customerId, List foreach (var adjunctInteraction in adjuncts) { - if (adjunctInteraction.ExistingAdjunctIds is not { Count: > 0 }) continue; + if (adjunctInteraction.ExistingAdjunctIds.IsNullOrEmpty()) continue; var currentIds = adjunctInteraction.Adjuncts .Select(a => a[AdjunctProperties.Id]?.Value()) diff --git a/src/IIIFPresentation/BackgroundHandler.Tests/TextCompletion/TextServiceJobCompletionMessageHandlerTests.cs b/src/IIIFPresentation/BackgroundHandler.Tests/TextCompletion/TextServiceJobCompletionMessageHandlerTests.cs index 827beb92..08fe53ea 100644 --- a/src/IIIFPresentation/BackgroundHandler.Tests/TextCompletion/TextServiceJobCompletionMessageHandlerTests.cs +++ b/src/IIIFPresentation/BackgroundHandler.Tests/TextCompletion/TextServiceJobCompletionMessageHandlerTests.cs @@ -5,6 +5,8 @@ using BackgroundHandler.Tests.infrastructure; using FakeItEasy; using FluentAssertions; +using IIIF; +using IIIF.ImageApi.V3; using IIIF.Presentation.V3; using IIIF.Search.V1; using IIIF.Search.V2; @@ -96,7 +98,7 @@ public async Task HandleMessage_ReturnsFalse_WhenStagedManifestMissing() { var manifestId = TestIdentifiers.IdWithSuffix(suffix: "_staging_missing"); var jobId = $"{CustomerId}/iiif/{manifestId}"; - await SetupManifestWithPipelineJob(manifestId, jobId); + await SetupManifestWithPipelineJob(manifestId); A.CallTo(() => iiifS3.ReadIIIFFromS3(A._, BucketLocationType.Staging, A._)) .Returns((IIIFManifest?)null); @@ -113,7 +115,7 @@ public async Task HandleMessage_UpdatesStatusToFailed_WhenJobFailed() { var manifestId = TestIdentifiers.IdWithSuffix(suffix: "_failed"); var jobId = $"{CustomerId}/iiif/{manifestId}"; - await SetupManifestWithPipelineJob(manifestId, jobId); + await SetupManifestWithPipelineJob(manifestId); var message = CreateMessage(jobId, PipelineJobStatus.Failed, errors: "OCR timed out"); @@ -139,7 +141,7 @@ public async Task HandleMessage_UpdatesStatusToCompleted_AndSavesManifest_WhenJo { var manifestId = TestIdentifiers.IdWithSuffix(suffix: "_completed_no_services"); var jobId = $"{CustomerId}/iiif/{manifestId}"; - await SetupManifestWithPipelineJob(manifestId, jobId); + await SetupManifestWithPipelineJob(manifestId); A.CallTo(() => iiifS3.ReadIIIFFromS3(A._, BucketLocationType.Staging, A._)) .Returns(new IIIFManifest { Id = manifestId }); @@ -166,9 +168,13 @@ public async Task HandleMessage_MergesSearchServicesIntoManifest_WhenAugmentedMa { var manifestId = TestIdentifiers.IdWithSuffix(suffix: "_merged_services"); var jobId = $"{CustomerId}/iiif/{manifestId}"; - await SetupManifestWithPipelineJob(manifestId, jobId); + await SetupManifestWithPipelineJob(manifestId); - var stagedManifest = new IIIFManifest { Id = manifestId }; + var stagedManifest = new IIIFManifest + { + Id = manifestId, + Service = [new ImageService3 { Id = "https://image.example.com" }] + }; A.CallTo(() => iiifS3.ReadIIIFFromS3(A._, BucketLocationType.Staging, A._)) .Returns(stagedManifest); @@ -191,6 +197,7 @@ public async Task HandleMessage_MergesSearchServicesIntoManifest_WhenAugmentedMa (await sut.HandleMessage(message, CancellationToken.None)).Should().BeTrue(); savedManifest.Should().NotBeNull(); + savedManifest!.Service.Should().HaveCount(2, "Original image service not overwritten"); savedManifest!.Service.Should().ContainSingle(s => s.Id == searchService.Id); } @@ -199,20 +206,20 @@ public async Task HandleMessage_DoesNotDuplicateServices_WhenAugmentedManifestCo { var manifestId = TestIdentifiers.IdWithSuffix(suffix: "_dedup_services"); var jobId = $"{CustomerId}/iiif/{manifestId}"; - await SetupManifestWithPipelineJob(manifestId, jobId); + await SetupManifestWithPipelineJob(manifestId); const string serviceId = "https://search.example.com/search"; var stagedManifest = new IIIFManifest { Id = manifestId, - Service = [new SearchService2 { Id = serviceId }] + Service = [new SearchService2 { Id = serviceId, Profile = "original" }] }; A.CallTo(() => iiifS3.ReadIIIFFromS3(A._, BucketLocationType.Staging, A._)) .Returns(stagedManifest); var augmentedManifest = new IIIFManifest { - Service = [new SearchService2 { Id = serviceId }] + Service = [new SearchService2 { Id = serviceId, Profile = "incoming" }] }; A.CallTo(() => textServicesClient.GetTextAugmentedManifest(jobId, A._)) .Returns(augmentedManifest); @@ -227,40 +234,18 @@ public async Task HandleMessage_DoesNotDuplicateServices_WhenAugmentedManifestCo await sut.HandleMessage(message, CancellationToken.None); - savedManifest!.Service.Should().HaveCount(1, "duplicate service ID should not be added twice"); - } - - [Fact] - public async Task HandleMessage_MergesContextFromAugmentedManifest_WhenAugmentedManifestHasContext() - { - var manifestId = TestIdentifiers.IdWithSuffix(suffix: "_context_merge"); - var jobId = $"{CustomerId}/iiif/{manifestId}"; - await SetupManifestWithPipelineJob(manifestId, jobId); - - const string searchContext = "http://iiif.io/api/search/2/context.json"; - var stagedManifest = new IIIFManifest { Id = manifestId }; - A.CallTo(() => iiifS3.ReadIIIFFromS3(A._, BucketLocationType.Staging, A._)) - .Returns(stagedManifest); - - var augmentedManifest = new IIIFManifest - { - Service = [new SearchService2 { Id = "https://search.example.com/search" }], - Context = searchContext - }; - A.CallTo(() => textServicesClient.GetTextAugmentedManifest(jobId, A._)) - .Returns(augmentedManifest); - - await sut.HandleMessage(CreateMessage(jobId, PipelineJobStatus.Completed), CancellationToken.None); - - stagedManifest.Context.Should().Be(searchContext); + savedManifest!.Service.Should().HaveCount(1, "Duplicate service ID should not be added twice"); + savedManifest.Service!.Single().As().Profile.Should() + .Be("original", "Original is not ovewritten"); + stagedManifest.Context.Should().BeNull("Context not added as no service added"); } [Fact] - public async Task HandleMessage_DoesNotAddPresentation3Context_FromAugmentedManifest() + public async Task HandleMessage_AddsSearch2Context_IfSearchServiceAdded() { var manifestId = TestIdentifiers.IdWithSuffix(suffix: "_context_p3_skip"); var jobId = $"{CustomerId}/iiif/{manifestId}"; - await SetupManifestWithPipelineJob(manifestId, jobId); + await SetupManifestWithPipelineJob(manifestId); var stagedManifest = new IIIFManifest { Id = manifestId }; A.CallTo(() => iiifS3.ReadIIIFFromS3(A._, BucketLocationType.Staging, A._)) @@ -276,7 +261,7 @@ public async Task HandleMessage_DoesNotAddPresentation3Context_FromAugmentedMani await sut.HandleMessage(CreateMessage(jobId, PipelineJobStatus.Completed), CancellationToken.None); - stagedManifest.Context.Should().BeNull(); + stagedManifest.Context.Should().Be("http://iiif.io/api/search/2/context.json"); } [Fact] @@ -284,7 +269,7 @@ public async Task HandleMessage_SetsFinishedTimestamp_WhenJobCompletes() { var manifestId = TestIdentifiers.IdWithSuffix(suffix: "_finished_completed"); var jobId = $"{CustomerId}/iiif/{manifestId}"; - await SetupManifestWithPipelineJob(manifestId, jobId); + await SetupManifestWithPipelineJob(manifestId); A.CallTo(() => iiifS3.ReadIIIFFromS3(A._, BucketLocationType.Staging, A._)) .Returns(new IIIFManifest { Id = manifestId }); @@ -302,7 +287,7 @@ public async Task HandleMessage_SetsFinishedTimestamp_WhenJobFails() { var manifestId = TestIdentifiers.IdWithSuffix(suffix: "_finished_failed"); var jobId = $"{CustomerId}/iiif/{manifestId}"; - await SetupManifestWithPipelineJob(manifestId, jobId); + await SetupManifestWithPipelineJob(manifestId); await sut.HandleMessage(CreateMessage(jobId, PipelineJobStatus.Failed, errors: "OCR error"), CancellationToken.None); @@ -315,7 +300,7 @@ public async Task HandleMessage_OnlyAddsSearchService2_WhenAugmentedManifestHasO { var manifestId = TestIdentifiers.IdWithSuffix(suffix: "_filter_services"); var jobId = $"{CustomerId}/iiif/{manifestId}"; - await SetupManifestWithPipelineJob(manifestId, jobId); + await SetupManifestWithPipelineJob(manifestId); var stagedManifest = new IIIFManifest { Id = manifestId }; A.CallTo(() => iiifS3.ReadIIIFFromS3(A._, BucketLocationType.Staging, A._)) @@ -338,11 +323,48 @@ public async Task HandleMessage_OnlyAddsSearchService2_WhenAugmentedManifestHasO await sut.HandleMessage(CreateMessage(jobId, PipelineJobStatus.Completed), CancellationToken.None); - savedManifest!.Service.Should().ContainSingle() - .Which.Should().BeOfType(); + savedManifest!.Service.Should().ContainSingle().Which.Should().Be(searchService); + } + + [Fact] + public async Task HandleMessage_SetsLabelOnSearchService2_AndAutoCompleteService() + { + var manifestId = TestIdentifiers.IdWithSuffix(); + var jobId = $"{CustomerId}/iiif/{manifestId}"; + await SetupManifestWithPipelineJob(manifestId); + + var stagedManifest = new IIIFManifest { Id = manifestId }; + A.CallTo(() => iiifS3.ReadIIIFFromS3(A._, BucketLocationType.Staging, A._)) + .Returns(stagedManifest); + + var searchService = new SearchService2 + { + Id = "https://search.example.com/search", + Service = [new AutoCompleteService2 { Id = "https://search.example.com/autocomplete" }] + }; + var otherService = new SearchService { Id = "https://image.example.com/image" }; + var augmentedManifest = new IIIFManifest + { + Service = [searchService, otherService] + }; + A.CallTo(() => textServicesClient.GetTextAugmentedManifest(jobId, A._)) + .Returns(augmentedManifest); + + IIIFManifest? savedManifest = null; + A.CallTo(() => manifestStorageManager.SaveManifestInStorage( + A._, A._, null, false, A._)) + .Invokes((IIIFManifest m, DbManifest _, string? _, bool _, CancellationToken _) => savedManifest = m) + .Returns(Task.CompletedTask); + + await sut.HandleMessage(CreateMessage(jobId, PipelineJobStatus.Completed), CancellationToken.None); + + var savedSearchSvc = savedManifest!.Service!.Single().As(); + savedSearchSvc.Label!.Values.Should().ContainSingle("Search within this manifest"); + var savedAutoCompleteSvc = savedSearchSvc!.Service!.Single().As(); + savedAutoCompleteSvc.Label!.Values.Should().ContainSingle("Autocomplete words in this manifest"); } - private async Task SetupManifestWithPipelineJob(string manifestId, string jobId) + private async Task SetupManifestWithPipelineJob(string manifestId) { var manifestEntry = await dbContext.Manifests.AddTestManifest(id: manifestId); var manifest = manifestEntry.Entity; diff --git a/src/IIIFPresentation/BackgroundHandler/TextCompletion/TextServiceJobCompletionMessageHandler.cs b/src/IIIFPresentation/BackgroundHandler/TextCompletion/TextServiceJobCompletionMessageHandler.cs index a5b33147..e5076844 100644 --- a/src/IIIFPresentation/BackgroundHandler/TextCompletion/TextServiceJobCompletionMessageHandler.cs +++ b/src/IIIFPresentation/BackgroundHandler/TextCompletion/TextServiceJobCompletionMessageHandler.cs @@ -2,9 +2,11 @@ using AWS.Helpers; using AWS.SQS; using BackgroundHandler.Helpers; +using Core.Helpers; using Core.IIIF; using IIIF; using IIIF.Presentation.V3; +using IIIF.Presentation.V3.Strings; using IIIF.Search.V2; using Microsoft.EntityFrameworkCore; using Models.Database.General; @@ -25,8 +27,6 @@ public class TextServiceJobCompletionMessageHandler( ILogger logger) : IMessageHandler { - private const string Search2Context = "http://iiif.io/api/search/2/context.json"; - public async Task HandleMessage(QueueMessage message, CancellationToken cancellationToken) { using (LogContextHelpers.SetServiceName(nameof(TextServiceJobCompletionMessageHandler), message.MessageId)) @@ -154,30 +154,32 @@ private async Task ApplyTextServices(string jobId, Manifest stagedManifest, var augmented = await textServicesClient.GetTextAugmentedManifest(jobId, cancellationToken); var searchServices = augmented?.Service?.OfType().ToList(); - if (searchServices is not { Count: > 0 }) + if (searchServices.IsNullOrEmpty()) { logger.LogDebug("No search services in text-augmented manifest for job {JobId}", jobId); return; } - - stagedManifest.Service ??= []; - var existingIds = stagedManifest.Service.GetDistinctIds(); - - foreach (var service in searchServices) - { - if (existingIds.Add(service.Id!)) stagedManifest.Service.Add(service); - } - MergeContext(stagedManifest, augmented); - + // Add search service to manifest, if added then ensure Manifest has the search context + stagedManifest.Service ??= []; + var added = stagedManifest.Service.AddDistinctById(searchServices, AddService); + if (added > 0) stagedManifest.EnsureContext(SearchService2.Search2Context); logger.LogDebug("Added search service to manifest for job {JobId}", jobId); } - private static void MergeContext(Manifest target, Manifest source) + private static void AddService(IService service) { - foreach (var context in source.GetContextStrings().Where(c => c == Search2Context)) + // Expectation is we'll get a SearchService2 containing an AutoCompleteService2. Set labels on these if null + if (service is SearchService2 searchService) { - target.EnsureContext(context); + searchService.Label ??= new LanguageMap("en", "Search within this manifest"); + // We're only expecting 1 here but use FirstOrDefault, rather than SingleOrDefault to avoid throwing if + // text-service adds unexpected service. + var autoComplete = searchService.Service?.OfType().FirstOrDefault(); + if (autoComplete != null) + { + autoComplete.Label ??= new LanguageMap("en", "Autocomplete words in this manifest"); + } } } diff --git a/src/IIIFPresentation/Core.Tests/IIIF/ResourceBaseCollectionXTests.cs b/src/IIIFPresentation/Core.Tests/IIIF/ResourceBaseCollectionXTests.cs index 9616c8c4..49acda45 100644 --- a/src/IIIFPresentation/Core.Tests/IIIF/ResourceBaseCollectionXTests.cs +++ b/src/IIIFPresentation/Core.Tests/IIIF/ResourceBaseCollectionXTests.cs @@ -18,10 +18,11 @@ public void AddDistinctById_AddsAllItemsWhenTargetEmpty() var target = new List(); var source = new[] { new TestItem("id1"), new TestItem("id2") }; - target.AddDistinctById(source); + var added = target.AddDistinctById(source); target.Should().HaveCount(2); target.Select(x => x.Id).Should().ContainInOrder("id1", "id2"); + added.Should().Be(2); } [Fact] @@ -30,10 +31,11 @@ public void AddDistinctById_ExcludesDuplicateIds() var target = new List { new("id1") }; var source = new[] { new TestItem("id1"), new TestItem("id2") }; - target.AddDistinctById(source); + var added = target.AddDistinctById(source); target.Should().HaveCount(2); target.Select(x => x.Id).Should().ContainInOrder("id1", "id2"); + added.Should().Be(1); } [Fact] @@ -41,10 +43,11 @@ public void AddDistinctById_IgnoresNullSource() { var target = new List { new("id1") }; - target.AddDistinctById(null); + var added = target.AddDistinctById(null); target.Should().HaveCount(1); target[0].Id.Should().Be("id1"); + added.Should().Be(0); } [Fact] @@ -53,9 +56,24 @@ public void AddDistinctById_ExcludesAllDuplicates() var target = new List { new("id1"), new("id2") }; var source = new[] { new TestItem("id1"), new TestItem("id2"), new TestItem("id3") }; - target.AddDistinctById(source); + var added = target.AddDistinctById(source); target.Should().HaveCount(3); target.Select(x => x.Id).Should().ContainInOrder("id1", "id2", "id3"); + added.Should().Be(1); + } + + [Fact] + public void AddDistinctById_CanAlterBeforeAdd() + { + var target = new List { new("id1"), new("id2") }; + var source = new[] { new TestItem("id1"), new TestItem("id2"), new TestItem("id3") }; + + var added = target.AddDistinctById(source, ti => ti.Profile = "Changed"); + + target.Should().HaveCount(3); + target.Select(x => x.Id).Should().ContainInOrder("id1", "id2", "id3"); + added.Should().Be(1); + target.Where(t => t.Profile == "Changed").Should().HaveCount(1, "Only one item was altered"); } } diff --git a/src/IIIFPresentation/Core/IIIF/IServiceX.cs b/src/IIIFPresentation/Core/IIIF/IServiceX.cs deleted file mode 100644 index 92c72ef5..00000000 --- a/src/IIIFPresentation/Core/IIIF/IServiceX.cs +++ /dev/null @@ -1,12 +0,0 @@ -using IIIF; - -namespace Core.IIIF; - -public static class IServiceX -{ - /// - /// Get a list of unique ids. - /// - public static HashSet GetDistinctIds(this IList target) where T : IService - => [..target.Select(s => s.Id).Where(id => id != null).Select(id => id!)]; -} diff --git a/src/IIIFPresentation/Core/IIIF/ResourceBaseCollectionX.cs b/src/IIIFPresentation/Core/IIIF/ResourceBaseCollectionX.cs index d4f0c5ad..979b33e5 100644 --- a/src/IIIFPresentation/Core/IIIF/ResourceBaseCollectionX.cs +++ b/src/IIIFPresentation/Core/IIIF/ResourceBaseCollectionX.cs @@ -1,4 +1,4 @@ -using IIIF.Presentation.V3; +using IIIF; namespace Core.IIIF; @@ -7,18 +7,28 @@ public static class ResourceBaseCollectionX /// /// Add items to a list that are not already present, based on their id property. /// - public static void AddDistinctById(this IList target, IEnumerable? source) where T : ResourceBase + /// List to add items to + /// Items to add + /// Optional action to run before adding item + /// Number of items added + public static int AddDistinctById(this IList target, IEnumerable? source, Action? preAdd = null) + where T : IResource { - if (source == null) return; + if (source == null) return 0; var existingIds = new HashSet(target.Select(s => s.Id)); + int count = 0; foreach (var item in source) { if (!existingIds.Contains(item.Id)) { + count++; + preAdd?.Invoke(item); target.Add(item); existingIds.Add(item.Id); } } + + return count; } } diff --git a/src/IIIFPresentation/Services/TextServices/ITextServicesClient.cs b/src/IIIFPresentation/Services/TextServices/ITextServicesClient.cs index ae961916..70bc35bb 100644 --- a/src/IIIFPresentation/Services/TextServices/ITextServicesClient.cs +++ b/src/IIIFPresentation/Services/TextServices/ITextServicesClient.cs @@ -11,12 +11,12 @@ public interface ITextServicesClient /// The pipeline job to submit /// S3 bucket containing the staged manifest /// S3 key of the staged manifest - Task CreateOrUpdateJob(PipelineJob job, string bucket, string resourceKey, CancellationToken cancellationToken = default); + Task CreateOrUpdateJob(PipelineJob job, string bucket, string resourceKey, CancellationToken cancellationToken); /// /// Retrieve the text-augmented manifest for a completed job. /// Returns null if the job produced no text resources. /// /// Job identifier in format "{customerId}/iiif/{manifestId}" - Task GetTextAugmentedManifest(string jobId, CancellationToken cancellationToken = default); + Task GetTextAugmentedManifest(string jobId, CancellationToken cancellationToken); }