diff --git a/apps/core/src/modules/ai/ai-translation/ai-translation.service.ts b/apps/core/src/modules/ai/ai-translation/ai-translation.service.ts index b1e03213abf..973cfc7b451 100644 --- a/apps/core/src/modules/ai/ai-translation/ai-translation.service.ts +++ b/apps/core/src/modules/ai/ai-translation/ai-translation.service.ts @@ -46,6 +46,7 @@ import type { } from './ai-translation.types' import { AITranslationModel } from './ai-translation.types-model' import { BaseTranslationService } from './base-translation.service' +import { LexicalPartialTranslationBuilder } from './lexical-partial-translation.builder' import { TranslationConsistencyService } from './translation-consistency.service' import type { TranslationSourceSnapshot } from './translation-consistency.types' import type { ITranslationStrategy } from './translation-strategy.interface' @@ -100,6 +101,7 @@ export class AiTranslationService private readonly aiTranslationRepository: AiTranslationRepository, private readonly databaseService: DatabaseService, private readonly translationConsistencyService: TranslationConsistencyService, + private readonly lexicalPartialTranslationBuilder: LexicalPartialTranslationBuilder, private readonly configService: ConfigsService, private readonly aiService: AiService, private readonly aiInFlightService: AiInFlightService, @@ -121,6 +123,19 @@ export class AiTranslationService : this.markdownStrategy } + private scheduleStaleTranslationRegenerationBestEffort( + articleId: string, + targetLang: string, + ) { + this.scheduleRegenerationForStaleTranslations([articleId], targetLang).catch( + (err) => + this.logger.error( + 'Failed to schedule stale translation regeneration', + err, + ), + ) + } + onModuleInit() { this.registerTaskHandlers() } @@ -1074,7 +1089,22 @@ export class AiTranslationService translation, ) - return status === 'valid' ? translation : null + if (status === 'valid') { + return translation + } + + if (status !== 'stale') { + return null + } + + this.scheduleStaleTranslationRegenerationBestEffort(articleId, targetLang) + + const partial = this.lexicalPartialTranslationBuilder.build( + this.toArticleContent(document), + translation, + ) + + return partial?.translation ?? null } async getValidTranslationsForArticles( @@ -1225,6 +1255,7 @@ export class AiTranslationService const snapshot = this.buildSnapshotFromDocument(articleId, document) const validLangs: string[] = [] const staleLangs: string[] = [] + let matchedTranslation: AITranslationModel | null = null for (const t of translations) { const status = @@ -1234,29 +1265,23 @@ export class AiTranslationService ) if (status === 'valid') { validLangs.push(t.lang) + if (targetLang === t.lang) { + matchedTranslation = t + } } else if (status === 'stale') { staleLangs.push(t.lang) + if (targetLang === t.lang) { + matchedTranslation = + this.lexicalPartialTranslationBuilder.build( + this.toArticleContent(document), + t, + )?.translation ?? null + } } } - const matchedTranslation = - targetLang && validLangs.includes(targetLang) - ? await this.aiTranslationRepository.findByRefAndLang( - articleId, - targetLang, - ) - : null - if (staleLangs.length && targetLang) { - this.scheduleRegenerationForStaleTranslations( - [articleId], - targetLang, - ).catch((err) => - this.logger.error( - 'Failed to schedule stale translation regeneration', - err, - ), - ) + this.scheduleStaleTranslationRegenerationBestEffort(articleId, targetLang) } return { diff --git a/apps/core/src/modules/ai/ai-translation/lexical-block-reuse.ts b/apps/core/src/modules/ai/ai-translation/lexical-block-reuse.ts new file mode 100644 index 00000000000..aeeacbdd015 --- /dev/null +++ b/apps/core/src/modules/ai/ai-translation/lexical-block-reuse.ts @@ -0,0 +1,128 @@ +import type { + LexicalTranslationResult, + PropertySegment, + TranslationSegment, +} from './lexical-translation-parser' +import { validateMermaidTranslation } from './mermaid-translation-guard' + +export interface BlockTranslationSegments { + segments: TranslationSegment[] + propertySegments: PropertySegment[] +} + +export interface BackfillReusableBlockResult { + reusedBlockIds: string[] + skippedBlockIds: string[] +} + +export function groupSegmentsByBlock( + result: LexicalTranslationResult, +): Map { + const byBlock = new Map() + + const getBucket = (blockId: string) => { + let bucket = byBlock.get(blockId) + if (!bucket) { + bucket = { segments: [], propertySegments: [] } + byBlock.set(blockId, bucket) + } + return bucket + } + + for (const segment of result.segments) { + if (!segment.blockId || !segment.translatable) continue + getBucket(segment.blockId).segments.push(segment) + } + + for (const propertySegment of result.propertySegments) { + if (!propertySegment.blockId) continue + getBucket(propertySegment.blockId).propertySegments.push(propertySegment) + } + + return byBlock +} + +export function canReuseBlockTranslations( + currentBlock: BlockTranslationSegments, + translatedBlock: BlockTranslationSegments, +): boolean { + if (currentBlock.segments.length !== translatedBlock.segments.length) { + return false + } + + if ( + currentBlock.propertySegments.length !== + translatedBlock.propertySegments.length + ) { + return false + } + + return currentBlock.propertySegments.every((segment, index) => { + const translatedSegment = translatedBlock.propertySegments[index] + return ( + translatedSegment.property === segment.property && + translatedSegment.key === segment.key + ) + }) +} + +export function backfillReusableBlockTranslations( + currentResult: LexicalTranslationResult, + translatedResult: LexicalTranslationResult, + unchangedBlockIds: Set, + output: Map, +): BackfillReusableBlockResult { + const currentBlocks = groupSegmentsByBlock(currentResult) + const translatedBlocks = groupSegmentsByBlock(translatedResult) + const reusedBlockIds: string[] = [] + const skippedBlockIds: string[] = [] + + for (const blockId of unchangedBlockIds) { + const currentBlock = currentBlocks.get(blockId) + const translatedBlock = translatedBlocks.get(blockId) + + if ( + !currentBlock || + !translatedBlock || + !canReuseBlockTranslations(currentBlock, translatedBlock) + ) { + skippedBlockIds.push(blockId) + continue + } + + currentBlock.segments.forEach((segment, index) => { + output.set(segment.id, translatedBlock.segments[index].text) + }) + + currentBlock.propertySegments.forEach((propertySegment, index) => { + output.set( + propertySegment.id, + translatedBlock.propertySegments[index].text, + ) + }) + + reusedBlockIds.push(blockId) + } + + return { reusedBlockIds, skippedBlockIds } +} + +export function guardMermaidTranslations( + parseResult: LexicalTranslationResult, + translations: Map, + onReject?: (message: string) => void, +): void { + for (const prop of parseResult.propertySegments) { + if (prop.property !== 'diagram' || prop.node?.type !== 'mermaid') continue + const translated = translations.get(prop.id) + if (translated === undefined) continue + if (translated === prop.text) continue + + const validation = validateMermaidTranslation(prop.text, translated) + if (!validation.ok) { + const message = `Mermaid translation rejected: reason=${validation.reason} sourceLen=${prop.text.length} translatedLen=${translated.length}` + onReject?.(message) + translations.delete(prop.id) + } + } +} diff --git a/apps/core/src/modules/ai/ai-translation/lexical-partial-translation.builder.ts b/apps/core/src/modules/ai/ai-translation/lexical-partial-translation.builder.ts new file mode 100644 index 00000000000..ca0b6959dfa --- /dev/null +++ b/apps/core/src/modules/ai/ai-translation/lexical-partial-translation.builder.ts @@ -0,0 +1,175 @@ +import { Injectable, Logger } from '@nestjs/common' + +import { LexicalService } from '~/processors/helper/helper.lexical.service' +import { ContentFormat } from '~/shared/types/content-format.type' +import { md5 } from '~/utils/tool.util' + +import type { AiTranslationRow, ArticleContent } from './ai-translation.types' +import { + backfillReusableBlockTranslations, + guardMermaidTranslations, +} from './lexical-block-reuse' +import { + parseLexicalForTranslation, + restoreLexicalTranslation, +} from './lexical-translation-parser' + +export interface PartialLexicalTranslationStats { + totalBlockCount: number + changedBlockCount: number + reusedBlockCount: number + skippedReusableBlockCount: number +} + +export interface PartialLexicalTranslationResult { + translation: AiTranslationRow + stats: PartialLexicalTranslationStats +} + +interface LexicalSourceBlockSnapshot { + id: string + fingerprint: string +} + +interface LexicalSourceMetaHashes { + title?: unknown + subtitle?: unknown + summary?: unknown + tags?: unknown +} + +function isLexicalSourceBlockSnapshotArray( + value: unknown, +): value is LexicalSourceBlockSnapshot[] { + return ( + Array.isArray(value) && + value.every( + (snapshot) => + snapshot && + typeof snapshot === 'object' && + typeof (snapshot as LexicalSourceBlockSnapshot).id === 'string' && + typeof (snapshot as LexicalSourceBlockSnapshot).fingerprint === + 'string', + ) + ) +} + +function getMetaHashes(value: unknown): LexicalSourceMetaHashes | null { + if (!value || typeof value !== 'object' || Array.isArray(value)) return null + return value as LexicalSourceMetaHashes +} + +@Injectable() +export class LexicalPartialTranslationBuilder { + private readonly logger = new Logger(LexicalPartialTranslationBuilder.name) + + constructor(private readonly lexicalService: LexicalService) {} + + build( + content: ArticleContent, + existing: AiTranslationRow, + ): PartialLexicalTranslationResult | null { + if (content.contentFormat !== ContentFormat.Lexical || !content.content) { + return null + } + + if ( + existing.contentFormat !== ContentFormat.Lexical || + !existing.content || + !isLexicalSourceBlockSnapshotArray(existing.sourceBlockSnapshots) + ) { + return null + } + + const currentBlocks = this.lexicalService.extractRootBlocks(content.content) + if (currentBlocks.length === 0) { + return null + } + + const oldFingerprintByBlockId = new Map( + existing.sourceBlockSnapshots.map((snapshot) => [ + snapshot.id, + snapshot.fingerprint, + ]), + ) + const unchangedBlockIds = new Set() + + for (const block of currentBlocks) { + if ( + block.id && + oldFingerprintByBlockId.has(block.id) && + oldFingerprintByBlockId.get(block.id) === block.fingerprint + ) { + unchangedBlockIds.add(block.id) + } + } + + try { + const currentParseResult = parseLexicalForTranslation(content.content) + const translatedParseResult = parseLexicalForTranslation(existing.content) + const translations = new Map() + const backfillResult = backfillReusableBlockTranslations( + currentParseResult, + translatedParseResult, + unchangedBlockIds, + translations, + ) + + guardMermaidTranslations(currentParseResult, translations, (message) => + this.logger.warn(message), + ) + + const translatedContent = restoreLexicalTranslation( + currentParseResult, + translations, + ) + const text = this.lexicalService.lexicalToMarkdown(translatedContent) + const metaHashes = getMetaHashes(existing.sourceMetaHashes) + const stats: PartialLexicalTranslationStats = { + totalBlockCount: currentBlocks.length, + changedBlockCount: + currentBlocks.length - backfillResult.reusedBlockIds.length, + reusedBlockCount: backfillResult.reusedBlockIds.length, + skippedReusableBlockCount: backfillResult.skippedBlockIds.length, + } + + this.logger.log( + `Partial Lexical translation: total=${stats.totalBlockCount} changed=${stats.changedBlockCount} reused=${stats.reusedBlockCount} skipped=${stats.skippedReusableBlockCount}`, + ) + + return { + translation: { + ...existing, + title: + metaHashes?.title === md5(content.title) + ? existing.title + : content.title, + subtitle: + content.subtitle && metaHashes?.subtitle === md5(content.subtitle) + ? existing.subtitle + : (content.subtitle ?? null), + summary: + content.summary && metaHashes?.summary === md5(content.summary) + ? existing.summary + : (content.summary ?? null), + tags: + content.tags?.length && + metaHashes?.tags === md5(content.tags.join('|||')) + ? existing.tags + : (content.tags ?? []), + text, + contentFormat: ContentFormat.Lexical, + content: translatedContent, + }, + stats, + } + } catch (error) { + this.logger.warn( + `Partial Lexical translation failed: ${ + error instanceof Error ? error.message : String(error) + }`, + ) + return null + } + } +} diff --git a/apps/core/src/modules/ai/ai-translation/strategies/lexical-translation.strategy.ts b/apps/core/src/modules/ai/ai-translation/strategies/lexical-translation.strategy.ts index 6b5e39d901c..19d07b6b9bb 100644 --- a/apps/core/src/modules/ai/ai-translation/strategies/lexical-translation.strategy.ts +++ b/apps/core/src/modules/ai/ai-translation/strategies/lexical-translation.strategy.ts @@ -10,13 +10,14 @@ import type { IModelRuntime } from '../../runtime' import type { ArticleContent } from '../ai-translation.types' import type { AITranslationModel } from '../ai-translation.types-model' import { - type LexicalTranslationResult, + backfillReusableBlockTranslations, + guardMermaidTranslations, +} from '../lexical-block-reuse' +import { parseLexicalForTranslation, - type PropertySegment, restoreLexicalTranslation, type TranslationSegment, } from '../lexical-translation-parser' -import { validateMermaidTranslation } from '../mermaid-translation-guard' import type { ITranslationStrategy, TranslationResult, @@ -36,11 +37,6 @@ interface TranslationUnit { memberIds?: string[] } -interface BlockTranslationSegments { - segments: TranslationSegment[] - propertySegments: PropertySegment[] -} - interface LexicalTranslationInput { title?: string | null subtitle?: string | null @@ -178,7 +174,9 @@ export class LexicalTranslationStrategy ) } - this.guardMermaidTranslations(parseResult, allTranslations) + guardMermaidTranslations(parseResult, allTranslations, (message) => + this.logger.warn(message), + ) const translatedContent = restoreLexicalTranslation( parseResult, @@ -251,12 +249,15 @@ export class LexicalTranslationStrategy const translatedParseResult = parseLexicalForTranslation( existing.content!, ) - this.backfillReusableBlockTranslations( + const backfillResult = backfillReusableBlockTranslations( parseResult, translatedParseResult, unchangedBlockIds, allTranslations, ) + this.logger.log( + `Incremental reuse: reused=${backfillResult.reusedBlockIds.length} skipped=${backfillResult.skippedBlockIds.length}`, + ) } catch { throw new Error('Failed to parse existing translated content') } @@ -411,7 +412,9 @@ export class LexicalTranslationStrategy if (sl) sourceLang = sl } - this.guardMermaidTranslations(parseResult, allTranslations) + guardMermaidTranslations(parseResult, allTranslations, (message) => + this.logger.warn(message), + ) const translatedContent = restoreLexicalTranslation( parseResult, @@ -525,26 +528,6 @@ export class LexicalTranslationStrategy return units } - private guardMermaidTranslations( - parseResult: LexicalTranslationResult, - translations: Map, - ): void { - for (const prop of parseResult.propertySegments) { - if (prop.property !== 'diagram' || prop.node?.type !== 'mermaid') continue - const translated = translations.get(prop.id) - if (translated === undefined) continue - if (translated === prop.text) continue - - const validation = validateMermaidTranslation(prop.text, translated) - if (!validation.ok) { - this.logger.warn( - `Mermaid translation rejected: reason=${validation.reason} sourceLen=${prop.text.length} translatedLen=${translated.length}`, - ) - translations.delete(prop.id) - } - } - } - private resolvePropertyUnitMeta(prop: { property: string node: any @@ -590,88 +573,6 @@ export class LexicalTranslationStrategy return units } - private groupSegmentsByBlock( - result: LexicalTranslationResult, - ): Map { - const byBlock = new Map() - - const getBucket = (blockId: string) => { - let bucket = byBlock.get(blockId) - if (!bucket) { - bucket = { segments: [], propertySegments: [] } - byBlock.set(blockId, bucket) - } - return bucket - } - - for (const segment of result.segments) { - if (!segment.blockId || !segment.translatable) continue - getBucket(segment.blockId).segments.push(segment) - } - - for (const propertySegment of result.propertySegments) { - if (!propertySegment.blockId) continue - getBucket(propertySegment.blockId).propertySegments.push(propertySegment) - } - - return byBlock - } - - private canReuseBlockTranslations( - currentBlock: BlockTranslationSegments, - translatedBlock: BlockTranslationSegments, - ): boolean { - if (currentBlock.segments.length !== translatedBlock.segments.length) { - return false - } - - if ( - currentBlock.propertySegments.length !== - translatedBlock.propertySegments.length - ) { - return false - } - - return currentBlock.propertySegments.every((segment, index) => { - const translatedSegment = translatedBlock.propertySegments[index] - return ( - translatedSegment.property === segment.property && - translatedSegment.key === segment.key - ) - }) - } - - private backfillReusableBlockTranslations( - currentResult: LexicalTranslationResult, - translatedResult: LexicalTranslationResult, - unchangedBlockIds: Set, - output: Map, - ): void { - const currentBlocks = this.groupSegmentsByBlock(currentResult) - const translatedBlocks = this.groupSegmentsByBlock(translatedResult) - - for (const blockId of unchangedBlockIds) { - const currentBlock = currentBlocks.get(blockId) - const translatedBlock = translatedBlocks.get(blockId) - - if (!currentBlock || !translatedBlock) continue - if (!this.canReuseBlockTranslations(currentBlock, translatedBlock)) { - continue - } - - currentBlock.segments.forEach((segment, index) => { - output.set(segment.id, translatedBlock.segments[index].text) - }) - - currentBlock.propertySegments.forEach((propertySegment, index) => { - output.set( - propertySegment.id, - translatedBlock.propertySegments[index].text, - ) - }) - } - } - private unitsToEntries(units: TranslationUnit[]): Record { return Object.fromEntries(units.map((unit) => [unit.id, unit.payload])) } diff --git a/apps/core/src/modules/ai/ai.module.ts b/apps/core/src/modules/ai/ai.module.ts index 03ed6372fea..76a414f266a 100644 --- a/apps/core/src/modules/ai/ai.module.ts +++ b/apps/core/src/modules/ai/ai.module.ts @@ -24,6 +24,7 @@ import { } from './ai-translation/ai-translation.repository' import { AiTranslationService } from './ai-translation/ai-translation.service' import { AiTranslationEventHandlerService } from './ai-translation/ai-translation-event-handler.service' +import { LexicalPartialTranslationBuilder } from './ai-translation/lexical-partial-translation.builder' import { LexicalTranslationStrategy } from './ai-translation/strategies/lexical-translation.strategy' import { MarkdownTranslationStrategy } from './ai-translation/strategies/markdown-translation.strategy' import { TranslationConsistencyService } from './ai-translation/translation-consistency.service' @@ -58,6 +59,7 @@ import { AiWriterService } from './ai-writer/ai-writer.service' useClass: MarkdownTranslationStrategy, }, TranslationConsistencyService, + LexicalPartialTranslationBuilder, AiTranslationService, AiTranslationRepository, TranslationEntryRepository, diff --git a/apps/core/src/processors/helper/helper.lexical.service.ts b/apps/core/src/processors/helper/helper.lexical.service.ts index bcfd99864b5..7ecb5f39f49 100644 --- a/apps/core/src/processors/helper/helper.lexical.service.ts +++ b/apps/core/src/processors/helper/helper.lexical.service.ts @@ -140,6 +140,33 @@ export class LexicalService { ({ text }) => text, ) + if ( + node.type === 'mermaid' && + typeof node.diagram === 'string' && + node.diagram.trim() + ) { + segments.push(node.diagram) + } + + if (node.type === 'poll') { + if (typeof node.question === 'string' && node.question.trim()) { + segments.push(node.question) + } + + if (Array.isArray(node.options)) { + for (const option of node.options) { + if ( + option && + typeof option === 'object' && + typeof option.label === 'string' && + option.label.trim() + ) { + segments.push(option.label) + } + } + } + } + if (Array.isArray(node.children)) { const childText = node.children .map((child: any) => this.extractBlockText(child)) diff --git a/apps/core/test/src/modules/ai/ai-translation.service.spec.ts b/apps/core/test/src/modules/ai/ai-translation.service.spec.ts index caf63608208..62f87a54ec5 100644 --- a/apps/core/test/src/modules/ai/ai-translation.service.spec.ts +++ b/apps/core/test/src/modules/ai/ai-translation.service.spec.ts @@ -2,6 +2,7 @@ import { describe, expect, it, vi } from 'vitest' import { createPgRepositoryMock, now } from '@/helper/pg-repository-mock' import { AppException } from '~/common/errors/exception.types' +import { CollectionRefTypes } from '~/constants/db.constant' import type { AiTranslationRepository, AiTranslationRow, @@ -32,23 +33,50 @@ const row = (overrides: Partial = {}): AiTranslationRow => ({ ...overrides, }) +const articleDocument = (overrides: Record = {}) => ({ + id: 'post-1', + title: 'Source Title', + text: 'Source Text', + subtitle: null, + summary: null, + tags: [], + contentFormat: ContentFormat.Lexical, + content: '{"root":{"children":[]}}', + isPublished: true, + meta: { lang: 'zh' }, + modifiedAt: now, + createdAt: now, + ...overrides, +}) + const createService = () => { const repository = createPgRepositoryMock() const databaseService = { findGlobalById: vi.fn(), findGlobalByIds: vi.fn() } - const translationConsistencyService = {} - const configService = {} + const translationConsistencyService = { + evaluateTranslationFreshness: vi.fn(() => 'valid'), + filterTrulyStaleTranslations: vi.fn(), + partitionValidAndStaleTranslations: vi.fn(), + } + const partialBuilder = { build: vi.fn() } + const configService = { + get: vi.fn(() => ({ + enableAutoGenerateTranslation: true, + enableTranslation: true, + })), + } const aiService = {} const aiInFlightService = {} const eventManager = { emit: vi.fn() } const taskProcessor = { registerHandler: vi.fn() } const lexicalService = { lexicalToMarkdown: vi.fn(() => 'markdown') } - const aiTaskService = {} + const aiTaskService = { createTranslationTask: vi.fn() } const lexicalStrategy = {} const markdownStrategy = {} const service = new AiTranslationService( repository as any, databaseService as any, translationConsistencyService as any, + partialBuilder as any, configService as any, aiService as any, aiInFlightService as any, @@ -59,7 +87,16 @@ const createService = () => { lexicalStrategy as any, markdownStrategy as any, ) - return { databaseService, lexicalService, repository, service } + return { + aiTaskService, + configService, + databaseService, + lexicalService, + partialBuilder, + repository, + service, + translationConsistencyService, + } } describe('AiTranslationService', () => { @@ -98,4 +135,260 @@ describe('AiTranslationService', () => { AppException, ) }) + + it('returns a valid article translation without scheduling or building partial output', async () => { + const { + databaseService, + partialBuilder, + repository, + service, + translationConsistencyService, + } = createService() + const validTranslation = row() + const scheduleSpy = vi.spyOn( + service, + 'scheduleRegenerationForStaleTranslations', + ) + + databaseService.findGlobalById.mockResolvedValue({ + document: articleDocument(), + type: CollectionRefTypes.Post, + }) + repository.findByRefAndLang.mockResolvedValue(validTranslation) + translationConsistencyService.evaluateTranslationFreshness.mockReturnValue( + 'valid', + ) + + await expect( + service.getTranslationForArticle('post-1', 'en'), + ).resolves.toBe(validTranslation) + + expect(scheduleSpy).not.toHaveBeenCalled() + expect(partialBuilder.build).not.toHaveBeenCalled() + }) + + it('returns null for a missing article translation without scheduling or building partial output', async () => { + const { + databaseService, + partialBuilder, + repository, + service, + translationConsistencyService, + } = createService() + const scheduleSpy = vi.spyOn( + service, + 'scheduleRegenerationForStaleTranslations', + ) + + databaseService.findGlobalById.mockResolvedValue({ + document: articleDocument(), + type: CollectionRefTypes.Post, + }) + repository.findByRefAndLang.mockResolvedValue(null) + + await expect( + service.getTranslationForArticle('post-1', 'en'), + ).resolves.toBeNull() + + expect( + translationConsistencyService.evaluateTranslationFreshness, + ).not.toHaveBeenCalled() + expect(scheduleSpy).not.toHaveBeenCalled() + expect(partialBuilder.build).not.toHaveBeenCalled() + }) + + it('returns null for unknown article translation freshness without scheduling or building partial output', async () => { + const { + databaseService, + partialBuilder, + repository, + service, + translationConsistencyService, + } = createService() + const unknownTranslation = row({ + contentFormat: ContentFormat.Lexical, + content: '{"root":{"children":[]}}', + }) + const scheduleSpy = vi.spyOn( + service, + 'scheduleRegenerationForStaleTranslations', + ) + + databaseService.findGlobalById.mockResolvedValue({ + document: articleDocument(), + type: CollectionRefTypes.Post, + }) + repository.findByRefAndLang.mockResolvedValue(unknownTranslation) + translationConsistencyService.evaluateTranslationFreshness.mockReturnValue( + 'unknown', + ) + + await expect( + service.getTranslationForArticle('post-1', 'en'), + ).resolves.toBeNull() + + expect(scheduleSpy).not.toHaveBeenCalled() + expect(partialBuilder.build).not.toHaveBeenCalled() + }) + + it('returns a partial lexical translation for a stale article translation without persisting it', async () => { + const { + databaseService, + partialBuilder, + repository, + service, + translationConsistencyService, + } = createService() + const staleTranslation = row({ + contentFormat: ContentFormat.Lexical, + content: '{"root":{"children":[]}}', + }) + const partialTranslation = row({ + id: 'translation-1' as any, + title: 'Partial Title', + text: 'Partial Text', + contentFormat: ContentFormat.Lexical, + content: '{"root":{"children":[]}}', + }) + const scheduleSpy = vi + .spyOn(service, 'scheduleRegenerationForStaleTranslations') + .mockResolvedValue(undefined) + + databaseService.findGlobalById.mockResolvedValue({ + document: articleDocument(), + type: CollectionRefTypes.Post, + }) + repository.findByRefAndLang.mockResolvedValue(staleTranslation) + translationConsistencyService.evaluateTranslationFreshness.mockReturnValue( + 'stale', + ) + partialBuilder.build.mockReturnValue({ + stats: { + changedBlockCount: 0, + reusedBlockCount: 1, + skippedReusableBlockCount: 0, + totalBlockCount: 1, + }, + translation: partialTranslation, + }) + + await expect( + service.getTranslationForArticle('post-1', 'en'), + ).resolves.toBe(partialTranslation) + + expect(partialBuilder.build).toHaveBeenCalledWith( + expect.objectContaining({ + content: '{"root":{"children":[]}}', + contentFormat: ContentFormat.Lexical, + text: 'Source Text', + title: 'Source Title', + }), + staleTranslation, + ) + expect(scheduleSpy).toHaveBeenCalledWith(['post-1'], 'en') + expect(repository.updateById).not.toHaveBeenCalled() + expect(repository.upsert).not.toHaveBeenCalled() + }) + + it('returns null for a stale lexical article translation when partial build is unavailable', async () => { + const { + databaseService, + partialBuilder, + repository, + service, + translationConsistencyService, + } = createService() + const staleTranslation = row({ + contentFormat: ContentFormat.Lexical, + content: '{"root":{"children":[]}}', + }) + const scheduleSpy = vi + .spyOn(service, 'scheduleRegenerationForStaleTranslations') + .mockResolvedValue(undefined) + + databaseService.findGlobalById.mockResolvedValue({ + document: articleDocument(), + type: CollectionRefTypes.Post, + }) + repository.findByRefAndLang.mockResolvedValue(staleTranslation) + translationConsistencyService.evaluateTranslationFreshness.mockReturnValue( + 'stale', + ) + partialBuilder.build.mockReturnValue(null) + + await expect( + service.getTranslationForArticle('post-1', 'en'), + ).resolves.toBeNull() + + expect(partialBuilder.build).toHaveBeenCalledWith( + expect.objectContaining({ + contentFormat: ContentFormat.Lexical, + }), + staleTranslation, + ) + expect(scheduleSpy).toHaveBeenCalledWith(['post-1'], 'en') + }) + + it('returns a partial requested stale translation without listing it as available', async () => { + const { + databaseService, + partialBuilder, + repository, + service, + translationConsistencyService, + } = createService() + const validTranslation = row({ lang: 'en' }) + const staleTranslation = row({ + id: 'translation-2' as any, + lang: 'ja', + contentFormat: ContentFormat.Lexical, + content: '{"root":{"children":[]}}', + }) + const partialTranslation = row({ + id: 'translation-2' as any, + lang: 'ja', + text: 'Partial Japanese Text', + contentFormat: ContentFormat.Lexical, + content: '{"root":{"children":[]}}', + }) + const scheduleSpy = vi + .spyOn(service, 'scheduleRegenerationForStaleTranslations') + .mockResolvedValue(undefined) + + databaseService.findGlobalById.mockResolvedValue({ + document: articleDocument(), + type: CollectionRefTypes.Post, + }) + repository.listByRefId.mockResolvedValue([validTranslation, staleTranslation]) + translationConsistencyService.evaluateTranslationFreshness.mockImplementation( + (_snapshot: unknown, translation: AiTranslationRow) => + translation.lang === 'ja' ? 'stale' : 'valid', + ) + partialBuilder.build.mockReturnValue({ + stats: { + changedBlockCount: 0, + reusedBlockCount: 1, + skippedReusableBlockCount: 0, + totalBlockCount: 1, + }, + translation: partialTranslation, + }) + + await expect( + service.getTranslationAndAvailableLanguages('post-1', 'ja'), + ).resolves.toEqual({ + availableTranslations: ['en'], + sourceLang: 'zh', + translation: partialTranslation, + }) + + expect(repository.findByRefAndLang).not.toHaveBeenCalled() + expect(partialBuilder.build).toHaveBeenCalledWith( + expect.objectContaining({ + contentFormat: ContentFormat.Lexical, + }), + staleTranslation, + ) + expect(scheduleSpy).toHaveBeenCalledWith(['post-1'], 'ja') + }) }) diff --git a/apps/core/test/src/modules/ai/lexical-block-reuse.spec.ts b/apps/core/test/src/modules/ai/lexical-block-reuse.spec.ts new file mode 100644 index 00000000000..1a78133e5f7 --- /dev/null +++ b/apps/core/test/src/modules/ai/lexical-block-reuse.spec.ts @@ -0,0 +1,175 @@ +import { describe, expect, it, vi } from 'vitest' + +import { + backfillReusableBlockTranslations, + type BlockTranslationSegments, + canReuseBlockTranslations, + groupSegmentsByBlock, + guardMermaidTranslations, +} from '~/modules/ai/ai-translation/lexical-block-reuse' +import type { + LexicalTranslationResult, + PropertySegment, + TranslationSegment, +} from '~/modules/ai/ai-translation/lexical-translation-parser' + +const textSegment = ( + id: string, + text: string, + blockId: string | null, + translatable = true, +): TranslationSegment => ({ + id, + text, + node: {}, + translatable, + blockId, + rootIndex: 0, + flowId: null, +}) + +const propertySegment = ( + id: string, + text: string, + blockId: string | null, + property = 'summary', + key?: string, + node: any = {}, +): PropertySegment => ({ + id, + text, + node, + property, + key, + blockId, + rootIndex: 0, +}) + +const translationResult = ( + segments: TranslationSegment[], + propertySegments: PropertySegment[], +): LexicalTranslationResult => ({ + segments, + propertySegments, + editorState: { root: { children: [] } }, +}) + +describe('lexical-block-reuse', () => { + it('groups text and property segments by block id', () => { + const result = translationResult( + [ + textSegment('t_0', 'Alpha', 'block-a'), + textSegment('t_1', 'Code', 'block-a', false), + textSegment('t_2', 'No block', null), + textSegment('t_3', 'Beta', 'block-b'), + ], + [ + propertySegment('p_0', 'Caption', 'block-a', 'caption'), + propertySegment('p_1', 'Detached', null, 'caption'), + propertySegment('p_2', 'Title', 'block-b', 'title'), + ], + ) + + const grouped = groupSegmentsByBlock(result) + + expect([...grouped.keys()]).toEqual(['block-a', 'block-b']) + expect( + grouped.get('block-a')?.segments.map((segment) => segment.id), + ).toEqual(['t_0']) + expect( + grouped.get('block-a')?.propertySegments.map((segment) => segment.id), + ).toEqual(['p_0']) + expect( + grouped.get('block-b')?.segments.map((segment) => segment.id), + ).toEqual(['t_3']) + expect( + grouped.get('block-b')?.propertySegments.map((segment) => segment.id), + ).toEqual(['p_2']) + }) + + it('rejects block reuse when property shape differs', () => { + const currentBlock: BlockTranslationSegments = { + segments: [textSegment('t_0', 'Alpha', 'block-a')], + propertySegments: [ + propertySegment('p_0', 'Caption', 'block-a', 'caption', 'primary'), + ], + } + const translatedBlock: BlockTranslationSegments = { + segments: [textSegment('t_9', 'Translated Alpha', 'block-a')], + propertySegments: [ + propertySegment('p_9', 'Translated Caption', 'block-a', 'caption'), + ], + } + + expect(canReuseBlockTranslations(currentBlock, translatedBlock)).toBe(false) + }) + + it('backfills only unchanged reusable blocks and returns reuse stats', () => { + const currentResult = translationResult( + [ + textSegment('t_0', 'Alpha', 'block-a'), + textSegment('t_1', 'Beta', 'block-b'), + ], + [propertySegment('p_0', 'Caption', 'block-a', 'caption', 'primary')], + ) + const translatedResult = translationResult( + [ + textSegment('t_8', 'Alpha translated', 'block-a'), + textSegment('t_9', 'Beta translated', 'block-b'), + ], + [ + propertySegment( + 'p_8', + 'Caption translated', + 'block-a', + 'caption', + 'primary', + ), + ], + ) + const output = new Map() + + const result = backfillReusableBlockTranslations( + currentResult, + translatedResult, + new Set(['block-a']), + output, + ) + + expect(result).toEqual({ + reusedBlockIds: ['block-a'], + skippedBlockIds: [], + }) + expect(Object.fromEntries(output)).toEqual({ + t_0: 'Alpha translated', + p_0: 'Caption translated', + }) + expect(output.has('t_1')).toBe(false) + }) + + it('removes invalid Mermaid translations and calls the rejection callback', () => { + const parseResult = translationResult( + [], + [ + propertySegment( + 'p_0', + 'graph TD\n A-->B', + 'block-a', + 'diagram', + undefined, + { type: 'mermaid' }, + ), + ], + ) + const translations = new Map([['p_0', 'sequenceDiagram\n A-->B']]) + const onReject = vi.fn() + + guardMermaidTranslations(parseResult, translations, onReject) + + expect(translations.has('p_0')).toBe(false) + expect(onReject).toHaveBeenCalledOnce() + expect(onReject).toHaveBeenCalledWith( + expect.stringContaining('Mermaid translation rejected:'), + ) + }) +}) diff --git a/apps/core/test/src/modules/ai/lexical-partial-translation.builder.spec.ts b/apps/core/test/src/modules/ai/lexical-partial-translation.builder.spec.ts new file mode 100644 index 00000000000..e77cf55a08a --- /dev/null +++ b/apps/core/test/src/modules/ai/lexical-partial-translation.builder.spec.ts @@ -0,0 +1,467 @@ +import { describe, expect, it, vi } from 'vitest' + +import type { + AiTranslationRow, + ArticleContent, +} from '~/modules/ai/ai-translation/ai-translation.types' +import { LexicalPartialTranslationBuilder } from '~/modules/ai/ai-translation/lexical-partial-translation.builder' +import { LexicalService } from '~/processors/helper/helper.lexical.service' +import { ContentFormat } from '~/shared/types/content-format.type' +import { md5 } from '~/utils/tool.util' + +const now = new Date('2026-05-24T00:00:00.000Z') + +const textNode = (text: string) => ({ + detail: 0, + format: 0, + mode: 'normal', + style: '', + text, + type: 'text', + version: 1, +}) + +const paragraph = (text: string, blockId?: string) => ({ + children: [textNode(text)], + direction: null, + format: '', + indent: 0, + type: 'paragraph', + version: 1, + ...(blockId ? { $: { blockId } } : {}), +}) + +const emptyParagraph = (blockId: string) => ({ + children: [], + direction: null, + format: '', + indent: 0, + type: 'paragraph', + version: 1, + $: { blockId }, +}) + +const mermaidNode = (diagram: string, blockId: string) => ({ + type: 'mermaid', + version: 1, + diagram, + $: { blockId }, +}) + +const editorState = ( + children: Array< + | ReturnType + | ReturnType + | ReturnType + >, +): string => + JSON.stringify({ + root: { + children, + direction: null, + format: '', + indent: 0, + type: 'root', + version: 1, + }, + }) + +const rootTexts = (content: string): string[] => + JSON.parse(content).root.children.map( + (child: any) => child.children?.[0]?.text, + ) + +const rootDiagrams = (content: string): string[] => + JSON.parse(content).root.children + .filter((child: any) => child.type === 'mermaid') + .map((child: any) => child.diagram) + +const row = (overrides: Partial = {}): AiTranslationRow => ({ + id: 'translation-1' as any, + hash: 'hash', + refId: 'post-1' as any, + refType: 'post', + lang: 'en', + sourceLang: 'zh', + title: 'Translated Title', + text: 'Translated markdown', + subtitle: 'Translated Subtitle', + summary: 'Translated Summary', + tags: ['translated-tag'], + sourceModifiedAt: null, + aiModel: 'model', + aiProvider: 'provider', + contentFormat: ContentFormat.Lexical, + content: editorState([paragraph('Translated unchanged', 'block-a')]), + sourceBlockSnapshots: [{ id: 'block-a', fingerprint: 'fp-a' }], + sourceMetaHashes: { title: md5('Title') }, + createdAt: now, + ...overrides, +}) + +const content = (overrides: Partial = {}): ArticleContent => ({ + title: 'Title', + text: 'Source markdown', + subtitle: null, + summary: null, + tags: [], + contentFormat: ContentFormat.Lexical, + content: editorState([paragraph('Source unchanged', 'block-a')]), + ...overrides, +}) + +const createBuilder = () => { + const lexicalService = { + extractRootBlocks: vi.fn(), + lexicalToMarkdown: vi.fn((translatedContent: string) => + rootTexts(translatedContent).join('\n'), + ), + } + const builder = new LexicalPartialTranslationBuilder(lexicalService as any) + + return { builder, lexicalService } +} + +describe('LexicalPartialTranslationBuilder', () => { + it('reuses every eligible unchanged block even when the document hash differs', () => { + const { builder, lexicalService } = createBuilder() + const sourceContent = editorState([ + paragraph('Source first', 'block-a'), + paragraph('Source second', 'block-b'), + ]) + const existingContent = editorState([ + paragraph('Translated first', 'block-a'), + paragraph('Translated second', 'block-b'), + ]) + lexicalService.extractRootBlocks.mockReturnValue([ + { + id: 'block-a', + type: 'paragraph', + text: 'Source first', + fingerprint: 'fp-a', + index: 0, + }, + { + id: 'block-b', + type: 'paragraph', + text: 'Source second', + fingerprint: 'fp-b', + index: 1, + }, + ]) + + const result = builder.build( + content({ content: sourceContent }), + row({ + hash: 'different-whole-document-hash', + content: existingContent, + sourceBlockSnapshots: [ + { id: 'block-a', fingerprint: 'fp-a' }, + { id: 'block-b', fingerprint: 'fp-b' }, + ], + }), + ) + + expect(result?.stats).toEqual({ + totalBlockCount: 2, + changedBlockCount: 0, + reusedBlockCount: 2, + skippedReusableBlockCount: 0, + }) + expect(rootTexts(result!.translation.content!)).toEqual([ + 'Translated first', + 'Translated second', + ]) + }) + + it('reuses unchanged blocks, keeps changed blocks as source, and omits deleted old blocks', () => { + const { builder, lexicalService } = createBuilder() + const sourceContent = editorState([ + paragraph('Source unchanged', 'block-a'), + paragraph('Source changed', 'block-b'), + ]) + const existingContent = editorState([ + paragraph('Translated unchanged', 'block-a'), + paragraph('Translated stale', 'block-b'), + paragraph('Translated deleted', 'block-c'), + ]) + lexicalService.extractRootBlocks.mockReturnValue([ + { + id: 'block-a', + type: 'paragraph', + text: 'Source unchanged', + fingerprint: 'fp-a', + index: 0, + }, + { + id: 'block-b', + type: 'paragraph', + text: 'Source changed', + fingerprint: 'fp-b-new', + index: 1, + }, + ]) + + const result = builder.build( + content({ content: sourceContent }), + row({ + content: existingContent, + sourceBlockSnapshots: [ + { id: 'block-a', fingerprint: 'fp-a' }, + { id: 'block-b', fingerprint: 'fp-b-old' }, + { id: 'block-c', fingerprint: 'fp-c' }, + ], + }), + ) + + expect(result?.stats).toEqual({ + totalBlockCount: 2, + changedBlockCount: 1, + reusedBlockCount: 1, + skippedReusableBlockCount: 0, + }) + expect(rootTexts(result!.translation.content!)).toEqual([ + 'Translated unchanged', + 'Source changed', + ]) + expect(result!.translation.text).toBe('Translated unchanged\nSource changed') + }) + + it('falls changed meta fields back to source values using stored md5 hashes', () => { + const { builder, lexicalService } = createBuilder() + lexicalService.extractRootBlocks.mockReturnValue([ + { + id: 'block-a', + type: 'paragraph', + text: 'Source unchanged', + fingerprint: 'fp-a', + index: 0, + }, + ]) + + const result = builder.build( + content({ + title: 'Title', + subtitle: 'New Subtitle', + summary: 'New Summary', + tags: ['new-tag'], + }), + row({ + title: 'Translated Title', + subtitle: 'Old Translated Subtitle', + summary: 'Old Translated Summary', + tags: ['old-translated-tag'], + sourceMetaHashes: { + title: md5('Title'), + subtitle: md5('Old Subtitle'), + summary: md5('Old Summary'), + tags: md5('old-tag'), + }, + }), + ) + + expect(result?.translation).toEqual( + expect.objectContaining({ + title: 'Translated Title', + subtitle: 'New Subtitle', + summary: 'New Summary', + tags: ['new-tag'], + }), + ) + }) + + it('returns null when existing translated content cannot be parsed', () => { + const { builder, lexicalService } = createBuilder() + lexicalService.extractRootBlocks.mockReturnValue([ + { + id: 'block-a', + type: 'paragraph', + text: 'Source unchanged', + fingerprint: 'fp-a', + index: 0, + }, + ]) + + expect(builder.build(content(), row({ content: 'not json' }))).toBeNull() + }) + + it('counts unchanged-fingerprint blocks without reusable segments as changed', () => { + const { builder, lexicalService } = createBuilder() + const sourceContent = editorState([emptyParagraph('block-empty')]) + const existingContent = editorState([emptyParagraph('block-empty')]) + lexicalService.extractRootBlocks.mockReturnValue([ + { + id: 'block-empty', + type: 'paragraph', + text: '', + fingerprint: 'fp-empty', + index: 0, + }, + ]) + + const result = builder.build( + content({ content: sourceContent }), + row({ + content: existingContent, + sourceBlockSnapshots: [{ id: 'block-empty', fingerprint: 'fp-empty' }], + }), + ) + + expect(result?.stats).toEqual({ + totalBlockCount: 1, + changedBlockCount: 1, + reusedBlockCount: 0, + skippedReusableBlockCount: 1, + }) + }) + + it('returns null when markdown rendering fails', () => { + const { builder, lexicalService } = createBuilder() + lexicalService.extractRootBlocks.mockReturnValue([ + { + id: 'block-a', + type: 'paragraph', + text: 'Source unchanged', + fingerprint: 'fp-a', + index: 0, + }, + ]) + lexicalService.lexicalToMarkdown.mockImplementation(() => { + throw new Error('markdown failed') + }) + + expect(builder.build(content(), row())).toBeNull() + }) + + it('returns null when current Lexical content cannot be parsed', () => { + const { builder, lexicalService } = createBuilder() + lexicalService.extractRootBlocks.mockReturnValue([ + { + id: 'block-a', + type: 'paragraph', + text: 'Source unchanged', + fingerprint: 'fp-a', + index: 0, + }, + ]) + + expect(builder.build(content({ content: 'not json' }), row())).toBeNull() + }) + + it('treats a current block without block id as changed and returns source text', () => { + const { builder, lexicalService } = createBuilder() + const sourceContent = editorState([paragraph('Anonymous source')]) + const existingContent = editorState([paragraph('Anonymous translated')]) + lexicalService.extractRootBlocks.mockReturnValue([ + { + id: null, + type: 'paragraph', + text: 'Anonymous source', + fingerprint: 'fp-anonymous', + index: 0, + }, + ]) + + const result = builder.build( + content({ content: sourceContent }), + row({ + content: existingContent, + sourceBlockSnapshots: [{ id: '', fingerprint: 'fp-anonymous' }], + }), + ) + + expect(result?.stats).toEqual({ + totalBlockCount: 1, + changedBlockCount: 1, + reusedBlockCount: 0, + skippedReusableBlockCount: 0, + }) + expect(rootTexts(result!.translation.content!)).toEqual([ + 'Anonymous source', + ]) + expect(result!.translation.text).toBe('Anonymous source') + }) + + it('falls back to the current Mermaid source when an unchanged block has an invalid reused diagram translation', () => { + const { builder, lexicalService } = createBuilder() + const sourceDiagram = 'graph TD\n A[Input] --> B[Output]' + const invalidTranslatedDiagram = 'sequenceDiagram\n A-->B' + const sourceContent = editorState([ + mermaidNode(sourceDiagram, 'block-mermaid'), + ]) + const existingContent = editorState([ + mermaidNode(invalidTranslatedDiagram, 'block-mermaid'), + ]) + lexicalService.extractRootBlocks.mockReturnValue([ + { + id: 'block-mermaid', + type: 'mermaid', + text: sourceDiagram, + fingerprint: 'fp-mermaid', + index: 0, + }, + ]) + + const result = builder.build( + content({ content: sourceContent }), + row({ + content: existingContent, + sourceBlockSnapshots: [ + { id: 'block-mermaid', fingerprint: 'fp-mermaid' }, + ], + }), + ) + + expect(result?.stats).toEqual({ + totalBlockCount: 1, + changedBlockCount: 0, + reusedBlockCount: 1, + skippedReusableBlockCount: 0, + }) + expect(rootDiagrams(result!.translation.content!)).toEqual([sourceDiagram]) + }) + + it('keeps the current Mermaid source when its diagram changed under the same block id', () => { + const lexicalService = new LexicalService() + const builder = new LexicalPartialTranslationBuilder(lexicalService) + const oldSourceDiagram = 'graph TD\n A[Input] --> B[Old]' + const currentSourceDiagram = 'graph TD\n A[Input] --> C[Current]' + const oldTranslatedDiagram = 'graph TD\n A[Translated] --> B[Stale]' + const oldSourceContent = editorState([ + mermaidNode(oldSourceDiagram, 'block-mermaid'), + ]) + const currentSourceContent = editorState([ + mermaidNode(currentSourceDiagram, 'block-mermaid'), + ]) + const existingTranslatedContent = editorState([ + mermaidNode(oldTranslatedDiagram, 'block-mermaid'), + ]) + const [oldSourceBlock] = lexicalService.extractRootBlocks(oldSourceContent) + + const result = builder.build( + content({ content: currentSourceContent }), + row({ + content: existingTranslatedContent, + sourceBlockSnapshots: [ + { + id: 'block-mermaid', + fingerprint: oldSourceBlock.fingerprint, + }, + ], + }), + ) + + expect(result?.stats).toEqual({ + totalBlockCount: 1, + changedBlockCount: 1, + reusedBlockCount: 0, + skippedReusableBlockCount: 0, + }) + expect(rootDiagrams(result!.translation.content!)).toEqual([ + currentSourceDiagram, + ]) + expect(rootDiagrams(result!.translation.content!)).not.toContain( + oldTranslatedDiagram, + ) + }) +}) diff --git a/apps/core/test/src/processors/helper/helper.lexical.service.spec.ts b/apps/core/test/src/processors/helper/helper.lexical.service.spec.ts index bef7ab4ed0d..378a61e742a 100644 --- a/apps/core/test/src/processors/helper/helper.lexical.service.spec.ts +++ b/apps/core/test/src/processors/helper/helper.lexical.service.spec.ts @@ -952,6 +952,72 @@ describe('LexicalService', () => { updatedBlocks[2].fingerprint, ) }) + + it('changes the fingerprint when a Mermaid diagram changes without changing block id', () => { + const originalState = makeEditorState([ + { + type: 'mermaid', + version: 1, + diagram: 'graph TD\n A[Input] --> B[Output]', + $: { blockId: 'mermaid1' }, + }, + ]) + const updatedState = makeEditorState([ + { + type: 'mermaid', + version: 1, + diagram: 'graph TD\n A[Input] --> C[Changed]', + $: { blockId: 'mermaid1' }, + }, + ]) + + const [originalBlock] = service.extractRootBlocks(originalState) + const [updatedBlock] = service.extractRootBlocks(updatedState) + + expect(originalBlock.id).toBe('mermaid1') + expect(updatedBlock.id).toBe('mermaid1') + expect(originalBlock.text).toContain('B[Output]') + expect(updatedBlock.text).toContain('C[Changed]') + expect(originalBlock.fingerprint).not.toBe(updatedBlock.fingerprint) + }) + + it('changes the fingerprint when a poll question or option label changes without changing block id', () => { + const originalState = makeEditorState([ + { + type: 'poll', + version: 1, + question: 'Preferred release window?', + options: [ + { id: 'a', label: 'Morning', url: '/ignored-a' }, + { id: 'b', label: 'Evening', url: '/ignored-b' }, + ], + $: { blockId: 'poll0001' }, + }, + ]) + const updatedState = makeEditorState([ + { + type: 'poll', + version: 1, + question: 'Preferred deployment window?', + options: [ + { id: 'a', label: 'Morning', url: '/ignored-a' }, + { id: 'b', label: 'Afternoon', url: '/ignored-b' }, + ], + $: { blockId: 'poll0001' }, + }, + ]) + + const [originalBlock] = service.extractRootBlocks(originalState) + const [updatedBlock] = service.extractRootBlocks(updatedState) + + expect(originalBlock.id).toBe('poll0001') + expect(updatedBlock.id).toBe('poll0001') + expect(originalBlock.text).toContain('Preferred release window?') + expect(originalBlock.text).toContain('Evening') + expect(updatedBlock.text).toContain('Preferred deployment window?') + expect(updatedBlock.text).toContain('Afternoon') + expect(originalBlock.fingerprint).not.toBe(updatedBlock.fingerprint) + }) }) // ── Error handling ── diff --git a/docs/superpowers/plans/2026-05-24-lexical-block-partial-translation.md b/docs/superpowers/plans/2026-05-24-lexical-block-partial-translation.md new file mode 100644 index 00000000000..5250aeaccbc --- /dev/null +++ b/docs/superpowers/plans/2026-05-24-lexical-block-partial-translation.md @@ -0,0 +1,1129 @@ +# Lexical Block Partial Translation Implementation Plan + +> **For agentic workers:** REQUIRED SUB-SKILL: Use superpowers:subagent-driven-development (recommended) or superpowers:executing-plans to implement this plan task-by-task. Steps use checkbox (`- [ ]`) syntax for tracking. + +**Goal:** Return backend-composed partial Lexical translations that preserve unchanged translated blocks, render changed blocks as current source text, and asynchronously schedule regeneration without persisting the partial view. + +**Architecture:** Extract the existing incremental block-reuse logic from `LexicalTranslationStrategy` into a shared helper module, then use that helper from both the write path and a new read-path partial builder. `AiTranslationService` will try the partial builder only after whole-document freshness fails, return the composed transient row, and reuse the existing stale-regeneration scheduler. + +**Tech Stack:** NestJS services, TypeScript, Vitest, Lexical serialized JSON, `@haklex/rich-headless`, Drizzle-backed translation repository. + +--- + +## Scope Check + +This plan implements one subsystem: backend-only partial reuse for stale Lexical translations. It does not add schema, frontend merge logic, per-block persistence, or new search-index behavior. + +## File Structure + +| File | Responsibility | +| --- | --- | +| `apps/core/src/modules/ai/ai-translation/lexical-block-reuse.ts` | Shared block grouping, shape compatibility, reusable translation backfill, and Mermaid guard. | +| `apps/core/src/modules/ai/ai-translation/strategies/lexical-translation.strategy.ts` | Continue full and incremental generation, now using shared block-reuse helpers. | +| `apps/core/src/modules/ai/ai-translation/lexical-partial-translation.builder.ts` | Build transient partial translation rows for read paths. | +| `apps/core/src/modules/ai/ai.module.ts` | Register the partial builder as a Nest provider. | +| `apps/core/src/modules/ai/ai-translation/ai-translation.service.ts` | Attempt partial read fallback and schedule regeneration through existing scheduler. | +| `apps/core/test/src/modules/ai/lexical-block-reuse.spec.ts` | Unit tests for shared reuse helpers and Mermaid guard behavior. | +| `apps/core/test/src/modules/ai/lexical-partial-translation.builder.spec.ts` | Unit tests for partial composition edge cases. | +| `apps/core/test/src/modules/ai/ai-translation.service.spec.ts` | Service-level tests for read-path scheduling and no-persistence behavior. | + +## Task 1: Extract Shared Block-Reuse Helpers + +**Files:** +- Create: `apps/core/src/modules/ai/ai-translation/lexical-block-reuse.ts` +- Modify: `apps/core/src/modules/ai/ai-translation/strategies/lexical-translation.strategy.ts` +- Test: `apps/core/test/src/modules/ai/lexical-block-reuse.spec.ts` + +- [ ] **Step 1: Add failing helper tests** + +Create `apps/core/test/src/modules/ai/lexical-block-reuse.spec.ts`: + +```ts +import { describe, expect, it, vi } from 'vitest' + +import { + backfillReusableBlockTranslations, + canReuseBlockTranslations, + guardMermaidTranslations, + groupSegmentsByBlock, +} from '~/modules/ai/ai-translation/lexical-block-reuse' +import type { LexicalTranslationResult } from '~/modules/ai/ai-translation/lexical-translation-parser' + +const textSegment = (id: string, blockId: string, text: string) => ({ + id, + blockId, + text, + node: { text }, + translatable: true, + rootIndex: 0, + flowId: null, +}) + +const propSegment = ( + id: string, + blockId: string, + property: string, + text: string, + key?: string, +) => ({ + id, + blockId, + property, + text, + key, + node: key ? { [property]: { [key]: text } } : { [property]: text }, + rootIndex: 0, +}) + +describe('lexical block reuse helpers', () => { + it('groups text and property segments by block id', () => { + const result = { + segments: [ + textSegment('t_0', 'block-a', 'A'), + textSegment('t_1', 'block-b', 'B'), + ], + propertySegments: [propSegment('p_0', 'block-a', 'caption', 'Caption')], + editorState: { root: { children: [] } }, + } as unknown as LexicalTranslationResult + + const grouped = groupSegmentsByBlock(result) + + expect(grouped.get('block-a')?.segments.map((s) => s.id)).toEqual(['t_0']) + expect(grouped.get('block-a')?.propertySegments.map((s) => s.id)).toEqual([ + 'p_0', + ]) + expect(grouped.get('block-b')?.segments.map((s) => s.id)).toEqual(['t_1']) + }) + + it('rejects reusable blocks when property shape differs', () => { + expect( + canReuseBlockTranslations( + { + segments: [textSegment('t_0', 'block-a', 'A') as any], + propertySegments: [propSegment('p_0', 'block-a', 'caption', 'A') as any], + }, + { + segments: [textSegment('t_9', 'block-a', 'Translated A') as any], + propertySegments: [propSegment('p_9', 'block-a', 'alt', 'Translated A') as any], + }, + ), + ).toBe(false) + }) + + it('backfills only reusable unchanged blocks', () => { + const current = { + segments: [ + textSegment('t_0', 'block-a', '原文 A'), + textSegment('t_1', 'block-b', '原文 B'), + ], + propertySegments: [], + editorState: { root: { children: [] } }, + } as unknown as LexicalTranslationResult + const translated = { + segments: [ + textSegment('t_0', 'block-a', 'Translated A'), + textSegment('t_1', 'block-b', 'Translated B'), + ], + propertySegments: [], + editorState: { root: { children: [] } }, + } as unknown as LexicalTranslationResult + const output = new Map() + + const stats = backfillReusableBlockTranslations( + current, + translated, + new Set(['block-a']), + output, + ) + + expect(stats).toEqual({ reusedBlockIds: ['block-a'], skippedBlockIds: [] }) + expect(output.get('t_0')).toBe('Translated A') + expect(output.has('t_1')).toBe(false) + }) + + it('removes invalid Mermaid translations before restore', () => { + const warn = vi.fn() + const sourceNode = { type: 'mermaid', diagram: 'flowchart TD\nA[源] --> B[终]' } + const result = { + segments: [], + propertySegments: [ + { + id: 'p_0', + blockId: 'block-a', + property: 'diagram', + text: sourceNode.diagram, + node: sourceNode, + rootIndex: 0, + }, + ], + editorState: { root: { children: [] } }, + } as unknown as LexicalTranslationResult + const translations = new Map([['p_0', 'not a mermaid diagram']]) + + guardMermaidTranslations(result, translations, warn) + + expect(translations.has('p_0')).toBe(false) + expect(warn).toHaveBeenCalledWith( + expect.stringContaining('Mermaid translation rejected'), + ) + }) +}) +``` + +- [ ] **Step 2: Run the failing helper tests** + +Run: + +```bash +pnpm -C apps/core exec vitest run test/src/modules/ai/lexical-block-reuse.spec.ts +``` + +Expected result: TypeScript module resolution fails because `~/modules/ai/ai-translation/lexical-block-reuse` does not exist. + +- [ ] **Step 3: Create the shared helper module** + +Create `apps/core/src/modules/ai/ai-translation/lexical-block-reuse.ts`: + +```ts +import type { + LexicalTranslationResult, + PropertySegment, + TranslationSegment, +} from './lexical-translation-parser' +import { validateMermaidTranslation } from './mermaid-translation-guard' + +export interface BlockTranslationSegments { + segments: TranslationSegment[] + propertySegments: PropertySegment[] +} + +export interface BackfillReusableBlockResult { + reusedBlockIds: string[] + skippedBlockIds: string[] +} + +export function groupSegmentsByBlock( + result: LexicalTranslationResult, +): Map { + const byBlock = new Map() + + const getBucket = (blockId: string) => { + let bucket = byBlock.get(blockId) + if (!bucket) { + bucket = { segments: [], propertySegments: [] } + byBlock.set(blockId, bucket) + } + return bucket + } + + for (const segment of result.segments) { + if (!segment.blockId || !segment.translatable) continue + getBucket(segment.blockId).segments.push(segment) + } + + for (const propertySegment of result.propertySegments) { + if (!propertySegment.blockId) continue + getBucket(propertySegment.blockId).propertySegments.push(propertySegment) + } + + return byBlock +} + +export function canReuseBlockTranslations( + currentBlock: BlockTranslationSegments, + translatedBlock: BlockTranslationSegments, +): boolean { + if (currentBlock.segments.length !== translatedBlock.segments.length) { + return false + } + + if ( + currentBlock.propertySegments.length !== + translatedBlock.propertySegments.length + ) { + return false + } + + return currentBlock.propertySegments.every((segment, index) => { + const translatedSegment = translatedBlock.propertySegments[index] + return ( + translatedSegment.property === segment.property && + translatedSegment.key === segment.key + ) + }) +} + +export function backfillReusableBlockTranslations( + currentResult: LexicalTranslationResult, + translatedResult: LexicalTranslationResult, + unchangedBlockIds: Set, + output: Map, +): BackfillReusableBlockResult { + const currentBlocks = groupSegmentsByBlock(currentResult) + const translatedBlocks = groupSegmentsByBlock(translatedResult) + const reusedBlockIds: string[] = [] + const skippedBlockIds: string[] = [] + + for (const blockId of unchangedBlockIds) { + const currentBlock = currentBlocks.get(blockId) + const translatedBlock = translatedBlocks.get(blockId) + + if (!currentBlock || !translatedBlock) { + skippedBlockIds.push(blockId) + continue + } + if (!canReuseBlockTranslations(currentBlock, translatedBlock)) { + skippedBlockIds.push(blockId) + continue + } + + currentBlock.segments.forEach((segment, index) => { + output.set(segment.id, translatedBlock.segments[index].text) + }) + + currentBlock.propertySegments.forEach((propertySegment, index) => { + output.set( + propertySegment.id, + translatedBlock.propertySegments[index].text, + ) + }) + reusedBlockIds.push(blockId) + } + + return { reusedBlockIds, skippedBlockIds } +} + +export function guardMermaidTranslations( + parseResult: LexicalTranslationResult, + translations: Map, + onReject?: (message: string) => void, +): void { + for (const prop of parseResult.propertySegments) { + if (prop.property !== 'diagram' || prop.node?.type !== 'mermaid') continue + const translated = translations.get(prop.id) + if (translated === undefined) continue + if (translated === prop.text) continue + + const validation = validateMermaidTranslation(prop.text, translated) + if (!validation.ok) { + onReject?.( + `Mermaid translation rejected: reason=${validation.reason} sourceLen=${prop.text.length} translatedLen=${translated.length}`, + ) + translations.delete(prop.id) + } + } +} +``` + +- [ ] **Step 4: Refactor the Lexical strategy to use the shared helper** + +Modify `apps/core/src/modules/ai/ai-translation/strategies/lexical-translation.strategy.ts`: + +```ts +import { + backfillReusableBlockTranslations, + guardMermaidTranslations, +} from '../lexical-block-reuse' +``` + +Remove the local `BlockTranslationSegments` interface and delete the private methods `groupSegmentsByBlock()`, `canReuseBlockTranslations()`, and `backfillReusableBlockTranslations()`. + +Replace existing calls: + +```ts +const backfillResult = backfillReusableBlockTranslations( + parseResult, + translatedParseResult, + unchangedBlockIds, + allTranslations, +) +this.logger.log( + `Incremental reuse: reused=${backfillResult.reusedBlockIds.length} skipped=${backfillResult.skippedBlockIds.length}`, +) +``` + +Replace `this.guardMermaidTranslations(parseResult, allTranslations)` with: + +```ts +guardMermaidTranslations(parseResult, allTranslations, (message) => + this.logger.warn(message), +) +``` + +Delete the private `guardMermaidTranslations()` method from the strategy after both call sites use the shared helper. + +- [ ] **Step 5: Run helper tests and existing translation tests** + +Run: + +```bash +pnpm -C apps/core exec vitest run test/src/modules/ai/lexical-block-reuse.spec.ts test/src/modules/ai/ai-translation.service.spec.ts +``` + +Expected result: all tests pass. + +- [ ] **Step 6: Commit the helper extraction** + +Run: + +```bash +git add apps/core/src/modules/ai/ai-translation/lexical-block-reuse.ts apps/core/src/modules/ai/ai-translation/strategies/lexical-translation.strategy.ts apps/core/test/src/modules/ai/lexical-block-reuse.spec.ts +git commit --no-verify -m "refactor(ai): share lexical block reuse helpers" +``` + +## Task 2: Add Partial Lexical Translation Builder + +**Files:** +- Create: `apps/core/src/modules/ai/ai-translation/lexical-partial-translation.builder.ts` +- Test: `apps/core/test/src/modules/ai/lexical-partial-translation.builder.spec.ts` + +- [ ] **Step 1: Add failing partial-builder tests** + +Create `apps/core/test/src/modules/ai/lexical-partial-translation.builder.spec.ts`: + +```ts +import { describe, expect, it, vi } from 'vitest' + +import { LexicalPartialTranslationBuilder } from '~/modules/ai/ai-translation/lexical-partial-translation.builder' +import type { AiTranslationRow } from '~/modules/ai/ai-translation/ai-translation.types' +import { ContentFormat } from '~/shared/types/content-format.type' + +const paragraph = (blockId: string | null, text: string) => ({ + type: 'paragraph', + version: 1, + children: [{ type: 'text', version: 1, text, format: 0 }], + ...(blockId ? { $: { blockId } } : {}), +}) + +const editorState = (children: any[]) => + JSON.stringify({ root: { type: 'root', version: 1, children } }) + +const translationRow = (overrides: Partial = {}): AiTranslationRow => + ({ + id: 'translation-1' as any, + hash: 'old-hash', + refId: 'post-1' as any, + refType: 'post', + lang: 'en', + sourceLang: 'zh', + title: 'Old translated title', + text: 'Old translated markdown', + subtitle: 'Old translated subtitle', + summary: 'Old translated summary', + tags: ['old-tag'], + sourceModifiedAt: null, + aiModel: null, + aiProvider: null, + contentFormat: ContentFormat.Lexical, + content: editorState([ + paragraph('block-a', 'Translated A'), + paragraph('block-b', 'Translated B'), + paragraph('deleted-block', 'Deleted translation'), + ]), + sourceBlockSnapshots: [ + { id: 'block-a', fingerprint: 'fp-a', type: 'paragraph', index: 0 }, + { id: 'block-b', fingerprint: 'old-fp-b', type: 'paragraph', index: 1 }, + { id: 'deleted-block', fingerprint: 'fp-deleted', type: 'paragraph', index: 2 }, + ], + sourceMetaHashes: { + title: 'title-hash', + subtitle: 'subtitle-hash', + summary: 'summary-hash', + tags: 'tags-hash', + }, + createdAt: new Date('2026-05-24T00:00:00.000Z'), + ...overrides, + }) as AiTranslationRow + +const content = { + title: '当前标题', + text: '当前正文', + subtitle: '当前副标题', + summary: '当前摘要', + tags: ['当前标签'], + contentFormat: ContentFormat.Lexical, + content: editorState([ + paragraph('block-a', '原文 A'), + paragraph('block-b', '新的原文 B'), + ]), +} + +const createBuilder = () => { + const lexicalService = { + extractRootBlocks: vi.fn(() => [ + { id: 'block-a', type: 'paragraph', text: '原文 A', fingerprint: 'fp-a', index: 0 }, + { id: 'block-b', type: 'paragraph', text: '新的原文 B', fingerprint: 'new-fp-b', index: 1 }, + ]), + lexicalToMarkdown: vi.fn(() => 'Translated A\n\n新的原文 B'), + } + return { + lexicalService, + builder: new LexicalPartialTranslationBuilder(lexicalService as any), + } +} + +describe('LexicalPartialTranslationBuilder', () => { + it('reuses unchanged blocks, falls changed blocks back to source, and drops deleted blocks', () => { + const { builder, lexicalService } = createBuilder() + + const result = builder.build(content, translationRow()) + + expect(result).not.toBeNull() + expect(result?.stats).toEqual({ + totalBlockCount: 2, + changedBlockCount: 1, + reusedBlockCount: 1, + skippedReusableBlockCount: 0, + }) + const parsed = JSON.parse(result!.translation.content!) + expect(parsed.root.children.map((child: any) => child.children[0].text)).toEqual([ + 'Translated A', + '新的原文 B', + ]) + expect(result?.translation.text).toBe('Translated A\n\n新的原文 B') + expect(lexicalService.lexicalToMarkdown).toHaveBeenCalledWith( + result?.translation.content, + ) + }) + + it('falls changed meta fields back to source using the incremental md5 scheme', () => { + const { builder } = createBuilder() + + const result = builder.build(content, translationRow()) + + expect(result?.translation.title).toBe('当前标题') + expect(result?.translation.subtitle).toBe('当前副标题') + expect(result?.translation.summary).toBe('当前摘要') + expect(result?.translation.tags).toEqual(['当前标签']) + }) + + it('returns null when the existing translated content cannot be parsed', () => { + const { builder } = createBuilder() + + expect(builder.build(content, translationRow({ content: '{broken' }))).toBeNull() + }) + + it('treats a block without block id as changed', () => { + const lexicalService = { + extractRootBlocks: vi.fn(() => [ + { id: null, type: 'paragraph', text: '无 ID', fingerprint: 'fp-no-id', index: 0 }, + ]), + lexicalToMarkdown: vi.fn(() => '无 ID'), + } + const builder = new LexicalPartialTranslationBuilder(lexicalService as any) + const result = builder.build( + { + ...content, + content: editorState([paragraph(null, '无 ID')]), + }, + translationRow({ + content: editorState([paragraph('old-id', 'Translated old')]), + sourceBlockSnapshots: [{ id: 'old-id', fingerprint: 'fp-no-id' }], + }), + ) + + const parsed = JSON.parse(result!.translation.content!) + expect(parsed.root.children[0].children[0].text).toBe('无 ID') + expect(result?.stats.changedBlockCount).toBe(1) + expect(result?.stats.reusedBlockCount).toBe(0) + }) +}) +``` + +- [ ] **Step 2: Run the failing partial-builder tests** + +Run: + +```bash +pnpm -C apps/core exec vitest run test/src/modules/ai/lexical-partial-translation.builder.spec.ts +``` + +Expected result: TypeScript module resolution fails because `lexical-partial-translation.builder.ts` does not exist. + +- [ ] **Step 3: Implement the partial builder** + +Create `apps/core/src/modules/ai/ai-translation/lexical-partial-translation.builder.ts`: + +```ts +import { Injectable, Logger } from '@nestjs/common' + +import { LexicalService } from '~/processors/helper/helper.lexical.service' +import { ContentFormat } from '~/shared/types/content-format.type' +import { md5 } from '~/utils/tool.util' + +import type { ArticleContent, AiTranslationRow } from './ai-translation.types' +import { + backfillReusableBlockTranslations, + guardMermaidTranslations, +} from './lexical-block-reuse' +import { + parseLexicalForTranslation, + restoreLexicalTranslation, +} from './lexical-translation-parser' + +interface LexicalSourceBlockSnapshot { + id: string + fingerprint: string + type?: string + index?: number +} + +interface LexicalSourceMetaHashes { + title?: string | null + subtitle?: string | null + summary?: string | null + tags?: string | null +} + +export interface PartialLexicalTranslationStats { + totalBlockCount: number + changedBlockCount: number + reusedBlockCount: number + skippedReusableBlockCount: number +} + +export interface PartialLexicalTranslationResult { + translation: AiTranslationRow + stats: PartialLexicalTranslationStats +} + +@Injectable() +export class LexicalPartialTranslationBuilder { + private readonly logger = new Logger(LexicalPartialTranslationBuilder.name) + + constructor(private readonly lexicalService: LexicalService) {} + + build( + content: ArticleContent, + existing: AiTranslationRow, + ): PartialLexicalTranslationResult | null { + if (content.contentFormat !== ContentFormat.Lexical || !content.content) { + return null + } + if ( + existing.contentFormat !== ContentFormat.Lexical || + !existing.content + ) { + return null + } + + const snapshots = this.readBlockSnapshots(existing.sourceBlockSnapshots) + if (!snapshots.length) return null + + const currentBlocks = this.lexicalService.extractRootBlocks(content.content) + if (!currentBlocks.length) return null + + const snapshotMap = new Map(snapshots.map((s) => [s.id, s.fingerprint])) + const unchangedBlockIds = new Set() + let changedBlockCount = 0 + + for (const block of currentBlocks) { + if ( + block.id && + snapshotMap.has(block.id) && + snapshotMap.get(block.id) === block.fingerprint + ) { + unchangedBlockIds.add(block.id) + } else { + changedBlockCount += 1 + } + } + + try { + const currentParseResult = parseLexicalForTranslation(content.content) + const translatedParseResult = parseLexicalForTranslation(existing.content) + const translations = new Map() + const backfill = backfillReusableBlockTranslations( + currentParseResult, + translatedParseResult, + unchangedBlockIds, + translations, + ) + + guardMermaidTranslations(currentParseResult, translations, (message) => + this.logger.warn(message), + ) + + const translatedContent = restoreLexicalTranslation( + currentParseResult, + translations, + ) + const text = this.lexicalService.lexicalToMarkdown(translatedContent) + const meta = this.composeMeta(content, existing) + + this.logger.log( + `Partial lexical translation: refId=${existing.refId} lang=${existing.lang} totalBlocks=${currentBlocks.length} changed=${changedBlockCount} reused=${backfill.reusedBlockIds.length} skipped=${backfill.skippedBlockIds.length}`, + ) + + return { + translation: { + ...existing, + ...meta, + text, + contentFormat: ContentFormat.Lexical, + content: translatedContent, + }, + stats: { + totalBlockCount: currentBlocks.length, + changedBlockCount, + reusedBlockCount: backfill.reusedBlockIds.length, + skippedReusableBlockCount: backfill.skippedBlockIds.length, + }, + } + } catch (error) { + this.logger.warn( + `Partial lexical translation failed: refId=${existing.refId} lang=${existing.lang} message=${(error as Error).message}`, + ) + return null + } + } + + private readBlockSnapshots(value: unknown): LexicalSourceBlockSnapshot[] { + if (!Array.isArray(value)) return [] + return value.filter((item): item is LexicalSourceBlockSnapshot => { + if (!item || typeof item !== 'object') return false + const row = item as Record + return typeof row.id === 'string' && typeof row.fingerprint === 'string' + }) + } + + private readMetaHashes(value: unknown): LexicalSourceMetaHashes | null { + if (!value || typeof value !== 'object' || Array.isArray(value)) { + return null + } + return value as LexicalSourceMetaHashes + } + + private composeMeta(content: ArticleContent, existing: AiTranslationRow) { + const oldMetaHashes = this.readMetaHashes(existing.sourceMetaHashes) + const title = + oldMetaHashes?.title === md5(content.title) ? existing.title : content.title + + const subtitle = content.subtitle + ? oldMetaHashes?.subtitle === md5(content.subtitle) && existing.subtitle + ? existing.subtitle + : content.subtitle + : null + + const summary = content.summary + ? oldMetaHashes?.summary === md5(content.summary) && existing.summary + ? existing.summary + : content.summary + : null + + const tags = content.tags?.length + ? oldMetaHashes?.tags === md5(content.tags.join('|||')) && + existing.tags?.length + ? existing.tags + : content.tags + : (content.tags ?? []) + + return { title, subtitle, summary, tags } + } +} +``` + +- [ ] **Step 4: Run the partial-builder tests** + +Run: + +```bash +pnpm -C apps/core exec vitest run test/src/modules/ai/lexical-partial-translation.builder.spec.ts +``` + +Expected result: all tests pass. + +- [ ] **Step 5: Commit the builder** + +Run: + +```bash +git add apps/core/src/modules/ai/ai-translation/lexical-partial-translation.builder.ts apps/core/test/src/modules/ai/lexical-partial-translation.builder.spec.ts +git commit --no-verify -m "feat(ai): build partial lexical translations" +``` + +## Task 3: Wire Partial Reads Into Translation Service + +**Files:** +- Modify: `apps/core/src/modules/ai/ai.module.ts` +- Modify: `apps/core/src/modules/ai/ai-translation/ai-translation.service.ts` +- Modify: `apps/core/test/src/modules/ai/ai-translation.service.spec.ts` + +- [ ] **Step 1: Add failing service tests** + +Append these tests to `apps/core/test/src/modules/ai/ai-translation.service.spec.ts`: + +```ts + it('returns a partial lexical translation for stale article translation and schedules regeneration', async () => { + const { databaseService, repository, service, partialBuilder, translationConsistencyService } = + createService() + const stale = row({ + contentFormat: ContentFormat.Lexical, + content: '{"root":{"children":[]}}', + }) + const partial = row({ + contentFormat: ContentFormat.Lexical, + content: '{"root":{"children":[{"type":"paragraph"}]}}', + text: 'partial markdown', + }) + + databaseService.findGlobalById.mockResolvedValue({ + id: 'post-1', + document: { + id: 'post-1', + title: '源标题', + text: '源正文', + contentFormat: ContentFormat.Lexical, + content: '{"root":{"children":[]}}', + meta: { lang: 'zh' }, + }, + }) + repository.findByRefAndLang.mockResolvedValue(stale) + translationConsistencyService.evaluateTranslationFreshness.mockReturnValue('stale') + partialBuilder.build.mockReturnValue({ + translation: partial, + stats: { + totalBlockCount: 2, + changedBlockCount: 1, + reusedBlockCount: 1, + skippedReusableBlockCount: 0, + }, + }) + const schedule = vi + .spyOn(service, 'scheduleRegenerationForStaleTranslations') + .mockResolvedValue(undefined) + + await expect(service.getTranslationForArticle('post-1', 'en')).resolves.toEqual(partial) + expect(repository.updateById).not.toHaveBeenCalled() + expect(repository.upsert).not.toHaveBeenCalled() + expect(schedule).toHaveBeenCalledWith(['post-1'], 'en') + }) + + it('keeps existing stale behavior when partial composition is unavailable', async () => { + const { databaseService, repository, service, partialBuilder, translationConsistencyService } = + createService() + const stale = row({ + contentFormat: ContentFormat.Lexical, + content: '{"root":{"children":[]}}', + }) + + databaseService.findGlobalById.mockResolvedValue({ + id: 'post-1', + document: { + id: 'post-1', + title: '源标题', + text: '源正文', + contentFormat: ContentFormat.Lexical, + content: '{"root":{"children":[]}}', + meta: { lang: 'zh' }, + }, + }) + repository.findByRefAndLang.mockResolvedValue(stale) + translationConsistencyService.evaluateTranslationFreshness.mockReturnValue('stale') + partialBuilder.build.mockReturnValue(null) + const schedule = vi + .spyOn(service, 'scheduleRegenerationForStaleTranslations') + .mockResolvedValue(undefined) + + await expect(service.getTranslationForArticle('post-1', 'en')).resolves.toBeNull() + expect(schedule).toHaveBeenCalledWith(['post-1'], 'en') + }) +``` + +Update `createService()` in the same test file so mocks expose the new dependencies: + +```ts + const translationConsistencyService = { + evaluateTranslationFreshness: vi.fn(), + partitionValidAndStaleTranslations: vi.fn(), + filterTrulyStaleTranslations: vi.fn(), + } + const partialBuilder = { build: vi.fn() } +``` + +Return `partialBuilder` and `translationConsistencyService` from `createService()`, and pass `partialBuilder as any` into the `AiTranslationService` constructor at the same position added in implementation. + +- [ ] **Step 2: Run the failing service tests** + +Run: + +```bash +pnpm -C apps/core exec vitest run test/src/modules/ai/ai-translation.service.spec.ts +``` + +Expected result: constructor mismatch or missing partial-builder integration failure. + +- [ ] **Step 3: Register the builder provider** + +Modify `apps/core/src/modules/ai/ai.module.ts`: + +```ts +import { LexicalPartialTranslationBuilder } from './ai-translation/lexical-partial-translation.builder' +``` + +Add `LexicalPartialTranslationBuilder` to the `providers` array next to `LexicalTranslationStrategy`. + +- [ ] **Step 4: Inject the builder into `AiTranslationService`** + +Modify imports in `apps/core/src/modules/ai/ai-translation/ai-translation.service.ts`: + +```ts +import { LexicalPartialTranslationBuilder } from './lexical-partial-translation.builder' +``` + +Add the constructor parameter after `lexicalService`: + +```ts + private readonly lexicalService: LexicalService, + private readonly lexicalPartialTranslationBuilder: LexicalPartialTranslationBuilder, + private readonly aiTaskService: AiTaskService, +``` + +- [ ] **Step 5: Add a scheduler helper for stale read paths** + +Add this private method to `AiTranslationService` near `scheduleRegenerationForStaleTranslations()`: + +```ts + private scheduleStaleTranslationRegenerationBestEffort( + articleId: string, + targetLang: string, + ) { + this.scheduleRegenerationForStaleTranslations([articleId], targetLang).catch( + (err) => + this.logger.error( + 'Failed to schedule stale translation regeneration', + err, + ), + ) + } +``` + +- [ ] **Step 6: Use the partial builder in `getTranslationForArticle()`** + +Replace the end of `getTranslationForArticle()` with: + +```ts + if (status === 'valid') { + return translation + } + + if (status === 'stale') { + const partial = this.lexicalPartialTranslationBuilder.build( + this.toArticleContent(document), + translation, + ) + this.scheduleStaleTranslationRegenerationBestEffort(articleId, targetLang) + return partial?.translation ?? null + } + + return null +``` + +- [ ] **Step 7: Use the partial builder in `getTranslationAndAvailableLanguages()`** + +Modify the matched translation section: + +```ts + let matchedTranslation: AITranslationModel | null = null + if (targetLang) { + const direct = translations.find((t) => t.lang === targetLang) + if (direct) { + const directStatus = + this.translationConsistencyService.evaluateTranslationFreshness( + snapshot, + direct, + ) + if (directStatus === 'valid') { + matchedTranslation = direct + } else if (directStatus === 'stale') { + const partial = this.lexicalPartialTranslationBuilder.build( + this.toArticleContent(document), + direct, + ) + matchedTranslation = partial?.translation ?? null + } + } + } +``` + +Keep `availableTranslations` as `validLangs` unless product requirements later require exposing partial languages as available. This preserves existing language-list semantics while allowing a requested stale language to render a backend-composed response. + +- [ ] **Step 8: Run service tests** + +Run: + +```bash +pnpm -C apps/core exec vitest run test/src/modules/ai/ai-translation.service.spec.ts +``` + +Expected result: all tests pass. + +- [ ] **Step 9: Commit service wiring** + +Run: + +```bash +git add apps/core/src/modules/ai/ai.module.ts apps/core/src/modules/ai/ai-translation/ai-translation.service.ts apps/core/test/src/modules/ai/ai-translation.service.spec.ts +git commit --no-verify -m "feat(ai): return partial lexical translation reads" +``` + +## Task 4: Complete Edge-Case Coverage and Verification + +**Files:** +- Modify: `apps/core/test/src/modules/ai/lexical-partial-translation.builder.spec.ts` +- Modify: `apps/core/test/src/modules/ai/lexical-block-reuse.spec.ts` +- Verify: `apps/core/src/modules/ai/ai-translation/lexical-partial-translation.builder.ts` + +- [ ] **Step 1: Add exact edge-case tests from the spec** + +Append these tests to `apps/core/test/src/modules/ai/lexical-partial-translation.builder.spec.ts`: + +```ts + it('returns an equivalent composed result when the document hash is stale but all blocks are unchanged', () => { + const lexicalService = { + extractRootBlocks: vi.fn(() => [ + { id: 'block-a', type: 'paragraph', text: '原文 A', fingerprint: 'fp-a', index: 0 }, + { id: 'block-b', type: 'paragraph', text: '原文 B', fingerprint: 'old-fp-b', index: 1 }, + ]), + lexicalToMarkdown: vi.fn(() => 'Translated A\n\nTranslated B'), + } + const builder = new LexicalPartialTranslationBuilder(lexicalService as any) + const result = builder.build( + { + ...content, + content: editorState([ + paragraph('block-a', '原文 A'), + paragraph('block-b', '原文 B'), + ]), + }, + translationRow(), + ) + + const parsed = JSON.parse(result!.translation.content!) + expect(parsed.root.children.map((child: any) => child.children[0].text)).toEqual([ + 'Translated A', + 'Translated B', + ]) + expect(result?.stats.changedBlockCount).toBe(0) + expect(result?.stats.reusedBlockCount).toBe(2) + }) + + it('does not persist or leak an old translated block deleted from the source', () => { + const lexicalService = { + extractRootBlocks: vi.fn(() => [ + { id: 'block-a', type: 'paragraph', text: '原文 A', fingerprint: 'fp-a', index: 0 }, + ]), + lexicalToMarkdown: vi.fn(() => 'Translated A'), + } + const builder = new LexicalPartialTranslationBuilder(lexicalService as any) + const result = builder.build( + { + ...content, + content: editorState([paragraph('block-a', '原文 A')]), + }, + translationRow(), + ) + + const parsed = JSON.parse(result!.translation.content!) + expect(parsed.root.children).toHaveLength(1) + expect(parsed.root.children[0].children[0].text).toBe('Translated A') + expect(JSON.stringify(parsed)).not.toContain('Deleted translation') + }) +``` + +- [ ] **Step 2: Run focused tests** + +Run: + +```bash +pnpm -C apps/core exec vitest run test/src/modules/ai/lexical-block-reuse.spec.ts test/src/modules/ai/lexical-partial-translation.builder.spec.ts test/src/modules/ai/ai-translation.service.spec.ts +``` + +Expected result: all tests pass. + +- [ ] **Step 3: Run the broader core test command for the touched area** + +Run: + +```bash +pnpm -C apps/core run test -- test/src/modules/ai/lexical-block-reuse.spec.ts test/src/modules/ai/lexical-partial-translation.builder.spec.ts test/src/modules/ai/ai-translation.service.spec.ts +``` + +Expected result: all listed test files pass under the package test script. + +- [ ] **Step 4: Run type and lint checks for changed files** + +Run: + +```bash +pnpm -C apps/core run lint +``` + +Expected result: lint completes without errors. If this repository lint command is broader than the touched area and fails on unrelated existing files, record the unrelated failure text and run the focused Vitest commands from Step 2 as the verification floor. + +- [ ] **Step 5: Inspect git diff for persistence and cache invariants** + +Run: + +```bash +git diff -- apps/core/src/modules/ai/ai-translation apps/core/src/modules/ai/ai.module.ts +``` + +Expected inspection result: + +- No migration files are added. +- No repository `upsert()` or `updateById()` call writes a partial translation. +- No search indexing path consumes the partial builder result. +- `getTranslationForArticle()` schedules regeneration through `scheduleRegenerationForStaleTranslations()`. +- `LexicalTranslationStrategy` and `LexicalPartialTranslationBuilder` both use `lexical-block-reuse.ts`. + +- [ ] **Step 6: Commit final coverage and verification fixes** + +Run: + +```bash +git add apps/core/test/src/modules/ai/lexical-partial-translation.builder.spec.ts apps/core/test/src/modules/ai/lexical-block-reuse.spec.ts apps/core/src/modules/ai/ai-translation/lexical-partial-translation.builder.ts apps/core/src/modules/ai/ai-translation/lexical-block-reuse.ts apps/core/src/modules/ai/ai-translation/strategies/lexical-translation.strategy.ts apps/core/src/modules/ai/ai-translation/ai-translation.service.ts apps/core/src/modules/ai/ai.module.ts apps/core/test/src/modules/ai/ai-translation.service.spec.ts +git commit --no-verify -m "test(ai): cover lexical partial translation edges" +``` + +## Final Verification + +- [ ] **Run all focused tests** + +```bash +pnpm -C apps/core exec vitest run test/src/modules/ai/lexical-block-reuse.spec.ts test/src/modules/ai/lexical-partial-translation.builder.spec.ts test/src/modules/ai/ai-translation.service.spec.ts +``` + +Expected result: all focused tests pass. + +- [ ] **Run lint** + +```bash +pnpm -C apps/core run lint +``` + +Expected result: lint passes, or unrelated pre-existing lint failures are documented with exact output. + +- [ ] **Check final status** + +```bash +git status --short +``` + +Expected result: clean working tree after the final commit, or only intentionally uncommitted files explicitly listed in the handoff. + +## Self-Review Checklist + +| Spec requirement | Covered by | +| --- | --- | +| Backend-composed partial translation | Task 2, Task 3 | +| Changed blocks fall back to source | Task 2 tests, Task 4 invariant tests | +| No frontend merge logic | Task 3 service returns translation-like object | +| No canonical persistence of partial rows | Task 3 tests, Task 4 diff inspection | +| Shared read/write block helpers | Task 1 | +| Mermaid guard on partial reuse | Task 1 helper, Task 2 builder | +| Identical meta hash scheme | Task 2 builder | +| Existing scheduler reuse | Task 3 | +| Cache/search non-pollution | Task 4 diff inspection | +| Deleted old translated block does not leak | Task 4 tests | +| Observability for partial read counts | Task 2 builder log | diff --git a/docs/superpowers/specs/2026-05-24-lexical-block-partial-translation-design.md b/docs/superpowers/specs/2026-05-24-lexical-block-partial-translation-design.md new file mode 100644 index 00000000000..d42bbfae6b6 --- /dev/null +++ b/docs/superpowers/specs/2026-05-24-lexical-block-partial-translation-design.md @@ -0,0 +1,304 @@ +# Lexical Block Partial Translation Design + +## Context + +AI translation currently persists article-like translated fields. A translated +row replaces the source `title`, `text`, `content`, `subtitle`, `summary`, and +`tags` with translated values before the frontend receives the response. The +frontend therefore cannot safely repair stale blocks: it no longer has a +structured source-plus-translation pair. + +Lexical content already has block-level infrastructure: + +| Existing capability | Location | Use in this design | +| --- | --- | --- | +| Root block identifiers | `LexicalService.normalizeBlockIds()` | Stable block identity | +| Root block fingerprints | `LexicalService.extractRootBlocks()` | Block-level freshness comparison | +| Stored source snapshots | `ai_translations.source_block_snapshots` | Previous source block state | +| Stored meta hashes | `ai_translations.source_meta_hashes` | Field-level freshness comparison | +| Incremental translation restore | `restoreLexicalTranslation()` | Missing translations naturally fall back to source text | + +The current freshness model still treats the whole article as stale when +`translation.hash !== currentContentHash`. That behavior is correct for +complete translation cache hits, but too coarse for Lexical content whose +unchanged blocks can remain useful. + +## Goals + +- Return a backend-composed partial translation when only some Lexical blocks + changed. +- Preserve unchanged translated blocks. +- Fall back changed blocks to the current source text. +- Trigger regeneration for stale translations asynchronously. +- Avoid requiring frontend block-level merge logic. +- Avoid persisting partial translations as canonical translation rows. +- Keep read-path block reuse and write-path incremental reuse on the same + implementation primitives. + +## Non-Goals + +- Introducing a per-block translation table. +- Replacing the whole-document `hash` field. +- Changing frontend rendering semantics. +- Making non-Lexical formats partially reusable. +- Persisting mixed source-and-translation content as the definitive translation. + +## Freshness Model + +| Status | Meaning | Response behavior | Background behavior | +| --- | --- | --- | --- | +| `valid` | The stored translation hash matches the current source hash. | Return the stored translation row. | None. | +| `partial` | The full hash is stale, but Lexical block comparison can safely compose a usable result. | Return backend-composed content with stale blocks reverted to source. | Schedule regeneration for the requested language. | +| `stale` | The translation exists but cannot be safely composed. | Use the existing stale/miss behavior. | Schedule regeneration. | +| `missing` | No translation row exists. | Use the existing generation path. | Generate translation. | + +`partial` is primarily an internal response classification. The frontend should +not need this status to render content correctly, because the backend response +already contains the final `content` and `text` fields. + +## Read Path + +```text +┌──────────────────────┐ +│ Translation request │ +└──────────┬───────────┘ + ▼ +┌──────────────────────┐ +│ Find existing row │ +└──────────┬───────────┘ + ▼ + ◆ Full hash fresh? ◆ + / \ + ▼ ▼ +┌──────────────┐ ┌────────────────────┐ +│ Return row │ │ Try Lexical partial │ +└──────────────┘ └─────────┬──────────┘ + ▼ + ◆ Partial possible? ◆ + / \ + ▼ ▼ + ┌────────────────┐ ┌────────────────┐ + │ Return partial │ │ Existing stale │ + │ Schedule regen │ │ Schedule regen │ + └────────────────┘ └────────────────┘ +``` + +The partial path is available only when all of the following hold: + +| Requirement | Rationale | +| --- | --- | +| Current content format is Lexical. | Block snapshots are Lexical-specific. | +| Current source content exists and parses. | The backend must build the result from the latest source structure. | +| Stored translated content exists and parses. | Unchanged blocks are copied from this translated content. | +| Stored source block snapshots exist. | The backend needs a previous source fingerprint map. | +| Current blocks have stable block IDs for reuse. | Blocks without stable identity cannot be safely matched. | + +## Partial Composition + +The composition unit should be an internal backend helper, for example +`buildPartialLexicalTranslation()`. It receives the current article content and +an existing translation row, then returns either a composed translation-like +object or a failure result. + +The block reuse logic must be shared with the existing incremental write path +rather than duplicated. The current private helpers in +`lexical-translation.strategy.ts` are the reference behavior: + +| Existing helper | Required shared responsibility | +| --- | --- | +| `groupSegmentsByBlock()` | Build comparable block buckets from parsed Lexical translation segments. | +| `canReuseBlockTranslations()` | Enforce segment/property shape compatibility before reuse. | +| `backfillReusableBlockTranslations()` | Copy translated segment/property values only for unchanged reusable blocks. | + +Implementation should extract these behaviors into a shared internal module, +for example `lexical-block-reuse.ts`, and have both the incremental write path +and partial read path call the same functions. The design intentionally rejects +parallel read/write implementations because divergent block matching would make +freshness behavior non-deterministic. + +Composition rules: + +| Content region | Fresh | Stale | +| --- | --- | --- | +| Lexical root block | Reuse old translated block text/properties. | Keep the current source block. | +| `title` | Reuse old translated title. | Use current source title. | +| `subtitle` | Reuse old translated subtitle. | Use current source subtitle or `null`. | +| `summary` | Reuse old translated summary. | Use current source summary or `null`. | +| `tags` | Reuse old translated tags. | Use current source tags, an empty array, or `null` according to the source value. | +| `text` | Recompute from composed Lexical `content`. | Recompute from composed Lexical `content`. | + +Block reuse must require `blockId + fingerprint` equality. Index alone is not a +safe key because blocks may be reordered. + +Current meta hashes must be recomputed with the identical `md5` scheme used by +the incremental write path: + +| Field | Hash input | +| --- | --- | +| `title` | `md5(content.title)` | +| `subtitle` | `md5(content.subtitle)` when present | +| `summary` | `md5(content.summary)` when present | +| `tags` | `md5(content.tags.join('|||'))` when tags are present | + +The read path must not introduce an alternate hashing scheme. + +```text +┌────────────────────┐ +│ Current source JSON │ +└─────────┬──────────┘ + ▼ +┌────────────────────┐ +│ Parse source blocks │ +└─────────┬──────────┘ + ▼ +┌──────────────────────┐ +│ Parse translated JSON │ +└─────────┬────────────┘ + ▼ +┌────────────────────────────┐ +│ For each current block │ +│ compare id + fingerprint │ +└─────────┬──────────────────┘ + ▼ + ◆ Unchanged? ◆ + / \ + ▼ ▼ +┌──────────────┐ ┌──────────────┐ +│ Copy old │ │ Keep source │ +│ translations │ │ block text │ +└──────┬───────┘ └──────┬───────┘ + └──────────┬──────┘ + ▼ +┌────────────────────────────┐ +│ restoreLexicalTranslation() │ +└─────────┬──────────────────┘ + ▼ +┌────────────────────────────┐ +│ lexicalToMarkdown(content) │ +└────────────────────────────┘ +``` + +`restoreLexicalTranslation()` already preserves the source text when a segment +or property translation is absent. The partial builder should exploit that +property: + +- Populate the translation map only for unchanged blocks. +- Omit stale blocks from the translation map. +- Run the same Mermaid translation guard used by the write path before restore. +- Restore into the current source parse result. +- Recompute Markdown from the restored Lexical JSON. + +## Safety Rules + +| Rule | Consequence | +| --- | --- | +| A block without `blockId` is treated as stale. | It falls back to source text. | +| A block whose fingerprint differs is treated as stale. | It falls back to source text. | +| A reused block must have compatible segment/property shape. | Incompatible blocks fall back to source text. | +| Mermaid property translations reused from the old row must pass `validateMermaidTranslation()` against the current source diagram. | Invalid Mermaid translations are removed from the translation map and fall back to source. | +| If source Lexical parsing fails, partial composition fails. | Existing stale behavior applies. | +| If translated Lexical parsing fails, partial composition fails. | Existing stale behavior applies. | +| Mermaid, Excalidraw, poll, ruby, and other property segments must use the existing parser/restorer path. | Special node behavior remains centralized. | +| `text` must be generated from composed `content`. | `content` and `text` remain consistent. | + +The primary invariant is stronger than a display preference: any block whose +current fingerprint differs from the stored source snapshot must render as the +current source block verbatim. + +## Persistence + +Partial translations should not be persisted. + +| Reason | Explanation | +| --- | --- | +| Avoid canonical mixed-language rows. | The stored translation should remain a complete generated artifact. | +| Preserve existing hash semantics. | `hash` continues to represent the source snapshot used to generate the stored translation. | +| Avoid search-index pollution. | Search should not index a temporary source-plus-translation view as the canonical translation. | +| Keep regeneration straightforward. | The existing incremental generation path can overwrite the stale row with a fresh row. | + +The response may carry optional metadata such as `status: "partial"`, +`staleBlockCount`, and `regenerationScheduled`, but persisted translation rows +should remain unchanged until regeneration completes. + +## Regeneration + +When a partial response is returned, the backend should schedule regeneration +for the requested language asynchronously. + +| Scenario | Scheduling behavior | +| --- | --- | +| Full translation is fresh. | Do not schedule. | +| Partial translation is returned. | Schedule stale regeneration for that language. | +| Partial composition fails. | Use existing stale scheduling behavior. | +| A task is already in flight. | Rely on existing task or in-flight deduplication. | +| Scheduling fails. | Log a warning; do not fail the read request. | + +The read path must not block on an LLM call. A successful partial response is +valid for immediate display because stale blocks have already fallen back to the +current source. + +The partial path should reuse the existing stale-regeneration scheduler, +`scheduleRegenerationForStaleTranslations()`, rather than creating a +per-request scheduling path. That scheduler already batches article IDs and +revalidates staleness through `filterTrulyStaleTranslations()` before task +creation. Reuse is required to avoid a thundering herd when repeated reads hit +the same stale translation. + +## Caching + +Partial responses are temporary read views and must not be cached as canonical +translation results. + +| Cache layer | Constraint | +| --- | --- | +| Translation row persistence | Do not write partial content into `ai_translations`. | +| Redis or response cache | Do not cache partial responses, unless a deliberately short TTL is chosen below the expected regeneration SLA. | +| Search index cache | Do not index partial content as a translation-language document. | + +Fresh regenerated translations may use existing cache and indexing behavior +after the canonical row has been updated. + +## Error Handling + +| Failure | Behavior | +| --- | --- | +| Source content is missing or invalid. | Skip partial composition. | +| Existing translated content is missing or invalid. | Skip partial composition. | +| Snapshot shape is invalid. | Skip partial composition. | +| Block shape mismatch occurs for a reused candidate. | Treat only that block as stale. | +| Markdown regeneration fails. | Treat partial composition as failed. | +| Regeneration scheduling fails. | Return partial response and log warning. | + +## Testing Strategy + +Behavior-oriented tests should cover observable outcomes rather than snapshotting +static internal tables. + +| Test case | Expected behavior | +| --- | --- | +| One Lexical block changes. | Unchanged blocks remain translated; changed block returns source text. | +| Blocks are reordered. | Reuse follows `blockId + fingerprint`, not root index. | +| A current block lacks `blockId`. | That block returns source text. | +| Existing translated content cannot be parsed. | Partial path is not used. | +| A meta field changes. | Only that field falls back to source; unchanged meta fields remain translated. | +| `text` is returned. | It is generated from the composed `content`. | +| Non-Lexical content is stale. | Existing whole-document stale behavior remains unchanged. | +| Regeneration scheduling fails. | The partial response still succeeds. | +| Whole-document hash differs but all block IDs and fingerprints match. | The composed result is equivalent to the stored translation row, except for metadata derived from changed meta fields. | +| Stored translation contains a block deleted from the current source. | The deleted block is absent from the response. | + +## Acceptance Criteria + +- A stale Lexical translation with unchanged blocks can still produce a readable + backend-composed response. +- Any block whose fingerprint differs from the stored source snapshot is + rendered as the current source block verbatim. +- The frontend can render the returned response without block-level merge logic. +- No partial result is written into `ai_translations`. +- The requested language is scheduled for regeneration after a partial response. +- Existing behavior for fresh, missing, and non-Lexical translations remains + compatible. +- Read-path partial reuse and write-path incremental reuse share the same block + grouping and compatibility helpers. +- Partial-read observability reports total, changed, and reused block counts in + the same spirit as the existing incremental diff log.