Skip to content
Merged
61 changes: 43 additions & 18 deletions apps/core/src/modules/ai/ai-translation/ai-translation.service.ts
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@ import type {
} from './ai-translation.types'
import { AITranslationModel } from './ai-translation.types-model'
import { BaseTranslationService } from './base-translation.service'
import { LexicalPartialTranslationBuilder } from './lexical-partial-translation.builder'
import { TranslationConsistencyService } from './translation-consistency.service'
import type { TranslationSourceSnapshot } from './translation-consistency.types'
import type { ITranslationStrategy } from './translation-strategy.interface'
Expand Down Expand Up @@ -100,6 +101,7 @@ export class AiTranslationService
private readonly aiTranslationRepository: AiTranslationRepository,
private readonly databaseService: DatabaseService,
private readonly translationConsistencyService: TranslationConsistencyService,
private readonly lexicalPartialTranslationBuilder: LexicalPartialTranslationBuilder,
private readonly configService: ConfigsService,
private readonly aiService: AiService,
private readonly aiInFlightService: AiInFlightService,
Expand All @@ -121,6 +123,19 @@ export class AiTranslationService
: this.markdownStrategy
}

private scheduleStaleTranslationRegenerationBestEffort(
articleId: string,
targetLang: string,
) {
this.scheduleRegenerationForStaleTranslations([articleId], targetLang).catch(
(err) =>
this.logger.error(
'Failed to schedule stale translation regeneration',
err,
),
)
}

onModuleInit() {
this.registerTaskHandlers()
}
Expand Down Expand Up @@ -1074,7 +1089,22 @@ export class AiTranslationService
translation,
)

return status === 'valid' ? translation : null
if (status === 'valid') {
return translation
}

if (status !== 'stale') {
return null
}

this.scheduleStaleTranslationRegenerationBestEffort(articleId, targetLang)

const partial = this.lexicalPartialTranslationBuilder.build(
this.toArticleContent(document),
translation,
)

return partial?.translation ?? null
}

async getValidTranslationsForArticles(
Expand Down Expand Up @@ -1225,6 +1255,7 @@ export class AiTranslationService
const snapshot = this.buildSnapshotFromDocument(articleId, document)
const validLangs: string[] = []
const staleLangs: string[] = []
let matchedTranslation: AITranslationModel | null = null

for (const t of translations) {
const status =
Expand All @@ -1234,29 +1265,23 @@ export class AiTranslationService
)
if (status === 'valid') {
validLangs.push(t.lang)
if (targetLang === t.lang) {
matchedTranslation = t
}
} else if (status === 'stale') {
staleLangs.push(t.lang)
if (targetLang === t.lang) {
matchedTranslation =
this.lexicalPartialTranslationBuilder.build(
this.toArticleContent(document),
t,
)?.translation ?? null
}
}
}

const matchedTranslation =
targetLang && validLangs.includes(targetLang)
? await this.aiTranslationRepository.findByRefAndLang(
articleId,
targetLang,
)
: null

if (staleLangs.length && targetLang) {
this.scheduleRegenerationForStaleTranslations(
[articleId],
targetLang,
).catch((err) =>
this.logger.error(
'Failed to schedule stale translation regeneration',
err,
),
)
this.scheduleStaleTranslationRegenerationBestEffort(articleId, targetLang)
}

return {
Expand Down
128 changes: 128 additions & 0 deletions apps/core/src/modules/ai/ai-translation/lexical-block-reuse.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,128 @@
import type {
LexicalTranslationResult,
PropertySegment,
TranslationSegment,
} from './lexical-translation-parser'
import { validateMermaidTranslation } from './mermaid-translation-guard'

export interface BlockTranslationSegments {
segments: TranslationSegment[]
propertySegments: PropertySegment[]
}

export interface BackfillReusableBlockResult {
reusedBlockIds: string[]
skippedBlockIds: string[]
}

export function groupSegmentsByBlock(
result: LexicalTranslationResult,
): Map<string, BlockTranslationSegments> {
const byBlock = new Map<string, BlockTranslationSegments>()

const getBucket = (blockId: string) => {
let bucket = byBlock.get(blockId)
if (!bucket) {
bucket = { segments: [], propertySegments: [] }
byBlock.set(blockId, bucket)
}
return bucket
}

for (const segment of result.segments) {
if (!segment.blockId || !segment.translatable) continue
getBucket(segment.blockId).segments.push(segment)
}

for (const propertySegment of result.propertySegments) {
if (!propertySegment.blockId) continue
getBucket(propertySegment.blockId).propertySegments.push(propertySegment)
}

return byBlock
}

export function canReuseBlockTranslations(
currentBlock: BlockTranslationSegments,
translatedBlock: BlockTranslationSegments,
): boolean {
if (currentBlock.segments.length !== translatedBlock.segments.length) {
return false
}

if (
currentBlock.propertySegments.length !==
translatedBlock.propertySegments.length
) {
return false
}

return currentBlock.propertySegments.every((segment, index) => {
const translatedSegment = translatedBlock.propertySegments[index]
return (
translatedSegment.property === segment.property &&
translatedSegment.key === segment.key
)
})
}

export function backfillReusableBlockTranslations(
currentResult: LexicalTranslationResult,
translatedResult: LexicalTranslationResult,
unchangedBlockIds: Set<string>,
output: Map<string, string>,
): BackfillReusableBlockResult {
const currentBlocks = groupSegmentsByBlock(currentResult)
const translatedBlocks = groupSegmentsByBlock(translatedResult)
const reusedBlockIds: string[] = []
const skippedBlockIds: string[] = []

for (const blockId of unchangedBlockIds) {
const currentBlock = currentBlocks.get(blockId)
const translatedBlock = translatedBlocks.get(blockId)

if (
!currentBlock ||
!translatedBlock ||
!canReuseBlockTranslations(currentBlock, translatedBlock)
) {
skippedBlockIds.push(blockId)
continue
}

currentBlock.segments.forEach((segment, index) => {
output.set(segment.id, translatedBlock.segments[index].text)
})

currentBlock.propertySegments.forEach((propertySegment, index) => {
output.set(
propertySegment.id,
translatedBlock.propertySegments[index].text,
)
})

reusedBlockIds.push(blockId)
}

return { reusedBlockIds, skippedBlockIds }
}

export function guardMermaidTranslations(
parseResult: LexicalTranslationResult,
translations: Map<string, string>,
onReject?: (message: string) => void,
): void {
for (const prop of parseResult.propertySegments) {
if (prop.property !== 'diagram' || prop.node?.type !== 'mermaid') continue
const translated = translations.get(prop.id)
if (translated === undefined) continue
if (translated === prop.text) continue

const validation = validateMermaidTranslation(prop.text, translated)
if (!validation.ok) {
const message = `Mermaid translation rejected: reason=${validation.reason} sourceLen=${prop.text.length} translatedLen=${translated.length}`
onReject?.(message)
translations.delete(prop.id)
}
}
}
Loading