From a74a94e1b97bf52b78624e81ca316c78a332ed77 Mon Sep 17 00:00:00 2001 From: aidamarfuaty Date: Fri, 5 Jun 2026 07:11:32 +0200 Subject: [PATCH 01/16] feat(contract): define serializable PaperDTO schema and conversion function --- packages/yoastseo/package.json | 3 +- .../yoastseo/spec/contract/paperDtoSpec.js | 60 ++++++++++++++++ packages/yoastseo/src/contract/index.js | 1 + packages/yoastseo/src/contract/paperDto.js | 72 +++++++++++++++++++ 4 files changed, 135 insertions(+), 1 deletion(-) create mode 100644 packages/yoastseo/spec/contract/paperDtoSpec.js create mode 100644 packages/yoastseo/src/contract/index.js create mode 100644 packages/yoastseo/src/contract/paperDto.js diff --git a/packages/yoastseo/package.json b/packages/yoastseo/package.json index 2529b757103..eec1a44af86 100644 --- a/packages/yoastseo/package.json +++ b/packages/yoastseo/package.json @@ -70,7 +70,8 @@ "loglevel": "^1.9.2", "parse5": "^8.0.0", "tiny-segmenter": "^0.2.0", - "tokenizer2": "^2.0.1" + "tokenizer2": "^2.0.1", + "zod": "^3.25.76" }, "yoast": { "premiumConfiguration": "" diff --git a/packages/yoastseo/spec/contract/paperDtoSpec.js b/packages/yoastseo/spec/contract/paperDtoSpec.js new file mode 100644 index 00000000000..4a7faa7c939 --- /dev/null +++ b/packages/yoastseo/spec/contract/paperDtoSpec.js @@ -0,0 +1,60 @@ +import Paper from "../../src/values/Paper.js"; +import { paperDtoSchema, toPaper } from "../../src/contract"; + +describe( "the Paper input contract (PaperDTO)", function() { + describe( "toPaper", function() { + it( "maps a valid keyphrase-core DTO onto a Paper", function() { + const paper = toPaper( { + text: "A post about cats.", + keyphrase: "cat food", + synonyms: "kitten food", + locale: "en_US", + description: "The best cat food.", + } ); + + expect( paper ).toBeInstanceOf( Paper ); + expect( paper.getText() ).toBe( "A post about cats." ); + // `keyphrase` maps to the engine's `keyword`. + expect( paper.getKeyword() ).toBe( "cat food" ); + expect( paper.getSynonyms() ).toBe( "kitten food" ); + expect( paper.getLocale() ).toBe( "en_US" ); + expect( paper.getDescription() ).toBe( "The best cat food." ); + } ); + + it( "leaves absent optional fields to Paper's defaults, without throwing", function() { + const paper = toPaper( { text: "Only text provided." } ); + + expect( paper.getKeyword() ).toBe( "" ); + // Engine default, not set by the DTO. + expect( paper.getLocale() ).toBe( "en_US" ); + expect( paper.getDescription() ).toBe( "" ); + } ); + + it( "stashes siteUrl and domain in customData as a placeholder", function() { + const paper = toPaper( { + text: "x", + siteUrl: "https://example.com", + domain: "example.com", + } ); + + expect( paper.getCustomData() ).toEqual( { + siteUrl: "https://example.com", + domain: "example.com", + } ); + } ); + + it( "throws on a structurally invalid payload (wrong type)", function() { + expect( () => toPaper( { text: 123 } ) ).toThrow(); + } ); + + it( "throws on unknown or typo'd keys (strict)", function() { + expect( () => toPaper( { text: "x", keyword: "typo" } ) ).toThrow(); + } ); + } ); + + describe( "paperDtoSchema", function() { + it( "accepts a minimal valid payload", function() { + expect( paperDtoSchema.parse( { text: "hi" } ) ).toEqual( { text: "hi" } ); + } ); + } ); +} ); diff --git a/packages/yoastseo/src/contract/index.js b/packages/yoastseo/src/contract/index.js new file mode 100644 index 00000000000..847d2a7f021 --- /dev/null +++ b/packages/yoastseo/src/contract/index.js @@ -0,0 +1 @@ +export { paperDtoSchema, toPaper } from "./paperDto.js"; diff --git a/packages/yoastseo/src/contract/paperDto.js b/packages/yoastseo/src/contract/paperDto.js new file mode 100644 index 00000000000..899b13d6d63 --- /dev/null +++ b/packages/yoastseo/src/contract/paperDto.js @@ -0,0 +1,72 @@ +import { isEmpty, isUndefined } from "lodash"; +import { z } from "zod"; +import Paper from "../values/Paper.js"; + +/** + * Serializable input contract for the analysis engine (keyphrase-core slice). + * + * Proof of concept for lingo-other-tasks#634. zod is the source of truth; a JSON + * Schema can be generated from it for non-JS / wire consumers. + * + * Two validation tiers (see the issue): structural validity is enforced here — + * wrong types, malformed payloads, and unknown keys throw at the boundary. Per + * assessment field needs are NOT enforced: every field except `text` is optional, + * so a consumer that omits e.g. `keyphrase` simply receives no keyphrase + * assessments, matching the engine's existing graceful-skip behaviour. + * + * `.strict()` encodes the "reject unknown keys" option from the issue's open + * question on unknown-key policy (it catches typos like `keyword` vs `keyphrase`); + * relax to `.strip()`/`.passthrough()` if that decision changes. + */ +export const paperDtoSchema = z.object( { + text: z.string().describe( "The content to analyse (HTML or plain text)." ), + keyphrase: z.string().optional().describe( "The focus keyphrase." ), + synonyms: z.string().optional().describe( "Comma-separated synonyms of the keyphrase." ), + locale: z.string().optional().describe( "Locale, e.g. \"en_US\". The engine defaults to \"en_US\" when absent." ), + description: z.string().optional().describe( "The SEO meta description." ), + siteUrl: z.string().optional().describe( "Full site URL including scheme, e.g. \"https://example.com\"." ), + domain: z.string().optional().describe( "Bare host without scheme, e.g. \"example.com\"." ), +} ).strict(); + +/** + * @typedef {import("zod").infer} PaperDTO + */ + +/** + * Validates a PaperDTO and maps it onto the engine's internal Paper. + * + * This is the single place that knows how contract fields land on Paper attributes + * (notably `keyphrase` -> `keyword`); the engine, assessors, and researches are + * untouched. Throws a `ZodError` when the payload is structurally invalid. Absent + * optional fields are left to Paper's own defaults, so missing inputs degrade + * gracefully rather than throwing. + * + * @param {PaperDTO} dto The serializable input contract. + * @returns {Paper} The constructed Paper, ready for `assessor.assess( paper )`. + */ +export function toPaper( dto ) { + const data = paperDtoSchema.parse( dto ); + + const attributes = { + keyword: data.keyphrase, + synonyms: data.synonyms, + locale: data.locale, + description: data.description, + }; + + // `siteUrl`/`domain` have no Paper attribute today — competing-links reads the + // site URL from WordPress context. Stash them in customData as a placeholder + // until that engine-side plumbing exists (lingo-other-tasks#634). + const customData = {}; + if ( ! isUndefined( data.siteUrl ) ) { + customData.siteUrl = data.siteUrl; + } + if ( ! isUndefined( data.domain ) ) { + customData.domain = data.domain; + } + if ( ! isEmpty( customData ) ) { + attributes.customData = customData; + } + + return new Paper( data.text, attributes ); +} From be722fa85d18eae7872679c6ec00693cfcab8c27 Mon Sep 17 00:00:00 2001 From: aidamarfuaty Date: Fri, 5 Jun 2026 07:42:39 +0200 Subject: [PATCH 02/16] feat(contract): expose PaperDTO via the yoastseo/contract entry Adds a root `contract/` redirect mirroring `yoastseo/researcher` so consumers import `yoastseo/contract` instead of deep-requiring `build/`. Keeps zod off the package-root bundle (only pulled when the contract is imported) and avoids an `exports` map, which would break existing deep importers. Co-Authored-By: Claude Opus 4.8 --- packages/yoastseo/contract/index.js | 10 ++++++++++ packages/yoastseo/eslint.config.mjs | 2 +- packages/yoastseo/package.json | 3 ++- 3 files changed, 13 insertions(+), 2 deletions(-) create mode 100644 packages/yoastseo/contract/index.js diff --git a/packages/yoastseo/contract/index.js b/packages/yoastseo/contract/index.js new file mode 100644 index 00000000000..1608ee7fb9e --- /dev/null +++ b/packages/yoastseo/contract/index.js @@ -0,0 +1,10 @@ +/* + * Public entry point for the serializable Paper input contract: `require( "yoastseo/contract" )`. + * + * Deliberately shipped as its own entry, separate from the package root (`build/index.js`), so the + * contract's runtime dependency (zod) is pulled in only by consumers that import the contract — never by + * code that loads the package root as a bundler "external" (e.g. Yoast SEO for WordPress, which exposes + * the root as a shared global). Mirrors the `yoastseo/researcher` entry. Keeping it here also gives + * consumers a stable path without deep-requiring `build/...`. + */ +module.exports = require( "../build/contract" ); diff --git a/packages/yoastseo/eslint.config.mjs b/packages/yoastseo/eslint.config.mjs index d824192065b..7019838c507 100644 --- a/packages/yoastseo/eslint.config.mjs +++ b/packages/yoastseo/eslint.config.mjs @@ -3,7 +3,7 @@ import yoastConfig from "@yoast/eslint-config"; /** @type {import('eslint').Linter.Config[]} */ export default [ - { ignores: [ "build", "vendor", "examples" ] }, + { ignores: [ "build", "vendor", "examples", "contract" ] }, ...yoastConfig, { languageOptions: { diff --git a/packages/yoastseo/package.json b/packages/yoastseo/package.json index eec1a44af86..662295abfc8 100644 --- a/packages/yoastseo/package.json +++ b/packages/yoastseo/package.json @@ -21,7 +21,8 @@ "build", "!*.map", "vendor", - "images" + "images", + "contract" ], "sideEffects": false, "scripts": { From 76b314f84c9d35e965880c28d7656a9cfdffe25d Mon Sep 17 00:00:00 2001 From: aidamarfuaty Date: Fri, 5 Jun 2026 10:46:21 +0200 Subject: [PATCH 03/16] feat(contract): enhance serializable input contract with additional metadata fields --- .../yoastseo/spec/contract/paperDtoSpec.js | 21 +++++++++++++++++++ packages/yoastseo/src/contract/paperDto.js | 16 +++++++++++++- 2 files changed, 36 insertions(+), 1 deletion(-) diff --git a/packages/yoastseo/spec/contract/paperDtoSpec.js b/packages/yoastseo/spec/contract/paperDtoSpec.js index 4a7faa7c939..de8ca0aedef 100644 --- a/packages/yoastseo/spec/contract/paperDtoSpec.js +++ b/packages/yoastseo/spec/contract/paperDtoSpec.js @@ -21,6 +21,27 @@ describe( "the Paper input contract (PaperDTO)", function() { expect( paper.getDescription() ).toBe( "The best cat food." ); } ); + it( "maps the full neutral metadata surface onto Paper", function() { + const paper = toPaper( { + text: "x", + title: "My SEO title", + slug: "my-slug", + permalink: "https://example.com/my-slug", + titleWidth: 400, + textTitle: "Article title", + date: "2024-01-01", + writingDirection: "RTL", + } ); + + expect( paper.getTitle() ).toBe( "My SEO title" ); + expect( paper.getSlug() ).toBe( "my-slug" ); + expect( paper.getPermalink() ).toBe( "https://example.com/my-slug" ); + expect( paper.getTitleWidth() ).toBe( 400 ); + expect( paper.getTextTitle() ).toBe( "Article title" ); + expect( paper.getDate() ).toBe( "2024-01-01" ); + expect( paper.getWritingDirection() ).toBe( "RTL" ); + } ); + it( "leaves absent optional fields to Paper's defaults, without throwing", function() { const paper = toPaper( { text: "Only text provided." } ); diff --git a/packages/yoastseo/src/contract/paperDto.js b/packages/yoastseo/src/contract/paperDto.js index 899b13d6d63..dc16b7cf995 100644 --- a/packages/yoastseo/src/contract/paperDto.js +++ b/packages/yoastseo/src/contract/paperDto.js @@ -3,7 +3,7 @@ import { z } from "zod"; import Paper from "../values/Paper.js"; /** - * Serializable input contract for the analysis engine (keyphrase-core slice). + * Serializable, platform-neutral input contract for the analysis engine. * * Proof of concept for lingo-other-tasks#634. zod is the source of truth; a JSON * Schema can be generated from it for non-JS / wire consumers. @@ -24,6 +24,13 @@ export const paperDtoSchema = z.object( { synonyms: z.string().optional().describe( "Comma-separated synonyms of the keyphrase." ), locale: z.string().optional().describe( "Locale, e.g. \"en_US\". The engine defaults to \"en_US\" when absent." ), description: z.string().optional().describe( "The SEO meta description." ), + title: z.string().optional().describe( "The SEO title." ), + slug: z.string().optional().describe( "The URL slug." ), + permalink: z.string().optional().describe( "The full permalink URL of the content." ), + titleWidth: z.number().optional().describe( "Rendered width of the SEO title in pixels." ), + textTitle: z.string().optional().describe( "The title of the text or article itself." ), + date: z.string().optional().describe( "Publication date." ), + writingDirection: z.enum( [ "LTR", "RTL" ] ).optional().describe( "Writing direction of the content." ), siteUrl: z.string().optional().describe( "Full site URL including scheme, e.g. \"https://example.com\"." ), domain: z.string().optional().describe( "Bare host without scheme, e.g. \"example.com\"." ), } ).strict(); @@ -52,6 +59,13 @@ export function toPaper( dto ) { synonyms: data.synonyms, locale: data.locale, description: data.description, + title: data.title, + slug: data.slug, + permalink: data.permalink, + titleWidth: data.titleWidth, + textTitle: data.textTitle, + date: data.date, + writingDirection: data.writingDirection, }; // `siteUrl`/`domain` have no Paper attribute today — competing-links reads the From 2df246c25d6388d9fdbc8a797293a4d07684eddb Mon Sep 17 00:00:00 2001 From: aidamarfuaty Date: Fri, 5 Jun 2026 10:50:32 +0200 Subject: [PATCH 04/16] feat(analyze): refactor to use paperFromRequest helper for input validation --- .../helpers/paper-from-request.js | 22 +++++ apps/content-analysis-api/routes/analyze.js | 87 +++++++++---------- apps/content-analysis-api/routes/research.js | 42 ++++----- 3 files changed, 85 insertions(+), 66 deletions(-) create mode 100644 apps/content-analysis-api/helpers/paper-from-request.js diff --git a/apps/content-analysis-api/helpers/paper-from-request.js b/apps/content-analysis-api/helpers/paper-from-request.js new file mode 100644 index 00000000000..da3da5057a4 --- /dev/null +++ b/apps/content-analysis-api/helpers/paper-from-request.js @@ -0,0 +1,22 @@ +const { toPaper } = require( "yoastseo/contract" ); + +/** + * Builds a Paper from the request body via the PaperDTO contract (`yoastseo/contract`). + * + * On a structurally invalid body (wrong types, unknown keys, missing `text`) it responds with a 400 and + * returns null, so callers should bail when the result is falsy. + * + * @param {Object} request The Express request. + * @param {Object} response The Express response. + * @returns {Object|null} The constructed Paper, or null when the body was rejected. + */ +const paperFromRequest = ( request, response ) => { + try { + return toPaper( request.body || {} ); + } catch ( error ) { + response.status( 400 ).json( { error: "Invalid request body", details: error.issues || String( error ) } ); + return null; + } +}; + +module.exports = { paperFromRequest }; diff --git a/apps/content-analysis-api/routes/analyze.js b/apps/content-analysis-api/routes/analyze.js index 30d58f99c1b..910f833e0fe 100644 --- a/apps/content-analysis-api/routes/analyze.js +++ b/apps/content-analysis-api/routes/analyze.js @@ -1,5 +1,6 @@ -const { Paper, assessments, assessors, interpreters } = require( "yoastseo" ); +const { assessments, assessors, interpreters } = require( "yoastseo" ); const { getResearcher } = require( "../helpers/get-researcher" ); +const { paperFromRequest } = require( "../helpers/paper-from-request" ); const express = require( "express" ), app = express(); @@ -32,9 +33,13 @@ const resultToVM = ( result ) => { module.exports = function( app ) { app.get( "/analyze", ( request, response ) => { + const paper = paperFromRequest( request, response ); + if ( ! paper ) { + return; + } + // Fetch the Researcher and set the morphology data for the given language (yes, this is a bit hacky) const language = request.body.locale || "en"; - const researcher = getResearcher( language ); const seoAssessor = new SEOAssessor( researcher ); @@ -46,11 +51,6 @@ module.exports = function( app ) { const relatedKeywordAssessor = new RelatedKeywordAssessor( researcher ); const inclusiveLanguageAssessor = new InclusiveLanguageAssessor( researcher ); - const paper = new Paper( - request.body.text || "", - request.body || {} - ); - seoAssessor.assess( paper ); contentAssessor.assess( paper ); relatedKeywordAssessor.assess( paper ); @@ -65,116 +65,113 @@ module.exports = function( app ) { } ); app.get( "/analyze/seo", ( request, response ) => { + const paper = paperFromRequest( request, response ); + if ( ! paper ) { + return; + } const language = request.body.locale || "en"; const researcher = getResearcher( language ); const assessor = new SEOAssessor( researcher ); assessor.addAssessment( "keyphraseDistribution", new KeyphraseDistributionAssessment() ); assessor.addAssessment( "TextTitleAssessment", new TextTitleAssessment() ); - const paper = new Paper( - request.body.text || "", - request.body || {} - ); assessor.assess( paper ); response.json( assessor.getValidResults().map( resultToVM ) ); } ); app.get( "/analyze/readability", ( request, response ) => { + const paper = paperFromRequest( request, response ); + if ( ! paper ) { + return; + } const language = request.body.locale || "en"; const researcher = getResearcher( language ); const assessor = new ContentAssessor( researcher ); assessor.addAssessment( "wordComplexity", new WordComplexityAssessment() ); assessor.addAssessment( "textAlignment", new TextAlignmentAssessment() ); - const paper = new Paper( - request.body.text || "", - request.body || {} - ); + assessor.assess( paper ); response.json( assessor.getValidResults().map( resultToVM ) ); } ); app.get( "/analyze/related-keyphrase", ( request, response ) => { + const paper = paperFromRequest( request, response ); + if ( ! paper ) { + return; + } const language = request.body.locale || "en"; const researcher = getResearcher( language ); const assessor = new RelatedKeywordAssessor( researcher ); - const paper = new Paper( - request.body.text || "", - request.body || {} - ); + assessor.assess( paper ); response.json( assessor.getValidResults().map( resultToVM ) ); } ); app.get( "/analyze/inclusive-language", ( request, response ) => { + const paper = paperFromRequest( request, response ); + if ( ! paper ) { + return; + } const language = request.body.locale || "en"; const researcher = getResearcher( language ); const assessor = new InclusiveLanguageAssessor( researcher ); - const paper = new Paper( - request.body.text || "", - request.body || {} - ); + assessor.assess( paper ); response.json( assessor.getValidResults().map( resultToVM ) ); } ); app.get( "/analyze/meta-description", ( request, response ) => { - if (! request.body.description) { + if ( ! request.body.description ) { return response.status( 400 ).json( { error: "Description is required" } ); } + const paper = paperFromRequest( request, response ); + if ( ! paper ) { + return; + } const language = request.body.locale || "en"; const researcher = getResearcher( language ); const assessor = new MetaDescriptionAssessor( researcher ); - const paper = new Paper( - request.body.text || "", - request.body || {} - ); assessor.assess( paper ); response.json( assessor.getValidResults().map( resultToVM ) ); } ); app.get( "/analyze/seo-title", ( request, response ) => { - if (! request.body.title) { + if ( ! request.body.title ) { return response.status( 400 ).json( { error: "Title is required" } ); } + const paper = paperFromRequest( request, response ); + if ( ! paper ) { + return; + } const language = request.body.locale || "en"; const researcher = getResearcher( language ); const assessor = new SeoTitleAssessor( researcher ); - const paper = new Paper( - request.body.text || "", - request.body || {} - ); assessor.assess( paper ); response.json( assessor.getValidResults().map( resultToVM ) ); } ); app.get( "/analyze/keyphrase", ( request, response ) => { - if (! request.body.keyword) { - return response.status( 400 ).json( { error: "Keyword is required" } ); + const paper = paperFromRequest( request, response ); + if ( ! paper ) { + return; } const language = request.body.locale || "en"; const researcher = getResearcher( language ); const assessor = new KeyphraseAssessor( researcher ); - const paper = new Paper( - request.body.text || "", - request.body || {} - ); assessor.assess( paper ); response.json( assessor.getValidResults().map( resultToVM ) ); } ); app.get( "/analyze/keyphrase-use", ( request, response ) => { - if (! request.body.keyword) { - return response.status( 400 ).json( { error: "Keyword is required" } ); + const paper = paperFromRequest( request, response ); + if ( ! paper ) { + return; } const language = request.body.locale || "en"; const researcher = getResearcher( language ); const assessor = new KeyphraseUseAssessor( researcher ); assessor.addAssessment( "keyphraseDistribution", new KeyphraseDistributionAssessment() ); - const paper = new Paper( - request.body.text || "", - request.body || {} - ); assessor.assess( paper ); response.json( assessor.getValidResults().map( resultToVM ) ); } ); diff --git a/apps/content-analysis-api/routes/research.js b/apps/content-analysis-api/routes/research.js index eb252c65c88..5fb698e74f0 100644 --- a/apps/content-analysis-api/routes/research.js +++ b/apps/content-analysis-api/routes/research.js @@ -1,52 +1,52 @@ -const { Paper } = require( "yoastseo" ); const { build } = require( "yoastseo/build/parse/build" ); const { LanguageProcessor } = require( "yoastseo/build/parse/language" ); const { getResearcher } = require( "../helpers/get-researcher" ); +const { paperFromRequest } = require( "../helpers/paper-from-request" ); module.exports = function( app ) { app.get( "/research/estimated-reading-time", ( request, response ) => { + const paper = paperFromRequest( request, response ); + if ( ! paper ) { + return; + } const language = request.body.locale || "en"; const researcher = getResearcher( language ); - const paper = new Paper( - request.body.text || "", - request.body || {} - ); researcher.setPaper( paper ); const estimatedReadingTime = researcher.getResearch( "readingTime" ); response.json( { time: estimatedReadingTime } ); } ); app.get( "/research/flesch-reading-ease", ( request, response ) => { + const paper = paperFromRequest( request, response ); + if ( ! paper ) { + return; + } const language = request.body.locale || "en"; const researcher = getResearcher( language ); - const paper = new Paper( - request.body.text || "", - request.body || {} - ); researcher.setPaper( paper ); const fleschReadingEaseScore = researcher.getResearch( "getFleschReadingScore" ); response.json( { score: fleschReadingEaseScore.score, difficulty: fleschReadingEaseScore.difficulty } ); } ); app.get( "/research/word-count", ( request, response ) => { + const paper = paperFromRequest( request, response ); + if ( ! paper ) { + return; + } const language = request.body.locale || "en"; const researcher = getResearcher( language ); - const paper = new Paper( - request.body.text || "", - request.body || {} - ); researcher.setPaper( paper ); const wordCount = researcher.getResearch( "wordCountInText" ); response.json( { count: wordCount.count, unit: wordCount.unit } ); } ); app.get( "/research/sentence-count", ( request, response ) => { + const paper = paperFromRequest( request, response ); + if ( ! paper ) { + return; + } const language = request.body.locale || "en"; const researcher = getResearcher( language ); - const paper = new Paper( - request.body.text || "", - request.body || {} - ); paper.setTree( build( paper, new LanguageProcessor( researcher ), paper._attributes && paper._attributes.shortcodes ) ); researcher.setPaper( paper ); const sentenceLengths = researcher.getResearch( "countSentencesFromText" ); @@ -56,12 +56,12 @@ module.exports = function( app ) { } ); app.get( "/research/paragraph-count", ( request, response ) => { + const paper = paperFromRequest( request, response ); + if ( ! paper ) { + return; + } const language = request.body.locale || "en"; const researcher = getResearcher( language ); - const paper = new Paper( - request.body.text || "", - request.body || {} - ); paper.setTree( build( paper, new LanguageProcessor( researcher ), paper._attributes && paper._attributes.shortcodes ) ); researcher.setPaper( paper ); const paragraphLengths = researcher.getResearch( "getParagraphLength" ); From dc05c91c07c65507add96f512f169c193851f587 Mon Sep 17 00:00:00 2001 From: aidamarfuaty Date: Sat, 6 Jun 2026 05:05:29 +0200 Subject: [PATCH 05/16] feat(contract): add deprecated `keyword` alias for `keyphrase` in PaperDTO schema --- packages/yoastseo/spec/contract/paperDtoSpec.js | 12 +++++++++++- packages/yoastseo/src/contract/paperDto.js | 13 +++++++++---- 2 files changed, 20 insertions(+), 5 deletions(-) diff --git a/packages/yoastseo/spec/contract/paperDtoSpec.js b/packages/yoastseo/spec/contract/paperDtoSpec.js index de8ca0aedef..71e3c45cfa5 100644 --- a/packages/yoastseo/spec/contract/paperDtoSpec.js +++ b/packages/yoastseo/spec/contract/paperDtoSpec.js @@ -69,7 +69,17 @@ describe( "the Paper input contract (PaperDTO)", function() { } ); it( "throws on unknown or typo'd keys (strict)", function() { - expect( () => toPaper( { text: "x", keyword: "typo" } ) ).toThrow(); + expect( () => toPaper( { text: "x", keyphrse: "typo" } ) ).toThrow(); + } ); + + it( "accepts the deprecated `keyword` alias and maps it to the keyphrase", function() { + const paper = toPaper( { text: "x", keyword: "cat food" } ); + expect( paper.getKeyword() ).toBe( "cat food" ); + } ); + + it( "prefers `keyphrase` over the deprecated `keyword` when both are supplied", function() { + const paper = toPaper( { text: "x", keyphrase: "preferred", keyword: "legacy" } ); + expect( paper.getKeyword() ).toBe( "preferred" ); } ); } ); diff --git a/packages/yoastseo/src/contract/paperDto.js b/packages/yoastseo/src/contract/paperDto.js index dc16b7cf995..5658dc5d0b6 100644 --- a/packages/yoastseo/src/contract/paperDto.js +++ b/packages/yoastseo/src/contract/paperDto.js @@ -14,13 +14,15 @@ import Paper from "../values/Paper.js"; * so a consumer that omits e.g. `keyphrase` simply receives no keyphrase * assessments, matching the engine's existing graceful-skip behaviour. * - * `.strict()` encodes the "reject unknown keys" option from the issue's open - * question on unknown-key policy (it catches typos like `keyword` vs `keyphrase`); - * relax to `.strip()`/`.passthrough()` if that decision changes. + * `.strict()` rejects unknown keys, catching typos (e.g. `keyphrse`). The one + * blessed exception is `keyword`: a deprecated alias for `keyphrase`, accepted so + * existing consumers (which speak the engine's `keyword`) can adopt the contract + * without renaming. Remove it at a future major once they migrate to `keyphrase`. */ export const paperDtoSchema = z.object( { text: z.string().describe( "The content to analyse (HTML or plain text)." ), keyphrase: z.string().optional().describe( "The focus keyphrase." ), + keyword: z.string().optional().describe( "Deprecated alias for `keyphrase`; prefer `keyphrase`." ), synonyms: z.string().optional().describe( "Comma-separated synonyms of the keyphrase." ), locale: z.string().optional().describe( "Locale, e.g. \"en_US\". The engine defaults to \"en_US\" when absent." ), description: z.string().optional().describe( "The SEO meta description." ), @@ -54,8 +56,11 @@ export const paperDtoSchema = z.object( { export function toPaper( dto ) { const data = paperDtoSchema.parse( dto ); + // `keyphrase` is canonical; `keyword` is a deprecated alias. Keyphrase wins when both are supplied. + const keyphrase = isUndefined( data.keyphrase ) ? data.keyword : data.keyphrase; + const attributes = { - keyword: data.keyphrase, + keyword: keyphrase, synonyms: data.synonyms, locale: data.locale, description: data.description, From b7d09cfe5d8e836b112bc58196c958c3d952f144 Mon Sep 17 00:00:00 2001 From: aidamarfuaty Date: Mon, 8 Jun 2026 12:23:51 +0200 Subject: [PATCH 06/16] feat(contract): update PaperDTO to include open-ended customData field and remove siteUrl/domain placeholders --- .../yoastseo/spec/contract/paperDtoSpec.js | 23 +++++++++------- packages/yoastseo/src/contract/paperDto.js | 27 +++++++------------ 2 files changed, 23 insertions(+), 27 deletions(-) diff --git a/packages/yoastseo/spec/contract/paperDtoSpec.js b/packages/yoastseo/spec/contract/paperDtoSpec.js index 71e3c45cfa5..56fbabbd60f 100644 --- a/packages/yoastseo/spec/contract/paperDtoSpec.js +++ b/packages/yoastseo/spec/contract/paperDtoSpec.js @@ -51,17 +51,20 @@ describe( "the Paper input contract (PaperDTO)", function() { expect( paper.getDescription() ).toBe( "" ); } ); - it( "stashes siteUrl and domain in customData as a placeholder", function() { - const paper = toPaper( { - text: "x", - siteUrl: "https://example.com", - domain: "example.com", - } ); + it( "passes an open-ended customData object through unchanged", function() { + const customData = { hasGlobalIdentifier: false, productType: "variable", anything: [ 1, 2 ] }; + const paper = toPaper( { text: "x", customData } ); - expect( paper.getCustomData() ).toEqual( { - siteUrl: "https://example.com", - domain: "example.com", - } ); + expect( paper.getCustomData() ).toEqual( customData ); + } ); + + it( "rejects a non-object customData (shape is validated)", function() { + expect( () => toPaper( { text: "x", customData: "not an object" } ) ).toThrow(); + } ); + + it( "rejects siteUrl/domain for now (deferred to the competing-links refactor)", function() { + expect( () => toPaper( { text: "x", siteUrl: "https://example.com" } ) ).toThrow(); + expect( () => toPaper( { text: "x", domain: "example.com" } ) ).toThrow(); } ); it( "throws on a structurally invalid payload (wrong type)", function() { diff --git a/packages/yoastseo/src/contract/paperDto.js b/packages/yoastseo/src/contract/paperDto.js index 5658dc5d0b6..467d7fe49aa 100644 --- a/packages/yoastseo/src/contract/paperDto.js +++ b/packages/yoastseo/src/contract/paperDto.js @@ -1,4 +1,4 @@ -import { isEmpty, isUndefined } from "lodash"; +import { isUndefined } from "lodash"; import { z } from "zod"; import Paper from "../values/Paper.js"; @@ -33,8 +33,14 @@ export const paperDtoSchema = z.object( { textTitle: z.string().optional().describe( "The title of the text or article itself." ), date: z.string().optional().describe( "Publication date." ), writingDirection: z.enum( [ "LTR", "RTL" ] ).optional().describe( "Writing direction of the content." ), - siteUrl: z.string().optional().describe( "Full site URL including scheme, e.g. \"https://example.com\"." ), - domain: z.string().optional().describe( "Bare host without scheme, e.g. \"example.com\"." ), + // Open-ended extensibility bag (e.g. product identifiers/SKU data, read by the product assessments). + // Validated as an object only — its contents are intentionally unchecked, because typing the inner keys + // would couple the contract to platform-specific (product/Shopify) shapes. + customData: z.record( z.unknown() ).optional().describe( "Open-ended custom data; contents are not validated." ), + // `siteUrl` / `domain` are intentionally NOT in the contract yet: no consumer feeds them through Paper + // today and no assessment reads them. They belong to the competing-links assessment, which currently + // gets the site URL from context. Add them (full URL incl. scheme vs bare host — see #97) as part of + // that assessment's refactor, when there is a real reader to shape the semantics against. } ).strict(); /** @@ -71,21 +77,8 @@ export function toPaper( dto ) { textTitle: data.textTitle, date: data.date, writingDirection: data.writingDirection, + customData: data.customData, }; - // `siteUrl`/`domain` have no Paper attribute today — competing-links reads the - // site URL from WordPress context. Stash them in customData as a placeholder - // until that engine-side plumbing exists (lingo-other-tasks#634). - const customData = {}; - if ( ! isUndefined( data.siteUrl ) ) { - customData.siteUrl = data.siteUrl; - } - if ( ! isUndefined( data.domain ) ) { - customData.domain = data.domain; - } - if ( ! isEmpty( customData ) ) { - attributes.customData = customData; - } - return new Paper( data.text, attributes ); } From 79a8c00a7ce4c0ed2664c523b5decd019c7e38c9 Mon Sep 17 00:00:00 2001 From: aidamarfuaty Date: Mon, 8 Jun 2026 12:49:34 +0200 Subject: [PATCH 07/16] feat(contract): add createToPaper function for consumer-defined input extensions --- .../yoastseo/spec/contract/paperDtoSpec.js | 28 ++++++- packages/yoastseo/src/contract/index.js | 2 +- packages/yoastseo/src/contract/paperDto.js | 82 +++++++++++++------ 3 files changed, 87 insertions(+), 25 deletions(-) diff --git a/packages/yoastseo/spec/contract/paperDtoSpec.js b/packages/yoastseo/spec/contract/paperDtoSpec.js index 56fbabbd60f..00384dc67bb 100644 --- a/packages/yoastseo/spec/contract/paperDtoSpec.js +++ b/packages/yoastseo/spec/contract/paperDtoSpec.js @@ -1,5 +1,6 @@ +import { z } from "zod"; import Paper from "../../src/values/Paper.js"; -import { paperDtoSchema, toPaper } from "../../src/contract"; +import { paperDtoSchema, toPaper, createToPaper } from "../../src/contract"; describe( "the Paper input contract (PaperDTO)", function() { describe( "toPaper", function() { @@ -91,4 +92,29 @@ describe( "the Paper input contract (PaperDTO)", function() { expect( paperDtoSchema.parse( { text: "hi" } ) ).toEqual( { text: "hi" } ); } ); } ); + + describe( "createToPaper (consumer extension)", function() { + const extendedSchema = paperDtoSchema.extend( { customField: z.string() } ); + + it( "validates a consumer-defined field and passes it onto the Paper", function() { + const paper = createToPaper( extendedSchema )( { + text: "x", + keyphrase: "cat food", + customField: "consumer value", + } ); + + // Base mapping still applies. + expect( paper.getKeyword() ).toBe( "cat food" ); + // The extra field lands on the Paper's attributes for a custom assessment to read. + expect( paper._attributes.customField ).toBe( "consumer value" ); + } ); + + it( "type-checks the consumer-defined field", function() { + expect( () => createToPaper( extendedSchema )( { text: "x", customField: 123 } ) ).toThrow(); + } ); + + it( "still rejects genuinely unknown keys (strict is preserved through extend)", function() { + expect( () => createToPaper( extendedSchema )( { text: "x", customField: "v", bogus: 1 } ) ).toThrow(); + } ); + } ); } ); diff --git a/packages/yoastseo/src/contract/index.js b/packages/yoastseo/src/contract/index.js index 847d2a7f021..c40696f5097 100644 --- a/packages/yoastseo/src/contract/index.js +++ b/packages/yoastseo/src/contract/index.js @@ -1 +1 @@ -export { paperDtoSchema, toPaper } from "./paperDto.js"; +export { paperDtoSchema, toPaper, createToPaper } from "./paperDto.js"; diff --git a/packages/yoastseo/src/contract/paperDto.js b/packages/yoastseo/src/contract/paperDto.js index 467d7fe49aa..452b7977f3a 100644 --- a/packages/yoastseo/src/contract/paperDto.js +++ b/packages/yoastseo/src/contract/paperDto.js @@ -47,6 +47,61 @@ export const paperDtoSchema = z.object( { * @typedef {import("zod").infer} PaperDTO */ +/** + * The keys the base contract handles itself. Anything else a consumer adds via `paperDtoSchema.extend()` + * is treated as a pass-through extra by `createToPaper` and copied onto the Paper's attributes verbatim. + * Derived from the schema so it stays in sync automatically. + * + * @type {Set} + */ +const BASE_KEYS = new Set( Object.keys( paperDtoSchema.shape ) ); + +/** + * Builds a `toPaper` mapper bound to a given schema, so consumers that register their own analyses/assessments can + * extend the contract with extra input fields and have those fields validated and passed through to Paper. + * + * Pass `paperDtoSchema.extend({ myField: z.string() })`: the extended schema validates the extra field + * (and, being a strict object, still rejects genuinely unknown keys), and any key not handled by the base + * contract is copied verbatim onto the Paper's attributes — where a consumer's custom assessment can read + * it via `paper._attributes.myField`. The base `keyphrase` -> `keyword` mapping and the rest of the neutral + * surface are applied exactly as in {@link toPaper}. + * + * @param {import("zod").ZodType} [schema] The schema to validate against. Defaults to the base contract. + * @returns {(dto: object) => Paper} A mapper that validates `dto` and returns the constructed Paper. + */ +export function createToPaper( schema = paperDtoSchema ) { + return function( dto ) { + const data = schema.parse( dto ); + + // `keyphrase` is canonical; `keyword` is a deprecated alias. Keyphrase wins when both are supplied. + const keyphrase = isUndefined( data.keyphrase ) ? data.keyword : data.keyphrase; + + const attributes = { + keyword: keyphrase, + synonyms: data.synonyms, + locale: data.locale, + description: data.description, + title: data.title, + slug: data.slug, + permalink: data.permalink, + titleWidth: data.titleWidth, + textTitle: data.textTitle, + date: data.date, + writingDirection: data.writingDirection, + customData: data.customData, + }; + + // Consumer-defined extra fields (validated by the extended schema) are passed through verbatim. + Object.keys( data ).forEach( ( key ) => { + if ( ! BASE_KEYS.has( key ) ) { + attributes[ key ] = data[ key ]; + } + } ); + + return new Paper( data.text, attributes ); + }; +} + /** * Validates a PaperDTO and maps it onto the engine's internal Paper. * @@ -56,29 +111,10 @@ export const paperDtoSchema = z.object( { * optional fields are left to Paper's own defaults, so missing inputs degrade * gracefully rather than throwing. * + * Consumers that need extra, validated input fields for their own assessments should build a mapper with + * {@link createToPaper} and an extended schema instead. + * * @param {PaperDTO} dto The serializable input contract. * @returns {Paper} The constructed Paper, ready for `assessor.assess( paper )`. */ -export function toPaper( dto ) { - const data = paperDtoSchema.parse( dto ); - - // `keyphrase` is canonical; `keyword` is a deprecated alias. Keyphrase wins when both are supplied. - const keyphrase = isUndefined( data.keyphrase ) ? data.keyword : data.keyphrase; - - const attributes = { - keyword: keyphrase, - synonyms: data.synonyms, - locale: data.locale, - description: data.description, - title: data.title, - slug: data.slug, - permalink: data.permalink, - titleWidth: data.titleWidth, - textTitle: data.textTitle, - date: data.date, - writingDirection: data.writingDirection, - customData: data.customData, - }; - - return new Paper( data.text, attributes ); -} +export const toPaper = createToPaper(); From d936fa534f1f5cc4bd8932da92cea9370d6949dc Mon Sep 17 00:00:00 2001 From: aidamarfuaty Date: Mon, 8 Jun 2026 15:24:51 +0200 Subject: [PATCH 08/16] feat(contract): document PaperDTO as a serializable input contract for non-WordPress consumers --- packages/yoastseo/GLOSSARY.md | 22 ++++++++++++++++++++++ packages/yoastseo/README.md | 30 ++++++++++++++++++++++++++++++ 2 files changed, 52 insertions(+) diff --git a/packages/yoastseo/GLOSSARY.md b/packages/yoastseo/GLOSSARY.md index fe513b96e26..476ae274e7c 100644 --- a/packages/yoastseo/GLOSSARY.md +++ b/packages/yoastseo/GLOSSARY.md @@ -27,6 +27,28 @@ const paper = new Paper("

This is the main content

", { }); ``` +### PaperDTO +A documented, serializable, platform-neutral **input contract** for the engine, exposed via the opt-in `yoastseo/contract` entry. A non-WordPress consumer sends a `PaperDTO` (plain JSON) and the `toPaper` boundary validates it and constructs an internal [Paper](#paper). It is the *external* counterpart of `Paper`: where `Paper` is the engine's internal value object, `PaperDTO` is the stable shape consumers send. + +Key differences from `Paper`: +- Uses the canonical name **`keyphrase`** (mapped to the engine's `keyword`); `keyword` is accepted as a deprecated alias. +- Excludes WordPress-specific fields (`wpBlocks`, `shortcodes`, `isFrontPage`) — these stay on the WordPress side, not in the neutral contract. +- Authored in [zod](https://zod.dev); validates structure (wrong types / unknown keys throw) while leaving per-assessment fields optional (omitting one just skips that assessment). +- Extensible: consumers can `paperDtoSchema.extend({ … })` and build a mapper with `createToPaper` to validate custom fields for their own assessments. + +**Example:** +```javascript +import { toPaper } from "yoastseo/contract"; + +const paper = toPaper({ + text: "

This is the main content

", + keyphrase: "example", + description: "This is a meta description", + slug: "example-page", + locale: "en_US" +}); +``` + ### Assessment A single analysis unit that evaluates one specific aspect of content. Each assessment: - Has a specific purpose (e.g., the _keyword density_ assessment evaluates the number of keywords used in the content) diff --git a/packages/yoastseo/README.md b/packages/yoastseo/README.md index 6c78894e4fa..e8ad0617747 100644 --- a/packages/yoastseo/README.md +++ b/packages/yoastseo/README.md @@ -97,6 +97,36 @@ console.log( researcher.getResearch( "wordCountInText" ) ); There is a basic example of this setup [over here](https://github.com/Yoast/wordpress-seo/tree/trunk/apps/content-analysis-api). +### Serializable input contract (`yoastseo/contract`) + +Non-WordPress consumers (a web API, the Shopify app, the Google Docs extension, …) can send a documented, serializable input shape — a `PaperDTO` — instead of constructing a `Paper` by hand. The contract is a separate, opt-in entry point, so its validation dependency is only loaded by consumers that import it; the package root is unaffected. + +```js +import { toPaper } from "yoastseo/contract"; + +// `toPaper` validates the input and returns an engine `Paper`. +const paper = toPaper( { + text: "Text to analyze", + keyphrase: "analyze", + locale: "en_US", +} ); + +// `paper` can now be passed to `worker.analyze( paper )` or `assessor.assess( paper )`. +``` + +Notes: +- **Platform-neutral.** The contract covers the fields used by the analysis (`text`, `keyphrase`, `synonyms`, `locale`, `description`, `title`, `slug`, `permalink`, `titleWidth`, `textTitle`, `date`, `writingDirection`, and an open `customData` object). WordPress-specific fields (`wpBlocks`, `shortcodes`, `isFrontPage`) are intentionally **not** part of it. +- **`keyphrase` is the canonical field name.** `keyword` is accepted as a deprecated alias so existing consumers can adopt the contract without renaming. +- **Validation.** `toPaper` throws on structurally invalid input (wrong types, unknown keys). Omitting an optional field is fine — the assessments that need it are simply skipped, matching the engine's existing behaviour. +- **Extensible.** A consumer that registers its own assessments can validate extra fields by extending the schema: + ```js + import { z } from "zod"; + import { paperDtoSchema, createToPaper } from "yoastseo/contract"; + + const toPaper = createToPaper( paperDtoSchema.extend( { myField: z.string() } ) ); + const paper = toPaper( { text: "…", myField: "…" } ); // `myField` is validated and available on the Paper + ``` + ## Supported languages ### SEO analysis From 6115c62049c8bfb8a30ac50b83db0f095e02ed3d Mon Sep 17 00:00:00 2001 From: aidamarfuaty Date: Mon, 8 Jun 2026 15:40:13 +0200 Subject: [PATCH 09/16] feat(contract): update PaperDTO to include optional, deprecated WordPress-transitional fields for analysis parity --- packages/yoastseo/GLOSSARY.md | 8 ++++---- packages/yoastseo/README.md | 2 +- .../yoastseo/spec/contract/paperDtoSpec.js | 19 +++++++++++++++++++ packages/yoastseo/src/contract/paperDto.js | 18 +++++++++++++++++- 4 files changed, 41 insertions(+), 6 deletions(-) diff --git a/packages/yoastseo/GLOSSARY.md b/packages/yoastseo/GLOSSARY.md index 476ae274e7c..d729fd2575e 100644 --- a/packages/yoastseo/GLOSSARY.md +++ b/packages/yoastseo/GLOSSARY.md @@ -28,11 +28,11 @@ const paper = new Paper("

This is the main content

", { ``` ### PaperDTO -A documented, serializable, platform-neutral **input contract** for the engine, exposed via the opt-in `yoastseo/contract` entry. A non-WordPress consumer sends a `PaperDTO` (plain JSON) and the `toPaper` boundary validates it and constructs an internal [Paper](#paper). It is the *external* counterpart of `Paper`: where `Paper` is the engine's internal value object, `PaperDTO` is the stable shape consumers send. +A documented, serializable **input contract** for the engine (neutral core + a few optional, deprecated WordPress-transitional fields), exposed via the opt-in `yoastseo/contract` entry. A non-WordPress consumer sends a `PaperDTO` (plain JSON) and the `toPaper` boundary validates it and constructs an internal [Paper](#paper). It is the *external* counterpart of `Paper`: where `Paper` is the engine's internal value object, `PaperDTO` is the stable shape consumers send. Key differences from `Paper`: - Uses the canonical name **`keyphrase`** (mapped to the engine's `keyword`); `keyword` is accepted as a deprecated alias. -- Excludes WordPress-specific fields (`wpBlocks`, `shortcodes`, `isFrontPage`) — these stay on the WordPress side, not in the neutral contract. +- Carries the WordPress-transitional fields (`wpBlocks`, `shortcodes`, `isFrontPage`) as **optional, deprecated** — they are real analysis inputs that change WordPress scores, so a remote/API analysis needs them for result parity. - Authored in [zod](https://zod.dev); validates structure (wrong types / unknown keys throw) while leaving per-assessment fields optional (omitting one just skips that assessment). - Extensible: consumers can `paperDtoSchema.extend({ … })` and build a mapper with `createToPaper` to validate custom fields for their own assessments. @@ -81,7 +81,7 @@ Types of assessors include: - ReadabilityAssessor: Analyzes text readability - CornerStoneAssessor: Applies stricter rules for important content -The diagram below shows an example hierarchy of assessors and assessments. +The diagram below shows an example hierarchy of assessors and assessments. ```mermaid graph TD @@ -201,4 +201,4 @@ Alternative words or phrases with similar meaning to the keyphrase. Used to: ``` Keyphrase: "car" Synonyms: "automobile", "vehicle", "motor vehicle" -``` \ No newline at end of file +``` diff --git a/packages/yoastseo/README.md b/packages/yoastseo/README.md index e8ad0617747..5296eb1deab 100644 --- a/packages/yoastseo/README.md +++ b/packages/yoastseo/README.md @@ -115,7 +115,7 @@ const paper = toPaper( { ``` Notes: -- **Platform-neutral.** The contract covers the fields used by the analysis (`text`, `keyphrase`, `synonyms`, `locale`, `description`, `title`, `slug`, `permalink`, `titleWidth`, `textTitle`, `date`, `writingDirection`, and an open `customData` object). WordPress-specific fields (`wpBlocks`, `shortcodes`, `isFrontPage`) are intentionally **not** part of it. +- **Covers the analysis inputs.** The neutral core is `text`, `keyphrase`, `synonyms`, `locale`, `description`, `title`, `slug`, `permalink`, `titleWidth`, `textTitle`, `date`, `writingDirection`, and an open `customData` object. The contract also carries optional, **deprecated** WordPress-transitional fields (`wpBlocks`, `shortcodes`, `isFrontPage`): they are real analysis inputs that change WordPress scores, so a remote/API analysis needs them to reproduce in-browser results. They are marked deprecated. Non-WordPress consumers simply omit them. - **`keyphrase` is the canonical field name.** `keyword` is accepted as a deprecated alias so existing consumers can adopt the contract without renaming. - **Validation.** `toPaper` throws on structurally invalid input (wrong types, unknown keys). Omitting an optional field is fine — the assessments that need it are simply skipped, matching the engine's existing behaviour. - **Extensible.** A consumer that registers its own assessments can validate extra fields by extending the schema: diff --git a/packages/yoastseo/spec/contract/paperDtoSpec.js b/packages/yoastseo/spec/contract/paperDtoSpec.js index 00384dc67bb..859bf25ae1f 100644 --- a/packages/yoastseo/spec/contract/paperDtoSpec.js +++ b/packages/yoastseo/spec/contract/paperDtoSpec.js @@ -63,6 +63,25 @@ describe( "the Paper input contract (PaperDTO)", function() { expect( () => toPaper( { text: "x", customData: "not an object" } ) ).toThrow(); } ); + it( "accepts the deprecated WP-transitional fields and maps them onto the Paper", function() { + const wpBlocks = [ { name: "core/paragraph" } ]; + const paper = toPaper( { + text: "x", + wpBlocks, + shortcodes: [ "gallery", "caption" ], + isFrontPage: true, + } ); + + expect( paper._attributes.wpBlocks ).toEqual( wpBlocks ); + expect( paper._attributes.shortcodes ).toEqual( [ "gallery", "caption" ] ); + expect( paper.isFrontPage() ).toBe( true ); + } ); + + it( "type-checks the WP-transitional fields (e.g. shortcodes must be strings)", function() { + expect( () => toPaper( { text: "x", shortcodes: "not-an-array" } ) ).toThrow(); + expect( () => toPaper( { text: "x", isFrontPage: "yes" } ) ).toThrow(); + } ); + it( "rejects siteUrl/domain for now (deferred to the competing-links refactor)", function() { expect( () => toPaper( { text: "x", siteUrl: "https://example.com" } ) ).toThrow(); expect( () => toPaper( { text: "x", domain: "example.com" } ) ).toThrow(); diff --git a/packages/yoastseo/src/contract/paperDto.js b/packages/yoastseo/src/contract/paperDto.js index 452b7977f3a..d99bbf8a10d 100644 --- a/packages/yoastseo/src/contract/paperDto.js +++ b/packages/yoastseo/src/contract/paperDto.js @@ -3,7 +3,14 @@ import { z } from "zod"; import Paper from "../values/Paper.js"; /** - * Serializable, platform-neutral input contract for the analysis engine. + * Serializable input contract for the analysis engine. + * + * The core surface is platform-neutral, but the contract also carries a few **optional, deprecated** + * WordPress-transitional fields (`wpBlocks`, `shortcodes`, `isFrontPage`). They're included because they + * are real analysis *inputs* — they change the resulting scores for WordPress content (e.g. shortcodes are + * stripped before word-counting/keyphrase matching; blocks drive tree construction). So a remote/API + * analysis of a WordPress page can only reproduce the in-browser scores if it can send them. They are + * marked deprecated: #264 introduces a neutral structured-content representation that will replace them. * * Proof of concept for lingo-other-tasks#634. zod is the source of truth; a JSON * Schema can be generated from it for non-JS / wire consumers. @@ -37,6 +44,12 @@ export const paperDtoSchema = z.object( { // Validated as an object only — its contents are intentionally unchecked, because typing the inner keys // would couple the contract to platform-specific (product/Shopify) shapes. customData: z.record( z.unknown() ).optional().describe( "Open-ended custom data; contents are not validated." ), + // WordPress-transitional fields — optional and DEPRECATED. They are real analysis inputs (they change + // WP scores), so they're in the contract for browser/remote result parity; #264's neutral structured + // content will replace them. Kept optional so non-WP consumers simply omit them. + wpBlocks: z.array( z.unknown() ).optional().describe( "Deprecated (WP-transitional, see #264): WordPress block-editor blocks." ), + shortcodes: z.array( z.string() ).optional().describe( "Deprecated (WP-transitional, see #264): shortcode tags present in the text." ), + isFrontPage: z.boolean().optional().describe( "Deprecated (WP-transitional, see #264): whether the page is the site front page." ), // `siteUrl` / `domain` are intentionally NOT in the contract yet: no consumer feeds them through Paper // today and no assessment reads them. They belong to the competing-links assessment, which currently // gets the site URL from context. Add them (full URL incl. scheme vs bare host — see #97) as part of @@ -89,6 +102,9 @@ export function createToPaper( schema = paperDtoSchema ) { date: data.date, writingDirection: data.writingDirection, customData: data.customData, + wpBlocks: data.wpBlocks, + shortcodes: data.shortcodes, + isFrontPage: data.isFrontPage, }; // Consumer-defined extra fields (validated by the extended schema) are passed through verbatim. From 9031a1d98092e4ebcdeb57965122f09ea441f80d Mon Sep 17 00:00:00 2001 From: aidamarfuaty Date: Tue, 9 Jun 2026 08:03:01 +0200 Subject: [PATCH 10/16] Update doc --- packages/yoastseo/src/contract/paperDto.js | 20 +++++++++----------- 1 file changed, 9 insertions(+), 11 deletions(-) diff --git a/packages/yoastseo/src/contract/paperDto.js b/packages/yoastseo/src/contract/paperDto.js index d99bbf8a10d..6025944c382 100644 --- a/packages/yoastseo/src/contract/paperDto.js +++ b/packages/yoastseo/src/contract/paperDto.js @@ -9,11 +9,9 @@ import Paper from "../values/Paper.js"; * WordPress-transitional fields (`wpBlocks`, `shortcodes`, `isFrontPage`). They're included because they * are real analysis *inputs* — they change the resulting scores for WordPress content (e.g. shortcodes are * stripped before word-counting/keyphrase matching; blocks drive tree construction). So a remote/API - * analysis of a WordPress page can only reproduce the in-browser scores if it can send them. They are - * marked deprecated: #264 introduces a neutral structured-content representation that will replace them. - * - * Proof of concept for lingo-other-tasks#634. zod is the source of truth; a JSON - * Schema can be generated from it for non-JS / wire consumers. + * analysis of a WordPress page can only reproduce the in-browser scores if it can send them. + * They are marked deprecated as they will be removed once the engine's structured content (blocks, shortcodes) + * is fully neutral and optional, and the front page gets a proper context-aware assessment. * * Two validation tiers (see the issue): structural validity is enforced here — * wrong types, malformed payloads, and unknown keys throw at the boundary. Per @@ -21,10 +19,10 @@ import Paper from "../values/Paper.js"; * so a consumer that omits e.g. `keyphrase` simply receives no keyphrase * assessments, matching the engine's existing graceful-skip behaviour. * - * `.strict()` rejects unknown keys, catching typos (e.g. `keyphrse`). The one - * blessed exception is `keyword`: a deprecated alias for `keyphrase`, accepted so + * `.strict()` rejects unknown keys, catching typos (e.g. `keyphrse`). + * The one blessed exception is `keyword`: a deprecated alias for `keyphrase`, accepted so * existing consumers (which speak the engine's `keyword`) can adopt the contract - * without renaming. Remove it at a future major once they migrate to `keyphrase`. + * without renaming. They will be removed at a future major once they migrate to `keyphrase`. */ export const paperDtoSchema = z.object( { text: z.string().describe( "The content to analyse (HTML or plain text)." ), @@ -123,14 +121,14 @@ export function createToPaper( schema = paperDtoSchema ) { * * This is the single place that knows how contract fields land on Paper attributes * (notably `keyphrase` -> `keyword`); the engine, assessors, and researches are - * untouched. Throws a `ZodError` when the payload is structurally invalid. Absent - * optional fields are left to Paper's own defaults, so missing inputs degrade + * untouched. Throws a `ZodError` when the payload is structurally invalid. + * Absent optional fields are left to Paper's own defaults, so missing inputs degrade * gracefully rather than throwing. * * Consumers that need extra, validated input fields for their own assessments should build a mapper with * {@link createToPaper} and an extended schema instead. * * @param {PaperDTO} dto The serializable input contract. - * @returns {Paper} The constructed Paper, ready for `assessor.assess( paper )`. + * @returns {Paper} The constructed Paper. */ export const toPaper = createToPaper(); From 3bfdf41048617dc24c818c7ce823e8f3a6857755 Mon Sep 17 00:00:00 2001 From: aidamarfuaty Date: Tue, 9 Jun 2026 09:14:03 +0200 Subject: [PATCH 11/16] refactor(paper): replace Paper instantiation with toPaper contract and improve documentation --- apps/content-analysis-webworker/src/index.js | 8 ++++---- packages/yoastseo/src/contract/paperDto.js | 12 ++++-------- 2 files changed, 8 insertions(+), 12 deletions(-) diff --git a/apps/content-analysis-webworker/src/index.js b/apps/content-analysis-webworker/src/index.js index 80d5c42ae49..7f2b0e0b8f5 100644 --- a/apps/content-analysis-webworker/src/index.js +++ b/apps/content-analysis-webworker/src/index.js @@ -1,4 +1,5 @@ -import { AnalysisWorkerWrapper, Paper, interpreters } from "yoastseo"; +import { AnalysisWorkerWrapper, interpreters } from "yoastseo"; +import { toPaper } from "yoastseo/contract"; const loadWebWorker = ( language ) => { const workerUnwrapped = new Worker( new URL("./worker.js", import.meta.url) ); @@ -40,9 +41,8 @@ document.addEventListener("DOMContentLoaded", function(event) { logLevel: "TRACE", // Optional, see https://github.com/pimterry/loglevel#documentation } ).then( () => { // The worker has been configured, we can now analyze a Paper. - const paper = new Paper( paperText, { - keyword: keyphrase, - } ); + // Build the Paper through the serializable input contract (`yoastseo/contract`). + const paper = toPaper( { text: paperText, keyphrase } ); return worker.analyze( paper ); } ).then( ( results ) => { diff --git a/packages/yoastseo/src/contract/paperDto.js b/packages/yoastseo/src/contract/paperDto.js index 6025944c382..344ca7a94b1 100644 --- a/packages/yoastseo/src/contract/paperDto.js +++ b/packages/yoastseo/src/contract/paperDto.js @@ -14,9 +14,9 @@ import Paper from "../values/Paper.js"; * is fully neutral and optional, and the front page gets a proper context-aware assessment. * * Two validation tiers (see the issue): structural validity is enforced here — - * wrong types, malformed payloads, and unknown keys throw at the boundary. Per - * assessment field needs are NOT enforced: every field except `text` is optional, - * so a consumer that omits e.g. `keyphrase` simply receives no keyphrase + * wrong types, malformed payloads, and unknown keys throw at the boundary. + * Per-assessment field needs are NOT enforced: every field except `text` is optional, + * so a consumer that omits, e.g. `keyphrase` simply receives no keyphrase * assessments, matching the engine's existing graceful-skip behaviour. * * `.strict()` rejects unknown keys, catching typos (e.g. `keyphrse`). @@ -39,7 +39,7 @@ export const paperDtoSchema = z.object( { date: z.string().optional().describe( "Publication date." ), writingDirection: z.enum( [ "LTR", "RTL" ] ).optional().describe( "Writing direction of the content." ), // Open-ended extensibility bag (e.g. product identifiers/SKU data, read by the product assessments). - // Validated as an object only — its contents are intentionally unchecked, because typing the inner keys + // Validated as an object only — its contents are intentionally unchecked because typing the inner keys // would couple the contract to platform-specific (product/Shopify) shapes. customData: z.record( z.unknown() ).optional().describe( "Open-ended custom data; contents are not validated." ), // WordPress-transitional fields — optional and DEPRECATED. They are real analysis inputs (they change @@ -48,10 +48,6 @@ export const paperDtoSchema = z.object( { wpBlocks: z.array( z.unknown() ).optional().describe( "Deprecated (WP-transitional, see #264): WordPress block-editor blocks." ), shortcodes: z.array( z.string() ).optional().describe( "Deprecated (WP-transitional, see #264): shortcode tags present in the text." ), isFrontPage: z.boolean().optional().describe( "Deprecated (WP-transitional, see #264): whether the page is the site front page." ), - // `siteUrl` / `domain` are intentionally NOT in the contract yet: no consumer feeds them through Paper - // today and no assessment reads them. They belong to the competing-links assessment, which currently - // gets the site URL from context. Add them (full URL incl. scheme vs bare host — see #97) as part of - // that assessment's refactor, when there is a real reader to shape the semantics against. } ).strict(); /** From 3b42d5190d4443268ac9ae6a570614c4f6c639a8 Mon Sep 17 00:00:00 2001 From: aidamarfuaty Date: Tue, 9 Jun 2026 10:24:43 +0200 Subject: [PATCH 12/16] feat(analyze): enforce keyphrase requirement for keyphrase analysis endpoints --- apps/content-analysis-api/routes/analyze.js | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/apps/content-analysis-api/routes/analyze.js b/apps/content-analysis-api/routes/analyze.js index 910f833e0fe..1335e1a65e7 100644 --- a/apps/content-analysis-api/routes/analyze.js +++ b/apps/content-analysis-api/routes/analyze.js @@ -155,6 +155,10 @@ module.exports = function( app ) { if ( ! paper ) { return; } + // This endpoint is keyphrase analysis, so a keyphrase is required (the contract leaves it optional). + if ( ! paper.hasKeyword() ) { + return response.status( 400 ).json( { error: "A keyphrase is required" } ); + } const language = request.body.locale || "en"; const researcher = getResearcher( language ); const assessor = new KeyphraseAssessor( researcher ); @@ -167,6 +171,10 @@ module.exports = function( app ) { if ( ! paper ) { return; } + // This endpoint is keyphrase analysis, so a keyphrase is required (the contract leaves it optional). + if ( ! paper.hasKeyword() ) { + return response.status( 400 ).json( { error: "A keyphrase is required" } ); + } const language = request.body.locale || "en"; const researcher = getResearcher( language ); const assessor = new KeyphraseUseAssessor( researcher ); From 594d6c21c921d84363d08ef52b4dc69270e74429 Mon Sep 17 00:00:00 2001 From: aidamarfuaty Date: Tue, 9 Jun 2026 10:52:42 +0200 Subject: [PATCH 13/16] refactor(contract): drop lodash for the keyphrase alias, document strict extension Addresses PR review: replace lodash.isUndefined with nullish coalescing for the keyphrase/keyword alias (no extra dependency, avoids the no-undefined rule), and note in createToPaper's JSDoc that .extend() preserves .strict() so open-ended extra keys need .passthrough(). Co-Authored-By: Claude Opus 4.8 --- packages/yoastseo/src/contract/paperDto.js | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) diff --git a/packages/yoastseo/src/contract/paperDto.js b/packages/yoastseo/src/contract/paperDto.js index 344ca7a94b1..04fc81c600b 100644 --- a/packages/yoastseo/src/contract/paperDto.js +++ b/packages/yoastseo/src/contract/paperDto.js @@ -1,4 +1,3 @@ -import { isUndefined } from "lodash"; import { z } from "zod"; import Paper from "../values/Paper.js"; @@ -43,11 +42,11 @@ export const paperDtoSchema = z.object( { // would couple the contract to platform-specific (product/Shopify) shapes. customData: z.record( z.unknown() ).optional().describe( "Open-ended custom data; contents are not validated." ), // WordPress-transitional fields — optional and DEPRECATED. They are real analysis inputs (they change - // WP scores), so they're in the contract for browser/remote result parity; #264's neutral structured - // content will replace them. Kept optional so non-WP consumers simply omit them. - wpBlocks: z.array( z.unknown() ).optional().describe( "Deprecated (WP-transitional, see #264): WordPress block-editor blocks." ), - shortcodes: z.array( z.string() ).optional().describe( "Deprecated (WP-transitional, see #264): shortcode tags present in the text." ), - isFrontPage: z.boolean().optional().describe( "Deprecated (WP-transitional, see #264): whether the page is the site front page." ), + // WP scores), so they're in the contract for browser/remote result parity. + // Kept optional so non-WP consumers simply omit them. + wpBlocks: z.array( z.unknown() ).optional().describe( "Deprecated (WP-transitional): WordPress block-editor blocks." ), + shortcodes: z.array( z.string() ).optional().describe( "Deprecated (WP-transitional): shortcode tags present in the text." ), + isFrontPage: z.boolean().optional().describe( "Deprecated (WP-transitional: whether the page is the site front page." ), } ).strict(); /** @@ -73,6 +72,11 @@ const BASE_KEYS = new Set( Object.keys( paperDtoSchema.shape ) ); * it via `paper._attributes.myField`. The base `keyphrase` -> `keyword` mapping and the rest of the neutral * surface are applied exactly as in {@link toPaper}. * + * Note: `.extend()` preserves the base schema's `.strict()`, so an extended schema still rejects keys it + * doesn't declare. A consumer that genuinely needs open-ended, undeclared extra keys should call + * `.passthrough()` before `.extend()` (most cases are better served by declaring the fields, or by the + * open `customData` object). + * * @param {import("zod").ZodType} [schema] The schema to validate against. Defaults to the base contract. * @returns {(dto: object) => Paper} A mapper that validates `dto` and returns the constructed Paper. */ @@ -81,7 +85,7 @@ export function createToPaper( schema = paperDtoSchema ) { const data = schema.parse( dto ); // `keyphrase` is canonical; `keyword` is a deprecated alias. Keyphrase wins when both are supplied. - const keyphrase = isUndefined( data.keyphrase ) ? data.keyword : data.keyphrase; + const keyphrase = data.keyphrase ?? data.keyword; const attributes = { keyword: keyphrase, From 79060165e97b78bb11e8c1702478fecd0842f5d2 Mon Sep 17 00:00:00 2001 From: aidamarfuaty Date: Tue, 9 Jun 2026 10:54:50 +0200 Subject: [PATCH 14/16] docs(contract): fix isFrontPage describe text (unmatched paren) Co-Authored-By: Claude Opus 4.8 --- packages/yoastseo/src/contract/paperDto.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/yoastseo/src/contract/paperDto.js b/packages/yoastseo/src/contract/paperDto.js index 04fc81c600b..fec4710e70a 100644 --- a/packages/yoastseo/src/contract/paperDto.js +++ b/packages/yoastseo/src/contract/paperDto.js @@ -46,7 +46,7 @@ export const paperDtoSchema = z.object( { // Kept optional so non-WP consumers simply omit them. wpBlocks: z.array( z.unknown() ).optional().describe( "Deprecated (WP-transitional): WordPress block-editor blocks." ), shortcodes: z.array( z.string() ).optional().describe( "Deprecated (WP-transitional): shortcode tags present in the text." ), - isFrontPage: z.boolean().optional().describe( "Deprecated (WP-transitional: whether the page is the site front page." ), + isFrontPage: z.boolean().optional().describe( "Deprecated (WP-transitional): whether the page is the site front page." ), } ).strict(); /** From 5bbc12910f42e811deda314cb88f2d9c8546806b Mon Sep 17 00:00:00 2001 From: aidamarfuaty Date: Mon, 15 Jun 2026 15:53:03 +0200 Subject: [PATCH 15/16] refactor(contract): update customData handling and improve documentation for consumer-defined assessments --- packages/yoastseo/GLOSSARY.md | 2 +- packages/yoastseo/README.md | 15 +-- .../yoastseo/spec/contract/paperDtoSpec.js | 32 +----- packages/yoastseo/src/contract/index.js | 2 +- packages/yoastseo/src/contract/paperDto.js | 101 ++++++------------ 5 files changed, 39 insertions(+), 113 deletions(-) diff --git a/packages/yoastseo/GLOSSARY.md b/packages/yoastseo/GLOSSARY.md index d729fd2575e..adaa747b033 100644 --- a/packages/yoastseo/GLOSSARY.md +++ b/packages/yoastseo/GLOSSARY.md @@ -34,7 +34,7 @@ Key differences from `Paper`: - Uses the canonical name **`keyphrase`** (mapped to the engine's `keyword`); `keyword` is accepted as a deprecated alias. - Carries the WordPress-transitional fields (`wpBlocks`, `shortcodes`, `isFrontPage`) as **optional, deprecated** — they are real analysis inputs that change WordPress scores, so a remote/API analysis needs them for result parity. - Authored in [zod](https://zod.dev); validates structure (wrong types / unknown keys throw) while leaving per-assessment fields optional (omitting one just skips that assessment). -- Extensible: consumers can `paperDtoSchema.extend({ … })` and build a mapper with `createToPaper` to validate custom fields for their own assessments. +- Consumers that register their own analysis (e.g., assessments) pass those inputs through the opaque `customData` object, whose contents are not validated. **Example:** ```javascript diff --git a/packages/yoastseo/README.md b/packages/yoastseo/README.md index ae3013370ad..3b740addcf3 100644 --- a/packages/yoastseo/README.md +++ b/packages/yoastseo/README.md @@ -145,17 +145,10 @@ const paper = toPaper( { ``` Notes: -- **Covers the analysis inputs.** The neutral core is `text`, `keyphrase`, `synonyms`, `locale`, `description`, `title`, `slug`, `permalink`, `titleWidth`, `textTitle`, `date`, `writingDirection`, and an open `customData` object. The contract also carries optional, **deprecated** WordPress-transitional fields (`wpBlocks`, `shortcodes`, `isFrontPage`): they are real analysis inputs that change WordPress scores, so a remote/API analysis needs them to reproduce in-browser results. They are marked deprecated. Non-WordPress consumers simply omit them. -- **`keyphrase` is the canonical field name.** `keyword` is accepted as a deprecated alias so existing consumers can adopt the contract without renaming. +- **Covers the analysis inputs.** The neutral core is `text`, `keyphrase`, `synonyms`, `locale`, `description`, `title`, `slug`, `permalink`, `titleWidth`, `textTitle`, `date`, `writingDirection`, and an opaque `customData` object. The contract also carries optional, **deprecated** WordPress-transitional fields (`wpBlocks`, `shortcodes`, `isFrontPage`): they are real analysis inputs that change WordPress scores, so a remote/API analysis needs them to reproduce in-browser results. They are marked deprecated. Non-WordPress consumers simply omit them. +- **`keyphrase` is the canonical field name.** `keyword` is accepted as a deprecated alias, so existing consumers can adopt the contract without renaming. - **Validation.** `toPaper` throws on structurally invalid input (wrong types, unknown keys). Omitting an optional field is fine — the assessments that need it are simply skipped, matching the engine's existing behaviour. -- **Extensible.** A consumer that registers its own assessments can validate extra fields by extending the schema: - ```js - import { z } from "zod"; - import { paperDtoSchema, createToPaper } from "yoastseo/contract"; - - const toPaper = createToPaper( paperDtoSchema.extend( { myField: z.string() } ) ); - const paper = toPaper( { text: "…", myField: "…" } ); // `myField` is validated and available on the Paper - ``` +- **Custom analysis.** A consumer that registers its own analysis (e.g., assessments, researcher) can pass their inputs through the opaque `customData` object. Its contents are not validated, so the consumer's own analysis is responsible for reading and validating them. ## Supported languages @@ -201,7 +194,7 @@ Hebrew, Farsi, Turkish, Norwegian, Czech, Slovak, Greek, Japanese 4 The Passive voice check for Japanese is not implemented since the structure is the same as the potential form and can additionally be used for an honorific purpose. Identifying whether a verb is in its passive, honorific or potential form is problematic without contextual information. The following readability assessments are available for all languages: -- sentence length (with a default upper limit of 20 words, see1 above ) +- sentence length (with a default upper limit of 20 words, see1 above) - paragraph length - subheading distribution - text presence diff --git a/packages/yoastseo/spec/contract/paperDtoSpec.js b/packages/yoastseo/spec/contract/paperDtoSpec.js index 859bf25ae1f..6a6704d7997 100644 --- a/packages/yoastseo/spec/contract/paperDtoSpec.js +++ b/packages/yoastseo/spec/contract/paperDtoSpec.js @@ -1,6 +1,5 @@ -import { z } from "zod"; import Paper from "../../src/values/Paper.js"; -import { paperDtoSchema, toPaper, createToPaper } from "../../src/contract"; +import { paperDtoSchema, toPaper } from "../../src/contract"; describe( "the Paper input contract (PaperDTO)", function() { describe( "toPaper", function() { @@ -52,8 +51,8 @@ describe( "the Paper input contract (PaperDTO)", function() { expect( paper.getDescription() ).toBe( "" ); } ); - it( "passes an open-ended customData object through unchanged", function() { - const customData = { hasGlobalIdentifier: false, productType: "variable", anything: [ 1, 2 ] }; + it( "passes an opaque customData object through unchanged for consumer-defined assessments", function() { + const customData = { someFlag: false, someLabel: "consumer value", anything: [ 1, 2 ] }; const paper = toPaper( { text: "x", customData } ); expect( paper.getCustomData() ).toEqual( customData ); @@ -111,29 +110,4 @@ describe( "the Paper input contract (PaperDTO)", function() { expect( paperDtoSchema.parse( { text: "hi" } ) ).toEqual( { text: "hi" } ); } ); } ); - - describe( "createToPaper (consumer extension)", function() { - const extendedSchema = paperDtoSchema.extend( { customField: z.string() } ); - - it( "validates a consumer-defined field and passes it onto the Paper", function() { - const paper = createToPaper( extendedSchema )( { - text: "x", - keyphrase: "cat food", - customField: "consumer value", - } ); - - // Base mapping still applies. - expect( paper.getKeyword() ).toBe( "cat food" ); - // The extra field lands on the Paper's attributes for a custom assessment to read. - expect( paper._attributes.customField ).toBe( "consumer value" ); - } ); - - it( "type-checks the consumer-defined field", function() { - expect( () => createToPaper( extendedSchema )( { text: "x", customField: 123 } ) ).toThrow(); - } ); - - it( "still rejects genuinely unknown keys (strict is preserved through extend)", function() { - expect( () => createToPaper( extendedSchema )( { text: "x", customField: "v", bogus: 1 } ) ).toThrow(); - } ); - } ); } ); diff --git a/packages/yoastseo/src/contract/index.js b/packages/yoastseo/src/contract/index.js index c40696f5097..847d2a7f021 100644 --- a/packages/yoastseo/src/contract/index.js +++ b/packages/yoastseo/src/contract/index.js @@ -1 +1 @@ -export { paperDtoSchema, toPaper, createToPaper } from "./paperDto.js"; +export { paperDtoSchema, toPaper } from "./paperDto.js"; diff --git a/packages/yoastseo/src/contract/paperDto.js b/packages/yoastseo/src/contract/paperDto.js index fec4710e70a..64a19061012 100644 --- a/packages/yoastseo/src/contract/paperDto.js +++ b/packages/yoastseo/src/contract/paperDto.js @@ -37,10 +37,10 @@ export const paperDtoSchema = z.object( { textTitle: z.string().optional().describe( "The title of the text or article itself." ), date: z.string().optional().describe( "Publication date." ), writingDirection: z.enum( [ "LTR", "RTL" ] ).optional().describe( "Writing direction of the content." ), - // Open-ended extensibility bag (e.g. product identifiers/SKU data, read by the product assessments). - // Validated as an object only — its contents are intentionally unchecked because typing the inner keys - // would couple the contract to platform-specific (product/Shopify) shapes. - customData: z.record( z.unknown() ).optional().describe( "Open-ended custom data; contents are not validated." ), + // Opaque data bag for consumers that register their own custom analysis, e.g., assessments. Validated as an object only — + // its contents are intentionally unchecked because typing the inner keys would couple the contract to + // consumer-specific shapes. + customData: z.record( z.unknown() ).optional().describe( "Opaque data for consumer-defined custom assessments; contents are not validated." ), // WordPress-transitional fields — optional and DEPRECATED. They are real analysis inputs (they change // WP scores), so they're in the contract for browser/remote result parity. // Kept optional so non-WP consumers simply omit them. @@ -53,69 +53,6 @@ export const paperDtoSchema = z.object( { * @typedef {import("zod").infer} PaperDTO */ -/** - * The keys the base contract handles itself. Anything else a consumer adds via `paperDtoSchema.extend()` - * is treated as a pass-through extra by `createToPaper` and copied onto the Paper's attributes verbatim. - * Derived from the schema so it stays in sync automatically. - * - * @type {Set} - */ -const BASE_KEYS = new Set( Object.keys( paperDtoSchema.shape ) ); - -/** - * Builds a `toPaper` mapper bound to a given schema, so consumers that register their own analyses/assessments can - * extend the contract with extra input fields and have those fields validated and passed through to Paper. - * - * Pass `paperDtoSchema.extend({ myField: z.string() })`: the extended schema validates the extra field - * (and, being a strict object, still rejects genuinely unknown keys), and any key not handled by the base - * contract is copied verbatim onto the Paper's attributes — where a consumer's custom assessment can read - * it via `paper._attributes.myField`. The base `keyphrase` -> `keyword` mapping and the rest of the neutral - * surface are applied exactly as in {@link toPaper}. - * - * Note: `.extend()` preserves the base schema's `.strict()`, so an extended schema still rejects keys it - * doesn't declare. A consumer that genuinely needs open-ended, undeclared extra keys should call - * `.passthrough()` before `.extend()` (most cases are better served by declaring the fields, or by the - * open `customData` object). - * - * @param {import("zod").ZodType} [schema] The schema to validate against. Defaults to the base contract. - * @returns {(dto: object) => Paper} A mapper that validates `dto` and returns the constructed Paper. - */ -export function createToPaper( schema = paperDtoSchema ) { - return function( dto ) { - const data = schema.parse( dto ); - - // `keyphrase` is canonical; `keyword` is a deprecated alias. Keyphrase wins when both are supplied. - const keyphrase = data.keyphrase ?? data.keyword; - - const attributes = { - keyword: keyphrase, - synonyms: data.synonyms, - locale: data.locale, - description: data.description, - title: data.title, - slug: data.slug, - permalink: data.permalink, - titleWidth: data.titleWidth, - textTitle: data.textTitle, - date: data.date, - writingDirection: data.writingDirection, - customData: data.customData, - wpBlocks: data.wpBlocks, - shortcodes: data.shortcodes, - isFrontPage: data.isFrontPage, - }; - - // Consumer-defined extra fields (validated by the extended schema) are passed through verbatim. - Object.keys( data ).forEach( ( key ) => { - if ( ! BASE_KEYS.has( key ) ) { - attributes[ key ] = data[ key ]; - } - } ); - - return new Paper( data.text, attributes ); - }; -} - /** * Validates a PaperDTO and maps it onto the engine's internal Paper. * @@ -125,10 +62,32 @@ export function createToPaper( schema = paperDtoSchema ) { * Absent optional fields are left to Paper's own defaults, so missing inputs degrade * gracefully rather than throwing. * - * Consumers that need extra, validated input fields for their own assessments should build a mapper with - * {@link createToPaper} and an extended schema instead. - * * @param {PaperDTO} dto The serializable input contract. * @returns {Paper} The constructed Paper. */ -export const toPaper = createToPaper(); +export function toPaper( dto ) { + const data = paperDtoSchema.parse( dto ); + + // `keyphrase` is canonical; `keyword` is a deprecated alias. Keyphrase wins when both are supplied. + const keyphrase = data.keyphrase ?? data.keyword; + + const attributes = { + keyword: keyphrase, + synonyms: data.synonyms, + locale: data.locale, + description: data.description, + title: data.title, + slug: data.slug, + permalink: data.permalink, + titleWidth: data.titleWidth, + textTitle: data.textTitle, + date: data.date, + writingDirection: data.writingDirection, + customData: data.customData, + wpBlocks: data.wpBlocks, + shortcodes: data.shortcodes, + isFrontPage: data.isFrontPage, + }; + + return new Paper( data.text, attributes ); +} From deca983e172cde4179f9726cbfb9598cd1c04d54 Mon Sep 17 00:00:00 2001 From: aidamarfuaty Date: Wed, 24 Jun 2026 15:46:32 +0200 Subject: [PATCH 16/16] refactor(analyze): replace locale handling with paperLanguage function for consistency --- .../helpers/paper-language.js | 13 ++++++++ apps/content-analysis-api/routes/analyze.js | 33 ++++++++++--------- apps/content-analysis-api/routes/research.js | 11 ++++--- packages/yoastseo/package.json | 2 +- packages/yoastseo/src/contract/paperDto.js | 8 ++++- 5 files changed, 45 insertions(+), 22 deletions(-) create mode 100644 apps/content-analysis-api/helpers/paper-language.js diff --git a/apps/content-analysis-api/helpers/paper-language.js b/apps/content-analysis-api/helpers/paper-language.js new file mode 100644 index 00000000000..d37727efdca --- /dev/null +++ b/apps/content-analysis-api/helpers/paper-language.js @@ -0,0 +1,13 @@ +/** + * Derives the language code from a Paper's validated locale. + * + * The contract validates and normalises `locale` onto the Paper (defaulting to `en_US`), so the + * researcher language is taken from there rather than from the raw request body — keeping the routes + * consistent with the values that passed through the contract. + * + * @param {Object} paper The Paper constructed via the contract. + * @returns {string} The language code (the locale's prefix, e.g. `en` from `en_US`). + */ +const paperLanguage = ( paper ) => paper.getLocale().split( /[-_]/ )[ 0 ]; + +module.exports = { paperLanguage }; diff --git a/apps/content-analysis-api/routes/analyze.js b/apps/content-analysis-api/routes/analyze.js index 1335e1a65e7..211ceea0854 100644 --- a/apps/content-analysis-api/routes/analyze.js +++ b/apps/content-analysis-api/routes/analyze.js @@ -1,6 +1,7 @@ const { assessments, assessors, interpreters } = require( "yoastseo" ); const { getResearcher } = require( "../helpers/get-researcher" ); const { paperFromRequest } = require( "../helpers/paper-from-request" ); +const { paperLanguage } = require( "../helpers/paper-language" ); const express = require( "express" ), app = express(); @@ -39,7 +40,7 @@ module.exports = function( app ) { } // Fetch the Researcher and set the morphology data for the given language (yes, this is a bit hacky) - const language = request.body.locale || "en"; + const language = paperLanguage( paper ); const researcher = getResearcher( language ); const seoAssessor = new SEOAssessor( researcher ); @@ -69,7 +70,7 @@ module.exports = function( app ) { if ( ! paper ) { return; } - const language = request.body.locale || "en"; + const language = paperLanguage( paper ); const researcher = getResearcher( language ); const assessor = new SEOAssessor( researcher ); assessor.addAssessment( "keyphraseDistribution", new KeyphraseDistributionAssessment() ); @@ -84,7 +85,7 @@ module.exports = function( app ) { if ( ! paper ) { return; } - const language = request.body.locale || "en"; + const language = paperLanguage( paper ); const researcher = getResearcher( language ); const assessor = new ContentAssessor( researcher ); assessor.addAssessment( "wordComplexity", new WordComplexityAssessment() ); @@ -99,7 +100,7 @@ module.exports = function( app ) { if ( ! paper ) { return; } - const language = request.body.locale || "en"; + const language = paperLanguage( paper ); const researcher = getResearcher( language ); const assessor = new RelatedKeywordAssessor( researcher ); @@ -112,7 +113,7 @@ module.exports = function( app ) { if ( ! paper ) { return; } - const language = request.body.locale || "en"; + const language = paperLanguage( paper ); const researcher = getResearcher( language ); const assessor = new InclusiveLanguageAssessor( researcher ); @@ -121,14 +122,15 @@ module.exports = function( app ) { } ); app.get( "/analyze/meta-description", ( request, response ) => { - if ( ! request.body.description ) { - return response.status( 400 ).json( { error: "Description is required" } ); - } const paper = paperFromRequest( request, response ); if ( ! paper ) { return; } - const language = request.body.locale || "en"; + // This endpoint analyses the meta description, so one is required (the contract leaves it optional). + if ( ! paper.getDescription() ) { + return response.status( 400 ).json( { error: "Description is required" } ); + } + const language = paperLanguage( paper ); const researcher = getResearcher( language ); const assessor = new MetaDescriptionAssessor( researcher ); assessor.assess( paper ); @@ -136,14 +138,15 @@ module.exports = function( app ) { } ); app.get( "/analyze/seo-title", ( request, response ) => { - if ( ! request.body.title ) { - return response.status( 400 ).json( { error: "Title is required" } ); - } const paper = paperFromRequest( request, response ); if ( ! paper ) { return; } - const language = request.body.locale || "en"; + // This endpoint analyses the SEO title, so one is required (the contract leaves it optional). + if ( ! paper.getTitle() ) { + return response.status( 400 ).json( { error: "Title is required" } ); + } + const language = paperLanguage( paper ); const researcher = getResearcher( language ); const assessor = new SeoTitleAssessor( researcher ); assessor.assess( paper ); @@ -159,7 +162,7 @@ module.exports = function( app ) { if ( ! paper.hasKeyword() ) { return response.status( 400 ).json( { error: "A keyphrase is required" } ); } - const language = request.body.locale || "en"; + const language = paperLanguage( paper ); const researcher = getResearcher( language ); const assessor = new KeyphraseAssessor( researcher ); assessor.assess( paper ); @@ -175,7 +178,7 @@ module.exports = function( app ) { if ( ! paper.hasKeyword() ) { return response.status( 400 ).json( { error: "A keyphrase is required" } ); } - const language = request.body.locale || "en"; + const language = paperLanguage( paper ); const researcher = getResearcher( language ); const assessor = new KeyphraseUseAssessor( researcher ); assessor.addAssessment( "keyphraseDistribution", new KeyphraseDistributionAssessment() ); diff --git a/apps/content-analysis-api/routes/research.js b/apps/content-analysis-api/routes/research.js index 5fb698e74f0..97d0533266b 100644 --- a/apps/content-analysis-api/routes/research.js +++ b/apps/content-analysis-api/routes/research.js @@ -2,6 +2,7 @@ const { build } = require( "yoastseo/build/parse/build" ); const { LanguageProcessor } = require( "yoastseo/build/parse/language" ); const { getResearcher } = require( "../helpers/get-researcher" ); const { paperFromRequest } = require( "../helpers/paper-from-request" ); +const { paperLanguage } = require( "../helpers/paper-language" ); module.exports = function( app ) { app.get( "/research/estimated-reading-time", ( request, response ) => { @@ -9,7 +10,7 @@ module.exports = function( app ) { if ( ! paper ) { return; } - const language = request.body.locale || "en"; + const language = paperLanguage( paper ); const researcher = getResearcher( language ); researcher.setPaper( paper ); const estimatedReadingTime = researcher.getResearch( "readingTime" ); @@ -21,7 +22,7 @@ module.exports = function( app ) { if ( ! paper ) { return; } - const language = request.body.locale || "en"; + const language = paperLanguage( paper ); const researcher = getResearcher( language ); researcher.setPaper( paper ); const fleschReadingEaseScore = researcher.getResearch( "getFleschReadingScore" ); @@ -33,7 +34,7 @@ module.exports = function( app ) { if ( ! paper ) { return; } - const language = request.body.locale || "en"; + const language = paperLanguage( paper ); const researcher = getResearcher( language ); researcher.setPaper( paper ); const wordCount = researcher.getResearch( "wordCountInText" ); @@ -45,7 +46,7 @@ module.exports = function( app ) { if ( ! paper ) { return; } - const language = request.body.locale || "en"; + const language = paperLanguage( paper ); const researcher = getResearcher( language ); paper.setTree( build( paper, new LanguageProcessor( researcher ), paper._attributes && paper._attributes.shortcodes ) ); researcher.setPaper( paper ); @@ -60,7 +61,7 @@ module.exports = function( app ) { if ( ! paper ) { return; } - const language = request.body.locale || "en"; + const language = paperLanguage( paper ); const researcher = getResearcher( language ); paper.setTree( build( paper, new LanguageProcessor( researcher ), paper._attributes && paper._attributes.shortcodes ) ); researcher.setPaper( paper ); diff --git a/packages/yoastseo/package.json b/packages/yoastseo/package.json index 2be34d4e3cc..612c83deaa9 100644 --- a/packages/yoastseo/package.json +++ b/packages/yoastseo/package.json @@ -34,7 +34,7 @@ "pretest": "grunt get-premium-configuration", "export:inclusive-language": "jest exportInclusiveLanguage", "test": "jest", - "lint": "eslint . --max-warnings 19", + "lint": "eslint . --max-warnings 18", "watch": "yarn clean && yarn watch:js", "watch:js": "yarn build:js --watch", "watch:types": "tsc --watch" diff --git a/packages/yoastseo/src/contract/paperDto.js b/packages/yoastseo/src/contract/paperDto.js index 64a19061012..15ee35f9093 100644 --- a/packages/yoastseo/src/contract/paperDto.js +++ b/packages/yoastseo/src/contract/paperDto.js @@ -89,5 +89,11 @@ export function toPaper( dto ) { isFrontPage: data.isFrontPage, }; - return new Paper( data.text, attributes ); + // Omit absent optional fields entirely rather than passing explicit `undefined`, so Paper's own + // defaults apply and the constructed attributes only ever carry keys the consumer actually supplied. + const presentAttributes = Object.fromEntries( + Object.entries( attributes ).filter( ( [ , value ] ) => typeof value !== "undefined" ) + ); + + return new Paper( data.text, presentAttributes ); }