diff --git a/apps/content-analysis-api/helpers/paper-from-request.js b/apps/content-analysis-api/helpers/paper-from-request.js new file mode 100644 index 00000000000..da3da5057a4 --- /dev/null +++ b/apps/content-analysis-api/helpers/paper-from-request.js @@ -0,0 +1,22 @@ +const { toPaper } = require( "yoastseo/contract" ); + +/** + * Builds a Paper from the request body via the PaperDTO contract (`yoastseo/contract`). + * + * On a structurally invalid body (wrong types, unknown keys, missing `text`) it responds with a 400 and + * returns null, so callers should bail when the result is falsy. + * + * @param {Object} request The Express request. + * @param {Object} response The Express response. + * @returns {Object|null} The constructed Paper, or null when the body was rejected. + */ +const paperFromRequest = ( request, response ) => { + try { + return toPaper( request.body || {} ); + } catch ( error ) { + response.status( 400 ).json( { error: "Invalid request body", details: error.issues || String( error ) } ); + return null; + } +}; + +module.exports = { paperFromRequest }; diff --git a/apps/content-analysis-api/helpers/paper-language.js b/apps/content-analysis-api/helpers/paper-language.js new file mode 100644 index 00000000000..d37727efdca --- /dev/null +++ b/apps/content-analysis-api/helpers/paper-language.js @@ -0,0 +1,13 @@ +/** + * Derives the language code from a Paper's validated locale. + * + * The contract validates and normalises `locale` onto the Paper (defaulting to `en_US`), so the + * researcher language is taken from there rather than from the raw request body — keeping the routes + * consistent with the values that passed through the contract. + * + * @param {Object} paper The Paper constructed via the contract. + * @returns {string} The language code (the locale's prefix, e.g. `en` from `en_US`). + */ +const paperLanguage = ( paper ) => paper.getLocale().split( /[-_]/ )[ 0 ]; + +module.exports = { paperLanguage }; diff --git a/apps/content-analysis-api/routes/analyze.js b/apps/content-analysis-api/routes/analyze.js index 30d58f99c1b..211ceea0854 100644 --- a/apps/content-analysis-api/routes/analyze.js +++ b/apps/content-analysis-api/routes/analyze.js @@ -1,5 +1,7 @@ -const { Paper, assessments, assessors, interpreters } = require( "yoastseo" ); +const { assessments, assessors, interpreters } = require( "yoastseo" ); const { getResearcher } = require( "../helpers/get-researcher" ); +const { paperFromRequest } = require( "../helpers/paper-from-request" ); +const { paperLanguage } = require( "../helpers/paper-language" ); const express = require( "express" ), app = express(); @@ -32,9 +34,13 @@ const resultToVM = ( result ) => { module.exports = function( app ) { app.get( "/analyze", ( request, response ) => { - // Fetch the Researcher and set the morphology data for the given language (yes, this is a bit hacky) - const language = request.body.locale || "en"; + const paper = paperFromRequest( request, response ); + if ( ! paper ) { + return; + } + // Fetch the Researcher and set the morphology data for the given language (yes, this is a bit hacky) + const language = paperLanguage( paper ); const researcher = getResearcher( language ); const seoAssessor = new SEOAssessor( researcher ); @@ -46,11 +52,6 @@ module.exports = function( app ) { const relatedKeywordAssessor = new RelatedKeywordAssessor( researcher ); const inclusiveLanguageAssessor = new InclusiveLanguageAssessor( researcher ); - const paper = new Paper( - request.body.text || "", - request.body || {} - ); - seoAssessor.assess( paper ); contentAssessor.assess( paper ); relatedKeywordAssessor.assess( paper ); @@ -65,116 +66,123 @@ module.exports = function( app ) { } ); app.get( "/analyze/seo", ( request, response ) => { - const language = request.body.locale || "en"; + const paper = paperFromRequest( request, response ); + if ( ! paper ) { + return; + } + const language = paperLanguage( paper ); const researcher = getResearcher( language ); const assessor = new SEOAssessor( researcher ); assessor.addAssessment( "keyphraseDistribution", new KeyphraseDistributionAssessment() ); assessor.addAssessment( "TextTitleAssessment", new TextTitleAssessment() ); - const paper = new Paper( - request.body.text || "", - request.body || {} - ); assessor.assess( paper ); response.json( assessor.getValidResults().map( resultToVM ) ); } ); app.get( "/analyze/readability", ( request, response ) => { - const language = request.body.locale || "en"; + const paper = paperFromRequest( request, response ); + if ( ! paper ) { + return; + } + const language = paperLanguage( paper ); const researcher = getResearcher( language ); const assessor = new ContentAssessor( researcher ); assessor.addAssessment( "wordComplexity", new WordComplexityAssessment() ); assessor.addAssessment( "textAlignment", new TextAlignmentAssessment() ); - const paper = new Paper( - request.body.text || "", - request.body || {} - ); + assessor.assess( paper ); response.json( assessor.getValidResults().map( resultToVM ) ); } ); app.get( "/analyze/related-keyphrase", ( request, response ) => { - const language = request.body.locale || "en"; + const paper = paperFromRequest( request, response ); + if ( ! paper ) { + return; + } + const language = paperLanguage( paper ); const researcher = getResearcher( language ); const assessor = new RelatedKeywordAssessor( researcher ); - const paper = new Paper( - request.body.text || "", - request.body || {} - ); + assessor.assess( paper ); response.json( assessor.getValidResults().map( resultToVM ) ); } ); app.get( "/analyze/inclusive-language", ( request, response ) => { - const language = request.body.locale || "en"; + const paper = paperFromRequest( request, response ); + if ( ! paper ) { + return; + } + const language = paperLanguage( paper ); const researcher = getResearcher( language ); const assessor = new InclusiveLanguageAssessor( researcher ); - const paper = new Paper( - request.body.text || "", - request.body || {} - ); + assessor.assess( paper ); response.json( assessor.getValidResults().map( resultToVM ) ); } ); app.get( "/analyze/meta-description", ( request, response ) => { - if (! request.body.description) { + const paper = paperFromRequest( request, response ); + if ( ! paper ) { + return; + } + // This endpoint analyses the meta description, so one is required (the contract leaves it optional). + if ( ! paper.getDescription() ) { return response.status( 400 ).json( { error: "Description is required" } ); } - const language = request.body.locale || "en"; + const language = paperLanguage( paper ); const researcher = getResearcher( language ); const assessor = new MetaDescriptionAssessor( researcher ); - const paper = new Paper( - request.body.text || "", - request.body || {} - ); assessor.assess( paper ); response.json( assessor.getValidResults().map( resultToVM ) ); } ); app.get( "/analyze/seo-title", ( request, response ) => { - if (! request.body.title) { + const paper = paperFromRequest( request, response ); + if ( ! paper ) { + return; + } + // This endpoint analyses the SEO title, so one is required (the contract leaves it optional). + if ( ! paper.getTitle() ) { return response.status( 400 ).json( { error: "Title is required" } ); } - const language = request.body.locale || "en"; + const language = paperLanguage( paper ); const researcher = getResearcher( language ); const assessor = new SeoTitleAssessor( researcher ); - const paper = new Paper( - request.body.text || "", - request.body || {} - ); assessor.assess( paper ); response.json( assessor.getValidResults().map( resultToVM ) ); } ); app.get( "/analyze/keyphrase", ( request, response ) => { - if (! request.body.keyword) { - return response.status( 400 ).json( { error: "Keyword is required" } ); + const paper = paperFromRequest( request, response ); + if ( ! paper ) { + return; + } + // This endpoint is keyphrase analysis, so a keyphrase is required (the contract leaves it optional). + if ( ! paper.hasKeyword() ) { + return response.status( 400 ).json( { error: "A keyphrase is required" } ); } - const language = request.body.locale || "en"; + const language = paperLanguage( paper ); const researcher = getResearcher( language ); const assessor = new KeyphraseAssessor( researcher ); - const paper = new Paper( - request.body.text || "", - request.body || {} - ); assessor.assess( paper ); response.json( assessor.getValidResults().map( resultToVM ) ); } ); app.get( "/analyze/keyphrase-use", ( request, response ) => { - if (! request.body.keyword) { - return response.status( 400 ).json( { error: "Keyword is required" } ); + const paper = paperFromRequest( request, response ); + if ( ! paper ) { + return; + } + // This endpoint is keyphrase analysis, so a keyphrase is required (the contract leaves it optional). + if ( ! paper.hasKeyword() ) { + return response.status( 400 ).json( { error: "A keyphrase is required" } ); } - const language = request.body.locale || "en"; + const language = paperLanguage( paper ); const researcher = getResearcher( language ); const assessor = new KeyphraseUseAssessor( researcher ); assessor.addAssessment( "keyphraseDistribution", new KeyphraseDistributionAssessment() ); - const paper = new Paper( - request.body.text || "", - request.body || {} - ); assessor.assess( paper ); response.json( assessor.getValidResults().map( resultToVM ) ); } ); diff --git a/apps/content-analysis-api/routes/research.js b/apps/content-analysis-api/routes/research.js index eb252c65c88..97d0533266b 100644 --- a/apps/content-analysis-api/routes/research.js +++ b/apps/content-analysis-api/routes/research.js @@ -1,52 +1,53 @@ -const { Paper } = require( "yoastseo" ); const { build } = require( "yoastseo/build/parse/build" ); const { LanguageProcessor } = require( "yoastseo/build/parse/language" ); const { getResearcher } = require( "../helpers/get-researcher" ); +const { paperFromRequest } = require( "../helpers/paper-from-request" ); +const { paperLanguage } = require( "../helpers/paper-language" ); module.exports = function( app ) { app.get( "/research/estimated-reading-time", ( request, response ) => { - const language = request.body.locale || "en"; + const paper = paperFromRequest( request, response ); + if ( ! paper ) { + return; + } + const language = paperLanguage( paper ); const researcher = getResearcher( language ); - const paper = new Paper( - request.body.text || "", - request.body || {} - ); researcher.setPaper( paper ); const estimatedReadingTime = researcher.getResearch( "readingTime" ); response.json( { time: estimatedReadingTime } ); } ); app.get( "/research/flesch-reading-ease", ( request, response ) => { - const language = request.body.locale || "en"; + const paper = paperFromRequest( request, response ); + if ( ! paper ) { + return; + } + const language = paperLanguage( paper ); const researcher = getResearcher( language ); - const paper = new Paper( - request.body.text || "", - request.body || {} - ); researcher.setPaper( paper ); const fleschReadingEaseScore = researcher.getResearch( "getFleschReadingScore" ); response.json( { score: fleschReadingEaseScore.score, difficulty: fleschReadingEaseScore.difficulty } ); } ); app.get( "/research/word-count", ( request, response ) => { - const language = request.body.locale || "en"; + const paper = paperFromRequest( request, response ); + if ( ! paper ) { + return; + } + const language = paperLanguage( paper ); const researcher = getResearcher( language ); - const paper = new Paper( - request.body.text || "", - request.body || {} - ); researcher.setPaper( paper ); const wordCount = researcher.getResearch( "wordCountInText" ); response.json( { count: wordCount.count, unit: wordCount.unit } ); } ); app.get( "/research/sentence-count", ( request, response ) => { - const language = request.body.locale || "en"; + const paper = paperFromRequest( request, response ); + if ( ! paper ) { + return; + } + const language = paperLanguage( paper ); const researcher = getResearcher( language ); - const paper = new Paper( - request.body.text || "", - request.body || {} - ); paper.setTree( build( paper, new LanguageProcessor( researcher ), paper._attributes && paper._attributes.shortcodes ) ); researcher.setPaper( paper ); const sentenceLengths = researcher.getResearch( "countSentencesFromText" ); @@ -56,12 +57,12 @@ module.exports = function( app ) { } ); app.get( "/research/paragraph-count", ( request, response ) => { - const language = request.body.locale || "en"; + const paper = paperFromRequest( request, response ); + if ( ! paper ) { + return; + } + const language = paperLanguage( paper ); const researcher = getResearcher( language ); - const paper = new Paper( - request.body.text || "", - request.body || {} - ); paper.setTree( build( paper, new LanguageProcessor( researcher ), paper._attributes && paper._attributes.shortcodes ) ); researcher.setPaper( paper ); const paragraphLengths = researcher.getResearch( "getParagraphLength" ); diff --git a/apps/content-analysis-webworker/src/index.js b/apps/content-analysis-webworker/src/index.js index 80d5c42ae49..7f2b0e0b8f5 100644 --- a/apps/content-analysis-webworker/src/index.js +++ b/apps/content-analysis-webworker/src/index.js @@ -1,4 +1,5 @@ -import { AnalysisWorkerWrapper, Paper, interpreters } from "yoastseo"; +import { AnalysisWorkerWrapper, interpreters } from "yoastseo"; +import { toPaper } from "yoastseo/contract"; const loadWebWorker = ( language ) => { const workerUnwrapped = new Worker( new URL("./worker.js", import.meta.url) ); @@ -40,9 +41,8 @@ document.addEventListener("DOMContentLoaded", function(event) { logLevel: "TRACE", // Optional, see https://github.com/pimterry/loglevel#documentation } ).then( () => { // The worker has been configured, we can now analyze a Paper. - const paper = new Paper( paperText, { - keyword: keyphrase, - } ); + // Build the Paper through the serializable input contract (`yoastseo/contract`). + const paper = toPaper( { text: paperText, keyphrase } ); return worker.analyze( paper ); } ).then( ( results ) => { diff --git a/packages/yoastseo/GLOSSARY.md b/packages/yoastseo/GLOSSARY.md index fe513b96e26..adaa747b033 100644 --- a/packages/yoastseo/GLOSSARY.md +++ b/packages/yoastseo/GLOSSARY.md @@ -27,6 +27,28 @@ const paper = new Paper("
This is the main content
", { }); ``` +### PaperDTO +A documented, serializable **input contract** for the engine (neutral core + a few optional, deprecated WordPress-transitional fields), exposed via the opt-in `yoastseo/contract` entry. A non-WordPress consumer sends a `PaperDTO` (plain JSON) and the `toPaper` boundary validates it and constructs an internal [Paper](#paper). It is the *external* counterpart of `Paper`: where `Paper` is the engine's internal value object, `PaperDTO` is the stable shape consumers send. + +Key differences from `Paper`: +- Uses the canonical name **`keyphrase`** (mapped to the engine's `keyword`); `keyword` is accepted as a deprecated alias. +- Carries the WordPress-transitional fields (`wpBlocks`, `shortcodes`, `isFrontPage`) as **optional, deprecated** — they are real analysis inputs that change WordPress scores, so a remote/API analysis needs them for result parity. +- Authored in [zod](https://zod.dev); validates structure (wrong types / unknown keys throw) while leaving per-assessment fields optional (omitting one just skips that assessment). +- Consumers that register their own analysis (e.g., assessments) pass those inputs through the opaque `customData` object, whose contents are not validated. + +**Example:** +```javascript +import { toPaper } from "yoastseo/contract"; + +const paper = toPaper({ + text: "
This is the main content
", + keyphrase: "example", + description: "This is a meta description", + slug: "example-page", + locale: "en_US" +}); +``` + ### Assessment A single analysis unit that evaluates one specific aspect of content. Each assessment: - Has a specific purpose (e.g., the _keyword density_ assessment evaluates the number of keywords used in the content) @@ -59,7 +81,7 @@ Types of assessors include: - ReadabilityAssessor: Analyzes text readability - CornerStoneAssessor: Applies stricter rules for important content -The diagram below shows an example hierarchy of assessors and assessments. +The diagram below shows an example hierarchy of assessors and assessments. ```mermaid graph TD @@ -179,4 +201,4 @@ Alternative words or phrases with similar meaning to the keyphrase. Used to: ``` Keyphrase: "car" Synonyms: "automobile", "vehicle", "motor vehicle" -``` \ No newline at end of file +``` diff --git a/packages/yoastseo/README.md b/packages/yoastseo/README.md index 5a474c59bf1..3b740addcf3 100644 --- a/packages/yoastseo/README.md +++ b/packages/yoastseo/README.md @@ -127,6 +127,29 @@ console.log( researcher.getResearch( "wordCountInText" ) ); There is a basic example of this setup [over here](https://github.com/Yoast/wordpress-seo/tree/trunk/apps/content-analysis-api). +### Serializable input contract (`yoastseo/contract`) + +Non-WordPress consumers (a web API, the Shopify app, the Google Docs extension, …) can send a documented, serializable input shape — a `PaperDTO` — instead of constructing a `Paper` by hand. The contract is a separate, opt-in entry point, so its validation dependency is only loaded by consumers that import it; the package root is unaffected. + +```js +import { toPaper } from "yoastseo/contract"; + +// `toPaper` validates the input and returns an engine `Paper`. +const paper = toPaper( { + text: "Text to analyze", + keyphrase: "analyze", + locale: "en_US", +} ); + +// `paper` can now be passed to `worker.analyze( paper )` or `assessor.assess( paper )`. +``` + +Notes: +- **Covers the analysis inputs.** The neutral core is `text`, `keyphrase`, `synonyms`, `locale`, `description`, `title`, `slug`, `permalink`, `titleWidth`, `textTitle`, `date`, `writingDirection`, and an opaque `customData` object. The contract also carries optional, **deprecated** WordPress-transitional fields (`wpBlocks`, `shortcodes`, `isFrontPage`): they are real analysis inputs that change WordPress scores, so a remote/API analysis needs them to reproduce in-browser results. They are marked deprecated. Non-WordPress consumers simply omit them. +- **`keyphrase` is the canonical field name.** `keyword` is accepted as a deprecated alias, so existing consumers can adopt the contract without renaming. +- **Validation.** `toPaper` throws on structurally invalid input (wrong types, unknown keys). Omitting an optional field is fine — the assessments that need it are simply skipped, matching the engine's existing behaviour. +- **Custom analysis.** A consumer that registers its own analysis (e.g., assessments, researcher) can pass their inputs through the opaque `customData` object. Its contents are not validated, so the consumer's own analysis is responsible for reading and validating them. + ## Supported languages ### SEO analysis @@ -171,7 +194,7 @@ Hebrew, Farsi, Turkish, Norwegian, Czech, Slovak, Greek, Japanese 4 The Passive voice check for Japanese is not implemented since the structure is the same as the potential form and can additionally be used for an honorific purpose. Identifying whether a verb is in its passive, honorific or potential form is problematic without contextual information. The following readability assessments are available for all languages: -- sentence length (with a default upper limit of 20 words, see1 above ) +- sentence length (with a default upper limit of 20 words, see1 above) - paragraph length - subheading distribution - text presence diff --git a/packages/yoastseo/contract/index.js b/packages/yoastseo/contract/index.js new file mode 100644 index 00000000000..1608ee7fb9e --- /dev/null +++ b/packages/yoastseo/contract/index.js @@ -0,0 +1,10 @@ +/* + * Public entry point for the serializable Paper input contract: `require( "yoastseo/contract" )`. + * + * Deliberately shipped as its own entry, separate from the package root (`build/index.js`), so the + * contract's runtime dependency (zod) is pulled in only by consumers that import the contract — never by + * code that loads the package root as a bundler "external" (e.g. Yoast SEO for WordPress, which exposes + * the root as a shared global). Mirrors the `yoastseo/researcher` entry. Keeping it here also gives + * consumers a stable path without deep-requiring `build/...`. + */ +module.exports = require( "../build/contract" ); diff --git a/packages/yoastseo/eslint.config.mjs b/packages/yoastseo/eslint.config.mjs index 4c872eda471..67698b9ac6d 100644 --- a/packages/yoastseo/eslint.config.mjs +++ b/packages/yoastseo/eslint.config.mjs @@ -3,7 +3,7 @@ import yoastConfig from "@yoast/eslint-config"; /** @type {import('eslint').Linter.Config[]} */ export default [ - { ignores: [ "build", "vendor", "examples", "researcher" ] }, + { ignores: [ "build", "vendor", "examples", "researcher", "contract" ] }, ...yoastConfig, { languageOptions: { diff --git a/packages/yoastseo/package.json b/packages/yoastseo/package.json index 9c0aa38b0cc..612c83deaa9 100644 --- a/packages/yoastseo/package.json +++ b/packages/yoastseo/package.json @@ -22,7 +22,8 @@ "!*.map", "vendor", "images", - "researcher" + "researcher", + "contract" ], "sideEffects": false, "scripts": { @@ -33,7 +34,7 @@ "pretest": "grunt get-premium-configuration", "export:inclusive-language": "jest exportInclusiveLanguage", "test": "jest", - "lint": "eslint . --max-warnings 19", + "lint": "eslint . --max-warnings 18", "watch": "yarn clean && yarn watch:js", "watch:js": "yarn build:js --watch", "watch:types": "tsc --watch" @@ -71,7 +72,8 @@ "loglevel": "^1.9.2", "parse5": "^8.0.0", "tiny-segmenter": "^0.2.0", - "tokenizer2": "^2.0.1" + "tokenizer2": "^2.0.1", + "zod": "^3.25.76" }, "yoast": { "premiumConfiguration": "" diff --git a/packages/yoastseo/spec/contract/paperDtoSpec.js b/packages/yoastseo/spec/contract/paperDtoSpec.js new file mode 100644 index 00000000000..6a6704d7997 --- /dev/null +++ b/packages/yoastseo/spec/contract/paperDtoSpec.js @@ -0,0 +1,113 @@ +import Paper from "../../src/values/Paper.js"; +import { paperDtoSchema, toPaper } from "../../src/contract"; + +describe( "the Paper input contract (PaperDTO)", function() { + describe( "toPaper", function() { + it( "maps a valid keyphrase-core DTO onto a Paper", function() { + const paper = toPaper( { + text: "A post about cats.", + keyphrase: "cat food", + synonyms: "kitten food", + locale: "en_US", + description: "The best cat food.", + } ); + + expect( paper ).toBeInstanceOf( Paper ); + expect( paper.getText() ).toBe( "A post about cats." ); + // `keyphrase` maps to the engine's `keyword`. + expect( paper.getKeyword() ).toBe( "cat food" ); + expect( paper.getSynonyms() ).toBe( "kitten food" ); + expect( paper.getLocale() ).toBe( "en_US" ); + expect( paper.getDescription() ).toBe( "The best cat food." ); + } ); + + it( "maps the full neutral metadata surface onto Paper", function() { + const paper = toPaper( { + text: "x", + title: "My SEO title", + slug: "my-slug", + permalink: "https://example.com/my-slug", + titleWidth: 400, + textTitle: "Article title", + date: "2024-01-01", + writingDirection: "RTL", + } ); + + expect( paper.getTitle() ).toBe( "My SEO title" ); + expect( paper.getSlug() ).toBe( "my-slug" ); + expect( paper.getPermalink() ).toBe( "https://example.com/my-slug" ); + expect( paper.getTitleWidth() ).toBe( 400 ); + expect( paper.getTextTitle() ).toBe( "Article title" ); + expect( paper.getDate() ).toBe( "2024-01-01" ); + expect( paper.getWritingDirection() ).toBe( "RTL" ); + } ); + + it( "leaves absent optional fields to Paper's defaults, without throwing", function() { + const paper = toPaper( { text: "Only text provided." } ); + + expect( paper.getKeyword() ).toBe( "" ); + // Engine default, not set by the DTO. + expect( paper.getLocale() ).toBe( "en_US" ); + expect( paper.getDescription() ).toBe( "" ); + } ); + + it( "passes an opaque customData object through unchanged for consumer-defined assessments", function() { + const customData = { someFlag: false, someLabel: "consumer value", anything: [ 1, 2 ] }; + const paper = toPaper( { text: "x", customData } ); + + expect( paper.getCustomData() ).toEqual( customData ); + } ); + + it( "rejects a non-object customData (shape is validated)", function() { + expect( () => toPaper( { text: "x", customData: "not an object" } ) ).toThrow(); + } ); + + it( "accepts the deprecated WP-transitional fields and maps them onto the Paper", function() { + const wpBlocks = [ { name: "core/paragraph" } ]; + const paper = toPaper( { + text: "x", + wpBlocks, + shortcodes: [ "gallery", "caption" ], + isFrontPage: true, + } ); + + expect( paper._attributes.wpBlocks ).toEqual( wpBlocks ); + expect( paper._attributes.shortcodes ).toEqual( [ "gallery", "caption" ] ); + expect( paper.isFrontPage() ).toBe( true ); + } ); + + it( "type-checks the WP-transitional fields (e.g. shortcodes must be strings)", function() { + expect( () => toPaper( { text: "x", shortcodes: "not-an-array" } ) ).toThrow(); + expect( () => toPaper( { text: "x", isFrontPage: "yes" } ) ).toThrow(); + } ); + + it( "rejects siteUrl/domain for now (deferred to the competing-links refactor)", function() { + expect( () => toPaper( { text: "x", siteUrl: "https://example.com" } ) ).toThrow(); + expect( () => toPaper( { text: "x", domain: "example.com" } ) ).toThrow(); + } ); + + it( "throws on a structurally invalid payload (wrong type)", function() { + expect( () => toPaper( { text: 123 } ) ).toThrow(); + } ); + + it( "throws on unknown or typo'd keys (strict)", function() { + expect( () => toPaper( { text: "x", keyphrse: "typo" } ) ).toThrow(); + } ); + + it( "accepts the deprecated `keyword` alias and maps it to the keyphrase", function() { + const paper = toPaper( { text: "x", keyword: "cat food" } ); + expect( paper.getKeyword() ).toBe( "cat food" ); + } ); + + it( "prefers `keyphrase` over the deprecated `keyword` when both are supplied", function() { + const paper = toPaper( { text: "x", keyphrase: "preferred", keyword: "legacy" } ); + expect( paper.getKeyword() ).toBe( "preferred" ); + } ); + } ); + + describe( "paperDtoSchema", function() { + it( "accepts a minimal valid payload", function() { + expect( paperDtoSchema.parse( { text: "hi" } ) ).toEqual( { text: "hi" } ); + } ); + } ); +} ); diff --git a/packages/yoastseo/src/contract/index.js b/packages/yoastseo/src/contract/index.js new file mode 100644 index 00000000000..847d2a7f021 --- /dev/null +++ b/packages/yoastseo/src/contract/index.js @@ -0,0 +1 @@ +export { paperDtoSchema, toPaper } from "./paperDto.js"; diff --git a/packages/yoastseo/src/contract/paperDto.js b/packages/yoastseo/src/contract/paperDto.js new file mode 100644 index 00000000000..15ee35f9093 --- /dev/null +++ b/packages/yoastseo/src/contract/paperDto.js @@ -0,0 +1,99 @@ +import { z } from "zod"; +import Paper from "../values/Paper.js"; + +/** + * Serializable input contract for the analysis engine. + * + * The core surface is platform-neutral, but the contract also carries a few **optional, deprecated** + * WordPress-transitional fields (`wpBlocks`, `shortcodes`, `isFrontPage`). They're included because they + * are real analysis *inputs* — they change the resulting scores for WordPress content (e.g. shortcodes are + * stripped before word-counting/keyphrase matching; blocks drive tree construction). So a remote/API + * analysis of a WordPress page can only reproduce the in-browser scores if it can send them. + * They are marked deprecated as they will be removed once the engine's structured content (blocks, shortcodes) + * is fully neutral and optional, and the front page gets a proper context-aware assessment. + * + * Two validation tiers (see the issue): structural validity is enforced here — + * wrong types, malformed payloads, and unknown keys throw at the boundary. + * Per-assessment field needs are NOT enforced: every field except `text` is optional, + * so a consumer that omits, e.g. `keyphrase` simply receives no keyphrase + * assessments, matching the engine's existing graceful-skip behaviour. + * + * `.strict()` rejects unknown keys, catching typos (e.g. `keyphrse`). + * The one blessed exception is `keyword`: a deprecated alias for `keyphrase`, accepted so + * existing consumers (which speak the engine's `keyword`) can adopt the contract + * without renaming. They will be removed at a future major once they migrate to `keyphrase`. + */ +export const paperDtoSchema = z.object( { + text: z.string().describe( "The content to analyse (HTML or plain text)." ), + keyphrase: z.string().optional().describe( "The focus keyphrase." ), + keyword: z.string().optional().describe( "Deprecated alias for `keyphrase`; prefer `keyphrase`." ), + synonyms: z.string().optional().describe( "Comma-separated synonyms of the keyphrase." ), + locale: z.string().optional().describe( "Locale, e.g. \"en_US\". The engine defaults to \"en_US\" when absent." ), + description: z.string().optional().describe( "The SEO meta description." ), + title: z.string().optional().describe( "The SEO title." ), + slug: z.string().optional().describe( "The URL slug." ), + permalink: z.string().optional().describe( "The full permalink URL of the content." ), + titleWidth: z.number().optional().describe( "Rendered width of the SEO title in pixels." ), + textTitle: z.string().optional().describe( "The title of the text or article itself." ), + date: z.string().optional().describe( "Publication date." ), + writingDirection: z.enum( [ "LTR", "RTL" ] ).optional().describe( "Writing direction of the content." ), + // Opaque data bag for consumers that register their own custom analysis, e.g., assessments. Validated as an object only — + // its contents are intentionally unchecked because typing the inner keys would couple the contract to + // consumer-specific shapes. + customData: z.record( z.unknown() ).optional().describe( "Opaque data for consumer-defined custom assessments; contents are not validated." ), + // WordPress-transitional fields — optional and DEPRECATED. They are real analysis inputs (they change + // WP scores), so they're in the contract for browser/remote result parity. + // Kept optional so non-WP consumers simply omit them. + wpBlocks: z.array( z.unknown() ).optional().describe( "Deprecated (WP-transitional): WordPress block-editor blocks." ), + shortcodes: z.array( z.string() ).optional().describe( "Deprecated (WP-transitional): shortcode tags present in the text." ), + isFrontPage: z.boolean().optional().describe( "Deprecated (WP-transitional): whether the page is the site front page." ), +} ).strict(); + +/** + * @typedef {import("zod").infer