From c043858e7850384c18c70c78681e18f242010a9f Mon Sep 17 00:00:00 2001
From: Janpot <2109932+Janpot@users.noreply.github.com>
Date: Tue, 24 Mar 2026 13:34:30 +0100
Subject: [PATCH 01/16] [code-infra] Add optional HTML validation to broken
links checker
Add an `htmlValidate` option to the crawl config that validates HTML
content of crawled pages using the html-validate library. The option
accepts `true` (uses recommended rules), or a config object supporting
`extends: ['mui:recommended']` for the default preset. Config is always
static (never loaded from disk). Reports are printed per page.
Co-Authored-By: Claude Opus 4.6 (1M context)
---
packages/code-infra/package.json | 3 +-
.../__fixtures__/static-site/index.html | 1 +
.../static-site/invalid-html.html | 15 ++++
.../src/brokenLinksChecker/index.mjs | 81 ++++++++++++++++++-
.../src/brokenLinksChecker/index.test.ts | 30 ++++++-
pnpm-lock.yaml | 61 ++++++++++++++
6 files changed, 188 insertions(+), 3 deletions(-)
create mode 100644 packages/code-infra/src/brokenLinksChecker/__fixtures__/static-site/invalid-html.html
diff --git a/packages/code-infra/package.json b/packages/code-infra/package.json
index a3abb61b8..4ede00ac2 100644
--- a/packages/code-infra/package.json
+++ b/packages/code-infra/package.json
@@ -93,6 +93,7 @@
"clipboardy": "^5.3.1",
"content-type": "^1.0.5",
"env-ci": "^11.2.0",
+ "es-toolkit": "^1.45.1",
"eslint-config-prettier": "^10.1.8",
"eslint-import-resolver-typescript": "^4.4.4",
"eslint-module-utils": "^2.12.1",
@@ -104,11 +105,11 @@
"eslint-plugin-react-compiler": "^19.1.0-rc.2",
"eslint-plugin-react-hooks": "^7.0.1",
"eslint-plugin-testing-library": "^7.16.0",
- "es-toolkit": "^1.45.1",
"execa": "^9.6.1",
"git-url-parse": "^16.1.0",
"globals": "^17.4.0",
"globby": "^16.1.1",
+ "html-validate": "^10.11.2",
"minimatch": "^10.2.4",
"node-html-parser": "^7.1.0",
"open": "^11.0.0",
diff --git a/packages/code-infra/src/brokenLinksChecker/__fixtures__/static-site/index.html b/packages/code-infra/src/brokenLinksChecker/__fixtures__/static-site/index.html
index c9ba541e5..c79573ba0 100644
--- a/packages/code-infra/src/brokenLinksChecker/__fixtures__/static-site/index.html
+++ b/packages/code-infra/src/brokenLinksChecker/__fixtures__/static-site/index.html
@@ -22,6 +22,7 @@ Test Site Home
Page with API Links
Example Markdown
Page with Unclosed Tags
+ Invalid HTML Page
+ Invalid HTML
+
+ First
+ Second
+
+ Tom & Jerry
+
diff --git a/packages/code-infra/src/brokenLinksChecker/__fixtures__/static-site/invalid-html.html b/packages/code-infra/src/brokenLinksChecker/__fixtures__/static-site/invalid-html.html
new file mode 100644
index 000000000..fd4f01f73
--- /dev/null
+++ b/packages/code-infra/src/brokenLinksChecker/__fixtures__/static-site/invalid-html.html
@@ -0,0 +1,15 @@
+
+
+