-
Notifications
You must be signed in to change notification settings - Fork 9.7k
core(config): implement llms.txt check for AI agents #16970
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Merged
Merged
Changes from all commits
Commits
File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Some comments aren't visible on the classic Files Changed page.
There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,111 @@ | ||
| /** | ||
| * @license | ||
| * Copyright 2026 Google LLC | ||
| * SPDX-License-Identifier: Apache-2.0 | ||
| */ | ||
|
|
||
| import {Audit} from '../audit.js'; | ||
| import * as i18n from '../../lib/i18n/i18n.js'; | ||
|
|
||
| const HTTP_CLIENT_ERROR_CODE_LOW = 400; | ||
| const HTTP_SERVER_ERROR_CODE_LOW = 500; | ||
|
|
||
| const UIStrings = { | ||
| /** Title of a Lighthouse audit that provides detail on the site's llms.txt file. Note: "llms.txt" is a canonical filename and should not be translated. This descriptive title is shown when the llms.txt file is present and configured correctly. */ | ||
| title: 'llms.txt is present', | ||
| /** Title of a Lighthouse audit that provides detail on the site's llms.txt file. Note: "llms.txt" is a canonical filename and should not be translated. This descriptive title is shown when the llms.txt file is misconfigured. */ | ||
| failureTitle: 'llms.txt is missing or incomplete', | ||
| /** Description of a Lighthouse audit that tells the user *why* they need to have a valid llms.txt file. Note: "llms.txt" is a canonical filename and should not be translated. This is displayed after a user expands the section to see more. No character length limits. */ | ||
| description: 'If your llms.txt file is malformed, large language models may not be able to ' + | ||
| 'understand how you want your website to be crawled or used for training. A valid ' + | ||
| 'llms.txt should be a Markdown file containing at least one H1 header. ' + | ||
| '[Learn more about llms.txt](https://llmstxt.org/).', | ||
| /** | ||
| * @description Label for the audit identifying that the request failed with a specific HTTP status code. | ||
| * @example {500} statusCode | ||
| * */ | ||
| displayValueHttpBadCode: 'Failed with HTTP status {statusCode}', | ||
| /** Explanatory message stating that there was a failure in an audit caused by Lighthouse not being able to download the llms.txt file for the site. Note: "llms.txt" is a canonical filename and should not be translated. */ | ||
| explanation: 'Fetch of llms.txt failed', | ||
| /** Message indicating that the file is missing a required H1 header. */ | ||
| missingH1: 'File is missing a required H1 header (e.g., "# Title").', | ||
| /** Message indicating that the file is suspiciously short. */ | ||
| tooShort: 'File is suspiciously short.', | ||
| /** Message indicating that the file is missing links. */ | ||
| missingLinks: 'File does not appear to contain any links.', | ||
| }; | ||
|
|
||
| const str_ = i18n.createIcuMessageFn(import.meta.url, UIStrings); | ||
|
|
||
| class LlmsTxt extends Audit { | ||
| /** | ||
| * @return {LH.Audit.Meta} | ||
| */ | ||
| static get meta() { | ||
| return { | ||
| id: 'llms-txt', | ||
| title: str_(UIStrings.title), | ||
| failureTitle: str_(UIStrings.failureTitle), | ||
| description: str_(UIStrings.description), | ||
| requiredArtifacts: ['LlmsTxt'], | ||
| }; | ||
| } | ||
|
|
||
| /** | ||
| * @param {LH.Artifacts} artifacts | ||
| * @return {LH.Audit.Product} | ||
| */ | ||
| static audit(artifacts) { | ||
| const { | ||
| status, | ||
| content, | ||
| } = artifacts.LlmsTxt; | ||
|
|
||
| if (!status) { | ||
| return { | ||
| score: 0, | ||
| explanation: str_(UIStrings.explanation), | ||
| }; | ||
| } | ||
|
|
||
| if (status >= HTTP_SERVER_ERROR_CODE_LOW) { | ||
| return { | ||
| score: 0, | ||
| displayValue: str_(UIStrings.displayValueHttpBadCode, {statusCode: status}), | ||
| }; | ||
| } else if (status >= HTTP_CLIENT_ERROR_CODE_LOW || content === '') { | ||
| return { | ||
| score: 1, | ||
| notApplicable: true, | ||
| }; | ||
| } | ||
|
|
||
| if (content === null) { | ||
| throw new Error(`Status ${status} was valid, but content was null`); | ||
| } | ||
|
|
||
| const hasH1 = /^#\s+.+/m.test(content); | ||
| const hasLink = /\[.+\]\(.+\)/.test(content); | ||
| const isTooShort = content.length < 50; | ||
|
|
||
| const errors = []; | ||
| if (!hasH1) errors.push(str_(UIStrings.missingH1)); | ||
| if (!hasLink) errors.push(str_(UIStrings.missingLinks)); | ||
| if (isTooShort) errors.push(str_(UIStrings.tooShort)); | ||
|
|
||
| /** @type {LH.Audit.Details.Table['headings']} */ | ||
| const headings = [ | ||
| {key: 'message', valueType: 'text', label: 'Error'}, | ||
| ]; | ||
|
|
||
| const details = Audit.makeTableDetails(headings, errors.map(m => ({message: m}))); | ||
|
|
||
| return { | ||
| score: Number(errors.length === 0), | ||
| details: errors.length ? details : undefined, | ||
| }; | ||
| } | ||
| } | ||
|
|
||
| export default LlmsTxt; | ||
| export {UIStrings}; | ||
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,28 @@ | ||
| /** | ||
| * @license | ||
| * Copyright 2026 Google LLC | ||
| * SPDX-License-Identifier: Apache-2.0 | ||
| */ | ||
|
|
||
|
|
||
| import BaseGatherer from '../../base-gatherer.js'; | ||
|
|
||
| class LlmsTxt extends BaseGatherer { | ||
| /** @type {LH.Gatherer.GathererMeta} */ | ||
| meta = { | ||
| supportedModes: ['snapshot', 'navigation'], | ||
| }; | ||
|
|
||
| /** | ||
| * @param {LH.Gatherer.Context} passContext | ||
| * @return {Promise<LH.Artifacts['LlmsTxt']>} | ||
| */ | ||
| async getArtifact(passContext) { | ||
| const {finalDisplayedUrl} = passContext.baseArtifacts.URL; | ||
| const llmUrl = new URL('/llms.txt', finalDisplayedUrl).href; | ||
| return passContext.driver.fetcher.fetchResource(llmUrl) | ||
| .catch(err => ({status: null, content: null, errorMessage: err.message})); | ||
| } | ||
| } | ||
|
|
||
| export default LlmsTxt; |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,151 @@ | ||
| /** | ||
| * @license | ||
| * Copyright 2026 Google LLC | ||
| * SPDX-License-Identifier: Apache-2.0 | ||
| */ | ||
|
|
||
| import assert from 'assert/strict'; | ||
|
|
||
| import LlmsTxtAudit from '../../../audits/agentic/llms-txt.js'; | ||
|
|
||
| describe('Agentic: llms.txt audit', () => { | ||
| it('fails and reports error when no llms.txt was provided', () => { | ||
| const artifacts = { | ||
| LlmsTxt: { | ||
| status: null, | ||
| content: null, | ||
| }, | ||
| }; | ||
|
|
||
| const auditResult = LlmsTxtAudit.audit(artifacts); | ||
| assert.equal(auditResult.score, 0); | ||
| assert.ok(auditResult.explanation); | ||
| }); | ||
|
|
||
| it('fails when request for /llms.txt returns a HTTP500+ error', () => { | ||
| const testData = [ | ||
| { | ||
| status: 500, | ||
| content: null, | ||
| }, | ||
| { | ||
| status: 503, | ||
| content: 'There is some content', | ||
| }, | ||
| { | ||
| status: 599, | ||
| content: null, | ||
| }, | ||
| ]; | ||
|
|
||
| testData.forEach(LlmsTxt => { | ||
| const artifacts = { | ||
| LlmsTxt, | ||
| }; | ||
|
|
||
| const auditResult = LlmsTxtAudit.audit(artifacts); | ||
| assert.equal(auditResult.score, 0); | ||
| }); | ||
| }); | ||
|
|
||
| it('fails when llms.txt file is missing required elements', () => { | ||
| const testData = [ | ||
| { | ||
| LlmsTxt: { | ||
| status: 200, | ||
| content: 'Long enough file with a link [Link](https://example.com) but no H1.', | ||
| }, | ||
| expectedErrors: 1, // Missing H1 | ||
| }, | ||
| { | ||
| LlmsTxt: { | ||
| status: 200, | ||
| content: '# Title\nThis file is long enough and has an H1 header but no links.', | ||
| }, | ||
| expectedErrors: 1, // Missing links | ||
| }, | ||
| { | ||
| LlmsTxt: { | ||
| status: 200, | ||
| content: '# Title\n[Link](url)', | ||
| }, | ||
| expectedErrors: 1, // Too short | ||
| }, | ||
| { | ||
| LlmsTxt: { | ||
| status: 200, | ||
| content: 'Short text with no H1 and no links', | ||
| }, | ||
| expectedErrors: 3, // Missing H1, Missing links, Too short | ||
| }, | ||
| ]; | ||
|
|
||
| testData.forEach(({LlmsTxt, expectedErrors}) => { | ||
| const artifacts = { | ||
| LlmsTxt, | ||
| }; | ||
|
|
||
| const auditResult = LlmsTxtAudit.audit(artifacts); | ||
|
|
||
| assert.equal(auditResult.score, 0); | ||
| assert.equal(auditResult.details.items.length, expectedErrors); | ||
| }); | ||
| }); | ||
|
|
||
| it('not applicable when there is no llms.txt or it\'s empty', () => { | ||
| const testData = [ | ||
| { | ||
| status: 404, | ||
| content: 'invalid content', | ||
| }, | ||
| { | ||
| status: 401, | ||
| content: 'invalid content', | ||
| }, | ||
| { | ||
| status: 200, | ||
| content: '', | ||
| }, | ||
| ]; | ||
|
|
||
| testData.forEach(LlmsTxt => { | ||
| const artifacts = { | ||
| LlmsTxt, | ||
| }; | ||
|
|
||
| const auditResult = LlmsTxtAudit.audit(artifacts); | ||
| assert.equal(auditResult.score, 1); | ||
| assert.equal(auditResult.notApplicable, true); | ||
| }); | ||
| }); | ||
|
|
||
| it('passes when llms.txt is valid Markdown', () => { | ||
| const testData = [ | ||
| { | ||
| status: 200, | ||
| content: `# Title\nLong enough file with a link [Link](https://example.com) to pass.`, | ||
| }, | ||
| { | ||
| status: 201, | ||
| content: `# Another Title\n\nLong enough with a link [Link](https://example.com) as required.`, | ||
| }, | ||
| { | ||
| status: 200, | ||
| content: ` | ||
| # Title with spacing | ||
|
|
||
| This content is long enough to pass the length check and has a link [Here](https://example.com). | ||
| `, | ||
| }, | ||
| ]; | ||
|
|
||
| testData.forEach(LlmsTxt => { | ||
| const artifacts = { | ||
| LlmsTxt, | ||
| }; | ||
|
|
||
| const auditResult = LlmsTxtAudit.audit(artifacts); | ||
| assert.equal(auditResult.score, 1); | ||
| }); | ||
| }); | ||
| }); |
Oops, something went wrong.
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
Uh oh!
There was an error while loading. Please reload this page.