diff --git a/packages/docs-infra/src/pipeline/loadServerTypes/typeHighlighting.test.ts b/packages/docs-infra/src/pipeline/loadServerTypes/typeHighlighting.test.ts index 4576efe0e..6dd2de527 100644 --- a/packages/docs-infra/src/pipeline/loadServerTypes/typeHighlighting.test.ts +++ b/packages/docs-infra/src/pipeline/loadServerTypes/typeHighlighting.test.ts @@ -6,6 +6,7 @@ import { formatMultilineUnionHast, getShortTypeString, shouldShowDetailedType, + clearInlineTypeHastCache, DEFAULT_UNION_PRINT_WIDTH, } from './typeHighlighting'; import { getHastTextContent } from './hastTypeUtils'; @@ -252,6 +253,53 @@ describe('typeHighlighting', () => { expect(text.includes('|')).toBe(true); }); }); + + describe('memoization', () => { + it('should return equivalent HAST for repeated calls with the same input', async () => { + clearInlineTypeHastCache(); + const input = 'string | number | null'; + const first = await formatInlineTypeAsHast(input); + const second = await formatInlineTypeAsHast(input); + const third = await formatInlineTypeAsHast(input); + expect(second).toEqual(first); + expect(third).toEqual(first); + }); + + it('should return distinct object instances so downstream mutation cannot poison the cache', async () => { + clearInlineTypeHastCache(); + const input = 'boolean'; + const first = await formatInlineTypeAsHast(input); + const second = await formatInlineTypeAsHast(input); + // Distinct object identities (not the same reference). + expect(second).not.toBe(first); + // Mutating the first result must not affect the second. + first.children.length = 0; + expect(second.children.length).toBeGreaterThan(0); + }); + + it('should key memoization on both typeText and unionPrintWidth', async () => { + clearInlineTypeHastCache(); + const longUnion = '"a" | "b" | "c" | "d" | "e"'; + const inline = await formatInlineTypeAsHast(longUnion); + const multiline = await formatInlineTypeAsHast(longUnion, 10); + // Different unionPrintWidth values must produce different structures + // (multiline splits across lines, inline doesn't). + expect(multiline).not.toEqual(inline); + }); + + it('should be idempotent: mutating one result does not affect subsequent calls', async () => { + clearInlineTypeHastCache(); + const input = 'Record'; + const first = await formatInlineTypeAsHast(input); + // Deeply mutate. + const firstCodeElement = first.children[0] as Element; + firstCodeElement.children = []; + firstCodeElement.properties = { className: ['corrupted'] }; + const second = await formatInlineTypeAsHast(input); + expect(hasClassInHast(second, 'corrupted')).toBe(false); + expect(extractText(second)).toContain('Record'); + }); + }); }); describe('formatDetailedTypeAsHast', () => { diff --git a/packages/docs-infra/src/pipeline/loadServerTypes/typeHighlighting.ts b/packages/docs-infra/src/pipeline/loadServerTypes/typeHighlighting.ts index 251d38a71..2983ac4b5 100644 --- a/packages/docs-infra/src/pipeline/loadServerTypes/typeHighlighting.ts +++ b/packages/docs-infra/src/pipeline/loadServerTypes/typeHighlighting.ts @@ -158,27 +158,25 @@ export function formatMultilineUnionHast(hast: HastRoot): HastRoot { } /** - * Formats an inline type string with syntax highlighting. + * Memoization cache for {@link formatInlineTypeAsHast}. * - * This function transforms type strings (like `string`, `number | null`, etc.) into - * syntax-highlighted HAST nodes. It ensures proper TypeScript context by prefixing - * the type with `type _ =` before highlighting, then removes the prefix from the result. + * Large component prop graphs (e.g. mui-x DataGrid) reference a small number of + * shared nested types thousands of times. Without this cache the same type string + * is pushed through `transformHtmlCodeInline` → `parseSource` → Oniguruma's WASM + * tokenizer on every reference. Oniguruma runs in a fixed-size WebAssembly + * linear memory buffer, and the repeated scratch allocations fragment that buffer + * until it overruns with `RuntimeError: memory access out of bounds`. * - * @param typeText - The type string to format (e.g., "string | number") - * @param unionPrintWidth - Optional width threshold for multiline union formatting. - * When set, unions exceeding this width are split across lines. - * @returns A promise that resolves to a HAST root containing highlighted nodes - * - * @example - * ```ts - * await formatInlineTypeAsHast('string | number') - * // Returns HAST nodes with syntax highlighting for "string | number" + * Instrumenting a DataGrid extraction showed **1947 calls for 5 unique inputs** + * before the crash. Memoizing on `(unionPrintWidth, typeText)` collapses the + * redundant work to 5 calls and unblocks extraction. * - * await formatInlineTypeAsHast('"a" | "b" | "c" | "d" | "e"', 20) - * // Returns HAST nodes with multiline formatting for long unions - * ``` + * The cached HAST is deep-cloned on return so downstream mutations don't poison + * the cache. */ -export async function formatInlineTypeAsHast( +const inlineTypeHastCache = new Map>(); + +async function computeInlineTypeAsHast( typeText: string, unionPrintWidth?: number, ): Promise { @@ -217,6 +215,54 @@ export async function formatInlineTypeAsHast( return result; } +/** + * Formats an inline type string with syntax highlighting. + * + * This function transforms type strings (like `string`, `number | null`, etc.) into + * syntax-highlighted HAST nodes. It ensures proper TypeScript context by prefixing + * the type with `type _ =` before highlighting, then removes the prefix from the result. + * + * Memoized by `(typeText, unionPrintWidth)` — see {@link inlineTypeHastCache}. + * + * @param typeText - The type string to format (e.g., "string | number") + * @param unionPrintWidth - Optional width threshold for multiline union formatting. + * When set, unions exceeding this width are split across lines. + * @returns A promise that resolves to a HAST root containing highlighted nodes + * + * @example + * ```ts + * await formatInlineTypeAsHast('string | number') + * // Returns HAST nodes with syntax highlighting for "string | number" + * + * await formatInlineTypeAsHast('"a" | "b" | "c" | "d" | "e"', 20) + * // Returns HAST nodes with multiline formatting for long unions + * ``` + */ +export async function formatInlineTypeAsHast( + typeText: string, + unionPrintWidth?: number, +): Promise { + const cacheKey = `${unionPrintWidth ?? ''}:${typeText}`; + let cached = inlineTypeHastCache.get(cacheKey); + if (!cached) { + cached = computeInlineTypeAsHast(typeText, unionPrintWidth); + inlineTypeHastCache.set(cacheKey, cached); + } + const result = await cached; + // Deep clone so downstream mutations don't poison the cached entry. + return structuredClone(result); +} + +/** + * Clears the inline-type HAST memoization cache. Intended for test isolation — + * production use of the pipeline should let the cache grow for the lifetime of + * the process, since it's bounded by the number of distinct type strings in the + * project and provides a large perf win on repeat invocations. + */ +export function clearInlineTypeHastCache(): void { + inlineTypeHastCache.clear(); +} + /** * Wraps a HAST produced by formatInlineTypeAsHast in a
 element.
  * Converts root > code > [spans] into root > pre > code > [line-wrapped spans].