Drop Cheerio from renderProp and fastTextOnly (#60547)

heiskr · Copilot · web-flow · commit e1bb63e6cea8 · 2026-04-01T14:53:03.000Z
Co-authored-by: Copilot &lt;223556219+Copilot@users.noreply.github.com&gt;
diff --git a/src/content-render/tests/text-only.ts b/src/content-render/tests/text-only.ts
@@ -0,0 +1,37 @@
+import { describe, expect, it } from 'vitest'
+
+import { fastTextOnly } from '@/content-render/unified/text-only'
+
+describe('fastTextOnly', () => {
+  it('returns empty string for falsy input', () => {
+    expect(fastTextOnly('')).toBe('')
+  })
+
+  it('strips a simple <p> wrapper and decodes entities', () => {
+    expect(fastTextOnly('<p>Foo &amp; bar</p>')).toBe('Foo & bar')
+  })
+
+  it('strips nested tags', () => {
+    expect(fastTextOnly('A <a href="#">link</a> and <code>code</code>')).toBe('A link and code')
+  })
+
+  it('handles multiple nested elements', () => {
+    expect(fastTextOnly('<p>text with <code>code</code> and <em>emphasis</em></p>')).toBe(
+      'text with code and emphasis',
+    )
+  })
+
+  it('decodes HTML entities', () => {
+    expect(fastTextOnly('<p>&lt;script&gt;alert(1)&lt;/script&gt;</p>')).toBe(
+      '<script>alert(1)</script>',
+    )
+  })
+
+  it('trims whitespace', () => {
+    expect(fastTextOnly('<p>  hello  </p>')).toBe('hello')
+  })
+
+  it('handles self-closing tags', () => {
+    expect(fastTextOnly('before<br/>after')).toBe('beforeafter')
+  })
+})
diff --git a/src/content-render/unified/text-only.ts b/src/content-render/unified/text-only.ts
@@ -1,18 +1,23 @@
-import { load } from 'cheerio'
 import { decode } from 'html-entities'
 
+// Strip all HTML tags, leaving only text content.
+// Handles nested tags like `<p>text with <code>code</code></p>`.
+const TAG_RE = /<[^>]+>/g
+
 // Given a piece of HTML return it without HTML. E.g.
 // `<p>Foo &amp; bar</p>` becomes `Foo & bar`
 // and `A <a href="">link</a> and <code>code</code>` becomes `A link and code`.
-// Take advantage of the subtle fact that a lot of the times, the html value
-// we get here is a single line that starts with `<p>` and ends with `</p>`
-// and contains no longer HTML tags.
+//
+// This operates on trusted rendered HTML from our own render pipeline,
+// not user-supplied input. The output is used for plain-text display only
+// (mini-TOC items, search descriptions, etc.).
 export function fastTextOnly(html: string): string {
   if (!html) return ''
+  // Fast path: simple `<p>text</p>` with no inner tags
   if (html.startsWith('<p>') && html.endsWith('</p>')) {
     const middle = html.slice(3, -4)
     if (!middle.includes('<')) return decode(middle.trim())
   }
-  const $ = load(html, { xmlMode: true })
-  return $.root().text().trim()
+  // Strip all tags and decode entities.
+  return decode(html.replace(TAG_RE, '').trim()) // lgtm[js/incomplete-multi-character-sanitization]
 }
diff --git a/src/frame/lib/page.ts b/src/frame/lib/page.ts
@@ -1,7 +1,7 @@
 import assert from 'assert'
 import path from 'path'
 import fs from 'fs/promises'
-import { load } from 'cheerio'
+import { stripOuterTag } from '@/frame/lib/strip-outer-tag'
 import getApplicableVersions from '@/versions/lib/get-applicable-versions'
 import generateRedirectsForPermalinks from '@/redirects/lib/permalinks'
 import getEnglishHeadings from '@/languages/lib/get-english-headings'
@@ -440,8 +440,7 @@ class Page {
     if (!opts.unwrap) return html
 
     // The unwrap option removes surrounding tags from a string, preserving any inner HTML
-    const $ = load(html, { xmlMode: true })
-    return $.root().contents().html() || ''
+    return stripOuterTag(html)
   }
 
   // infer current page's corresponding homepage
diff --git a/src/frame/lib/strip-outer-tag.ts b/src/frame/lib/strip-outer-tag.ts
@@ -0,0 +1,32 @@
+// Remove the outermost tag from an HTML string, preserving inner content.
+// E.g. `<p>Hello <strong>world</strong></p>` → `Hello <strong>world</strong>`
+// Only unwraps when the string is exactly one element with matching open/close tags.
+// Returns the original string unchanged for multiple top-level elements or malformed HTML.
+export function stripOuterTag(html: string): string {
+  if (!html) return ''
+
+  // Extract opening tag and tag name
+  const openMatch = html.match(/^<([a-z][a-z0-9]*)\b[^>]*>/i)
+  if (!openMatch) return html
+
+  const tagName = openMatch[1]
+  const closeTag = `</${tagName}>`
+
+  // Must end with matching close tag
+  if (html.slice(-closeTag.length).toLowerCase() !== closeTag.toLowerCase()) return html
+
+  // Verify single top-level element by checking that same-name tags
+  // in the inner content are balanced. If depth goes negative, there
+  // are sibling elements (e.g. `<p>a</p><p>b</p>`).
+  const inner = html.slice(openMatch[0].length, html.length - closeTag.length)
+  const tagRe = new RegExp(`<(/?)(${tagName})\\b[^>]*>`, 'gi')
+  let depth = 0
+  let m
+  while ((m = tagRe.exec(inner)) !== null) {
+    depth += m[1] === '/' ? -1 : 1
+    if (depth < 0) return html
+  }
+  if (depth !== 0) return html
+
+  return inner
+}
diff --git a/src/frame/tests/strip-outer-tag.ts b/src/frame/tests/strip-outer-tag.ts
@@ -0,0 +1,39 @@
+import { describe, expect, it } from 'vitest'
+
+import { stripOuterTag } from '@/frame/lib/strip-outer-tag'
+
+describe('stripOuterTag', () => {
+  it('returns empty string for falsy input', () => {
+    expect(stripOuterTag('')).toBe('')
+  })
+
+  it('removes a <p> wrapper preserving inner HTML', () => {
+    expect(stripOuterTag('<p>Hello <strong>world</strong></p>')).toBe(
+      'Hello <strong>world</strong>',
+    )
+  })
+
+  it('removes a <div> wrapper', () => {
+    expect(stripOuterTag('<div class="foo">content</div>')).toBe('content')
+  })
+
+  it('returns original string for multiple top-level elements', () => {
+    expect(stripOuterTag('<p>a</p><p>b</p>')).toBe('<p>a</p><p>b</p>')
+  })
+
+  it('returns original string for plain text with no tags', () => {
+    expect(stripOuterTag('just text')).toBe('just text')
+  })
+
+  it('returns original string for malformed HTML', () => {
+    expect(stripOuterTag('<p>unclosed')).toBe('<p>unclosed')
+  })
+
+  it('returns original string when open and close tags do not match', () => {
+    expect(stripOuterTag('<p>content</div>')).toBe('<p>content</div>')
+  })
+
+  it('handles nested same-name tags', () => {
+    expect(stripOuterTag('<div><div>inner</div></div>')).toBe('<div>inner</div>')
+  })
+})