diff options
| author | Fuwn <[email protected]> | 2026-06-01 15:45:01 +0000 |
|---|---|---|
| committer | Fuwn <[email protected]> | 2026-06-01 15:45:01 +0000 |
| commit | 6a7228c06d7af2a28ead1f4ae1830a258c05afae (patch) | |
| tree | 26a1fc3cc8546bd15dac92910998afb8c2a67fd9 /src/lib | |
| parent | fix(security): allow-list web-push endpoints to stop SSRF (diff) | |
| download | due.moe-6a7228c06d7af2a28ead1f4ae1830a258c05afae.tar.xz due.moe-6a7228c06d7af2a28ead1f4ae1830a258c05afae.zip | |
fix(security): sanitize third-party RSS HTML before {@html}
The /updates page rendered manga/novel feed fields (content, titles,
series names) from mangaupdates/syosetu/wlnupdates via {@html} with no
sanitization. CSP already blocks script execution, but injected markup
could still phish, redirect, or track. Add sanitizeFeedHtml (DOMPurify
with a small safe allow-list) and apply it on ingest. A behaviour-gate
test plus a check against the live mangaupdates feed confirm legitimate
formatting (entities, <i>/<b>/<a href>) is preserved while <script>,
event handlers, <iframe>/<meta>/<style> and javascript: URLs are removed.
Diffstat (limited to 'src/lib')
| -rw-r--r-- | src/lib/Utility/sanitizeHtml.test.ts | 57 | ||||
| -rw-r--r-- | src/lib/Utility/sanitizeHtml.ts | 32 |
2 files changed, 89 insertions, 0 deletions
diff --git a/src/lib/Utility/sanitizeHtml.test.ts b/src/lib/Utility/sanitizeHtml.test.ts new file mode 100644 index 00000000..1094635e --- /dev/null +++ b/src/lib/Utility/sanitizeHtml.test.ts @@ -0,0 +1,57 @@ +// @vitest-environment jsdom +import { describe, expect, it } from "vitest"; +import { sanitizeFeedHtml } from "./sanitizeHtml"; + +describe("sanitizeFeedHtml", () => { + // Behaviour gate: the formatting real feeds use must survive untouched. + it("preserves entities, inline formatting and safe links", () => { + expect(sanitizeFeedHtml("Fruits & Vegetables")).toBe( + "Fruits & Vegetables", + ); + expect(sanitizeFeedHtml("<i>italic</i> and <b>bold</b>")).toBe( + "<i>italic</i> and <b>bold</b>", + ); + expect(sanitizeFeedHtml("Vol. 1 <em>Ch.</em> 5")).toBe( + "Vol. 1 <em>Ch.</em> 5", + ); + expect( + sanitizeFeedHtml('<a href="https://example.com/x">link</a>'), + ).toContain('href="https://example.com/x"'); + expect(sanitizeFeedHtml("line<br>break")).toContain("<br"); + }); + + it("returns empty string for nullish input", () => { + expect(sanitizeFeedHtml(undefined)).toBe(""); + expect(sanitizeFeedHtml(null)).toBe(""); + expect(sanitizeFeedHtml("")).toBe(""); + }); + + // The fix: scripts, handlers, dangerous tags and URLs must be removed. + it("strips scripts, event handlers and dangerous tags/urls", () => { + const script = sanitizeFeedHtml("<script>alert(1)</script>safe"); + expect(script).not.toContain("script"); + expect(script).toContain("safe"); + + const onerror = sanitizeFeedHtml("before<img src=x onerror=alert(1)>after"); + expect(onerror).not.toContain("onerror"); + expect(onerror).not.toContain("<img"); + expect(onerror).toContain("before"); + expect(onerror).toContain("after"); + + expect( + sanitizeFeedHtml('<a href="javascript:alert(1)">x</a>'), + ).not.toContain("javascript:"); + expect( + sanitizeFeedHtml('<iframe src="https://evil.example.com"></iframe>'), + ).not.toContain("iframe"); + expect( + sanitizeFeedHtml( + '<meta http-equiv="refresh" content="0;url=https://evil.example.com">', + ), + ).not.toContain("meta"); + expect(sanitizeFeedHtml("<style>body{display:none}</style>")).not.toContain( + "style", + ); + expect(sanitizeFeedHtml('<div onclick="steal()">text</div>')).toBe("text"); + }); +}); diff --git a/src/lib/Utility/sanitizeHtml.ts b/src/lib/Utility/sanitizeHtml.ts new file mode 100644 index 00000000..3d0229e4 --- /dev/null +++ b/src/lib/Utility/sanitizeHtml.ts @@ -0,0 +1,32 @@ +import DOMPurify from "dompurify"; + +const feedConfig = { + ALLOWED_TAGS: [ + "a", + "b", + "i", + "em", + "strong", + "u", + "s", + "br", + "p", + "span", + "small", + "sup", + "sub", + "code", + ], + ALLOWED_ATTR: ["href", "title"], + ALLOWED_URI_REGEXP: /^(?:https?|mailto):/i, +}; + +/** + * Sanitise HTML coming from third-party RSS feeds before it reaches an `{@html}` + * sink. Keeps the light formatting these feeds actually use (HTML entities, + * `<i>`/`<b>`/`<a href>`) and strips anything that could inject content or + * behaviour: `<script>`, event-handler attributes, `<iframe>`/`<meta>`/`<style>`, + * `javascript:` URLs, and so on. Browser-only — call it from client code. + */ +export const sanitizeFeedHtml = (html: string | undefined | null): string => + html ? DOMPurify.sanitize(html, feedConfig) : ""; |