diff --git a/static/js/app.js b/static/js/app.js index 089fa13..27688ba 100644 --- a/static/js/app.js +++ b/static/js/app.js @@ -48,6 +48,30 @@ function escapeHtml(str) { return div.innerHTML; } +/** + * Render Markdown to HTML, then sanitise with DOMPurify before returning. + * + * Marked.js does NOT sanitise. Without DOMPurify, `[link](javascript:...)`, + * dangerous `data:` URIs, and inline event handlers all survive into the DOM + * — that's the XSS class fixed in issue #11. + * + * Fallback: if either marked or DOMPurify is missing (CDN failure, ad blocker, + * tests), return the plain-text-escaped string rather than ever emit raw or + * unsanitised HTML. Never fall through to a bare Marked call without sanitising. + */ +function renderMarkdownSafe(text) { + if (!text) return ''; + if (typeof marked === 'undefined' || typeof DOMPurify === 'undefined') { + return escapeHtml(text); + } + try { + const html = marked.parse(text, { breaks: true, gfm: true }); + return DOMPurify.sanitize(html); + } catch (e) { + return escapeHtml(text); + } +} + function formatDate(timestamp) { if (!timestamp) return ''; try { diff --git a/static/js/download.js b/static/js/download.js index 7504f4e..d571118 100644 --- a/static/js/download.js +++ b/static/js/download.js @@ -188,7 +188,10 @@ async function downloadAs(format) { } else if (format === 'html') { const md = convertChatToMarkdown(tab, true); - const htmlContent = marked.parse(md); + // Sanitise with DOMPurify before embedding in the download blob (issue #11). + // The downloaded file is opened in a browser and any payload would execute + // in the file:// origin, so XSS still applies. + const htmlContent = renderMarkdownSafe(md); const html = ` ${escapeHtml(tab.title || 'Chat')} diff --git a/templates/base.html b/templates/base.html index 20aaeb9..f3b17b0 100644 --- a/templates/base.html +++ b/templates/base.html @@ -10,6 +10,9 @@ + + + diff --git a/templates/workspace.html b/templates/workspace.html index 0fdb4e0..96f99cf 100644 --- a/templates/workspace.html +++ b/templates/workspace.html @@ -312,12 +312,10 @@

${escapeHtml(tab.title)}

} function renderMarkdown(text) { - if (!text) return ''; - try { - return marked.parse(text, { breaks: true, gfm: true }); - } catch (e) { - return escapeHtml(text); - } + // Delegate to the shared sanitised helper in static/js/app.js — every + // markdown render in this app must flow through DOMPurify before + // hitting innerHTML (issue #11). Do not re-add a raw marked.parse here. + return renderMarkdownSafe(text); } document.addEventListener('click', e => { diff --git a/tests/test_xss_sanitization.py b/tests/test_xss_sanitization.py new file mode 100644 index 0000000..aa26e95 --- /dev/null +++ b/tests/test_xss_sanitization.py @@ -0,0 +1,141 @@ +""" +Regression tests for issue #11 — XSS via unsanitised Marked.js output. + +The frontend must: + 1. Load DOMPurify alongside Marked.js in base.html. + 2. Provide a `renderMarkdownSafe(text)` helper in static/js/app.js that + wraps marked.parse(...) with DOMPurify.sanitize(...). + 3. Use that helper at every site where markdown HTML reaches the DOM + (workspace.html → innerHTML) or a downloadable HTML blob (download.js). + 4. Never call marked.parse(...) without a DOMPurify.sanitize(...) wrap. + +These checks are static-source assertions — there is no JS test runner in +this repo, but a future regression that re-introduces a bare marked.parse +call would slip past every dynamic test even if one existed. Source-grep +guards are the cheapest backstop. + +Run: + python -m unittest tests.test_xss_sanitization -v +""" + +import os +import re +import unittest + +REPO_ROOT = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) + + +def _read(rel_path): + with open(os.path.join(REPO_ROOT, rel_path), "r", encoding="utf-8") as f: + return f.read() + + +class TestDOMPurifyLoaded(unittest.TestCase): + + def test_base_html_includes_dompurify_cdn(self): + src = _read("templates/base.html") + self.assertIn("dompurify", src.lower(), + "templates/base.html must load DOMPurify before any page-level script") + + def test_base_html_loads_dompurify_after_marked(self): + # Order matters: DOMPurify must be loaded before any script that calls + # renderMarkdownSafe(). Loading it after Marked.js but before app.js + # is the conventional spot. + src = _read("templates/base.html") + marked_pos = src.lower().find("marked.min.js") + purify_pos = src.lower().find("purify.min.js") + app_js_pos = src.find("/static/js/app.js") + self.assertGreater(marked_pos, 0, "Marked.js must be loaded") + self.assertGreater(purify_pos, 0, "DOMPurify must be loaded") + self.assertGreater(app_js_pos, 0, "app.js must be loaded") + self.assertLess(purify_pos, app_js_pos, + "DOMPurify must load before app.js so renderMarkdownSafe can use it") + + +class TestRenderMarkdownSafeHelper(unittest.TestCase): + + def test_app_js_defines_render_markdown_safe(self): + src = _read("static/js/app.js") + self.assertIn("renderMarkdownSafe", src, + "static/js/app.js must define renderMarkdownSafe()") + + def test_render_markdown_safe_invokes_dompurify(self): + src = _read("static/js/app.js") + # Look for the function body — must call DOMPurify.sanitize. + self.assertIn("DOMPurify.sanitize", src, + "renderMarkdownSafe() must invoke DOMPurify.sanitize(...)") + + def test_render_markdown_safe_falls_back_safely(self): + """If DOMPurify or marked is unavailable, the helper must NOT call + marked.parse alone. It must fall back to escapeHtml or similar.""" + src = _read("static/js/app.js") + self.assertIn("escapeHtml", src, + "renderMarkdownSafe() must fall back to escapeHtml when libs are missing") + + +class TestCallSitesUseSafeHelper(unittest.TestCase): + + def test_workspace_html_uses_safe_helper(self): + src = _read("templates/workspace.html") + # Either the helper is called, or DOMPurify.sanitize is inlined. + self.assertTrue( + "renderMarkdownSafe" in src or "DOMPurify.sanitize" in src, + "templates/workspace.html must sanitise markdown before innerHTML" + ) + + def test_download_js_uses_safe_helper(self): + src = _read("static/js/download.js") + self.assertTrue( + "renderMarkdownSafe" in src or "DOMPurify.sanitize" in src, + "static/js/download.js must sanitise markdown before writing to download blob" + ) + + +class TestNoBareMarkedParse(unittest.TestCase): + """The class of bug we're fixing: a bare `marked.parse(...)` whose return + value is then injected into innerHTML or a download blob. If a future edit + reintroduces the pattern, this test fails. + + A `marked.parse(...)` IS allowed inside renderMarkdownSafe (because that + function then sanitises). We allow at most one such call across the + frontend — the one inside the helper itself.""" + + FRONTEND_FILES = [ + "templates/workspace.html", + "templates/index.html", + "templates/search.html", + "templates/config.html", + "static/js/app.js", + "static/js/download.js", + ] + + def test_marked_parse_appears_only_inside_safe_helper(self): + marked_call = re.compile(r"marked\.parse\s*\(") + total = 0 + per_file = {} + for rel in self.FRONTEND_FILES: + full = os.path.join(REPO_ROOT, rel) + if not os.path.exists(full): + continue + with open(full, "r", encoding="utf-8") as f: + src = f.read() + n = len(marked_call.findall(src)) + per_file[rel] = n + total += n + # Exactly one allowed — the call inside renderMarkdownSafe in app.js. + self.assertEqual(per_file.get("static/js/app.js", 0), 1, + "static/js/app.js should contain marked.parse exactly once " + "(inside renderMarkdownSafe). per_file=%s" % per_file) + # All other frontend files must have ZERO bare marked.parse calls. + for rel, n in per_file.items(): + if rel == "static/js/app.js": + continue + self.assertEqual( + n, 0, + "%s contains a bare marked.parse(...) call — wrap it via " + "renderMarkdownSafe() instead. per_file=%s" % (rel, per_file) + ) + + +if __name__ == "__main__": + unittest.main()