Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
24 changes: 24 additions & 0 deletions static/js/app.js
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,30 @@ function escapeHtml(str) {
return div.innerHTML;
}

/**
* Render Markdown to HTML, then sanitise with DOMPurify before returning.
*
* Marked.js does NOT sanitise. Without DOMPurify, `[link](javascript:...)`,
* dangerous `data:` URIs, and inline event handlers all survive into the DOM
* — that's the XSS class fixed in issue #11.
*
* Fallback: if either marked or DOMPurify is missing (CDN failure, ad blocker,
* tests), return the plain-text-escaped string rather than ever emit raw or
* unsanitised HTML. Never fall through to a bare Marked call without sanitising.
*/
function renderMarkdownSafe(text) {
if (!text) return '';
if (typeof marked === 'undefined' || typeof DOMPurify === 'undefined') {
return escapeHtml(text);
}
try {
const html = marked.parse(text, { breaks: true, gfm: true });
return DOMPurify.sanitize(html);
} catch (e) {
return escapeHtml(text);
}
}

function formatDate(timestamp) {
if (!timestamp) return '';
try {
Expand Down
5 changes: 4 additions & 1 deletion static/js/download.js
Original file line number Diff line number Diff line change
Expand Up @@ -188,7 +188,10 @@ async function downloadAs(format) {
}
else if (format === 'html') {
const md = convertChatToMarkdown(tab, true);
const htmlContent = marked.parse(md);
// Sanitise with DOMPurify before embedding in the download blob (issue #11).
// The downloaded file is opened in a browser and any payload would execute
// in the file:// origin, so XSS still applies.
const htmlContent = renderMarkdownSafe(md);
const html = `<!DOCTYPE html>
<html><head><meta charset="UTF-8"><title>${escapeHtml(tab.title || 'Chat')}</title>
<style>body{max-width:800px;margin:40px auto;padding:0 20px;font-family:-apple-system,BlinkMacSystemFont,"Segoe UI",Roboto,"Helvetica Neue",Arial,sans-serif;line-height:1.6;color:#333}pre{background:#f5f5f5;padding:1em;overflow-x:auto;border-radius:4px;border:1px solid #ddd}code{font-family:ui-monospace,SFMono-Regular,Menlo,Monaco,Consolas,monospace;font-size:0.9em}hr{border:none;border-top:1px solid #ddd;margin:2em 0}h1,h2,h3{margin-top:2em;margin-bottom:1em}blockquote{border-left:4px solid #ddd;margin:0;padding-left:1em;color:#666}em{color:#666}@media(prefers-color-scheme:dark){body{background:#1a1a1a;color:#ddd}pre{background:#2d2d2d;border-color:#404040}blockquote{border-color:#404040;color:#999}em{color:#999}}</style>
Expand Down
3 changes: 3 additions & 0 deletions templates/base.html
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,9 @@
<script src="https://cdnjs.cloudflare.com/ajax/libs/highlight.js/11.9.0/highlight.min.js"></script>
<!-- Marked.js for Markdown rendering -->
<script src="https://cdnjs.cloudflare.com/ajax/libs/marked/12.0.1/marked.min.js"></script>
<!-- DOMPurify — sanitises Marked.js output before any DOM insertion (issue #11). -->
<!-- Required: every marked.parse(...) call must flow through DOMPurify.sanitize(...). -->
<script src="https://cdnjs.cloudflare.com/ajax/libs/dompurify/3.2.4/purify.min.js"></script>
</head>
<body>
<!-- Navbar -->
Expand Down
10 changes: 4 additions & 6 deletions templates/workspace.html
Original file line number Diff line number Diff line change
Expand Up @@ -312,12 +312,10 @@ <h2>${escapeHtml(tab.title)}</h2>
}

function renderMarkdown(text) {
if (!text) return '';
try {
return marked.parse(text, { breaks: true, gfm: true });
} catch (e) {
return escapeHtml(text);
}
// Delegate to the shared sanitised helper in static/js/app.js — every
// markdown render in this app must flow through DOMPurify before
// hitting innerHTML (issue #11). Do not re-add a raw marked.parse here.
return renderMarkdownSafe(text);
}

document.addEventListener('click', e => {
Expand Down
141 changes: 141 additions & 0 deletions tests/test_xss_sanitization.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,141 @@
"""
Regression tests for issue #11 — XSS via unsanitised Marked.js output.

The frontend must:
1. Load DOMPurify alongside Marked.js in base.html.
2. Provide a `renderMarkdownSafe(text)` helper in static/js/app.js that
wraps marked.parse(...) with DOMPurify.sanitize(...).
3. Use that helper at every site where markdown HTML reaches the DOM
(workspace.html → innerHTML) or a downloadable HTML blob (download.js).
4. Never call marked.parse(...) without a DOMPurify.sanitize(...) wrap.

These checks are static-source assertions — there is no JS test runner in
this repo, but a future regression that re-introduces a bare marked.parse
call would slip past every dynamic test even if one existed. Source-grep
guards are the cheapest backstop.

Run:
python -m unittest tests.test_xss_sanitization -v
"""

import os
import re
import unittest

REPO_ROOT = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))


def _read(rel_path):
with open(os.path.join(REPO_ROOT, rel_path), "r", encoding="utf-8") as f:
return f.read()


class TestDOMPurifyLoaded(unittest.TestCase):

def test_base_html_includes_dompurify_cdn(self):
src = _read("templates/base.html")
self.assertIn("dompurify", src.lower(),
"templates/base.html must load DOMPurify before any page-level script")

def test_base_html_loads_dompurify_after_marked(self):
# Order matters: DOMPurify must be loaded before any script that calls
# renderMarkdownSafe(). Loading it after Marked.js but before app.js
# is the conventional spot.
src = _read("templates/base.html")
marked_pos = src.lower().find("marked.min.js")
purify_pos = src.lower().find("purify.min.js")
app_js_pos = src.find("/static/js/app.js")
self.assertGreater(marked_pos, 0, "Marked.js must be loaded")
self.assertGreater(purify_pos, 0, "DOMPurify must be loaded")
self.assertGreater(app_js_pos, 0, "app.js must be loaded")
self.assertLess(purify_pos, app_js_pos,
"DOMPurify must load before app.js so renderMarkdownSafe can use it")


class TestRenderMarkdownSafeHelper(unittest.TestCase):

def test_app_js_defines_render_markdown_safe(self):
src = _read("static/js/app.js")
self.assertIn("renderMarkdownSafe", src,
"static/js/app.js must define renderMarkdownSafe()")

def test_render_markdown_safe_invokes_dompurify(self):
src = _read("static/js/app.js")
# Look for the function body — must call DOMPurify.sanitize.
self.assertIn("DOMPurify.sanitize", src,
"renderMarkdownSafe() must invoke DOMPurify.sanitize(...)")

def test_render_markdown_safe_falls_back_safely(self):
"""If DOMPurify or marked is unavailable, the helper must NOT call
marked.parse alone. It must fall back to escapeHtml or similar."""
src = _read("static/js/app.js")
self.assertIn("escapeHtml", src,
"renderMarkdownSafe() must fall back to escapeHtml when libs are missing")


class TestCallSitesUseSafeHelper(unittest.TestCase):

def test_workspace_html_uses_safe_helper(self):
src = _read("templates/workspace.html")
# Either the helper is called, or DOMPurify.sanitize is inlined.
self.assertTrue(
"renderMarkdownSafe" in src or "DOMPurify.sanitize" in src,
"templates/workspace.html must sanitise markdown before innerHTML"
)

def test_download_js_uses_safe_helper(self):
src = _read("static/js/download.js")
self.assertTrue(
"renderMarkdownSafe" in src or "DOMPurify.sanitize" in src,
"static/js/download.js must sanitise markdown before writing to download blob"
)


class TestNoBareMarkedParse(unittest.TestCase):
"""The class of bug we're fixing: a bare `marked.parse(...)` whose return
value is then injected into innerHTML or a download blob. If a future edit
reintroduces the pattern, this test fails.

A `marked.parse(...)` IS allowed inside renderMarkdownSafe (because that
function then sanitises). We allow at most one such call across the
frontend — the one inside the helper itself."""

FRONTEND_FILES = [
"templates/workspace.html",
"templates/index.html",
"templates/search.html",
"templates/config.html",
"static/js/app.js",
"static/js/download.js",
]

def test_marked_parse_appears_only_inside_safe_helper(self):
marked_call = re.compile(r"marked\.parse\s*\(")
total = 0
per_file = {}
for rel in self.FRONTEND_FILES:
full = os.path.join(REPO_ROOT, rel)
if not os.path.exists(full):
continue
with open(full, "r", encoding="utf-8") as f:
src = f.read()
n = len(marked_call.findall(src))
per_file[rel] = n
total += n
# Exactly one allowed — the call inside renderMarkdownSafe in app.js.
self.assertEqual(per_file.get("static/js/app.js", 0), 1,
"static/js/app.js should contain marked.parse exactly once "
"(inside renderMarkdownSafe). per_file=%s" % per_file)
# All other frontend files must have ZERO bare marked.parse calls.
for rel, n in per_file.items():
if rel == "static/js/app.js":
continue
self.assertEqual(
n, 0,
"%s contains a bare marked.parse(...) call — wrap it via "
"renderMarkdownSafe() instead. per_file=%s" % (rel, per_file)
)


if __name__ == "__main__":
unittest.main()