vojo/apps/ai-bot/markdown_test.go

169 lines
6.4 KiB
Go
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

package main
import (
"strings"
"testing"
)
// TestMarkdownToHTML asserts the rich constructs render and plain text stays
// plain. It checks for the meaningful tags/escaping (Contains), not goldmark's
// exact byte output — the converter's precise formatting is its own contract, not
// ours to pin.
func TestMarkdownToHTML(t *testing.T) {
rich := []struct {
name string
in string
contains []string
}{
{"bold", "a **bold** b", []string{"<strong>bold</strong>"}},
{"italic star", "a *it* b", []string{"<em>it</em>"}},
{"italic underscore", "a _it_ b", []string{"<em>it</em>"}},
{"bold italic", "***x***", []string{"<strong>", "<em>", "x"}},
{"strikethrough", "~~gone~~", []string{"gone"}}, // <del> or <s>; both rich
{"inline code", "use `npm i`", []string{"<code>npm i</code>"}},
{"inline code keeps stars literal", "`a*b*c`", []string{"<code>a*b*c</code>"}},
{"heading h1", "# Title", []string{"<h1>", "Title", "</h1>"}},
{"hr", "---", []string{"<hr"}},
{"unordered list", "- one\n- two", []string{"<ul>", "<li>", "one", "two"}},
{"ordered list", "1. one\n2. two", []string{"<ol>", "<li>", "one"}},
{"blockquote", "> quoted", []string{"<blockquote>", "quoted"}},
{"link", "see [xAI](https://x.ai)", []string{`href="https://x.ai"`, "xAI"}},
{"fenced code", "```go\nfmt.Println()\n```", []string{"<pre>", "<code", "fmt.Println"}},
{"table", "| a | b |\n| - | - |\n| 1 | 2 |", []string{"<table>", "<th>", "a", "<td>", "1"}},
{"image as link", "![logo](https://x.ai/logo.png)", []string{`href="https://x.ai/logo.png"`, "logo"}},
{"autolink bare url", "visit https://x.ai now", []string{`href="https://x.ai"`}},
}
for _, c := range rich {
t.Run("rich/"+c.name, func(t *testing.T) {
got, formatted := markdownToHTML(c.in)
if !formatted {
t.Fatalf("markdownToHTML(%q) formatted=false, want true (got %q)", c.in, got)
}
for _, sub := range c.contains {
if !strings.Contains(got, sub) {
t.Fatalf("markdownToHTML(%q) = %q, missing %q", c.in, got, sub)
}
}
})
}
// Plain text (even multi-line or with stray punctuation) carries no
// formatting, so the bot sends only the bare body.
plain := []string{
"just a sentence",
"line one\nline two",
"a < b & c > d",
"2 * 3 * 4",
"snake_case_name",
айл_имя_тут",
"text with ! bang",
`path c:\users`,
"",
}
for _, in := range plain {
t.Run("plain", func(t *testing.T) {
if got, formatted := markdownToHTML(in); formatted {
t.Fatalf("markdownToHTML(%q) formatted=true, want false (got %q)", in, got)
}
})
}
}
func TestMarkdownNeverEmitsUnsafeScheme(t *testing.T) {
for _, bad := range []string{
"[a](javascript:x)", "[a](data:text/html,x)", "[a](vbscript:x)", "[a](file:///etc)",
"[a](JaVaScRiPt:x)", "[a](java\tscript:x)",
} {
if html, _ := markdownToHTML(bad); strings.Contains(html, "href=") {
t.Fatalf("emitted a link for unsafe scheme: %q -> %q", bad, html)
}
}
}
func TestMarkdownOversizeFallsBackToPlain(t *testing.T) {
// A formatted reply whose HTML exceeds maxFormattedBytes must return ("", false)
// so the bot sends only the plain body.
big := strings.Repeat("- item\n", 8000)
if html, formatted := markdownToHTML(big); formatted || html != "" {
t.Fatalf("oversize formatted output should fall back to plain: formatted=%v len=%d", formatted, len(html))
}
// Implausibly large input is rejected outright.
huge := strings.Repeat("a", maxInputBytes+1)
if html, formatted := markdownToHTML(huge); formatted || html != "" {
t.Fatalf("oversize input should fall back to plain: formatted=%v len=%d", formatted, len(html))
}
}
func TestMarkdownAdversarialNoPanicNoInjection(t *testing.T) {
inputs := []string{
strings.Repeat("[", 20000) + "x",
"x" + strings.Repeat("](https://a)", 20000),
strings.Repeat("*", 5000) + "x" + strings.Repeat("*", 5000),
strings.Repeat("> ", 5000) + "deep",
strings.Repeat(" ", 50) + "- nested",
strings.Repeat("`", 4000) + "code",
"| " + strings.Repeat("a |", 2000) + "\n| " + strings.Repeat("- |", 2000) + "\n| x |",
"<script>alert(1)</script>\n**`<b>`**\n[x](\"><svg onload=alert(1)>)",
strings.Repeat("***nest ", 200) + "x" + strings.Repeat(" nest***", 200),
}
// Every model '<' is escaped to &lt;, so a dangerous element can only exist if
// the converter emitted it — and it emits none of these tag names. (Attribute
// vectors like onload= can appear only as escaped literal text, which is safe;
// the safe-href guarantee is covered by the unit + scheme tests.)
for i, in := range inputs {
html, _ := markdownToHTML(in) // must not panic
for _, tag := range []string{"<script", "<svg", "<img", "<iframe", "<style", "<object", "<embed"} {
if strings.Contains(strings.ToLower(html), tag) {
t.Fatalf("case %d emitted a dangerous tag %q: %.160q", i, tag, html)
}
}
}
}
func TestBuildNoticeContentAttachesFormatted(t *testing.T) {
c := buildNoticeContent("$evt", "@u:vojo.chat", "", "Here is **bold**.")
if c["format"] != matrixHTMLFormat {
t.Fatalf("format = %v, want %v", c["format"], matrixHTMLFormat)
}
fb, _ := c["formatted_body"].(string)
if !strings.Contains(fb, "<strong>bold</strong>") {
t.Fatalf("formatted_body missing bold: %q", fb)
}
if c["body"] != "Here is **bold**." {
t.Fatalf("plain body must be preserved, got %v", c["body"])
}
}
func TestBuildNoticeContentSkipsFormattedForPlain(t *testing.T) {
c := buildNoticeContent("$evt", "@u:vojo.chat", "", "no markdown here")
if _, ok := c["format"]; ok {
t.Fatalf("format must be absent for plain text")
}
if _, ok := c["formatted_body"]; ok {
t.Fatalf("formatted_body must be absent for plain text")
}
}
// TestMarkdownNoHangOnBangAndBackslash guards the inline-parser infinite loop: a
// '!' not starting an image, or a backslash not before ASCII punctuation
// (trailing, or before a letter/space/Cyrillic), used to fall through to a
// non-advancing default branch and spin forever — freezing the whole bot under
// the transaction mutex. These must all RETURN; if the bug returns this test
// hangs and `go test` times out instead of passing.
func TestMarkdownNoHangOnBangAndBackslash(t *testing.T) {
for _, in := range []string{
"Привет!",
"Hello! How are you?",
`path c:\users`,
`trailing backslash \`,
`что-то \ или вот это`,
`\` + "д",
"!",
"!!! wow",
"text with ! bang",
strings.Repeat("a! ", 2000),
strings.Repeat(`\`, 2000),
} {
_, _ = markdownToHTML(in) // a hang fails via test timeout
}
}