169 lines
6.4 KiB
Go
169 lines
6.4 KiB
Go
package main
|
||
|
||
import (
|
||
"strings"
|
||
"testing"
|
||
)
|
||
|
||
// TestMarkdownToHTML asserts the rich constructs render and plain text stays
|
||
// plain. It checks for the meaningful tags/escaping (Contains), not goldmark's
|
||
// exact byte output — the converter's precise formatting is its own contract, not
|
||
// ours to pin.
|
||
func TestMarkdownToHTML(t *testing.T) {
|
||
rich := []struct {
|
||
name string
|
||
in string
|
||
contains []string
|
||
}{
|
||
{"bold", "a **bold** b", []string{"<strong>bold</strong>"}},
|
||
{"italic star", "a *it* b", []string{"<em>it</em>"}},
|
||
{"italic underscore", "a _it_ b", []string{"<em>it</em>"}},
|
||
{"bold italic", "***x***", []string{"<strong>", "<em>", "x"}},
|
||
{"strikethrough", "~~gone~~", []string{"gone"}}, // <del> or <s>; both rich
|
||
{"inline code", "use `npm i`", []string{"<code>npm i</code>"}},
|
||
{"inline code keeps stars literal", "`a*b*c`", []string{"<code>a*b*c</code>"}},
|
||
{"heading h1", "# Title", []string{"<h1>", "Title", "</h1>"}},
|
||
{"hr", "---", []string{"<hr"}},
|
||
{"unordered list", "- one\n- two", []string{"<ul>", "<li>", "one", "two"}},
|
||
{"ordered list", "1. one\n2. two", []string{"<ol>", "<li>", "one"}},
|
||
{"blockquote", "> quoted", []string{"<blockquote>", "quoted"}},
|
||
{"link", "see [xAI](https://x.ai)", []string{`href="https://x.ai"`, "xAI"}},
|
||
{"fenced code", "```go\nfmt.Println()\n```", []string{"<pre>", "<code", "fmt.Println"}},
|
||
{"table", "| a | b |\n| - | - |\n| 1 | 2 |", []string{"<table>", "<th>", "a", "<td>", "1"}},
|
||
{"image as link", "", []string{`href="https://x.ai/logo.png"`, "logo"}},
|
||
{"autolink bare url", "visit https://x.ai now", []string{`href="https://x.ai"`}},
|
||
}
|
||
for _, c := range rich {
|
||
t.Run("rich/"+c.name, func(t *testing.T) {
|
||
got, formatted := markdownToHTML(c.in)
|
||
if !formatted {
|
||
t.Fatalf("markdownToHTML(%q) formatted=false, want true (got %q)", c.in, got)
|
||
}
|
||
for _, sub := range c.contains {
|
||
if !strings.Contains(got, sub) {
|
||
t.Fatalf("markdownToHTML(%q) = %q, missing %q", c.in, got, sub)
|
||
}
|
||
}
|
||
})
|
||
}
|
||
|
||
// Plain text (even multi-line or with stray punctuation) carries no
|
||
// formatting, so the bot sends only the bare body.
|
||
plain := []string{
|
||
"just a sentence",
|
||
"line one\nline two",
|
||
"a < b & c > d",
|
||
"2 * 3 * 4",
|
||
"snake_case_name",
|
||
"файл_имя_тут",
|
||
"text with ! bang",
|
||
`path c:\users`,
|
||
"",
|
||
}
|
||
for _, in := range plain {
|
||
t.Run("plain", func(t *testing.T) {
|
||
if got, formatted := markdownToHTML(in); formatted {
|
||
t.Fatalf("markdownToHTML(%q) formatted=true, want false (got %q)", in, got)
|
||
}
|
||
})
|
||
}
|
||
}
|
||
|
||
func TestMarkdownNeverEmitsUnsafeScheme(t *testing.T) {
|
||
for _, bad := range []string{
|
||
"[a](javascript:x)", "[a](data:text/html,x)", "[a](vbscript:x)", "[a](file:///etc)",
|
||
"[a](JaVaScRiPt:x)", "[a](java\tscript:x)",
|
||
} {
|
||
if html, _ := markdownToHTML(bad); strings.Contains(html, "href=") {
|
||
t.Fatalf("emitted a link for unsafe scheme: %q -> %q", bad, html)
|
||
}
|
||
}
|
||
}
|
||
|
||
func TestMarkdownOversizeFallsBackToPlain(t *testing.T) {
|
||
// A formatted reply whose HTML exceeds maxFormattedBytes must return ("", false)
|
||
// so the bot sends only the plain body.
|
||
big := strings.Repeat("- item\n", 8000)
|
||
if html, formatted := markdownToHTML(big); formatted || html != "" {
|
||
t.Fatalf("oversize formatted output should fall back to plain: formatted=%v len=%d", formatted, len(html))
|
||
}
|
||
// Implausibly large input is rejected outright.
|
||
huge := strings.Repeat("a", maxInputBytes+1)
|
||
if html, formatted := markdownToHTML(huge); formatted || html != "" {
|
||
t.Fatalf("oversize input should fall back to plain: formatted=%v len=%d", formatted, len(html))
|
||
}
|
||
}
|
||
|
||
func TestMarkdownAdversarialNoPanicNoInjection(t *testing.T) {
|
||
inputs := []string{
|
||
strings.Repeat("[", 20000) + "x",
|
||
"x" + strings.Repeat("](https://a)", 20000),
|
||
strings.Repeat("*", 5000) + "x" + strings.Repeat("*", 5000),
|
||
strings.Repeat("> ", 5000) + "deep",
|
||
strings.Repeat(" ", 50) + "- nested",
|
||
strings.Repeat("`", 4000) + "code",
|
||
"| " + strings.Repeat("a |", 2000) + "\n| " + strings.Repeat("- |", 2000) + "\n| x |",
|
||
"<script>alert(1)</script>\n**`<b>`**\n[x](\"><svg onload=alert(1)>)",
|
||
strings.Repeat("***nest ", 200) + "x" + strings.Repeat(" nest***", 200),
|
||
}
|
||
// Every model '<' is escaped to <, so a dangerous element can only exist if
|
||
// the converter emitted it — and it emits none of these tag names. (Attribute
|
||
// vectors like onload= can appear only as escaped literal text, which is safe;
|
||
// the safe-href guarantee is covered by the unit + scheme tests.)
|
||
for i, in := range inputs {
|
||
html, _ := markdownToHTML(in) // must not panic
|
||
for _, tag := range []string{"<script", "<svg", "<img", "<iframe", "<style", "<object", "<embed"} {
|
||
if strings.Contains(strings.ToLower(html), tag) {
|
||
t.Fatalf("case %d emitted a dangerous tag %q: %.160q", i, tag, html)
|
||
}
|
||
}
|
||
}
|
||
}
|
||
|
||
func TestBuildNoticeContentAttachesFormatted(t *testing.T) {
|
||
c := buildNoticeContent("$evt", "@u:vojo.chat", nil, "Here is **bold**.")
|
||
if c["format"] != matrixHTMLFormat {
|
||
t.Fatalf("format = %v, want %v", c["format"], matrixHTMLFormat)
|
||
}
|
||
fb, _ := c["formatted_body"].(string)
|
||
if !strings.Contains(fb, "<strong>bold</strong>") {
|
||
t.Fatalf("formatted_body missing bold: %q", fb)
|
||
}
|
||
if c["body"] != "Here is **bold**." {
|
||
t.Fatalf("plain body must be preserved, got %v", c["body"])
|
||
}
|
||
}
|
||
|
||
func TestBuildNoticeContentSkipsFormattedForPlain(t *testing.T) {
|
||
c := buildNoticeContent("$evt", "@u:vojo.chat", nil, "no markdown here")
|
||
if _, ok := c["format"]; ok {
|
||
t.Fatalf("format must be absent for plain text")
|
||
}
|
||
if _, ok := c["formatted_body"]; ok {
|
||
t.Fatalf("formatted_body must be absent for plain text")
|
||
}
|
||
}
|
||
|
||
// TestMarkdownNoHangOnBangAndBackslash guards the inline-parser infinite loop: a
|
||
// '!' not starting an image, or a backslash not before ASCII punctuation
|
||
// (trailing, or before a letter/space/Cyrillic), used to fall through to a
|
||
// non-advancing default branch and spin forever — freezing the whole bot under
|
||
// the transaction mutex. These must all RETURN; if the bug returns this test
|
||
// hangs and `go test` times out instead of passing.
|
||
func TestMarkdownNoHangOnBangAndBackslash(t *testing.T) {
|
||
for _, in := range []string{
|
||
"Привет!",
|
||
"Hello! How are you?",
|
||
`path c:\users`,
|
||
`trailing backslash \`,
|
||
`что-то \ или вот это`,
|
||
`\` + "д",
|
||
"!",
|
||
"!!! wow",
|
||
"text with ! bang",
|
||
strings.Repeat("a! ", 2000),
|
||
strings.Repeat(`\`, 2000),
|
||
} {
|
||
_, _ = markdownToHTML(in) // a hang fails via test timeout
|
||
}
|
||
}
|