package main import ( "net/url" "strings" "unicode" "golang.org/x/net/idna" ) // sources.go renders the user-facing "Sources" attribution for a web answer. It is built // SERVER-SIDE and appended AFTER the model's prose — never handed to the model. The model // was deliberately told to write "no URLs or links" (webSynthMessages) because instructing // it to cite made it paste the opaque grounding-api-redirect links uglily and mis-attribute // them. Doing the attribution here keeps the format controlled and the links honest. // // Compliance notes (Gemini Grounding terms, verified against ai.google.dev/gemini-api/terms): // - We NEVER resolve the grounding redirect server-side ("no programmatic/automated access // to Grounded Results"). We emit the redirect as a link the END USER clicks — the // intended direct-access flow — and it lands them on the real article. // - We label with the publisher domain (web.title), which is stable and ToS-neutral. // - The strict terms also ask for the Search-Suggestions chip (searchEntryPoint), which a // sanitised Matrix bubble can't render; that gap is pre-existing (the bot already shows // grounded prose without it) and out of scope here. // maxSourcesShown caps the appended attribution. A handful of domains is plenty and keeps // the message tidy — gemini grounding routinely returns a dozen near-duplicate chunks. const maxSourcesShown = 3 // sourcesFooter renders a compact, deduped "Sources" line from a web route's sources, or "" // when there's nothing usable. Each entry is a markdown link whose LABEL is the publisher // domain and whose HREF is the source link (markdownToHTML promotes it to a clickable ; // the plain body keeps the readable "[domain](url)" fallback). Dedup is by domain so several // chunks from one outlet collapse to one link. The label language follows the answer // (Cyrillic → Russian), since the bot replies in the user's language. func sourcesFooter(answer string, sources []WebSource) string { seen := make(map[string]bool, len(sources)) var links []string for _, s := range sources { dom := displayDomain(s.Title) u := strings.TrimSpace(s.URL) if dom == "" || u == "" { continue } key := strings.ToLower(dom) if seen[key] { continue } seen[key] = true links = append(links, "["+dom+"]("+u+")") if len(links) >= maxSourcesShown { break } } if len(links) == 0 { return "" } label := "Sources" if hasCyrillic(answer) { label = "Источники" } return "\n\n" + label + ": " + strings.Join(links, ", ") } // displayDomain turns a host/domain into a readable label: it trims a leading "www." and // surrounding space, then decodes a punycode IDN to its Unicode form. gemini grounding returns // the publisher domain in web.title, but for a non-ASCII host (e.g. a .рф site) that title is // punycode ("xn--…"), which renders as gibberish in the Sources footer. idna.ToUnicode is // punycode-only: ASCII domains pass through unchanged and a label that fails to decode keeps // its raw form (never worse than before). idna.Display was tried and gives byte-identical // output here — it adds no homograph protection over the basic decode (that lives in TR39 // script-mixing rules, not UTS#46), and the label isn't the click target anyway (the href is // the source URL), so the simpler profile is used. Shared by both citation label paths // (sourceDomain for gemini titles, hostOf for grok_web_search URLs). Returns "" for empty. func displayDomain(s string) string { s = strings.TrimSpace(s) s = strings.TrimPrefix(s, "www.") s = strings.TrimSpace(s) if s == "" { return "" } if u, err := idna.ToUnicode(s); err == nil && u != "" { s = u } return s } // hostOf extracts the readable host from a real URL — used to label grok_web_search citations, // which carry the actual publisher URL rather than a domain. Runs the host through // displayDomain so a "www." prefix is dropped and an IDN host decodes to Unicode, matching the // gemini-title path. Returns "" if the URL doesn't parse to a host. func hostOf(rawURL string) string { u, err := url.Parse(strings.TrimSpace(rawURL)) if err != nil || u.Host == "" { return "" } return displayDomain(u.Host) } // hasCyrillic reports whether s contains any Cyrillic letter — a cheap proxy for "the bot // answered in Russian", used only to localise the Sources label. func hasCyrillic(s string) bool { for _, r := range s { if unicode.Is(unicode.Cyrillic, r) { return true } } return false }