package main import ( "net/url" "strings" "unicode" ) // sources.go renders the user-facing "Sources" attribution for a web answer. It is built // SERVER-SIDE and appended AFTER the model's prose — never handed to the model. The model // was deliberately told to write "no URLs or links" (webSynthMessages) because instructing // it to cite made it paste the opaque grounding-api-redirect links uglily and mis-attribute // them. Doing the attribution here keeps the format controlled and the links honest. // // Compliance notes (Gemini Grounding terms, verified against ai.google.dev/gemini-api/terms): // - We NEVER resolve the grounding redirect server-side ("no programmatic/automated access // to Grounded Results"). We emit the redirect as a link the END USER clicks — the // intended direct-access flow — and it lands them on the real article. // - We label with the publisher domain (web.title), which is stable and ToS-neutral. // - The strict terms also ask for the Search-Suggestions chip (searchEntryPoint), which a // sanitised Matrix bubble can't render; that gap is pre-existing (the bot already shows // grounded prose without it) and out of scope here. // maxSourcesShown caps the appended attribution. A handful of domains is plenty and keeps // the message tidy — gemini grounding routinely returns a dozen near-duplicate chunks. const maxSourcesShown = 3 // sourcesFooter renders a compact, deduped "Sources" line from a web route's sources, or "" // when there's nothing usable. Each entry is a markdown link whose LABEL is the publisher // domain and whose HREF is the source link (markdownToHTML promotes it to a clickable ; // the plain body keeps the readable "[domain](url)" fallback). Dedup is by domain so several // chunks from one outlet collapse to one link. The label language follows the answer // (Cyrillic → Russian), since the bot replies in the user's language. func sourcesFooter(answer string, sources []WebSource) string { seen := make(map[string]bool, len(sources)) var links []string for _, s := range sources { dom := sourceDomain(s.Title) u := strings.TrimSpace(s.URL) if dom == "" || u == "" { continue } key := strings.ToLower(dom) if seen[key] { continue } seen[key] = true links = append(links, "["+dom+"]("+u+")") if len(links) >= maxSourcesShown { break } } if len(links) == 0 { return "" } label := "Sources" if hasCyrillic(answer) { label = "Источники" } return "\n\n" + label + ": " + strings.Join(links, ", ") } // sourceDomain normalises a citation's display label to a bare publisher domain: it trims a // leading "www." and surrounding space. gemini grounding already returns the domain in // web.title; this just tidies it. Returns "" for an empty/garbage label. func sourceDomain(title string) string { t := strings.TrimSpace(title) t = strings.TrimPrefix(t, "www.") return strings.TrimSpace(t) } // hostOf extracts the host (minus a leading "www.") from a real URL — used to label // grok_web_search citations, which carry the actual publisher URL rather than a domain. // Returns "" if the URL doesn't parse to a host. func hostOf(rawURL string) string { u, err := url.Parse(strings.TrimSpace(rawURL)) if err != nil || u.Host == "" { return "" } return strings.TrimPrefix(u.Host, "www.") } // hasCyrillic reports whether s contains any Cyrillic letter — a cheap proxy for "the bot // answered in Russian", used only to localise the Sources label. func hasCyrillic(s string) bool { for _, r := range s { if unicode.Is(unicode.Cyrillic, r) { return true } } return false }