fix(ai-bot): strip the bot mention from the search query and append a clickable sources footer to web answers
This commit is contained in:
parent
08456b63ad
commit
c12c228eb8
9 changed files with 279 additions and 14 deletions
|
|
@ -389,6 +389,14 @@ const unlimitedCap = 1 << 30
|
|||
|
||||
func (b *Bot) respond(ctx context.Context, roomID, threadRoot string, isDM bool, ev *Event, mc *MessageContent, history []bufferedMsg) {
|
||||
started := time.Now()
|
||||
// Clean the trigger ONCE, before it reaches the search query, the prompt, the buffer,
|
||||
// or telemetry. Two egress hazards both flow from the raw body: the bot's own mention
|
||||
// ("@ai:vojo.chat …", which the grounding provider mis-read as the subject "vojo.chat"),
|
||||
// and the Matrix rich-reply fallback (the quoted parent, "> <@ai> …"), which would
|
||||
// otherwise be googled verbatim on a reply-style follow-up. Both are stripped here so
|
||||
// every downstream consumer sees only what the user actually asked. Detection already
|
||||
// happened (m.mentions / replyParentIsBot), so this never affects whether we answer.
|
||||
mc.Body = stripBotMention(stripReplyFallback(mc.Body), b.cfg.BotMXID)
|
||||
// One telemetry row per request, populated as the flow decides its outcome and
|
||||
// emitted once via defer — so every exit (deny, error, empty, paid silence, success)
|
||||
// is recorded without scattering writes (F-FUNC-5). It starts as route=none/ok=false;
|
||||
|
|
@ -565,7 +573,11 @@ func (b *Bot) respond(ctx context.Context, roomID, threadRoot string, isDM bool,
|
|||
}
|
||||
b.log.InfoContext(ctx, "answered", "room", roomID, "sender", ev.Sender, "dm", isDM, "route", res.route,
|
||||
"usd", res.cost.Total(), "prompt_tokens", res.usage.PromptTokens, "completion_tokens", res.usage.CompletionTokens)
|
||||
if err := b.sendReply(ctx, roomID, threadRoot, ev, mc, text); err != nil {
|
||||
// Append the source attribution to the SENT message only — not to the buffered answer:
|
||||
// the gemini redirect links are ephemeral, so stale links must not pollute the history
|
||||
// that feeds later turns (sendReply buffers `text`, sends `text+footer`).
|
||||
footer := sourcesFooter(text, res.sources)
|
||||
if err := b.sendReply(ctx, roomID, threadRoot, ev, mc, text, footer); err != nil {
|
||||
// Paid silence (§8.1): the spend is real (USD is kept — refunding it would
|
||||
// under-count the ceiling), but the reply never landed. Refund the request SLOT
|
||||
// so the user can retry, and react ⚠️ so the failure isn't silent.
|
||||
|
|
@ -679,12 +691,15 @@ func (b *Bot) reactEncryptedOnce(ctx context.Context, roomID, eventID string) bo
|
|||
return true
|
||||
}
|
||||
|
||||
// sendReply sends the model's actual answer and records the completed exchange in the
|
||||
// conversation buffer so the next turn has context. It RETURNS the send error so the
|
||||
// caller can handle paid silence (§8.1): a billed answer that failed to deliver must
|
||||
// refund the slot and react, not vanish.
|
||||
func (b *Bot) sendReply(ctx context.Context, roomID, threadRoot string, trigger *Event, triggerMC *MessageContent, body string) error {
|
||||
if err := b.sendMessage(ctx, roomID, threadRoot, trigger, triggerMC, body); err != nil {
|
||||
// sendReply sends the answer (plus an optional source-attribution footer) and records the
|
||||
// completed exchange in the conversation buffer so the next turn has context. `body` is the
|
||||
// model's prose — what gets BUFFERED as the assistant turn; `footer` is the server-built
|
||||
// "Sources" line, appended only to the SENT message so its ephemeral grounding links never
|
||||
// enter the history that feeds later turns. It RETURNS the send error so the caller can
|
||||
// handle paid silence (§8.1): a billed answer that failed to deliver must refund the slot
|
||||
// and react, not vanish.
|
||||
func (b *Bot) sendReply(ctx context.Context, roomID, threadRoot string, trigger *Event, triggerMC *MessageContent, body, footer string) error {
|
||||
if err := b.sendMessage(ctx, roomID, threadRoot, trigger, triggerMC, body+footer); err != nil {
|
||||
return err
|
||||
}
|
||||
// Record the user trigger AND the assistant answer together, only AFTER the answer
|
||||
|
|
|
|||
|
|
@ -70,6 +70,29 @@ func TestStripReplyFallback(t *testing.T) {
|
|||
}
|
||||
}
|
||||
|
||||
func TestStripBotMention(t *testing.T) {
|
||||
cases := []struct{ in, want string }{
|
||||
// The headline regression: the full-mxid pill fallback cinny writes must not reach
|
||||
// the search query (it made the grounding provider search for "vojo.chat").
|
||||
{"@ai:vojo.chat мессенджер макс удалили из эппстора?", "мессенджер макс удалили из эппстора?"},
|
||||
// Bare "@localpart" fallback some clients write, with trailing address punctuation.
|
||||
{"@ai, какая погода в Москве", "какая погода в Москве"},
|
||||
// Mention mid-message is still removed (it is never user content).
|
||||
{"скажи @ai:vojo.chat кто выиграл", "скажи кто выиграл"},
|
||||
// No mention → unchanged (DMs, where the bot isn't addressed by name).
|
||||
{"кто выиграл вчера", "кто выиграл вчера"},
|
||||
// The product name in a real question must survive (we never strip the display name).
|
||||
{"@ai:vojo.chat что умеет Vojo AI", "что умеет Vojo AI"},
|
||||
// A longer handle that merely contains the localpart is kept.
|
||||
{"@ai:vojo.chat пинг @aibot", "пинг @aibot"},
|
||||
}
|
||||
for _, c := range cases {
|
||||
if got := stripBotMention(c.in, botID); got != c.want {
|
||||
t.Errorf("stripBotMention(%q) = %q, want %q", c.in, got, c.want)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestComputeUSD(t *testing.T) {
|
||||
const model = "grok-test"
|
||||
cfg := &Config{XAIModel: model, Prices: map[string]ModelPrice{
|
||||
|
|
|
|||
|
|
@ -38,6 +38,7 @@ type genResult struct {
|
|||
rewriteUsed bool
|
||||
webGrounded bool
|
||||
citationCount int
|
||||
sources []WebSource // user-facing source attribution (web route only; sources.go)
|
||||
}
|
||||
|
||||
func msSince(t time.Time) int { return int(time.Since(t).Milliseconds()) }
|
||||
|
|
@ -319,6 +320,7 @@ func (b *Bot) genWebThenGrok(ctx context.Context, body string, isDM bool, msgs [
|
|||
res.cost.WebTool += wc.Cost.WebTool
|
||||
res.citationCount = len(wc.Citations)
|
||||
res.webGrounded = len(wc.Citations) > 0
|
||||
res.sources = wc.Sources // carried to the user-facing "Sources" footer on success
|
||||
webUsage := wc.Usage
|
||||
if ferr != nil {
|
||||
if errors.Is(ferr, errGroundingCapped) {
|
||||
|
|
@ -366,10 +368,10 @@ func (b *Bot) genWebThenGrok(ctx context.Context, body string, isDM bool, msgs [
|
|||
// raw citation URLs into the prompt, nor ask Grok to "cite sources": gemini grounding
|
||||
// returns opaque vertexaisearch.../grounding-api-redirect/... redirect links (not publisher
|
||||
// URLs), and instructing Grok to cite made it paste those ugly redirects verbatim into the
|
||||
// reply and mis-attribute them ("ссылок из твоего сообщения"). The grounding already
|
||||
// happened (citation_count is recorded for telemetry); the user wants the answer, not
|
||||
// Google's internal redirect links. Real source attribution (resolving redirects to
|
||||
// domains) is a separate, deferred feature.
|
||||
// reply and mis-attribute them ("ссылок из твоего сообщения"). Source attribution is instead
|
||||
// built SERVER-SIDE and appended after the prose (sourcesFooter, sources.go) using the
|
||||
// citations' publisher-domain titles — controlled format, honest links — so the prompt keeps
|
||||
// telling Grok "no URLs or links".
|
||||
//
|
||||
// The note is also AUTHORITATIVE about the data being current and provided: the system
|
||||
// prompt's "don't claim you have internet access if you don't" rule otherwise wins on a
|
||||
|
|
|
|||
|
|
@ -46,6 +46,35 @@ func mentionsBot(mc *MessageContent, botMXID string, replyParentIsBot bool) bool
|
|||
return pillTargetsBot(mc.FormattedBody, botMXID)
|
||||
}
|
||||
|
||||
// stripBotMention removes the bot's own mention text from a trigger body before it is
|
||||
// used as a web-search query, a prompt turn, a buffer entry, or telemetry. cinny writes
|
||||
// the plain-text fallback of a mention pill as the bot's FULL mxid ("@ai:vojo.chat …"),
|
||||
// and that literal mxid, sent verbatim to the grounding provider as the search query, made
|
||||
// it treat "vojo.chat" as the SUBJECT entity — it searched "was the Vojo.chat messenger
|
||||
// removed?", found nothing, and confabulated "no, it's available", the exact first-ask
|
||||
// hallucination + same-question/different-answer the "Max" thread showed (the second ask
|
||||
// happened to anchor on "макс" instead, hence two opposite grounded answers). Mention
|
||||
// DETECTION already ran upstream via m.mentions (MSC3952), so dropping the body text never
|
||||
// changes routing. We strip only the UNAMBIGUOUS mxid forms — the full mxid and a
|
||||
// standalone "@localpart"; the human display name is deliberately left intact so a real
|
||||
// question that names the product ("что умеет Vojo AI") is never mangled.
|
||||
func stripBotMention(body, botMXID string) string {
|
||||
body = strings.ReplaceAll(body, botMXID, " ")
|
||||
at := "@" + localpartOf(botMXID)
|
||||
fields := strings.Fields(body)
|
||||
kept := fields[:0]
|
||||
for _, f := range fields {
|
||||
// Drop a standalone "@ai" pill fallback (with trailing address punctuation), but
|
||||
// keep "@aibot" or any word that merely contains it.
|
||||
if strings.EqualFold(strings.Trim(f, ",.:;!?–—-"), at) {
|
||||
continue
|
||||
}
|
||||
kept = append(kept, f)
|
||||
}
|
||||
out := strings.Join(kept, " ")
|
||||
return strings.TrimLeft(out, " ,:–—-") // leftover leading address punctuation ("@ai, …")
|
||||
}
|
||||
|
||||
// pillTargetsBot looks for an <a href> mention pill addressing the bot in the
|
||||
// HTML body. Matrix pills use either matrix.to/#/<mxid> or a matrix: URI.
|
||||
func pillTargetsBot(formattedBody, botMXID string) bool {
|
||||
|
|
|
|||
|
|
@ -78,7 +78,8 @@ func (c *geminiClient) Complete(ctx context.Context, req LLMRequest) (*LLMRespon
|
|||
|
||||
type geminiGroundResult struct {
|
||||
Digest string
|
||||
Citations []string
|
||||
Citations []string // redirect URIs — the verify-gate + citation_count
|
||||
Sources []WebSource // the same chunks with their publisher-domain titles (web.title)
|
||||
Usage Usage
|
||||
}
|
||||
|
||||
|
|
@ -170,9 +171,14 @@ func (c *geminiClient) groundedSearch(ctx context.Context, query string) (gemini
|
|||
sb.WriteString(p.Text)
|
||||
}
|
||||
var citations []string
|
||||
var sources []WebSource
|
||||
for _, ch := range out.Candidates[0].GroundingMetadata.GroundingChunks {
|
||||
if ch.Web.URI != "" {
|
||||
citations = append(citations, ch.Web.URI)
|
||||
// web.uri is the grounding-api-redirect (NOT the publisher URL — and Gemini's
|
||||
// terms forbid resolving it server-side); web.title is the publisher domain
|
||||
// ("rbc.ru"). Keep both: the user clicks the redirect to reach the real article.
|
||||
sources = append(sources, WebSource{Title: ch.Web.Title, URL: ch.Web.URI})
|
||||
}
|
||||
}
|
||||
// The verify-gate: no citations ⇒ not actually grounded ⇒ degrade.
|
||||
|
|
@ -182,6 +188,7 @@ func (c *geminiClient) groundedSearch(ctx context.Context, query string) (gemini
|
|||
return geminiGroundResult{
|
||||
Digest: strings.TrimSpace(sb.String()),
|
||||
Citations: citations,
|
||||
Sources: sources,
|
||||
Usage: Usage{
|
||||
PromptTokens: out.UsageMetadata.PromptTokenCount,
|
||||
CachedTokens: out.UsageMetadata.CachedContentTokenCount,
|
||||
|
|
|
|||
92
apps/ai-bot/sources.go
Normal file
92
apps/ai-bot/sources.go
Normal file
|
|
@ -0,0 +1,92 @@
|
|||
package main
|
||||
|
||||
import (
|
||||
"net/url"
|
||||
"strings"
|
||||
"unicode"
|
||||
)
|
||||
|
||||
// sources.go renders the user-facing "Sources" attribution for a web answer. It is built
|
||||
// SERVER-SIDE and appended AFTER the model's prose — never handed to the model. The model
|
||||
// was deliberately told to write "no URLs or links" (webSynthMessages) because instructing
|
||||
// it to cite made it paste the opaque grounding-api-redirect links uglily and mis-attribute
|
||||
// them. Doing the attribution here keeps the format controlled and the links honest.
|
||||
//
|
||||
// Compliance notes (Gemini Grounding terms, verified against ai.google.dev/gemini-api/terms):
|
||||
// - We NEVER resolve the grounding redirect server-side ("no programmatic/automated access
|
||||
// to Grounded Results"). We emit the redirect as a link the END USER clicks — the
|
||||
// intended direct-access flow — and it lands them on the real article.
|
||||
// - We label with the publisher domain (web.title), which is stable and ToS-neutral.
|
||||
// - The strict terms also ask for the Search-Suggestions chip (searchEntryPoint), which a
|
||||
// sanitised Matrix bubble can't render; that gap is pre-existing (the bot already shows
|
||||
// grounded prose without it) and out of scope here.
|
||||
|
||||
// maxSourcesShown caps the appended attribution. A handful of domains is plenty and keeps
|
||||
// the message tidy — gemini grounding routinely returns a dozen near-duplicate chunks.
|
||||
const maxSourcesShown = 3
|
||||
|
||||
// sourcesFooter renders a compact, deduped "Sources" line from a web route's sources, or ""
|
||||
// when there's nothing usable. Each entry is a markdown link whose LABEL is the publisher
|
||||
// domain and whose HREF is the source link (markdownToHTML promotes it to a clickable <a>;
|
||||
// the plain body keeps the readable "[domain](url)" fallback). Dedup is by domain so several
|
||||
// chunks from one outlet collapse to one link. The label language follows the answer
|
||||
// (Cyrillic → Russian), since the bot replies in the user's language.
|
||||
func sourcesFooter(answer string, sources []WebSource) string {
|
||||
seen := make(map[string]bool, len(sources))
|
||||
var links []string
|
||||
for _, s := range sources {
|
||||
dom := sourceDomain(s.Title)
|
||||
u := strings.TrimSpace(s.URL)
|
||||
if dom == "" || u == "" {
|
||||
continue
|
||||
}
|
||||
key := strings.ToLower(dom)
|
||||
if seen[key] {
|
||||
continue
|
||||
}
|
||||
seen[key] = true
|
||||
links = append(links, "["+dom+"]("+u+")")
|
||||
if len(links) >= maxSourcesShown {
|
||||
break
|
||||
}
|
||||
}
|
||||
if len(links) == 0 {
|
||||
return ""
|
||||
}
|
||||
label := "Sources"
|
||||
if hasCyrillic(answer) {
|
||||
label = "Источники"
|
||||
}
|
||||
return "\n\n" + label + ": " + strings.Join(links, ", ")
|
||||
}
|
||||
|
||||
// sourceDomain normalises a citation's display label to a bare publisher domain: it trims a
|
||||
// leading "www." and surrounding space. gemini grounding already returns the domain in
|
||||
// web.title; this just tidies it. Returns "" for an empty/garbage label.
|
||||
func sourceDomain(title string) string {
|
||||
t := strings.TrimSpace(title)
|
||||
t = strings.TrimPrefix(t, "www.")
|
||||
return strings.TrimSpace(t)
|
||||
}
|
||||
|
||||
// hostOf extracts the host (minus a leading "www.") from a real URL — used to label
|
||||
// grok_web_search citations, which carry the actual publisher URL rather than a domain.
|
||||
// Returns "" if the URL doesn't parse to a host.
|
||||
func hostOf(rawURL string) string {
|
||||
u, err := url.Parse(strings.TrimSpace(rawURL))
|
||||
if err != nil || u.Host == "" {
|
||||
return ""
|
||||
}
|
||||
return strings.TrimPrefix(u.Host, "www.")
|
||||
}
|
||||
|
||||
// hasCyrillic reports whether s contains any Cyrillic letter — a cheap proxy for "the bot
|
||||
// answered in Russian", used only to localise the Sources label.
|
||||
func hasCyrillic(s string) bool {
|
||||
for _, r := range s {
|
||||
if unicode.Is(unicode.Cyrillic, r) {
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
52
apps/ai-bot/sources_test.go
Normal file
52
apps/ai-bot/sources_test.go
Normal file
|
|
@ -0,0 +1,52 @@
|
|||
package main
|
||||
|
||||
import (
|
||||
"strings"
|
||||
"testing"
|
||||
)
|
||||
|
||||
func TestSourcesFooter(t *testing.T) {
|
||||
redirect := "https://vertexaisearch.cloud.google.com/grounding-api-redirect/abc"
|
||||
src := []WebSource{
|
||||
{Title: "rbc.ru", URL: redirect + "1"},
|
||||
{Title: "www.tass.ru", URL: redirect + "2"},
|
||||
{Title: "rbc.ru", URL: redirect + "3"}, // duplicate domain → collapsed
|
||||
{Title: "lenta.ru", URL: redirect + "4"},
|
||||
{Title: "vedomosti.ru", URL: redirect + "5"}, // beyond maxSourcesShown → dropped
|
||||
}
|
||||
|
||||
// Russian answer → Russian label, deduped, capped, www stripped, clickable.
|
||||
got := sourcesFooter("Да, удалили 3 июня.", src)
|
||||
want := "\n\nИсточники: [rbc.ru](" + redirect + "1), [tass.ru](" + redirect + "2), [lenta.ru](" + redirect + "4)"
|
||||
if got != want {
|
||||
t.Fatalf("sourcesFooter ru =\n %q\nwant\n %q", got, want)
|
||||
}
|
||||
|
||||
// English answer → English label.
|
||||
if got := sourcesFooter("Yes, removed on June 3.", src[:1]); !strings.HasPrefix(got, "\n\nSources: [rbc.ru](") {
|
||||
t.Fatalf("sourcesFooter en = %q", got)
|
||||
}
|
||||
|
||||
// No usable sources → empty (no trailing label on a grok_direct/empty answer).
|
||||
if got := sourcesFooter("привет", nil); got != "" {
|
||||
t.Fatalf("empty sources should yield no footer, got %q", got)
|
||||
}
|
||||
// A source missing a title or URL is skipped.
|
||||
if got := sourcesFooter("hi", []WebSource{{Title: "", URL: redirect}, {Title: "x.com", URL: ""}}); got != "" {
|
||||
t.Fatalf("incomplete sources should yield no footer, got %q", got)
|
||||
}
|
||||
}
|
||||
|
||||
func TestHostOf(t *testing.T) {
|
||||
cases := map[string]string{
|
||||
"https://www.reuters.com/world/article-123": "reuters.com",
|
||||
"https://rbc.ru/politics/03/06/2026": "rbc.ru",
|
||||
"not a url": "",
|
||||
"": "",
|
||||
}
|
||||
for in, want := range cases {
|
||||
if got := hostOf(in); got != want {
|
||||
t.Errorf("hostOf(%q) = %q, want %q", in, got, want)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -53,6 +53,16 @@ const (
|
|||
// degrades (with a hedge) rather than paying past the cap.
|
||||
var errGroundingCapped = errors.New("web grounding daily cap reached")
|
||||
|
||||
// WebSource is one attributable source behind a web answer: a human label (the publisher
|
||||
// domain) and a link the END USER can open. For gemini grounding the URL is the
|
||||
// grounding-api-redirect (clicked by the user → the real article; never resolved
|
||||
// server-side, which Gemini's terms forbid); for grok_web_search it is the real publisher
|
||||
// URL. Surfaced to the user as a compact "Sources" footer (sources.go).
|
||||
type WebSource struct {
|
||||
Title string // publisher domain ("rbc.ru") — the citation's web.title / the URL host
|
||||
URL string // the link to open (gemini: redirect; grok: real article URL)
|
||||
}
|
||||
|
||||
// WebContext is the result of a web fetch: a factual digest to feed the final model,
|
||||
// the sources behind it, the fetch's own token usage, and the cost the fetch incurred
|
||||
// (kept separate from the final synthesis tokens so each books to its own ledger
|
||||
|
|
@ -60,7 +70,8 @@ var errGroundingCapped = errors.New("web grounding daily cap reached")
|
|||
// still billed — the caller books it before degrading (§8.1 partial cascade).
|
||||
type WebContext struct {
|
||||
Digest string
|
||||
Citations []string
|
||||
Citations []string // raw source URLs (the verify-gate + citation_count telemetry)
|
||||
Sources []WebSource // the same sources with display titles (the user-facing footer)
|
||||
Usage Usage
|
||||
Cost CostBreakdown
|
||||
}
|
||||
|
|
@ -160,6 +171,7 @@ func (p *grokWebSearch) Fetch(ctx context.Context, query string) (WebContext, er
|
|||
|
||||
var digest string
|
||||
var citations []string
|
||||
var sources []WebSource
|
||||
for _, item := range out.Output {
|
||||
if item.Type != "message" {
|
||||
continue
|
||||
|
|
@ -171,6 +183,8 @@ func (p *grokWebSearch) Fetch(ctx context.Context, query string) (WebContext, er
|
|||
for _, a := range c.Annotations {
|
||||
if a.Type == "url_citation" && a.URL != "" {
|
||||
citations = append(citations, a.URL)
|
||||
// grok returns real publisher URLs, so the host IS the display domain.
|
||||
sources = append(sources, WebSource{Title: hostOf(a.URL), URL: a.URL})
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -187,6 +201,7 @@ func (p *grokWebSearch) Fetch(ctx context.Context, query string) (WebContext, er
|
|||
wc := WebContext{
|
||||
Digest: digest,
|
||||
Citations: citations,
|
||||
Sources: sources,
|
||||
Usage: usage,
|
||||
Cost: CostBreakdown{
|
||||
WebTool: computeUSD(p.model, usage, p.cfg) +
|
||||
|
|
@ -237,5 +252,5 @@ func (p *geminiGrounding) Fetch(ctx context.Context, query string) (WebContext,
|
|||
}
|
||||
return WebContext{Cost: cost, Usage: res.Usage}, err
|
||||
}
|
||||
return WebContext{Digest: res.Digest, Citations: res.Citations, Usage: res.Usage, Cost: cost}, nil
|
||||
return WebContext{Digest: res.Digest, Citations: res.Citations, Sources: res.Sources, Usage: res.Usage, Cost: cost}, nil
|
||||
}
|
||||
|
|
|
|||
|
|
@ -89,6 +89,36 @@ grounding is free under the daily RPD, guarded by `WEB_GROUNDING_DAILY_CAP`. `XA
|
|||
+ `GROK_REASONING_EFFORT=none` (4.3 otherwise reasons on every reply). Full flag table in the
|
||||
[README](../../apps/ai-bot/README.md).
|
||||
|
||||
## Trigger hygiene (what reaches the search query)
|
||||
|
||||
The raw event body is **cleaned once** at the top of `respond` ([bot.go](../../apps/ai-bot/bot.go),
|
||||
`stripBotMention(stripReplyFallback(...))`) before it is used as the web-search query, the prompt
|
||||
trigger, the buffer entry, or telemetry. Two egress hazards both rode the raw body: the bot's own
|
||||
mention pill fallback (cinny writes the **full mxid** `@ai:vojo.chat` into the plain `body`), and
|
||||
the rich-reply quoted parent. The mxid was the worse one — sent verbatim to gemini grounding it
|
||||
made the provider treat **`vojo.chat`** as the subject entity ("was the *Vojo.chat* messenger
|
||||
removed?") and confabulate a confident wrong answer; the same question without the mention (e.g. in
|
||||
a DM, which has no mention) grounded correctly. Mention **detection** is unaffected — it runs
|
||||
upstream on `m.mentions`/`replyParentIsBot` ([mentions.go](../../apps/ai-bot/mentions.go)), not on
|
||||
body text. The human display name is deliberately **not** stripped, so "что умеет Vojo AI" survives.
|
||||
|
||||
## Source attribution (the "Sources" footer)
|
||||
|
||||
Web answers append a compact, deduped **`Источники: [rbc.ru](…), …`** line built **server-side**
|
||||
after Grok's prose ([sources.go](../../apps/ai-bot/sources.go) `sourcesFooter`), never via the Grok
|
||||
prompt (the synth note still says "no URLs or links" — instructing Grok to cite made it paste ugly
|
||||
redirects and mis-attribute them). The label is the publisher **domain** (`web.title`); the link is
|
||||
the citation's URL — for `gemini_grounding` that is the opaque `grounding-api-redirect` URL, which
|
||||
the **end user clicks** to reach the real article. **Gemini Grounding terms** (verified against
|
||||
`ai.google.dev/gemini-api/terms`) constrain this: the redirect must **not** be resolved
|
||||
server-side (no "programmatic/automated access to Grounded Results"), and a strict reading also
|
||||
requires showing the **Search-Suggestions chip** (`searchEntryPoint.renderedContent`, HTML/CSS) —
|
||||
which a sanitised Matrix bubble can't render, so that part stays unmet (pre-existing gap; the bot
|
||||
already shows grounded prose without it). The footer is appended to the **sent** message only, not
|
||||
the buffered turn — the redirect links are ephemeral, so they must not pollute the history that
|
||||
feeds later prompts. `grok_web_search` returns **real** publisher URLs (no Google display ToS), so
|
||||
switching `WEB_PROVIDER` is the path to true article links — at ~17× the cost.
|
||||
|
||||
## Observability (logs + per-request trace)
|
||||
|
||||
`log/slog` to stderr (`LOG_LEVEL`, `LOG_FORMAT=text|json`). A context-aware handler
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue