diff --git a/apps/ai-bot/.gitignore b/apps/ai-bot/.gitignore index f6c38d7a..eeddd7c6 100644 --- a/apps/ai-bot/.gitignore +++ b/apps/ai-bot/.gitignore @@ -1,4 +1,5 @@ .env state/ ai-bot +/routereval *.local diff --git a/apps/ai-bot/README.md b/apps/ai-bot/README.md index aa76defc..5a28040a 100644 --- a/apps/ai-bot/README.md +++ b/apps/ai-bot/README.md @@ -162,11 +162,13 @@ offline-eval gate (misroute < 2-3% AND measured saving > the second provider's c | Env | Default | Meaning | |---|---|---| | `ROUTER_ENABLED` | false | Layer-0 heuristic router (else everything → grok_direct) | -| `ROUTER_CLASSIFIER_ENABLED` | false | Layer-1 Gemini classifier on uncertain cases (requires `ROUTER_ENABLED` + Gemini key) | +| `ROUTER_CLASSIFIER_ENABLED` | false | Layer-1 Gemini classifier — runs on **every** message when on (not just uncertain ones): it agreement-confirms trivial and, with `WEB_PARANOID`, raises checkable-fact lookups to web. Budget ~$0.00004/msg, reserved unconditionally. Requires `ROUTER_ENABLED` + Gemini key. | | `TRIVIAL_OFFLOAD_ENABLED` | false | answer trivial messages with Gemini (requires Gemini key) | | `WEB_ENABLED` | false | web_then_grok route (Gemini/Grok fetches fresh facts, **Grok stays the voice**) | | `WEB_PROVIDER` | `grok_web_search` | `grok_web_search` (xAI Agent Tools `web_search` on the Responses API, $5/1k calls, no Gemini key) or `gemini_grounding` (**cheapest**: Gemini does the fetch via native v1beta `google_search`, Grok voices it — ~$0.0013/query, validated on `gemini-2.5-flash-lite`; the F-EXT-3 "Gemini-3 only" caveat is the OpenAI-compat endpoint, native v1beta works on 2.5). Requires `GEMINI_API_KEY`. | -| `WEB_GROUNDING_DAILY_CAP` | 450 | durable per-day cap for `gemini_grounding` before degrading (keep < the 500/day free grounding RPD; guards the per-1k overage) | +| `WEB_PARANOID` | false | **the single switch that activates epistemic grounding.** Beyond freshness words, it unlocks the classifier-driven web arms (needs_web≥0.55, obscure entity, time-sensitive, lookup-hint) — i.e. it routes checkable-fact lookups (a film's cast, a date) to grounding instead of letting Grok answer from memory and hallucinate. With it off, web routing is freshness-only (= today), so turning on the classifier alone is web-routing-neutral. **Requires `WEB_PROVIDER=gemini_grounding`** (refuses to boot on `grok_web_search`, which has no daily cap). | +| `WEB_GROUNDING_DAILY_CAP` | 450 | durable per-day cap for `gemini_grounding` before degrading. Google gives **1,500 grounded requests/day free** (shared Flash/Flash-Lite, both free & paid tiers; verified ai.google.dev/pricing); keep the cap **under 1,500** so grounding stays free (token-only). Must be > 0 for `gemini_grounding` (a non-positive cap silently disables grounding → refuses to boot). | +| `GEMINI_GROUNDING_PER_PROMPT_USD` | 0.035 | the per-grounded-prompt FEE booked into the ledger so the `DAILY_USD_CEILING` accounts for it. The fee is **$35/1k = $0.035** but ONLY applies **above** the 1,500/day free allowance. So while `WEB_GROUNDING_DAILY_CAP ≤ 1,500` (e.g. the 450 default) grounding never hits the fee → **set `0`** (the bot then books only token cost, which is correct). Set `0.035` only if you raise the cap above 1,500/day, so the ceiling throttles before silently overrunning on requests #1501+. | | `REASONING_ENABLED` | false | manual "think harder" route on `REASONING_TRIGGER` | | `REASONING_TRIGGER` | `подумай глубже` | trigger phrase | | `REASONING_MODEL` | `grok-4.3` | a **reasoning-capable** model (the default `grok-4.20-non-reasoning` rejects `reasoning_effort`) | diff --git a/apps/ai-bot/bot.go b/apps/ai-bot/bot.go index 82982f5e..ee32368a 100644 --- a/apps/ai-bot/bot.go +++ b/apps/ai-bot/bot.go @@ -111,7 +111,7 @@ func NewBot(ctx context.Context, cfg *Config, logger *slog.Logger) (*Bot, error) } if cfg.WebEnabled { if cfg.WebProvider == webProviderGeminiGrounding { - b.web = &geminiGrounding{gem: gc, st: st, cfg: cfg} + b.web = &geminiGrounding{gem: gc, st: st, cfg: cfg, logger: logger} } else { b.web = newGrokWebSearch(cfg, logger) } @@ -466,7 +466,7 @@ func (b *Bot) respond(ctx context.Context, roomID, threadRoot string, isDM bool, defer cancel() msgs := buildContext(b.cfg.SystemPrompt, history, isDM, mc.Body, b.cfg.MaxCtxEvent, maxPromptTokens) - res, err := b.generate(genCtx, mc.Body, msgs, b.convID(roomID, threadRoot)) + res, err := b.generate(genCtx, mc.Body, msgs, b.convID(roomID, threadRoot), isDM) // Record what the routing + generation actually did, whatever the outcome. rl.Route = res.route @@ -487,6 +487,20 @@ func (b *Bot) respond(ctx context.Context, roomID, threadRoot string, isDM bool, if res.degraded != "" { rl.Degraded = res.degraded } + // Classifier signals + web outcome for the offline eval (§8). Booleans/counts are + // metadata (always recorded when telemetry is on); SearchQuery/AnswerText are content + // (stripped unless TELEMETRY_STORE_TEXT — see recordTelemetry). + rl.NeedsWeb = res.decision.NeedsWeb + rl.EntityObscure = res.decision.EntityObscure + rl.TimeSensitive = res.decision.TimeSensitive + rl.Verifiable = res.decision.Verifiable + rl.TrivialScore = res.decision.TrivialScore + rl.WebDecidedBy = res.decision.WebDecidedBy + rl.RewriteUsed = res.rewriteUsed + rl.WebGrounded = res.webGrounded + rl.CitationCount = res.citationCount + rl.SearchQuery = res.searchQuery + rl.AnswerText = res.text // The full routing/generation picture for one request, in one line: which route ran, // whether it was a fallback, the degrade reason (if any), the per-stage timings and @@ -494,7 +508,9 @@ func (b *Bot) respond(ctx context.Context, roomID, threadRoot string, isDM bool, b.log.DebugContext(ctx, "generation outcome", "route", res.route, "router_source", res.decision.Source, "router_confidence", res.decision.Confidence, "fallback", res.fallback, - "degraded", res.degraded, "stage_ms", res.stageMS, "usd", res.cost.Total()) + "degraded", res.degraded, "stage_ms", res.stageMS, "usd", res.cost.Total(), + "web_grounded", res.webGrounded, "citation_count", res.citationCount, + "grounding_fee_usd", res.cost.GroundingFee, "rewrite_used", res.rewriteUsed) if err != nil { // Terminal: even grok_direct failed. Settle whatever the cascade ACTUALLY spent diff --git a/apps/ai-bot/cascade.go b/apps/ai-bot/cascade.go index 809ea86c..e97b2cc6 100644 --- a/apps/ai-bot/cascade.go +++ b/apps/ai-bot/cascade.go @@ -30,6 +30,14 @@ type genResult struct { fallback bool // true if we degraded off the decided route degraded string // degrade reason for request_log stageMS map[string]int + + // Web-route outcome (for request_log §8): the resolved query actually sent to Fetch, + // whether the context-resolved rewrite was used (vs the bare body), and whether the + // fetch came back grounded with citations (a zero-citation synth is a silent false-web). + searchQuery string + rewriteUsed bool + webGrounded bool + citationCount int } func msSince(t time.Time) int { return int(time.Since(t).Milliseconds()) } @@ -47,33 +55,49 @@ func (b *Bot) reserveEstimate() float64 { // fetch can search several times and pull large context; reserve generously. est += float64(maxWebSearchCalls)*grokWebSearchPerCall + b.estimateUSD(b.cfg.XAIModel) } else { - est += b.estimateUSD(b.cfg.GeminiModel) + // gemini grounding: the fetch's tokens PLUS the per-grounded-prompt fee (§7 + // SG2), so the admission envelope is a true upper bound once the fee is booked. + est += b.estimateUSD(b.cfg.GeminiModel) + b.cfg.GeminiGroundingPerPrompt } } if b.cfg.ReasoningEnabled { // Higher reasoning effort can burn more output tokens; reserve double. est = max(est, 2*b.estimateUSD(b.cfg.ReasoningModel)) } + // The always-on Layer-1 classifier leg (§7 Finding 4): a cheap Gemini call on every + // message when the classifier is enabled, so reserved ≥ actual stays true. Added after + // the max() so it is never swallowed by the reasoning branch. + if b.cfg.RouterClassifierEnabled { + est += b.estimateUSD(b.cfg.GeminiModel) + } return est } // generate routes and produces an answer, degrading to grok_direct on any failure. // It returns a terminal error ONLY if even grok_direct fails; every other route falls // through to grok_direct rather than erroring. -func (b *Bot) generate(ctx context.Context, body string, msgs []Message, convID string) (genResult, error) { +func (b *Bot) generate(ctx context.Context, body string, msgs []Message, convID string, isDM bool) (genResult, error) { res := genResult{stageMS: map[string]int{}, finalModel: b.cfg.XAIModel} + // The privacy-minimised conversation window for the classifier + follow-up rewrite. + // DM-resolved (last ≤2 turns); bare trigger in groups (no cross-member subject bleed). + rcx := routerContext(msgs, isDM) + t0 := time.Now() - res.decision = b.classify(ctx, body, &res.cost) // accumulates cost.Router if Layer-1 runs + res.decision = b.classify(ctx, body, rcx, &res.cost) // accumulates cost.Router if Layer-1 runs res.stageMS["router"] = msSince(t0) res.route = res.decision.Route // The router's pre-dispatch verdict (what it chose, why, how sure). On a degrade the // route that actually runs differs from this — respond logs that final outcome — so - // the two lines together show "router wanted X, we ran Y". DEBUG: routing diagnostics. + // the two lines together show "router wanted X, we ran Y". DEBUG: routing diagnostics, + // content-free (the resolved search_query is NOT logged here — it's a gated path, §8). b.log.DebugContext(ctx, "route decided", "route", res.decision.Route, "source", res.decision.Source, "confidence", res.decision.Confidence, "needs_web", res.decision.NeedsWeb, + "web_decided_by", res.decision.WebDecidedBy, "verifiable", res.decision.Verifiable, + "entity_obscure", res.decision.EntityObscure, "time_sensitive", res.decision.TimeSensitive, + "trivial", res.decision.TrivialScore, "lookup_hint", res.decision.LookupHint, "reasoning_level", res.decision.ReasoningLevel) finalMsgs := msgs @@ -89,14 +113,21 @@ func (b *Bot) generate(ctx context.Context, body string, msgs []Message, convID } case routeWebThenGrok: if b.cfg.WebEnabled && b.web != nil { - if err := b.genWebThenGrok(ctx, body, msgs, convID, &res); err == nil { + if err := b.genWebThenGrok(ctx, body, isDM, msgs, convID, &res); err == nil { return res, nil } else { b.log.WarnContext(ctx, "web route failed; degrading to grok_direct", "err", err, "reason", res.degraded) b.degradeTo(&res, degradeWeb) - // The question wanted fresh facts but we have none — answer from training - // knowledge WITH an honest staleness caveat, not stale-as-current (§8.2.1). - finalMsgs = hedgeMessages(msgs) + // We have no fresh facts. For a RECENCY miss, hedge with an honest staleness + // caveat (§8.2.1). For a STATIC verifiable-fact miss (a film cast, a date), + // the staleness caveat is wrong — a stale caveat on a wrong cast still ships + // the wrong cast — so instruct Grok to ABSTAIN on specific names/dates/numbers + // instead of emitting a confident guess (§4.4). + if res.decision.factualMiss() { + finalMsgs = factualAbstainMessages(msgs) + } else { + finalMsgs = hedgeMessages(msgs) + } } } case routeReason: @@ -209,17 +240,38 @@ const webStageTimeout = 15 * time.Second // accounts for the spend before the caller degrades to grok_direct (the partial cascade // case, §8.1). The daily cap and per-stage deadline are applied here, uniformly for both // providers. -func (b *Bot) genWebThenGrok(ctx context.Context, body string, msgs []Message, convID string, res *genResult) error { +func (b *Bot) genWebThenGrok(ctx context.Context, body string, isDM bool, msgs []Message, convID string, res *genResult) error { + // DM-gated rewrite-with-fallback (§6): use the classifier's self-contained, + // follow-up-resolved query, but ONLY in a DM (a group buffer interleaves members' + // topics) and only when it's present and not over-long; otherwise the bare body — so + // the fetch is never worse than today. Sanitise before egress (it is model-authored + // text going to an external search API): collapse control chars/whitespace, cap length. + q := body + if isDM { + if sq := strings.TrimSpace(res.decision.SearchQuery); sq != "" && len([]rune(sq)) <= 200 { + q, res.rewriteUsed = sq, true + } + } + q = sanitizeSearchQuery(q) + if q == "" { + q, res.rewriteUsed = sanitizeSearchQuery(body), false // never send an empty query + } + res.searchQuery = q + // Per-stage web/grounding deadline, independent of the overall budget. wctx, cancelW := context.WithTimeout(ctx, webStageTimeout) tw := time.Now() - wc, ferr := b.web.Fetch(wctx, body) + wc, ferr := b.web.Fetch(wctx, q) cancelW() res.stageMS["web"] = msSince(tw) // Book the fetch's fee + tokens whether or not it produced a usable digest — the call - // was billed (the daily cap, if any, is enforced inside the provider). + // was billed (the daily cap, if any, is enforced inside the provider). GroundingFee is + // the per-grounded-prompt overage (§7 SG1), booked even on the error return. res.cost.Grounding += wc.Cost.Grounding + res.cost.GroundingFee += wc.Cost.GroundingFee res.cost.WebTool += wc.Cost.WebTool + res.citationCount = len(wc.Citations) + res.webGrounded = len(wc.Citations) > 0 webUsage := wc.Usage if ferr != nil { if errors.Is(ferr, errGroundingCapped) { @@ -227,6 +279,12 @@ func (b *Bot) genWebThenGrok(ctx context.Context, body string, msgs []Message, c } return ferr // web fee already booked; caller degrades to grok_direct (with hedge) } + // A non-empty digest with NO citations is a silent false-web (the answer is synthesised + // from an ungrounded fetch). gemini_grounding errors out before here; grok_web_search + // can reach this — surface it at WARN so it's visible at the default level (§8). + if len(wc.Citations) == 0 { + b.log.WarnContext(ctx, "web no-citation synth (ungrounded digest)", "provider", b.cfg.WebProvider) + } tf := time.Now() resp, err := b.llm.Complete(ctx, LLMRequest{ @@ -256,23 +314,70 @@ func (b *Bot) genWebThenGrok(ctx context.Context, body string, msgs []Message, c return nil } -// webSynthMessages inserts the fresh web digest (and its sources) as a system note just -// after the system prompt, so Grok answers in voice using current facts. +// webSynthMessages inserts the fresh web digest as a system note just after the system +// prompt, so Grok answers in voice using current facts. It deliberately does NOT pass the +// raw citation URLs into the prompt, nor ask Grok to "cite sources": gemini grounding +// returns opaque vertexaisearch.../grounding-api-redirect/... redirect links (not publisher +// URLs), and instructing Grok to cite made it paste those ugly redirects verbatim into the +// reply and mis-attribute them ("ссылок из твоего сообщения"). The grounding already +// happened (citation_count is recorded for telemetry); the user wants the answer, not +// Google's internal redirect links. Real source attribution (resolving redirects to +// domains) is a separate, deferred feature. func webSynthMessages(base []Message, wc WebContext) []Message { - facts := "Свежие данные из веба (используй их в ответе и сошлись на источники):\n" + wc.Digest - if len(wc.Citations) > 0 { - facts += "\nИсточники: " + strings.Join(wc.Citations, ", ") - } + facts := "Свежие данные из веба — ответь на их основе, кратко и по делу, без URL и ссылок:\n" + wc.Digest return insertSystemNote(base, facts) } -// hedgeMessages adds an honest staleness caveat for a web→grok_direct degrade: the user -// wanted fresh facts but we couldn't fetch them, so the model must flag that its answer -// is from training knowledge and may be out of date. +// hedgeMessages adds an honest staleness caveat for a web→grok_direct degrade on a +// RECENCY query: the user wanted fresh facts but we couldn't fetch them, so the model +// must flag that its answer is from training knowledge and may be out of date. func hedgeMessages(base []Message) []Message { return insertSystemNote(base, "Нет доступа к свежим источникам прямо сейчас — отвечай по знаниям на момент обучения и честно предупреди, что данные могут быть устаревшими.") } +// factualAbstainMessages is the degrade hedge for a STATIC verifiable-fact miss (§4.4): +// a staleness caveat is wrong here (the fact isn't stale, it's checkable and the model +// may simply not know it), so instruct Grok to ABSTAIN on specific names/dates/numbers +// rather than ship a confident guess — the exact failure (the hallucinated film cast) +// this redesign exists to stop. +func factualAbstainMessages(base []Message) []Message { + return insertSystemNote(base, "Не удалось проверить факты через веб. Если ответ зависит от конкретных имён, дат, годов, чисел или состава — честно скажи, что не уверен в точной фактуре и можешь ошибаться; НЕ выдавай догадку за факт.") +} + +// factualMiss reports whether a web degrade should use the abstain hedge (a static +// checkable-fact question) rather than the staleness hedge (a recency question). A +// recency signal (freshnessRe or the classifier's time_sensitive) always means +// staleness; otherwise a verifiable / obscure-entity question — OR any non-recency +// needs_web verdict (so an off-spec needs_web-only verdict still abstains rather than +// emit a confident guess) — means abstain. +func (d RouterDecision) factualMiss() bool { + if d.Freshness != "" || d.TimeSensitive { + return false + } + return d.Verifiable || d.EntityObscure || d.NeedsWeb +} + +// sanitizeSearchQuery prepares a (possibly model-authored) query for egress to an +// external search API: collapse newlines/control chars/runs of whitespace to single +// spaces and cap the rune length. Never trusts the model to have produced clean, +// bounded text. +func sanitizeSearchQuery(q string) string { + q = strings.Map(func(r rune) rune { + if r == '\n' || r == '\r' || r == '\t' { + return ' ' + } + if r < 0x20 || r == 0x7f { + return -1 // drop other control chars + } + return r + }, q) + q = strings.Join(strings.Fields(q), " ") // collapse whitespace runs + if r := []rune(q); len(r) > 200 { + q = strings.TrimSpace(string(r[:200])) + } + return q +} + // insertSystemNote inserts an extra system message right after the system prompt // (base[0] from buildContext), preserving the rest of the window. func insertSystemNote(base []Message, content string) []Message { diff --git a/apps/ai-bot/cascade_test.go b/apps/ai-bot/cascade_test.go index a801c574..238a177e 100644 --- a/apps/ai-bot/cascade_test.go +++ b/apps/ai-bot/cascade_test.go @@ -5,6 +5,7 @@ import ( "errors" "io" "log/slog" + "strings" "testing" ) @@ -29,13 +30,15 @@ func (f *fakeLLM) Complete(_ context.Context, req LLMRequest) (*LLMResponse, err } type fakeWeb struct { - wc WebContext - err error - calls int + wc WebContext + err error + calls int + lastQuery string } -func (f *fakeWeb) Fetch(_ context.Context, _ string) (WebContext, error) { +func (f *fakeWeb) Fetch(_ context.Context, q string) (WebContext, error) { f.calls++ + f.lastQuery = q if f.err != nil { return WebContext{}, f.err } @@ -71,7 +74,7 @@ func TestGenerateAllFlagsOffIsGrokDirect(t *testing.T) { cfg := cascadeCfg() b := &Bot{cfg: &cfg, llm: grok, gemini: gem, log: discardLog()} - res, err := b.generate(context.Background(), "привет", msgs("привет"), "") + res, err := b.generate(context.Background(), "привет", msgs("привет"), "", true) if err != nil { t.Fatalf("generate: %v", err) } @@ -93,7 +96,7 @@ func TestGenerateTrivialOffload(t *testing.T) { cfg.RouterEnabled, cfg.TrivialOffloadEnabled = true, true b := &Bot{cfg: &cfg, llm: grok, gemini: gem, log: discardLog()} - res, err := b.generate(context.Background(), "привет", msgs("привет"), "") + res, err := b.generate(context.Background(), "привет", msgs("привет"), "", true) if err != nil { t.Fatalf("generate: %v", err) } @@ -114,7 +117,7 @@ func TestGenerateTrivialDegradesToGrok(t *testing.T) { cfg.RouterEnabled, cfg.TrivialOffloadEnabled = true, true b := &Bot{cfg: &cfg, llm: grok, gemini: gem, log: discardLog()} - res, err := b.generate(context.Background(), "привет", msgs("привет"), "") + res, err := b.generate(context.Background(), "привет", msgs("привет"), "", true) if err != nil { t.Fatalf("generate: %v", err) } @@ -129,6 +132,8 @@ func TestGenerateTrivialDegradesToGrok(t *testing.T) { } } +// TestGenerateWebThenGrok: a freshness query (classifier off → Layer-0 web) fetches then +// has Grok synthesise, booking both calls' tokens + the web fee. func TestGenerateWebThenGrok(t *testing.T) { grok := &fakeLLM{text: "synthesised", usage: Usage{PromptTokens: 100, CompletionTokens: 50}} web := &fakeWeb{wc: WebContext{Digest: "fresh facts", Citations: []string{"http://src"}, Cost: CostBreakdown{WebTool: 0.1}}} @@ -136,7 +141,7 @@ func TestGenerateWebThenGrok(t *testing.T) { cfg.RouterEnabled, cfg.WebEnabled = true, true b := &Bot{cfg: &cfg, llm: grok, web: web, log: discardLog()} - res, err := b.generate(context.Background(), "какие новости сегодня", msgs("какие новости сегодня"), "") + res, err := b.generate(context.Background(), "какие новости сегодня", msgs("какие новости сегодня"), "", true) if err != nil { t.Fatalf("generate: %v", err) } @@ -146,13 +151,16 @@ func TestGenerateWebThenGrok(t *testing.T) { if res.cost.WebTool != 0.1 || res.cost.Token <= 0 { t.Fatalf("cost = %+v, want WebTool 0.1 + Token>0", res.cost) } + if !res.webGrounded || res.citationCount != 1 { + t.Fatalf("webGrounded=%v citations=%d, want true/1", res.webGrounded, res.citationCount) + } if web.calls != 1 || grok.calls != 1 { t.Fatalf("calls web=%d grok=%d, want 1/1", web.calls, grok.calls) } } -// TestGenerateWebDegradesToGrok: a web fetch failure (provider down or cap hit) degrades -// to grok_direct and books no web cost. +// TestGenerateWebDegradesToGrok: a web fetch failure (cap hit) degrades to grok_direct, +// books no web cost, and — being a RECENCY query — uses the staleness hedge, not abstain. func TestGenerateWebDegradesToGrok(t *testing.T) { grok := &fakeLLM{text: "grok fallback"} web := &fakeWeb{err: errGroundingCapped} @@ -160,7 +168,7 @@ func TestGenerateWebDegradesToGrok(t *testing.T) { cfg.RouterEnabled, cfg.WebEnabled = true, true b := &Bot{cfg: &cfg, llm: grok, web: web, log: discardLog()} - res, err := b.generate(context.Background(), "новости сегодня", msgs("новости сегодня"), "") + res, err := b.generate(context.Background(), "новости сегодня", msgs("новости сегодня"), "", true) if err != nil { t.Fatalf("generate: %v", err) } @@ -173,6 +181,10 @@ func TestGenerateWebDegradesToGrok(t *testing.T) { if res.cost.WebTool != 0 || res.cost.Grounding != 0 { t.Fatalf("web cost = %+v, want 0 (fetch failed before billing)", res.cost) } + // Recency miss → staleness hedge ("устаревшими"), not the factual-abstain hedge. + if !hedgeContains(grok.lastReq.Messages, "устаревш") { + t.Fatalf("freshness degrade should use the staleness hedge; messages = %+v", grok.lastReq.Messages) + } } // TestGenerateReasoningForced: the manual trigger routes to the reasoning model with @@ -183,7 +195,7 @@ func TestGenerateReasoningForced(t *testing.T) { cfg.ReasoningEnabled = true // ROUTER_ENABLED deliberately left off b := &Bot{cfg: &cfg, llm: grok, log: discardLog()} - res, err := b.generate(context.Background(), "подумай глубже про сознание", msgs("подумай глубже про сознание"), "") + res, err := b.generate(context.Background(), "подумай глубже про сознание", msgs("подумай глубже про сознание"), "", true) if err != nil { t.Fatalf("generate: %v", err) } @@ -195,29 +207,240 @@ func TestGenerateReasoningForced(t *testing.T) { } } -// TestClassifierConfidenceFloor: a Layer-1 classifier label that escalates off the safe -// floor (trivial/web) must clear the confidence floor, else the request stays on -// grok_direct — the false-trivial voice-leak guard (§8.6). -func TestClassifierConfidenceFloor(t *testing.T) { +// TestClassifyTrivialAgreementGate: a trivial route requires the Layer-0 candidate AND +// classifier.trivial AND confidence ≥ trivialFloor. A low-confidence "trivial" or a +// classifier that disagrees stays on grok_direct (no voice leak). +func TestClassifyTrivialAgreementGate(t *testing.T) { cfg := cascadeCfg() cfg.RouterEnabled, cfg.RouterClassifierEnabled = true, true gem := &fakeLLM{} b := &Bot{cfg: &cfg, gemini: gem, log: discardLog()} var cost CostBreakdown - const substantive = "напиши подробное эссе про историю римской империи" // Layer-0 → grok_direct - gem.text = `{"route":"trivial","confidence":0.2}` // low-confidence escalation - if d := b.classify(context.Background(), substantive, &cost); d.Route != routeGrokDirect { - t.Fatalf("low-confidence trivial must stay grok_direct (safe floor), got %q", d.Route) + gem.text = `{"trivial":true,"needs_web":false,"confidence":0.95}` + if d := b.classify(context.Background(), "привет", "USER: привет", &cost); d.Route != routeTrivial { + t.Fatalf("agreed high-confidence trivial = %q, want trivial", d.Route) } - gem.text = `{"route":"trivial","confidence":0.95}` // confident escalation is honoured - if d := b.classify(context.Background(), substantive, &cost); d.Route != routeTrivial { - t.Fatalf("high-confidence trivial should route trivial, got %q", d.Route) + gem.text = `{"trivial":true,"needs_web":false,"confidence":0.5}` + if d := b.classify(context.Background(), "привет", "USER: привет", &cost); d.Route != routeGrokDirect { + t.Fatalf("low-confidence trivial = %q, want grok_direct (no leak)", d.Route) } - // A classifier error degrades to the Layer-0 verdict (grok_direct), never silence. - gem.text, gem.err = "", errors.New("gemini down") - if d := b.classify(context.Background(), substantive, &cost); d.Route != routeGrokDirect { - t.Fatalf("classifier failure must fall back to heuristic grok_direct, got %q", d.Route) + // A non-trivial body can never be trivial even if the classifier claims so. + gem.text = `{"trivial":true,"needs_web":false,"confidence":0.99}` + const substantive = "напиши подробное эссе про историю римской империи" + if d := b.classify(context.Background(), substantive, "USER: …", &cost); d.Route != routeGrokDirect { + t.Fatalf("classifier.trivial on a substantive body = %q, want grok_direct", d.Route) + } +} + +// TestClassifyClassifierErrorFallsBackToLayer0: a classifier error/garbage degrades to the +// deterministic Layer-0 verdict — grok_direct for a substantive body, web for a freshness +// body — never an ungrounded confident answer, never a degrade-to-web. +func TestClassifyClassifierErrorFallsBackToLayer0(t *testing.T) { + cfg := cascadeCfg() + cfg.RouterEnabled, cfg.RouterClassifierEnabled, cfg.WebParanoid = true, true, true + gem := &fakeLLM{} + b := &Bot{cfg: &cfg, gemini: gem, log: discardLog()} + var cost CostBreakdown + + // Transport error → Layer-0. + gem.err = errors.New("gemini down") + if d := b.classify(context.Background(), "напиши эссе про рим", "USER: …", &cost); d.Route != routeGrokDirect { + t.Fatalf("classifier error on substantive body = %q, want grok_direct (Layer-0)", d.Route) + } + if d := b.classify(context.Background(), "новости сегодня", "USER: …", &cost); d.Route != routeWebThenGrok { + t.Fatalf("classifier error on freshness body = %q, want web (deterministic Layer-0 survives)", d.Route) + } + // Garbage JSON (no transport error) → also Layer-0. + gem.err, gem.text = nil, "not json at all" + if d := b.classify(context.Background(), "напиши эссе про рим", "USER: …", &cost); d.Route != routeGrokDirect { + t.Fatalf("garbage classifier JSON = %q, want grok_direct (Layer-0)", d.Route) + } +} + +// TestGenerateRoadHouseWebParanoidDM is the headline regression: an obscure-entity factual +// lookup in a DM, with the classifier + WEB_PARANOID on, routes to web AND the fetch uses +// the classifier's context-resolved search_query (the follow-up rewrite). With paranoid +// off it correctly stays grok_direct (the canary-neutral baseline). +func TestGenerateRoadHouseWebParanoidDM(t *testing.T) { + const verdict = `{"needs_web":true,"verifiable":true,"entity_obscure":true,"time_sensitive":false,"trivial":false,"search_query":"Дом у дороги 2024 фильм актёрский состав","confidence":0.7}` + mk := func(paranoid bool) (*fakeLLM, *fakeWeb, genResult) { + grok := &fakeLLM{text: "voiced", usage: Usage{PromptTokens: 10, CompletionTokens: 5}} + gem := &fakeLLM{text: verdict} + web := &fakeWeb{wc: WebContext{Digest: "cast: Patrick Swayze…", Citations: []string{"http://imdb"}}} + cfg := cascadeCfg() + cfg.RouterEnabled, cfg.RouterClassifierEnabled, cfg.WebEnabled, cfg.WebParanoid = true, true, true, paranoid + b := &Bot{cfg: &cfg, llm: grok, gemini: gem, web: web, log: discardLog()} + res, err := b.generate(context.Background(), "2024 года", []Message{ + {Role: "system", Content: "SYS"}, + {Role: "user", Content: "кто снимался в фильме дом у дороги"}, + {Role: "assistant", Content: "В фильме 1989 года…"}, + {Role: "user", Content: "2024 года"}, + }, "", true) + if err != nil { + t.Fatalf("generate: %v", err) + } + return grok, web, res + } + + _, web, res := mk(true) + if res.route != routeWebThenGrok { + t.Fatalf("paranoid DM road-house = %q, want web_then_grok (the fix)", res.route) + } + if !res.rewriteUsed || web.lastQuery != "Дом у дороги 2024 фильм актёрский состав" { + t.Fatalf("fetch should use the rewritten query: rewriteUsed=%v lastQuery=%q", res.rewriteUsed, web.lastQuery) + } + + _, _, resOff := mk(false) + if resOff.route != routeGrokDirect { + t.Fatalf("paranoid OFF road-house = %q, want grok_direct (baseline)", resOff.route) + } +} + +// TestGenerateFollowupGroupUsesBareBody: in a GROUP the context-resolved rewrite is +// suppressed — the fetch uses the bare (sanitised) body, never the classifier's +// search_query, so a member's follow-up can't ground the wrong prior subject. +func TestGenerateFollowupGroupUsesBareBody(t *testing.T) { + const verdict = `{"needs_web":true,"verifiable":true,"entity_obscure":true,"search_query":"какой-то чужой фильм 2024","confidence":0.7}` + grok := &fakeLLM{text: "voiced"} + gem := &fakeLLM{text: verdict} + web := &fakeWeb{wc: WebContext{Digest: "d", Citations: []string{"http://s"}}} + cfg := cascadeCfg() + cfg.RouterEnabled, cfg.RouterClassifierEnabled, cfg.WebEnabled, cfg.WebParanoid = true, true, true, true + b := &Bot{cfg: &cfg, llm: grok, gemini: gem, web: web, log: discardLog()} + + res, err := b.generate(context.Background(), "2024 года", msgs("2024 года"), "", false /* group */) + if err != nil { + t.Fatalf("generate: %v", err) + } + if res.route != routeWebThenGrok { + t.Fatalf("group route = %q, want web_then_grok", res.route) + } + if res.rewriteUsed || web.lastQuery != "2024 года" { + t.Fatalf("group must use the bare body, not the rewrite: rewriteUsed=%v lastQuery=%q", res.rewriteUsed, web.lastQuery) + } +} + +// TestGenerateWebEmptySearchQueryFallsBackToBody: the rewrite-with-fallback contract's +// empty arm (§6/§12). A DM web route whose classifier returned an empty search_query must +// fetch the bare (sanitised) body and report rewriteUsed=false — never an empty query. +func TestGenerateWebEmptySearchQueryFallsBackToBody(t *testing.T) { + // verifiable:true so it genuinely routes web (the needs_web arm requires verifiable); + // search_query empty is the point — the fetch must fall back to the bare body. + const verdict = `{"needs_web":true,"verifiable":true,"entity_obscure":false,"search_query":"","confidence":0.7}` + grok := &fakeLLM{text: "voiced"} + gem := &fakeLLM{text: verdict} + web := &fakeWeb{wc: WebContext{Digest: "d", Citations: []string{"http://s"}}} + cfg := cascadeCfg() + cfg.RouterEnabled, cfg.RouterClassifierEnabled, cfg.WebEnabled, cfg.WebParanoid = true, true, true, true + b := &Bot{cfg: &cfg, llm: grok, gemini: gem, web: web, log: discardLog()} + + const body = "в каком году основан Рим" + res, err := b.generate(context.Background(), body, msgs(body), "", true /* DM */) + if err != nil { + t.Fatalf("generate: %v", err) + } + if res.route != routeWebThenGrok { + t.Fatalf("route = %q, want web_then_grok", res.route) + } + if res.rewriteUsed || web.lastQuery != body { + t.Fatalf("empty search_query must fall back to the bare body: rewriteUsed=%v lastQuery=%q", res.rewriteUsed, web.lastQuery) + } +} + +// TestGenerateFreshnessTrapDesignedWeb: a freshness lexeme in a rumination +// ("сегодня…") still hard-routes to web (the accepted, designed cheap false-web, §14.1). +func TestGenerateFreshnessTrapDesignedWeb(t *testing.T) { + grok := &fakeLLM{text: "x"} + web := &fakeWeb{wc: WebContext{Digest: "d", Citations: []string{"http://s"}}} + cfg := cascadeCfg() + cfg.RouterEnabled, cfg.WebEnabled = true, true // classifier off — freshness alone routes + b := &Bot{cfg: &cfg, llm: grok, web: web, log: discardLog()} + + res, err := b.generate(context.Background(), "сегодня я думаю о смысле жизни", msgs("сегодня я думаю о смысле жизни"), "", true) + if err != nil { + t.Fatalf("generate: %v", err) + } + if res.route != routeWebThenGrok { + t.Fatalf("freshness rumination = %q, want web_then_grok (designed)", res.route) + } +} + +// TestGenerateWebDegradeFactualAbstain: a STATIC verifiable-fact web miss uses the +// factual-abstain hedge (not the staleness caveat), so Grok abstains on names/dates +// rather than shipping a confident guess. +func TestGenerateWebDegradeFactualAbstain(t *testing.T) { + const verdict = `{"needs_web":true,"verifiable":true,"entity_obscure":true,"time_sensitive":false,"search_query":"q","confidence":0.7}` + grok := &fakeLLM{text: "honest answer"} + gem := &fakeLLM{text: verdict} + web := &fakeWeb{err: errors.New("fetch boom")} + cfg := cascadeCfg() + cfg.RouterEnabled, cfg.RouterClassifierEnabled, cfg.WebEnabled, cfg.WebParanoid = true, true, true, true + b := &Bot{cfg: &cfg, llm: grok, gemini: gem, web: web, log: discardLog()} + + res, err := b.generate(context.Background(), "кто снимался в фильме дом у дороги", msgs("кто снимался в фильме дом у дороги"), "", true) + if err != nil { + t.Fatalf("generate: %v", err) + } + if res.route != routeGrokDirect || !res.fallback { + t.Fatalf("res route=%q fallback=%v, want grok_direct fallback", res.route, res.fallback) + } + if !hedgeContains(grok.lastReq.Messages, "Не удалось проверить") { + t.Fatalf("factual miss should use the abstain hedge; messages = %+v", grok.lastReq.Messages) + } + if hedgeContains(grok.lastReq.Messages, "устаревш") { + t.Fatalf("factual miss must NOT use the staleness hedge") + } +} + +// TestFactualMissHedge: the web-degrade hedge selection. A recency signal (Freshness or +// time_sensitive) → staleness (factualMiss=false); a static checkable-fact signal +// (verifiable / entity_obscure / a non-recency needs_web) → abstain (factualMiss=true). +func TestFactualMissHedge(t *testing.T) { + cases := []struct { + d RouterDecision + want bool // true => abstain hedge + }{ + {RouterDecision{Freshness: "recent"}, false}, + {RouterDecision{TimeSensitive: true}, false}, + {RouterDecision{Verifiable: true}, true}, + {RouterDecision{EntityObscure: true}, true}, + {RouterDecision{NeedsWeb: true}, true}, // off-spec needs_web-only → abstain (Q3) + {RouterDecision{NeedsWeb: true, TimeSensitive: true}, false}, // recency still wins + {RouterDecision{}, false}, + } + for _, c := range cases { + if got := c.d.factualMiss(); got != c.want { + t.Errorf("factualMiss(%+v) = %v, want %v", c.d, got, c.want) + } + } +} + +// TestReserveEstimate: flags off → exactly grok_direct's estimate; with gemini grounding + +// classifier on, it includes the per-prompt fee AND the always-on classifier leg (§7). +func TestReserveEstimate(t *testing.T) { + cfg := cascadeCfg() + b := &Bot{cfg: &cfg, log: discardLog()} + base := b.estimateUSD("grok-x") + if got := b.reserveEstimate(); !approxEq(got, base) { + t.Fatalf("flags-off reserve = %v, want grok_direct estimate %v", got, base) + } + + cfg2 := cascadeCfg() + cfg2.WebEnabled, cfg2.WebProvider = true, webProviderGeminiGrounding + cfg2.RouterEnabled, cfg2.RouterClassifierEnabled = true, true + cfg2.GeminiGroundingPerPrompt = 0.035 + b2 := &Bot{cfg: &cfg2, log: discardLog()} + want := b2.estimateUSD("grok-x") + b2.estimateUSD("gemini-x") + 0.035 + b2.estimateUSD("gemini-x") + if got := b2.reserveEstimate(); !approxEq(got, want) { + t.Fatalf("web+classifier reserve = %v, want %v (XAI + gemini fetch + $0.035 fee + classifier leg)", got, want) + } + // The fee must actually move the envelope (regression guard for an unbooked fee). + cfg3 := cfg2 + cfg3.GeminiGroundingPerPrompt = 0 + b3 := &Bot{cfg: &cfg3, log: discardLog()} + if b2.reserveEstimate()-b3.reserveEstimate() < 0.0349 { + t.Fatalf("the grounding fee must raise the reservation by ~0.035") } } @@ -225,36 +448,33 @@ func TestClassifierConfidenceFloor(t *testing.T) { // be kept fast with "none"), empty means not sent (compat with grok-4.20-non-reasoning), // and the reason route always overrides to "high" regardless. func TestGrokReasoningEffort(t *testing.T) { - // Configured effort reaches grok_direct. grok := &fakeLLM{text: "ok"} cfg := cascadeCfg() cfg.GrokReasoningEffort = "none" b := &Bot{cfg: &cfg, llm: grok, log: discardLog()} - if _, err := b.generate(context.Background(), "hello", msgs("hello"), ""); err != nil { + if _, err := b.generate(context.Background(), "hello", msgs("hello"), "", true); err != nil { t.Fatal(err) } if grok.lastReq.ReasoningEffort != "none" { t.Fatalf("grok_direct effort = %q, want none", grok.lastReq.ReasoningEffort) } - // Empty default → not sent (so grok-4.20-non-reasoning keeps working). grokDef := &fakeLLM{text: "ok"} cfgDef := cascadeCfg() // GrokReasoningEffort == "" bDef := &Bot{cfg: &cfgDef, llm: grokDef, log: discardLog()} - if _, err := bDef.generate(context.Background(), "hello", msgs("hello"), ""); err != nil { + if _, err := bDef.generate(context.Background(), "hello", msgs("hello"), "", true); err != nil { t.Fatal(err) } if grokDef.lastReq.ReasoningEffort != "" { t.Fatalf("default effort = %q, want empty (not sent)", grokDef.lastReq.ReasoningEffort) } - // The reason route ignores GROK_REASONING_EFFORT and always uses "high". grokR := &fakeLLM{text: "deep"} cfgR := cascadeCfg() cfgR.GrokReasoningEffort = "none" cfgR.ReasoningEnabled = true bR := &Bot{cfg: &cfgR, llm: grokR, log: discardLog()} - if _, err := bR.generate(context.Background(), "подумай глубже про X", msgs("подумай глубже про X"), ""); err != nil { + if _, err := bR.generate(context.Background(), "подумай глубже про X", msgs("подумай глубже про X"), "", true); err != nil { t.Fatal(err) } if grokR.lastReq.ReasoningEffort != "high" { @@ -269,7 +489,47 @@ func TestGenerateTerminalErrorPropagates(t *testing.T) { cfg := cascadeCfg() b := &Bot{cfg: &cfg, llm: grok, log: discardLog()} - if _, err := b.generate(context.Background(), "hello", msgs("hello"), ""); err == nil { + if _, err := b.generate(context.Background(), "hello", msgs("hello"), "", true); err == nil { t.Fatal("want terminal error when grok_direct fails, got nil") } } + +// TestWebSynthMessagesNoRawURLs guards the source-leak fix: the grounded digest is +// injected, but the raw gemini-grounding redirect URLs must NOT reach the synth prompt +// (Grok was pasting vertexaisearch.../grounding-api-redirect/... links into the reply). +func TestWebSynthMessagesNoRawURLs(t *testing.T) { + wc := WebContext{ + Digest: "Титаник вышел в 1997, режиссёр Джеймс Кэмерон.", + Citations: []string{"https://vertexaisearch.cloud.google.com/grounding-api-redirect/AUZIYQabc123"}, + } + out := webSynthMessages(msgs("в каком году титаник"), wc) + var note string + for _, m := range out { + if m.Role == "system" && strings.Contains(m.Content, "Свежие данные") { + note = m.Content + } + } + if note == "" { + t.Fatal("web synth note missing") + } + if !strings.Contains(note, "Титаник вышел в 1997") { + t.Fatalf("digest not injected: %q", note) + } + if strings.Contains(note, "vertexaisearch") || strings.Contains(note, "grounding-api-redirect") || strings.Contains(note, "http") { + t.Fatalf("raw citation URL leaked into the synth prompt: %q", note) + } +} + +func hedgeContains(ms []Message, sub string) bool { + for _, m := range ms { + if strings.Contains(m.Content, sub) { + return true + } + } + return false +} + +func approxEq(a, b float64) bool { + d := a - b + return d < 1e-9 && d > -1e-9 +} diff --git a/apps/ai-bot/cmd/routereval/golden_sample.json b/apps/ai-bot/cmd/routereval/golden_sample.json new file mode 100644 index 00000000..2bbcf58e --- /dev/null +++ b/apps/ai-bot/cmd/routereval/golden_sample.json @@ -0,0 +1,227 @@ +[ + { + "name": "road house first turn (obscure cast)", + "message": "кто снимался в фильме дом у дороги", + "verdict": { + "needs_web": true, + "verifiable": true, + "entity_obscure": true, + "time_sensitive": false, + "trivial": false, + "search_query": "Дом у дороги фильм актёрский состав", + "confidence": 0.7 + }, + "expected_route": "web_then_grok", + "factual": true + }, + { + "name": "road house follow-up (DM, resolved)", + "message": "2024 года", + "verdict": { + "needs_web": true, + "verifiable": true, + "entity_obscure": true, + "time_sensitive": false, + "trivial": false, + "search_query": "Дом у дороги 2024 фильм актёрский состав", + "confidence": 0.65 + }, + "expected_route": "web_then_grok", + "factual": true + }, + { + "name": "weather (freshness lexeme, forced web)", + "message": "погода сегодня в Москве", + "verdict": { + "needs_web": true, + "verifiable": false, + "entity_obscure": false, + "time_sensitive": true, + "trivial": false, + "search_query": "погода сегодня Москва", + "confidence": 0.95 + }, + "expected_route": "web_then_grok", + "factual": false + }, + { + "name": "freshness rumination (accepted designed false-web, §14.1)", + "message": "сегодня я думаю о смысле жизни", + "verdict": { + "needs_web": false, + "verifiable": false, + "entity_obscure": false, + "time_sensitive": false, + "trivial": false, + "search_query": "", + "confidence": 0.2 + }, + "expected_route": "web_then_grok", + "factual": false + }, + { + "name": "obscure entity founder (no freshness word)", + "message": "кто основал компанию Acme Widgets", + "verdict": { + "needs_web": true, + "verifiable": true, + "entity_obscure": true, + "time_sensitive": false, + "trivial": false, + "search_query": "Acme Widgets основатель компании", + "confidence": 0.6 + }, + "expected_route": "web_then_grok", + "factual": true + }, + { + "name": "static famous fact (author lookup)", + "message": "кто написал войну и мир", + "verdict": { + "needs_web": true, + "verifiable": true, + "entity_obscure": false, + "time_sensitive": false, + "trivial": false, + "search_query": "Война и мир автор", + "confidence": 0.62 + }, + "expected_route": "web_then_grok", + "factual": true + }, + { + "name": "current CEO (time-sensitive, sub-floor needs_web)", + "message": "кто возглавляет Tesla", + "verdict": { + "needs_web": true, + "verifiable": true, + "entity_obscure": false, + "time_sensitive": true, + "trivial": false, + "search_query": "Tesla CEO", + "confidence": 0.5 + }, + "expected_route": "web_then_grok", + "factual": false + }, + { + "name": "greeting (trivial, high confidence)", + "message": "привет", + "verdict": { + "needs_web": false, + "verifiable": false, + "entity_obscure": false, + "time_sensitive": false, + "trivial": true, + "search_query": "", + "confidence": 0.95 + }, + "expected_route": "trivial_direct", + "factual": false + }, + { + "name": "ack low-confidence trivial (no voice leak → grok)", + "message": "спасибо", + "verdict": { + "needs_web": false, + "verifiable": false, + "entity_obscure": false, + "time_sensitive": false, + "trivial": true, + "search_query": "", + "confidence": 0.5 + }, + "expected_route": "grok_direct", + "factual": false + }, + { + "name": "opinion / recommendation (safe floor)", + "message": "посоветуй фильм на вечер", + "verdict": { + "needs_web": false, + "verifiable": false, + "entity_obscure": false, + "time_sensitive": false, + "trivial": false, + "search_query": "", + "confidence": 0.82 + }, + "expected_route": "grok_direct", + "factual": false + }, + { + "name": "code help (safe floor)", + "message": "напиши функцию сортировки на python", + "verdict": { + "needs_web": false, + "verifiable": false, + "entity_obscure": false, + "time_sensitive": false, + "trivial": false, + "search_query": "", + "confidence": 0.9 + }, + "expected_route": "grok_direct", + "factual": false + }, + { + "name": "vague needs_web below floor (stays grok)", + "message": "что ты думаешь о криптовалютах", + "verdict": { + "needs_web": true, + "verifiable": false, + "entity_obscure": false, + "time_sensitive": false, + "trivial": false, + "search_query": "", + "confidence": 0.4 + }, + "expected_route": "grok_direct", + "factual": false + }, + { + "name": "explanation over-flagged needs_web but NOT verifiable (false-web fix)", + "message": "объясни как работают горутины в Go", + "verdict": { + "needs_web": true, + "verifiable": false, + "entity_obscure": false, + "time_sensitive": false, + "trivial": false, + "search_query": "", + "confidence": 0.9 + }, + "expected_route": "grok_direct", + "factual": false + }, + { + "name": "ack-prefixed long real question (not trivial, safe floor)", + "message": "спасибо, а теперь подробно объясни квантовую запутанность", + "verdict": { + "needs_web": false, + "verifiable": false, + "entity_obscure": false, + "time_sensitive": false, + "trivial": false, + "search_query": "", + "confidence": 0.85 + }, + "expected_route": "grok_direct", + "factual": false + }, + { + "name": "bare follow-up in a GROUP (no resolvable subject → grok)", + "message": "2024 года", + "verdict": { + "needs_web": false, + "verifiable": false, + "entity_obscure": false, + "time_sensitive": false, + "trivial": false, + "search_query": "", + "confidence": 0.3 + }, + "expected_route": "grok_direct", + "factual": false + } +] diff --git a/apps/ai-bot/cmd/routereval/main.go b/apps/ai-bot/cmd/routereval/main.go new file mode 100644 index 00000000..8711aa48 --- /dev/null +++ b/apps/ai-bot/cmd/routereval/main.go @@ -0,0 +1,188 @@ +// Command routereval is the OFFLINE router-replay harness for the §11 P1 gate. It reads +// a golden set of (message, recorded classifier verdict, expected route, factual flag), +// replays each item through the REAL decision functions (routedecide.ClassifyLayer0 + +// CombineWithFloors — the same code package main uses, never a copy), and reports the +// confusion matrix + the four P1 metrics: false-grok-on-factual (the lie metric), +// false-web, trivial-leak, misroute. It is fully deterministic and needs no network: it +// measures the ROUTING LAYER given a verdict, so you can sweep WEB_PARANOID and the +// floors instantly. (Classifier accuracy itself is a separate LIVE check — §11 P2.) +// +// The lie label on the web path uses the citation-presence proxy by convention: a golden +// item's `factual:true` + `expected_route:web_then_grok` marks "this MUST ground"; an +// LLM-judge over query+answer is the higher-fidelity option to wire later (§14.6/§15). +// +// Usage: +// +// go run ./cmd/routereval -golden cmd/routereval/golden_sample.json +// go run ./cmd/routereval -golden set.json -web-floor 0.7 # sweep the needs_web floor +// +// NOTE: golden_sample.json is labelled for the PRODUCTION config (paranoid ON) — its +// expected_route values assume the epistemic web arms are active. Running -paranoid=false +// against it is a what-if sweep that WILL report NO-GO (the entity facts fall to grok by +// design); it is NOT a passing baseline. To evaluate the paranoid-off behaviour, label a +// separate set whose expected_route reflects freshness-only web routing. +package main + +import ( + "encoding/json" + "flag" + "fmt" + "os" + + rd "vojo.chat/ai-bot/internal/routedecide" +) + +// goldenItem is one labelled row. Message drives the free Layer-0; Verdict is the +// recorded classifier output; ExpectedRoute + Factual are the ground-truth labels. +type goldenItem struct { + Name string `json:"name"` + Message string `json:"message"` + Verdict rd.Verdict `json:"verdict"` + ExpectedRoute string `json:"expected_route"` + Factual bool `json:"factual"` // a checkable-fact query that MUST ground +} + +func main() { + goldenPath := flag.String("golden", "cmd/routereval/golden_sample.json", "path to the golden-set JSON") + paranoid := flag.Bool("paranoid", true, "apply the WEB_PARANOID classifier-driven web arms") + webFloor := flag.Float64("web-floor", rd.WebNeedsWebFloor, "needs_web confidence floor to sweep") + trivialFloor := flag.Float64("trivial-floor", rd.TrivialFloor, "trivial confidence floor") + verbose := flag.Bool("v", false, "print every item, not just the mismatches") + flag.Parse() + + raw, err := os.ReadFile(*goldenPath) + if err != nil { + fmt.Fprintf(os.Stderr, "read golden set: %v\n", err) + os.Exit(2) + } + var items []goldenItem + if err := json.Unmarshal(raw, &items); err != nil { + fmt.Fprintf(os.Stderr, "parse golden set: %v\n", err) + os.Exit(2) + } + if len(items) == 0 { + fmt.Fprintln(os.Stderr, "golden set is empty") + os.Exit(2) + } + + floors := rd.Floors{WebNeedsWeb: *webFloor, Trivial: *trivialFloor} + fmt.Printf("routereval: %d items | paranoid=%v web-floor=%.2f trivial-floor=%.2f\n\n", + len(items), *paranoid, *webFloor, *trivialFloor) + + var ( + correct int + factualWeb, factualWebMissed int // denominator/numerator of false-grok-on-factual + nonWebExpected, falseWeb int + nonTrivialExpected, trivialLeak int + ) + roadHouseSeen := false + roadHousePass := true + for _, it := range items { + l0 := rd.ClassifyLayer0(it.Message) + got := rd.CombineWithFloors(l0, it.Verdict, *paranoid, floors).Route + ok := got == it.ExpectedRoute + if ok { + correct++ + } + if it.Factual && it.ExpectedRoute == rd.RouteWeb { + factualWeb++ + if got == rd.RouteGrokDirect { + factualWebMissed++ // a confident-lie risk: a checkable fact answered from memory + } + } + if it.ExpectedRoute != rd.RouteWeb { + nonWebExpected++ + if got == rd.RouteWeb { + falseWeb++ + } + } + if it.ExpectedRoute != rd.RouteTrivial { + nonTrivialExpected++ + if got == rd.RouteTrivial { + trivialLeak++ + } + } + // The Road House regression pair must pass (its name carries "road house"). + if contains(it.Name, "road house") { + roadHouseSeen = true + if !ok { + roadHousePass = false + } + } + if *verbose || !ok { + flag := "ok " + if !ok { + flag = "MISS" + } + fmt.Printf(" [%s] %-40s want=%-16s got=%-16s\n", flag, trunc(it.Name, 40), it.ExpectedRoute, got) + } + } + + rate := func(num, den int) float64 { + if den == 0 { + return 0 + } + return float64(num) / float64(den) + } + misroute := 1 - rate(correct, len(items)) + lie := rate(factualWebMissed, factualWeb) + fw := rate(falseWeb, nonWebExpected) + leak := rate(trivialLeak, nonTrivialExpected) + + fmt.Printf("\n— metrics (§11 P1 gates) —\n") + fmt.Printf(" false-grok-on-FACTUAL : %5.1f%% (%d/%d) gate < 5%% %s\n", lie*100, factualWebMissed, factualWeb, pass(lie < 0.05)) + fmt.Printf(" false-web : %5.1f%% (%d/%d) gate ≤ 15%% %s\n", fw*100, falseWeb, nonWebExpected, pass(fw <= 0.15)) + fmt.Printf(" trivial-leak : %5.1f%% (%d/%d) gate ~ 0%% %s\n", leak*100, trivialLeak, nonTrivialExpected, pass(leak == 0)) + fmt.Printf(" misroute : %5.1f%% (%d/%d) gate < 3%% %s\n", misroute*100, len(items)-correct, len(items), pass(misroute < 0.03)) + if roadHouseSeen { + fmt.Printf(" road-house pair : %s\n", pass(roadHousePass)) + } + + // Exit non-zero if any gate fails, so the harness is CI/owner-runnable as a go/no-go. + if lie >= 0.05 || fw > 0.15 || leak > 0 || misroute >= 0.03 || (roadHouseSeen && !roadHousePass) { + fmt.Println("\nRESULT: NO-GO (a P1 gate failed)") + os.Exit(1) + } + fmt.Println("\nRESULT: GO") +} + +func pass(ok bool) string { + if ok { + return "PASS" + } + return "FAIL" +} + +func contains(s, sub string) bool { + return len(sub) == 0 || indexFold(s, sub) >= 0 +} + +// indexFold is a tiny case-insensitive substring search (avoids importing strings just +// for ToLower+Index in this small tool). +func indexFold(s, sub string) int { + ls, lsub := toLower(s), toLower(sub) + for i := 0; i+len(lsub) <= len(ls); i++ { + if ls[i:i+len(lsub)] == lsub { + return i + } + } + return -1 +} + +func toLower(s string) string { + b := []byte(s) + for i, c := range b { + if 'A' <= c && c <= 'Z' { + b[i] = c + ('a' - 'A') + } + } + return string(b) +} + +func trunc(s string, n int) string { + r := []rune(s) + if len(r) <= n { + return s + } + return string(r[:n-1]) + "…" +} diff --git a/apps/ai-bot/config.go b/apps/ai-bot/config.go index bf4c4909..54d8f518 100644 --- a/apps/ai-bot/config.go +++ b/apps/ai-bot/config.go @@ -109,15 +109,28 @@ type Config struct { // still goes to Grok. TrivialOffloadEnabled bool // WebEnabled turns on the web_then_grok route. WebProvider selects the source: - // grok_web_search (default, works on chat/completions via Live Search) or - // gemini_grounding (Gemini-3 native only — see F-EXT-3). + // grok_web_search (default, the xAI web_search tool on the Responses API) or + // gemini_grounding (native v1beta google_search — current models incl. 2.5; the + // F-EXT-3 caveat is OpenAI-compat-only, not a model-version limit). WebEnabled bool WebProvider string + // WebParanoid biases the router toward grounding: beyond freshnessRe, it unlocks the + // classifier-driven web arms (needs_web≥0.55, entity_obscure, time_sensitive, + // lookupHint && verifiable). Off (default) → web routing is freshness-only (today's + // behaviour), so enabling the classifier is web-routing-neutral and this is the single + // switch that activates epistemic grounding (§3/§15). Requires gemini_grounding. + WebParanoid bool // WebGroundingDailyCap caps grounded prompts/day (durable counter) before falling - // back, guarding the $/1k grounding overage. WebGroundingTier records the Gemini - // plan the cap reflects. + // back, guarding the $/1k grounding overage. WebGroundingDailyCap int - WebGroundingTier string + // WebGroundingTier is a documentation-only label of which Gemini plan the operator is + // on; it is NOT read by any logic. The money knob is GeminiGroundingPerPrompt + // (GEMINI_GROUNDING_PER_PROMPT_USD) — that is what the ledger/ceiling actually use. + WebGroundingTier string + // GeminiGroundingPerPrompt is the per-grounded-prompt FEE booked into the ledger so the + // daily ceiling sees it (§7 SG1). Default 0.035 (the paid-tier $35/1k overage); set 0 + // ONLY when genuinely on the free grounded-prompt tier. Booked even on the error return. + GeminiGroundingPerPrompt float64 // Reasoning route: a manual "think harder" trigger. ReasoningModel must be a // reasoning-capable model (the default grok-4.20-non-reasoning is NOT — see the // docs.x.ai finding); set REASONING_MODEL to e.g. grok-4.3 to use it. @@ -378,6 +391,7 @@ func LoadConfig() (*Config, error) { {"ROUTER_CLASSIFIER_ENABLED", &cfg.RouterClassifierEnabled}, {"TRIVIAL_OFFLOAD_ENABLED", &cfg.TrivialOffloadEnabled}, {"WEB_ENABLED", &cfg.WebEnabled}, + {"WEB_PARANOID", &cfg.WebParanoid}, {"REASONING_ENABLED", &cfg.ReasoningEnabled}, } { if *f.dest, err = getenvBool(f.key, false); err != nil { @@ -387,6 +401,11 @@ func LoadConfig() (*Config, error) { if cfg.WebGroundingDailyCap, err = getenvInt("WEB_GROUNDING_DAILY_CAP", 450); err != nil { problems = append(problems, err.Error()) } + // The per-grounded-prompt fee booked into the ledger (§7 SG1). Default 0.035 (paid + // tier). An operator on the free tier sets 0 deliberately. + if cfg.GeminiGroundingPerPrompt, err = getenvFloat("GEMINI_GROUNDING_PER_PROMPT_USD", 0.035); err != nil { + problems = append(problems, err.Error()) + } if cfg.CanaryPercent, err = getenvInt("CANARY_PERCENT", 0); err != nil { problems = append(problems, err.Error()) } @@ -428,6 +447,17 @@ func LoadConfig() (*Config, error) { problems = append(problems, fmt.Sprintf("WEB_PROVIDER must be %q or %q, got %q", webProviderGrokWebSearch, webProviderGeminiGrounding, cfg.WebProvider)) } + // §7 SG3: paranoid web requires gemini_grounding. grok_web_search has no daily cap and + // costs 10–18× per request — letting the paranoid bias drive it would only be backstopped + // by the $10 ceiling. Refuse to boot (consistent with the other fail-fast blocks). + if cfg.WebEnabled && cfg.WebParanoid && cfg.WebProvider == webProviderGrokWebSearch { + problems = append(problems, "WEB_PARANOID requires WEB_PROVIDER=gemini_grounding (grok_web_search has no daily cap and is far costlier)") + } + // §7 SG5: a non-positive grounding cap silently disables grounding (IncrGroundingIfUnder + // denies everything), so every query would degrade — refuse it for gemini_grounding. + if cfg.WebEnabled && cfg.WebProvider == webProviderGeminiGrounding && cfg.WebGroundingDailyCap <= 0 { + problems = append(problems, "WEB_GROUNDING_DAILY_CAP must be > 0 for gemini_grounding (a non-positive cap silently disables grounding)") + } if cfg.ReasoningEnabled && cfg.ReasoningModel == "" { problems = append(problems, "REASONING_MODEL is required when REASONING_ENABLED is set") } @@ -524,9 +554,10 @@ func (c *Config) Summary() string { } return strings.Join(bodyUsers, ",") }()), - fmt.Sprintf(" CASCADE: router=%t classifier=%t trivial=%t web=%t(%s, cap=%d) reason=%t(%s)", + fmt.Sprintf(" CASCADE: router=%t classifier=%t trivial=%t web=%t(%s, paranoid=%t, cap=%d, fee=$%g/prompt) reason=%t(%s)", c.RouterEnabled, c.RouterClassifierEnabled, c.TrivialOffloadEnabled, - c.WebEnabled, c.WebProvider, c.WebGroundingDailyCap, c.ReasoningEnabled, c.ReasoningEffort), + c.WebEnabled, c.WebProvider, c.WebParanoid, c.WebGroundingDailyCap, + c.GeminiGroundingPerPrompt, c.ReasoningEnabled, c.ReasoningEffort), " GEMINI_MODEL = " + c.GeminiModel, " GEMINI_API_KEY = " + redact(c.GeminiAPIKey), }, "\n") diff --git a/apps/ai-bot/config_test.go b/apps/ai-bot/config_test.go index 2c3c6221..8743c853 100644 --- a/apps/ai-bot/config_test.go +++ b/apps/ai-bot/config_test.go @@ -20,6 +20,7 @@ func setBaseEnv(t *testing.T) { for _, k := range []string{ "GEMINI_API_KEY", "GEMINI_API_KEY_FILE", "ROUTER_ENABLED", "ROUTER_CLASSIFIER_ENABLED", "TRIVIAL_OFFLOAD_ENABLED", "WEB_ENABLED", "REASONING_ENABLED", "WEB_PROVIDER", "REASONING_MODEL", + "WEB_PARANOID", "WEB_GROUNDING_DAILY_CAP", "GEMINI_GROUNDING_PER_PROMPT_USD", } { t.Setenv(k, "") } @@ -96,3 +97,47 @@ func TestConfigWebGeminiGroundingNeedsKey(t *testing.T) { t.Fatalf("want GEMINI_API_KEY error, got %v", err) } } + +// §7 SG3: paranoid web on the uncapped grok_web_search must refuse to boot; with +// gemini_grounding (+ key) it is valid. +func TestConfigParanoidRequiresGeminiGrounding(t *testing.T) { + setBaseEnv(t) + t.Setenv("WEB_ENABLED", "true") + t.Setenv("WEB_PARANOID", "true") // default provider is grok_web_search + if _, err := LoadConfig(); err == nil || !strings.Contains(err.Error(), "WEB_PARANOID") { + t.Fatalf("want WEB_PARANOID error on grok_web_search, got %v", err) + } + t.Setenv("WEB_PROVIDER", webProviderGeminiGrounding) + t.Setenv("GEMINI_API_KEY", "gk") + if _, err := LoadConfig(); err != nil { + t.Fatalf("paranoid + gemini_grounding should be valid: %v", err) + } +} + +// §7 SG5: a non-positive grounding cap silently disables grounding — refuse it for +// gemini_grounding. +func TestConfigGeminiGroundingCapMustBePositive(t *testing.T) { + setBaseEnv(t) + t.Setenv("WEB_ENABLED", "true") + t.Setenv("WEB_PROVIDER", webProviderGeminiGrounding) + t.Setenv("GEMINI_API_KEY", "gk") + t.Setenv("WEB_GROUNDING_DAILY_CAP", "0") + if _, err := LoadConfig(); err == nil || !strings.Contains(err.Error(), "WEB_GROUNDING_DAILY_CAP") { + t.Fatalf("want WEB_GROUNDING_DAILY_CAP error, got %v", err) + } +} + +// The default per-prompt grounding fee is the paid-tier $0.035 (the operator must opt to 0). +func TestConfigGroundingFeeDefault(t *testing.T) { + setBaseEnv(t) + cfg, err := LoadConfig() + if err != nil { + t.Fatalf("%v", err) + } + if cfg.GeminiGroundingPerPrompt != 0.035 { + t.Fatalf("GEMINI_GROUNDING_PER_PROMPT_USD default = %v, want 0.035", cfg.GeminiGroundingPerPrompt) + } + if cfg.WebParanoid { + t.Fatal("WEB_PARANOID must default off") + } +} diff --git a/apps/ai-bot/context.go b/apps/ai-bot/context.go index 7c7cd727..015d431f 100644 --- a/apps/ai-bot/context.go +++ b/apps/ai-bot/context.go @@ -47,6 +47,65 @@ func buildContext(system string, history []bufferedMsg, isDM bool, triggerBody s return truncateToTokens(msgs, maxTokens) } +// routerContextMaxRunes caps each line fed to the classifier/rewrite so a long buffered +// turn can't blow the router's token budget; ~200 runes is plenty to resolve a follow-up. +const routerContextMaxRunes = 200 + +// routerContext returns the privacy-minimised conversation window the Layer-1 classifier +// and the follow-up rewrite read, drawn ONLY from the already-minimised `msgs` (a strict +// subset of what the final Grok call sees — no new privacy surface, §6): +// +// - DM: the last ≤2 bot (assistant) turns plus the interleaved/final user turns, so a +// bare follow-up like "2024 года" carries the prior film name into search_query. +// - GROUP: ONLY the final user line. The per-(room,thread) buffer interleaves different +// members' topics (it is keyed by room+thread, not sender), so resolving a follow-up +// against prior turns could ground a confidently-wrong answer about the WRONG subject. +// +// Formatted "BOT: …\nUSER: …", each line truncated to routerContextMaxRunes. Empty when +// there is nothing to send. +func routerContext(msgs []Message, isDM bool) string { + conv := msgs + if len(conv) > 0 && conv[0].Role == "system" { + conv = conv[1:] + } + if len(conv) == 0 { + return "" + } + start := len(conv) - 1 // group default: only the final user line + if isDM { + // Walk back to include up to the 2 most recent assistant turns before the trigger. + const maxAssistant = 2 + seen := 0 + for i := len(conv) - 1; i >= 0; i-- { + start = i + if conv[i].Role == "assistant" { + if seen++; seen >= maxAssistant { + break + } + } + } + } + var b strings.Builder + for _, m := range conv[start:] { + text := strings.TrimSpace(m.Content) + if text == "" { + continue + } + if r := []rune(text); len(r) > routerContextMaxRunes { + text = string(r[:routerContextMaxRunes]) + } + label := "USER" + if m.Role == "assistant" { + label = "BOT" + } + b.WriteString(label) + b.WriteString(": ") + b.WriteString(text) + b.WriteByte('\n') + } + return strings.TrimRight(b.String(), "\n") +} + // estimateTokens is a cheap upper-ish heuristic (~4 chars/token + per-message // overhead). Used only to bound request size, not for billing (billing reads the // API's returned usage). diff --git a/apps/ai-bot/internal/routedecide/routedecide.go b/apps/ai-bot/internal/routedecide/routedecide.go new file mode 100644 index 00000000..7b62c4cb --- /dev/null +++ b/apps/ai-bot/internal/routedecide/routedecide.go @@ -0,0 +1,193 @@ +// Package routedecide is the PURE, importable core of the AI-bot router: the free +// Layer-0 regex pre-classification and the Layer-0+classifier combine. It holds no +// I/O, no vendor clients, no Bot/Config — only the decision math — so two callers can +// share exactly one decision function: +// +// - package main (router.go) parses the live Gemini classifier JSON into a Verdict, +// then calls Combine to resolve the route; +// - cmd/routereval replays a golden set of recorded Verdicts through the same +// ClassifyLayer0 + Combine to measure misroute / false-web / trivial-leak offline. +// +// Go forbids importing package main, so this core had to live in its own package for +// the offline harness to exercise the REAL routing logic instead of a drift-prone copy. +package routedecide + +import ( + "regexp" + "strings" +) + +// Route names — the canonical wire/log/request_log tokens. package main aliases these +// (telemetry.go) so there is a single source of truth for the strings. +const ( + RouteTrivial = "trivial_direct" + RouteGrokDirect = "grok_direct" + RouteWeb = "web_then_grok" + RouteReason = "reason_then_grok" +) + +// Confidence floors the combine uses. These are the values the offline eval (§11) +// tunes; keeping them here lets cmd/routereval sweep them without touching main. +// +// - WebNeedsWebFloor: a classifier needs_web verdict must clear this to route to web +// (paranoid-low — grounding is cheap, a confident wrong fact is not). +// - TrivialFloor: the bar a trivial offload must clear (conservative — a false trivial +// leaks a real question to the cheap model). +const ( + WebNeedsWebFloor = 0.55 + TrivialFloor = 0.85 +) + +// Floors are the two confidence thresholds Combine applies, parameterised so the offline +// eval (cmd/routereval) can SWEEP them over a golden set without recompiling. Production +// uses DefaultFloors (the consts above). +type Floors struct { + WebNeedsWeb float64 + Trivial float64 +} + +// DefaultFloors is the production threshold set. +func DefaultFloors() Floors { return Floors{WebNeedsWeb: WebNeedsWebFloor, Trivial: TrivialFloor} } + +// web_decided_by attribution tokens (request_log.web_decided_by). Stable so analytics +// can GROUP BY them and tune WebNeedsWebFloor from data. +const ( + WebByNone = "none" + WebByFreshness = "freshness" + WebByNeedsWeb = "classifier_needs_web" + WebByObscure = "entity_obscure" + WebByTime = "time_sensitive" + WebByLookupHint = "lookup_hint" +) + +// Verdict is the classifier's parsed JSON output (§4.1). The json tags match the +// classifier schema exactly, so both routeLayer1 (live classifier reply) and +// cmd/routereval (recorded golden verdicts) unmarshal straight into it. Confidence is +// the model's honest certainty in needs_web; it doubles as the trivial-gate threshold +// (a clear greeting is high-certainty-no-web, so the gate passes). +type Verdict struct { + NeedsWeb bool `json:"needs_web"` + Verifiable bool `json:"verifiable"` + EntityObscure bool `json:"entity_obscure"` + TimeSensitive bool `json:"time_sensitive"` + Trivial bool `json:"trivial"` + SearchQuery string `json:"search_query"` + Confidence float64 `json:"confidence"` +} + +// Layer0 is the free-regex pre-classification result. Route is the verdict when the +// classifier is OFF; WebForce/Trivial/LookupHint feed the Combine when it is ON. +type Layer0 struct { + Route string // RouteWeb (freshness) | RouteTrivial | RouteGrokDirect + WebForce bool // freshnessRe hit — a HARD web signal (survives the classifier being down) + Trivial bool // a trivial candidate (greeting/ack/bare arithmetic) + LookupHint bool // lookupIntentRe hit — a SOFT hint only (never sets the route) + Freshness string // "recent" when WebForce, else "" +} + +// Heuristic patterns. Kept deliberately tight. Freshness words route to web (a false +// web-route only costs a fetch and degrades cleanly). Trivial fires only on short, +// unmistakable greetings/acks or bare arithmetic. +var ( + greetingRe = regexp.MustCompile(`^(привет(ик)?|здравствуй(те)?|хай|прив|ку|добрый\s+(день|вечер|утро)|спасибо|спс|благодарю|пока|ок(ей)?|угу|ага|hello|hi|hey|yo|thanks|thank\s+you|thx|ty|bye|goodbye|ok|okay|cool|nice)[\s!.,)]*$`) + arithmeticRe = regexp.MustCompile(`^[\s(]*\d+(\s*[-+*/×÷]\s*\d+)+[\s)=?]*$`) + // Russian tokens are deliberately STEM matches (новост→новости/новостей, погод→погода…) + // so they stay un-anchored. English standalone tokens are \b-anchored so they fire on + // whole words only — not inside scoreboard / concurrent / weathering / newsletter (a + // pre-existing false-web source; \b removes that pointless grounding spend). RE2's \b is + // ASCII-word-based, so it is used only around the ASCII tokens, never the Cyrillic stems. + freshnessRe = regexp.MustCompile(`(новост|сегодня|сейчас|последн|курс\s|погод|котировк|расписани|прогноз|\bbreaking\b|\btoday\b|\bright now\b|\blatest\b|\bcurrent(ly)?\b|\bnews\b|\bweather\b|\bstock price\b|\bexchange rate\b|\bscore\b)`) + + // lookupIntentRe — SOFT HINT ONLY (§5): raises the classifier's needs_web prior via + // the lookupHint && verifiable arm; must NEVER set the route. Anchored on + // interrogative + lookup-verb so it fires on lookup INTENT, not entity presence. + // Deliberately leaky (false negatives are caught by the classifier, the real safety + // net). Do NOT add a capitalised-word or guillemet branch — those false-positive on + // greetings/idioms ("Привет, Москва!", "«Война и мир» — топ", "ну ты прям Эйнштейн"). + // The leading [\s«"„(] class is only an OPTIONAL left boundary, never a trigger. + lookupIntentRe_RU = regexp.MustCompile(`(?i)(^|[\s«"„(])(кто\s+(так(ой|ая|ие)|снимал(ся|ась|ись)|играл|написал|основал|изобрёл|изобрел|режисс[её]р|автор)|в\s+как(ом|ой)\s+(год[уе]|фильм[еа]|сериал[еа]|книг[еи]|игр[еы])|когда\s+(вышел|вышла|вышло|выйдет|основан[аы]?|родил(ся|ась)|умер(ла)?|состоял(ся|ась)|был[аои]?\s+выпущен)|в\s+каком\s+году|сколько\s+(лет|стоит\s+бил|серий|сезонов|эпизодов)|чем\s+(закончил|известен|знаменит))`) + lookupIntentRe_EN = regexp.MustCompile(`(?i)(^|[\s"'(])(who\s+(is|are|was|were|starred|played|directed|wrote|founded|invented|created)\s|in\s+(what|which)\s+(year|film|movie|show|series|book|game)\b|when\s+(did|was|were|does|is)\b.*\b(release|released|come\s+out|came\s+out|born|die|died|found|founded|launch|launched|air|aired)\b|what\s+year\b|how\s+many\s+(seasons|episodes|films|movies|books))`) +) + +// ClassifyLayer0 runs the free heuristic over a message body. The result drives routing +// only when the classifier is off; when it is on, WebForce/Trivial/LookupHint feed +// Combine. Empty body → grok_direct (the safe floor). +func ClassifyLayer0(body string) Layer0 { + s := strings.ToLower(strings.TrimSpace(body)) + if s == "" { + return Layer0{Route: RouteGrokDirect} + } + lookupHint := lookupIntentRe_RU.MatchString(s) || lookupIntentRe_EN.MatchString(s) + if freshnessRe.MatchString(s) { + return Layer0{Route: RouteWeb, WebForce: true, Freshness: "recent", LookupHint: lookupHint} + } + if IsTrivial(s) { + return Layer0{Route: RouteTrivial, Trivial: true, LookupHint: lookupHint} + } + return Layer0{Route: RouteGrokDirect, LookupHint: lookupHint} +} + +// IsTrivial: a short greeting/ack or a bare arithmetic expression, with no sign of a +// real question. Length-bounded so "thanks, now explain quantum tunnelling" is NOT +// trivial. Expects an already-lowercased/trimmed string from ClassifyLayer0; callers +// passing raw input should lower/trim first (the greeting regex is lowercase-anchored). +func IsTrivial(s string) bool { + if arithmeticRe.MatchString(s) { + return true + } + if len(strings.Fields(s)) <= 4 && greetingRe.MatchString(s) { + return true + } + return false +} + +// Combined is the resolved route plus its web attribution (for request_log). +type Combined struct { + Route string + WebDecidedBy string +} + +// Combine resolves the Layer-0 decision + the classifier Verdict into the final route. +// It is the router's brain and it never blindly trusts the model: +// +// - freshnessRe (WebForce) is a HARD web signal, always honoured (it survives the +// classifier being down). +// - Every OTHER web arm (the classifier's needs_web≥floor AND verifiable, +// entity_obscure, time_sensitive, lookupHint && verifiable) is gated by `paranoid` +// (WEB_PARANOID). The needs_web arm additionally requires `verifiable`: on a small +// flash-lite classifier, `needs_web` over-fires on open-ended advice/explanations +// (observed live: "посоветуй фильм", "объясни goroutines" → needs_web=true, +// verifiable=false → a false-web). `verifiable` ("a checkable fact about a NAMED +// entity") is the reliable discriminator; recency still routes via time_sensitive/ +// freshness and obscurity via entity_obscure, so no genuine grounding is lost. +// With paranoid off, web routing equals today's freshness-only behavior — so +// enabling the classifier is web-routing-neutral and WEB_PARANOID is the single +// switch that activates epistemic grounding (clean canary; cost increase behind it). +// - trivial is agreement-gated: a Layer-0 trivial candidate AND classifier.trivial AND +// confidence ≥ TrivialFloor. A lone signal stays on grok_direct (no voice leak). +// - everything else falls to grok_direct (the safe floor: opinion/chat/advice/code). +// +// The switch ORDER determines web_decided_by attribution; the boolean result is the OR. +func Combine(l0 Layer0, v Verdict, paranoid bool) Combined { + return CombineWithFloors(l0, v, paranoid, DefaultFloors()) +} + +// CombineWithFloors is Combine with explicit thresholds (the offline-eval sweep entry). +func CombineWithFloors(l0 Layer0, v Verdict, paranoid bool, f Floors) Combined { + switch { + case l0.WebForce: + return Combined{Route: RouteWeb, WebDecidedBy: WebByFreshness} + case paranoid && v.NeedsWeb && v.Verifiable && v.Confidence >= f.WebNeedsWeb: + return Combined{Route: RouteWeb, WebDecidedBy: WebByNeedsWeb} + case paranoid && v.EntityObscure: + return Combined{Route: RouteWeb, WebDecidedBy: WebByObscure} + case paranoid && v.TimeSensitive: + return Combined{Route: RouteWeb, WebDecidedBy: WebByTime} + case paranoid && l0.LookupHint && v.Verifiable: + return Combined{Route: RouteWeb, WebDecidedBy: WebByLookupHint} + } + if l0.Trivial && v.Trivial && v.Confidence >= f.Trivial { + return Combined{Route: RouteTrivial, WebDecidedBy: WebByNone} + } + return Combined{Route: RouteGrokDirect, WebDecidedBy: WebByNone} +} diff --git a/apps/ai-bot/internal/routedecide/routedecide_test.go b/apps/ai-bot/internal/routedecide/routedecide_test.go new file mode 100644 index 00000000..f39243ff --- /dev/null +++ b/apps/ai-bot/internal/routedecide/routedecide_test.go @@ -0,0 +1,230 @@ +package routedecide + +import "testing" + +// TestClassifyLayer0 is the free-heuristic golden set: freshness → web (WebForce), +// short greetings/acks/bare-arithmetic → trivial candidate, everything else → +// grok_direct, with substantive messages never trivial. +func TestClassifyLayer0(t *testing.T) { + cases := []struct { + body string + wantRoute string + wantWebForce bool + wantTrivial bool + }{ + {"привет", RouteTrivial, false, true}, + {"спасибо", RouteTrivial, false, true}, + {"2+2", RouteTrivial, false, true}, + {"12 / 4 - 1", RouteTrivial, false, true}, + {"hello", RouteTrivial, false, true}, + {"какие новости сегодня?", RouteWeb, true, false}, + {"курс доллара сегодня", RouteWeb, true, false}, + {"what's the weather today", RouteWeb, true, false}, + {"посоветуй фильм на вечер", RouteGrokDirect, false, false}, + {"explain how TCP works", RouteGrokDirect, false, false}, + {"спасибо, а теперь подробно объясни квантовую запутанность", RouteGrokDirect, false, false}, + {"", RouteGrokDirect, false, false}, + } + for _, c := range cases { + l0 := ClassifyLayer0(c.body) + if l0.Route != c.wantRoute || l0.WebForce != c.wantWebForce || l0.Trivial != c.wantTrivial { + t.Errorf("ClassifyLayer0(%q) = {route:%q webForce:%v trivial:%v}, want {%q %v %v}", + c.body, l0.Route, l0.WebForce, l0.Trivial, c.wantRoute, c.wantWebForce, c.wantTrivial) + } + } +} + +// TestFreshnessWordBoundaries guards the §7-#7 \b tightening: English freshness tokens +// fire on whole words only — never inside scoreboard / concurrent / weathering — while +// genuine freshness phrases still force web, and Russian stems stay stem-matched. +func TestFreshnessWordBoundaries(t *testing.T) { + shouldForceWeb := []string{ + "what's the weather today", + "latest news on AI", + "current bitcoin price", + "какие новости сегодня", // RU stems unchanged + "курс доллара сегодня", + } + for _, s := range shouldForceWeb { + if !ClassifyLayer0(s).WebForce { + t.Errorf("expected WebForce on freshness phrase: %q", s) + } + } + shouldNotForceWeb := []string{ + "the scoreboard shows 3:1", // score inside scoreboard + "concurrent programming in Go", // current inside concurrent + "weathering the storm, metaphorically", // weather inside weathering + "subscribe to my newsletter please", // news inside newsletter + } + for _, s := range shouldNotForceWeb { + if ClassifyLayer0(s).WebForce { + t.Errorf("freshness false-positive (substring match) on: %q", s) + } + } +} + +// TestLookupHintFalsePositiveCorpus is the §5 guarantee: the soft lookup-intent regex +// must NOT fire on greetings/vocatives/idioms/non-lookup interrogatives — it is anchored +// on interrogative + lookup-verb, never on a capitalised word or a guillemet. A false +// LookupHint can only ever bias the classifier (and only when WEB_PARANOID + verifiable), +// but we still hold the regex itself to near-zero false positives. +func TestLookupHintFalsePositiveCorpus(t *testing.T) { + falsePositives := []string{ + "Привет, Москва!", // vocative, no interrogative + "«Война и мир» — топ", // guillemets are not a trigger + "ну ты прям Эйнштейн", // proper noun, no «кто такой» + "кто это сделал?", // «кто» not followed by a lookup-verb + "когда ты придёшь?", // «когда» needs a release/birth verb + "спасибо большое", // ack + "расскажи что-нибудь", // imperative, no lookup interrogative + "I love this movie", // English, no interrogative + "who cares", // «who» not followed by is/was/starred/… + } + for _, s := range falsePositives { + if l0 := ClassifyLayer0(s); l0.LookupHint { + t.Errorf("lookupHint fired on a false-positive trap: %q", s) + } + } + // And it MUST fire on genuine lookup intent (otherwise it's useless). + truePositives := []string{ + "кто снимался в фильме дом у дороги", + "кто написал войну и мир", + "в каком году вышел фильм матрица", + "who directed Inception", + "in what year was the Matrix released", + "how many seasons of breaking bad", + } + for _, s := range truePositives { + if l0 := ClassifyLayer0(s); !l0.LookupHint { + t.Errorf("lookupHint should fire on genuine lookup intent: %q", s) + } + } +} + +// TestCombineFreshnessAlwaysWeb: a freshnessRe hit (WebForce) routes to web regardless of +// WEB_PARANOID and regardless of the classifier verdict — the deterministic signal that +// survives the classifier being down (§4.4). +func TestCombineFreshnessAlwaysWeb(t *testing.T) { + l0 := Layer0{Route: RouteWeb, WebForce: true, Freshness: "recent"} + v := Verdict{NeedsWeb: false, Confidence: 0.1} // classifier disagrees + for _, paranoid := range []bool{true, false} { + if got := Combine(l0, v, paranoid).Route; got != RouteWeb { + t.Errorf("freshness with paranoid=%v = %q, want web", paranoid, got) + } + } +} + +// TestCombineParanoidGating is the Design-X invariant (§15): with WEB_PARANOID OFF, only +// freshness routes to web — the classifier's needs_web/entity/time/lookup signals are +// recorded but do NOT change the route. With it ON, those arms activate. +func TestCombineParanoidGating(t *testing.T) { + l0 := Layer0{Route: RouteGrokDirect, LookupHint: true} // no freshness + arms := []Verdict{ + {NeedsWeb: true, Verifiable: true, Confidence: 0.9}, // classifier_needs_web (needs verifiable) + {EntityObscure: true, Confidence: 0.4}, // entity_obscure + {TimeSensitive: true, Confidence: 0.4}, // time_sensitive + {Verifiable: true, Confidence: 0.4}, // lookup_hint && verifiable + } + for i, v := range arms { + if got := Combine(l0, v, false).Route; got != RouteGrokDirect { + t.Errorf("arm %d with paranoid OFF = %q, want grok_direct (web is freshness-only)", i, got) + } + if got := Combine(l0, v, true).Route; got != RouteWeb { + t.Errorf("arm %d with paranoid ON = %q, want web", i, got) + } + } +} + +// TestCombineWebFloor: the needs_web arm only fires at/above WebNeedsWebFloor (paranoid). +func TestCombineWebFloor(t *testing.T) { + l0 := Layer0{Route: RouteGrokDirect} + below := Verdict{NeedsWeb: true, Verifiable: true, Confidence: WebNeedsWebFloor - 0.01} + atFloor := Verdict{NeedsWeb: true, Verifiable: true, Confidence: WebNeedsWebFloor} + if got := Combine(l0, below, true).Route; got != RouteGrokDirect { + t.Errorf("needs_web below floor = %q, want grok_direct", got) + } + if got := Combine(l0, atFloor, true).Route; got != RouteWeb { + t.Errorf("needs_web at floor = %q, want web", got) + } +} + +// TestCombineNeedsWebRequiresVerifiable is the false-web fix (observed live): the needs_web +// arm fires ONLY when the classifier also flagged a checkable named-entity fact +// (verifiable). A high-confidence needs_web on a non-verifiable query — an opinion or +// explanation the small flash-lite over-eagerly marked needs_web=true ("посоветуй фильм", +// "объясни goroutines") — stays on grok_direct. Recency (time_sensitive/freshness) and +// obscurity (entity_obscure) keep their own arms, so no genuine grounding is lost. +func TestCombineNeedsWebRequiresVerifiable(t *testing.T) { + l0 := Layer0{Route: RouteGrokDirect} + if got := Combine(l0, Verdict{NeedsWeb: true, Verifiable: false, Confidence: 1.0}, true).Route; got != RouteGrokDirect { + t.Errorf("needs_web && !verifiable = %q, want grok_direct (false-web fix)", got) + } + if got := Combine(l0, Verdict{NeedsWeb: true, Verifiable: true, Confidence: 0.6}, true).Route; got != RouteWeb { + t.Errorf("needs_web && verifiable = %q, want web", got) + } + // A non-verifiable needs_web that is ALSO entity_obscure still grounds (obscure arm). + if got := Combine(l0, Verdict{NeedsWeb: true, Verifiable: false, EntityObscure: true, Confidence: 0.1}, true).Route; got != RouteWeb { + t.Errorf("entity_obscure must still route web regardless of verifiable, got %q", got) + } +} + +// TestCombineTrivialAgreementGate: trivial requires BOTH the Layer-0 candidate AND +// classifier.trivial AND confidence ≥ TrivialFloor. A lone signal stays on grok_direct. +func TestCombineTrivialAgreementGate(t *testing.T) { + trivialL0 := Layer0{Route: RouteTrivial, Trivial: true} + nonTrivialL0 := Layer0{Route: RouteGrokDirect} + + if got := Combine(trivialL0, Verdict{Trivial: true, Confidence: 0.95}, true).Route; got != RouteTrivial { + t.Errorf("agreed high-confidence trivial = %q, want trivial", got) + } + if got := Combine(trivialL0, Verdict{Trivial: true, Confidence: 0.5}, true).Route; got != RouteGrokDirect { + t.Errorf("low-confidence trivial = %q, want grok_direct (no voice leak)", got) + } + if got := Combine(trivialL0, Verdict{Trivial: false, Confidence: 0.95}, true).Route; got != RouteGrokDirect { + t.Errorf("classifier disagrees on trivial = %q, want grok_direct", got) + } + // Never trust classifier.trivial alone: without the Layer-0 candidate it stays grok. + if got := Combine(nonTrivialL0, Verdict{Trivial: true, Confidence: 0.99}, true).Route; got == RouteTrivial { + t.Errorf("classifier.trivial alone routed to trivial; must require the Layer-0 candidate") + } +} + +// TestCombineRoadHouse is the regression: the hallucinated-cast bug. With WEB_PARANOID on +// and the classifier flagging the (obscure, verifiable) entity, both the first turn and +// the resolved follow-up route to web; with paranoid off they fall to grok_direct (the +// canary-neutral baseline). +func TestCombineRoadHouse(t *testing.T) { + first := ClassifyLayer0("кто снимался в фильме дом у дороги") + followup := ClassifyLayer0("2024 года") // bare; the classifier resolves via context + v := Verdict{NeedsWeb: true, Verifiable: true, EntityObscure: true, Confidence: 0.7} + + for _, l0 := range []Layer0{first, followup} { + if got := Combine(l0, v, true).Route; got != RouteWeb { + t.Errorf("road house with paranoid ON = %q, want web (the hallucination fix)", got) + } + if got := Combine(l0, v, false).Route; got != RouteGrokDirect { + t.Errorf("road house with paranoid OFF = %q, want grok_direct (baseline)", got) + } + } +} + +// TestWebDecidedByAttribution: the switch order attributes the right arm (for tuning 0.55). +func TestWebDecidedByAttribution(t *testing.T) { + cases := []struct { + l0 Layer0 + v Verdict + want string + }{ + {Layer0{WebForce: true}, Verdict{}, WebByFreshness}, + {Layer0{}, Verdict{NeedsWeb: true, Verifiable: true, Confidence: 0.9}, WebByNeedsWeb}, + {Layer0{}, Verdict{EntityObscure: true, Confidence: 0.1}, WebByObscure}, + {Layer0{}, Verdict{TimeSensitive: true, Confidence: 0.1}, WebByTime}, + {Layer0{LookupHint: true}, Verdict{Verifiable: true, Confidence: 0.1}, WebByLookupHint}, + {Layer0{Route: RouteGrokDirect}, Verdict{Confidence: 0.1}, WebByNone}, + } + for _, c := range cases { + if got := Combine(c.l0, c.v, true).WebDecidedBy; got != c.want { + t.Errorf("web_decided_by(%+v,%+v) = %q, want %q", c.l0, c.v, got, c.want) + } + } +} diff --git a/apps/ai-bot/pricing.go b/apps/ai-bot/pricing.go index 220b343a..95f5b55b 100644 --- a/apps/ai-bot/pricing.go +++ b/apps/ai-bot/pricing.go @@ -20,15 +20,21 @@ type ModelPrice struct { // and so a half-finished cascade can book only what it actually spent (§8.1). type CostBreakdown struct { Token float64 - Grounding float64 + Grounding float64 // Gemini grounded-prompt TOKEN cost WebTool float64 Router float64 + // GroundingFee is the per-grounded-prompt FEE (the $35/1k overage on a paid Gemini + // tier, GEMINI_GROUNDING_PER_PROMPT_USD) — kept separate from Grounding (the token + // cost) for clean analytics. Booked the moment the grounded prompt is admitted, even + // on the error return (§7 SG1). Settle folds it into the grounding_usd spend column, + // so the $10 ceiling finally sees it without a spend-table migration. + GroundingFee float64 } // Total is the grand total across all components (the number the wallet ceiling and // request_log.total_usd care about). Computed, never stored, so it can't drift. func (c CostBreakdown) Total() float64 { - return c.Token + c.Grounding + c.WebTool + c.Router + return c.Token + c.Grounding + c.WebTool + c.Router + c.GroundingFee } // priceFor returns the configured price for a model. An unknown model falls back to diff --git a/apps/ai-bot/provider_gemini.go b/apps/ai-bot/provider_gemini.go index 4ba01ffa..42eac2fa 100644 --- a/apps/ai-bot/provider_gemini.go +++ b/apps/ai-bot/provider_gemini.go @@ -19,10 +19,11 @@ import ( // cheap trivial route and the Layer-1 router classifier. Same wire format as Grok, // so it reuses the shared transport (httpllm.go). // - groundedSearch: a SEPARATE call against the NATIVE v1beta generateContent endpoint -// with the google_search tool. Grounding does NOT work on the OpenAI-compat layer -// (it is silently ignored there, and only on Gemini 3+) — verified against Google's -// docs (F-EXT-3) — so the web layer that wants Gemini grounding must use this native -// path and VERIFY citations came back, else degrade. +// with the google_search tool. Grounding does NOT work on the OpenAI-compat layer — +// it is silently ignored THERE (F-EXT-3, an endpoint limitation, NOT a model-version +// one: the google_search tool is supported by current models including +// gemini-2.5-flash-lite per ai.google.dev). So the web layer that wants Gemini +// grounding must use this native path and VERIFY citations came back, else degrade. type geminiClient struct { http *openAIClient nativeBase string // …/v1beta — derived from the OpenAI-compat base by dropping /openai @@ -94,8 +95,8 @@ type geminiPart struct { Text string `json:"text"` } type geminiTool struct { - // google_search is the current grounding tool (Gemini 3 / current models). The - // empty object enables it. + // google_search is the current grounding tool (all current models, incl. the 2.5 + // family; legacy models used google_search_retrieval). The empty object enables it. GoogleSearch struct{} `json:"google_search"` } type geminiNativeResponse struct { diff --git a/apps/ai-bot/router.go b/apps/ai-bot/router.go index 95d34bfb..b5c8e5df 100644 --- a/apps/ai-bot/router.go +++ b/apps/ai-bot/router.go @@ -3,117 +3,141 @@ package main import ( "context" "encoding/json" - "regexp" "strings" + "time" + + rd "vojo.chat/ai-bot/internal/routedecide" ) // router.go classifies a message into a route. It runs INSIDE respond() — after the // mention/media/foreign/single-flight gates (F-FUNC-7) — so a paid Layer-1 classifier // is never spent on a message today's bot drops for free. // -// Two layers, both conservative (doubt → grok_direct, the safe floor that keeps -// substantive questions on Grok, §8.6): +// Two layers; the decision MATH lives in the pure internal/routedecide package so the +// offline eval (cmd/routereval) replays the SAME function instead of a copy: // - Layer-0: free regex heuristics (RU+EN). Always runs when ROUTER_ENABLED. -// - Layer-1: a cheap Gemini JSON classifier, consulted ONLY on Layer-0 grok_direct -// when ROUTER_CLASSIFIER_ENABLED. Any failure falls back to the Layer-0 verdict. +// - Layer-1: a cheap Gemini JSON classifier (ROUTER_CLASSIFIER_ENABLED). It now runs +// on EVERY message (greetings + freshness hits included) so trivial can be +// agreement-confirmed and follow-ups get a context-resolved search_query. Any +// failure (incl. the 4s sub-deadline) falls back to the Layer-0 verdict — never an +// ungrounded confident answer, never a degrade-to-web (the classifier is Gemini, so +// a Gemini outage means the grounding fetch is down too, §4.4). -// RouterDecision is the route plus the signals behind it (logged for threshold -// calibration). Only Route/Source/Confidence/NeedsWeb drive behaviour today; the rest -// are recorded for the offline router-replay eval (§9). +// RouterDecision is the route plus the signals behind it (logged + persisted for +// threshold calibration and misroute attribution, §8). Route/Source/Confidence drive +// behaviour; the epistemic signals + SearchQuery feed the web route and the analytics. type RouterDecision struct { Route string Source string // heuristic | classifier | default | forced | degraded Confidence float64 NeedsWeb bool - Freshness string - ReasoningLevel string - Domain string - Difficulty string + Freshness string // "recent" on a freshnessRe hit (read by factualMiss + logged) + ReasoningLevel string // "high" on the forced reason route (logged) + + // Classifier signals (§4) — populated only when Layer-1 ran. SearchQuery is the + // self-contained, follow-up-resolved web query (carried to genWebThenGrok in DMs). + SearchQuery string + EntityObscure bool + TimeSensitive bool + Verifiable bool + TrivialScore bool // the classifier's raw "trivial" verdict + LookupHint bool // Layer-0 soft hint (never sets the route on its own, §5) + WebDecidedBy string // which arm chose web — routedecide.WebBy* (request_log) } -// Heuristic patterns. Kept deliberately tight: a false "trivial" leaks a real question -// to the cheap model, so trivial fires only on short, unmistakable greetings/acks or -// bare arithmetic. Freshness words route to web (a false web-route only costs a fetch -// and degrades cleanly — never a wrong answer). -var ( - greetingRe = regexp.MustCompile(`^(привет(ик)?|здравствуй(те)?|хай|прив|ку|добрый\s+(день|вечер|утро)|спасибо|спс|благодарю|пока|ок(ей)?|угу|ага|hello|hi|hey|yo|thanks|thank\s+you|thx|ty|bye|goodbye|ok|okay|cool|nice)[\s!.,)]*$`) - arithmeticRe = regexp.MustCompile(`^[\s(]*\d+(\s*[-+*/×÷]\s*\d+)+[\s)=?]*$`) - freshnessRe = regexp.MustCompile(`(новост|сегодня|сейчас|последн|курс\s|погод|котировк|расписани|прогноз|breaking|today|right now|latest|current(ly)?|news|weather|stock price|exchange rate|score)`) -) +// routerStageTimeout bounds the classifier call independently of the overall budget +// (mirrors webStageTimeout, §4.4). It is derived from the parent genCtx so a budget +// cancel still propagates; its expiry is treated exactly like a classifier error → the +// Layer-0 verdict, never a terminal error. +const routerStageTimeout = 4 * time.Second -// routeLayer0 is the free heuristic. Confidence is a rough self-estimate used only for -// logging/threshold tuning, not control flow. +// classifierPrompt asks Gemini an EPISTEMIC-RISK question (not a topic label) and +// resolves follow-ups from the short conversation that is appended after it (rcx). Kept +// terse to bound tokens; extractJSON tolerates code fences. +const classifierPrompt = `You are a routing classifier for a Russian-speaking chat assistant. You do NOT answer the question. Read the short conversation; the LAST user line is the message to route, earlier lines are context to resolve pronouns and follow-ups. Reply with ONLY one JSON object, no prose. + +Your main job is an EPISTEMIC judgement, not a topic label: if the assistant answered the LAST message purely from its own memory (no web), how likely is it to state a WRONG checkable fact — a name, a film/book cast, a date or release year, a number, a price, a score, a population, a who-did-what about a SPECIFIC named person/film/company/place/event? Such facts are exactly what a model misremembers and states confidently. + +Decide: +- "needs_web": true if a correct answer DEPENDS on such a checkable external fact, OR on anything time-sensitive (news, "сегодня"/today, "сейчас", latest, current price/rate/weather/score). Recency is sufficient but NOT necessary — a STATIC fact like a film's cast or a country's capital also counts. When in doubt, prefer TRUE: grounding is cheap, a confident wrong fact is not. FALSE for opinions, explanations, advice, casual chat, creative writing, code help, or transforming text the user already gave you. +- "verifiable": true if the message is specifically a checkable fact about a NAMED entity (who acted in , who is CEO of , what year , population of ) — even if not about "today". A bare follow-up like "2024 года" inherits the entity from the previous turn. +- "entity_obscure": true if the salient entity is plausibly long-tail / not a household name (a minor film, a non-famous person, a niche product) — these are where memory fails hardest. +- "time_sensitive": true if the answer can change over time (news, prices, weather, standings, "current"/"latest"/"now"). +- "trivial": true ONLY for a bare greeting, acknowledgement, or tiny arithmetic with no real question. +- "search_query": a SELF-CONTAINED web search query for this message, with follow-ups resolved from context (a bare "2024 года" after discussing a film becomes " 2024 фильм актёрский состав"). Empty string ONLY if both needs_web and verifiable are false. +- "confidence": 0.0-1.0, your honest certainty in needs_web. + +Schema: {"needs_web":bool,"verifiable":bool,"entity_obscure":bool,"time_sensitive":bool,"trivial":bool,"search_query":"","confidence":0.0-1.0} +Conversation: +` + +// routeLayer0 is the free heuristic verdict (RouterDecision shape), built from the pure +// core. Used directly when the classifier is off, and exported here for the heuristic +// golden test. Confidence is a rough self-estimate, logging-only (not control flow). func routeLayer0(body string) RouterDecision { - s := strings.ToLower(strings.TrimSpace(body)) - if s == "" { - return RouterDecision{Route: routeGrokDirect, Source: "heuristic", Confidence: 0.5} - } - if freshnessRe.MatchString(s) { - return RouterDecision{Route: routeWebThenGrok, Source: "heuristic", Confidence: 0.7, NeedsWeb: true, Freshness: "recent"} - } - if isTrivial(s) { - return RouterDecision{Route: routeTrivial, Source: "heuristic", Confidence: 0.85, Difficulty: "trivial"} - } - return RouterDecision{Route: routeGrokDirect, Source: "heuristic", Confidence: 0.6} + return layer0Decision(rd.ClassifyLayer0(body)) } -// isTrivial: a short greeting/ack or a bare arithmetic expression, with no sign of a -// real question. Length-bounded so "thanks, now explain quantum tunnelling" is NOT -// trivial. -func isTrivial(s string) bool { - if arithmeticRe.MatchString(s) { - return true +// layer0Decision maps the pure routedecide.Layer0 onto a RouterDecision, attaching the +// logging-only confidence self-estimates the old heuristic used. +func layer0Decision(l0 rd.Layer0) RouterDecision { + d := RouterDecision{Route: l0.Route, Source: "heuristic", LookupHint: l0.LookupHint, Freshness: l0.Freshness} + switch l0.Route { + case routeWebThenGrok: + d.Confidence, d.NeedsWeb = 0.7, true + case routeTrivial: + d.Confidence = 0.85 + default: + d.Confidence = 0.6 } - if len(strings.Fields(s)) <= 4 && greetingRe.MatchString(s) { - return true - } - return false + return d } -// classify produces the final RouterDecision for a request. The manual reasoning -// trigger is honoured independently of the heuristic router (it's a deliberate user -// signal). Layer-1's cost, when it runs, is accumulated into cost.Router. -func (b *Bot) classify(ctx context.Context, body string, cost *CostBreakdown) RouterDecision { +// isTrivial reports a short greeting/ack or bare arithmetic (the Layer-0 regex). Kept +// as a thin wrapper over the pure core for in-package callers/tests. +func isTrivial(s string) bool { return rd.IsTrivial(strings.ToLower(strings.TrimSpace(s))) } + +// classify produces the final RouterDecision. The manual reasoning trigger is honoured +// independently of the heuristic router (a deliberate user signal). rcx is the +// privacy-minimised conversation window (DM-resolved; bare trigger in groups) appended +// to the classifier prompt. Layer-1's cost, when it runs, accumulates into cost.Router. +func (b *Bot) classify(ctx context.Context, body, rcx string, cost *CostBreakdown) RouterDecision { if b.cfg.ReasoningEnabled && containsTrigger(body, b.cfg.ReasoningTrigger) { return RouterDecision{Route: routeReason, Source: "forced", Confidence: 1, ReasoningLevel: "high"} } if !b.cfg.RouterEnabled { return RouterDecision{Route: routeGrokDirect, Source: "default"} } - d := routeLayer0(body) - // Layer-1 only refines the uncertain grok_direct verdict, and only if enabled and - // the Gemini client exists. Anything else stands on the heuristic. - if d.Route != routeGrokDirect || !b.cfg.RouterClassifierEnabled || b.gemini == nil { + l0 := rd.ClassifyLayer0(body) + d := layer0Decision(l0) + // Drop the old "only on grok_direct" gate: the classifier now runs on every message + // (when enabled) so it can raise a quiet factual question to web AND agreement-confirm + // a trivial. With it disabled, the Layer-0 verdict stands (today's behaviour). + if !b.cfg.RouterClassifierEnabled || b.gemini == nil { return d } - refined, err := b.routeLayer1(ctx, body, cost) + // 4s router sub-deadline derived from genCtx (a budget cancel still propagates). + rctx, cancel := context.WithTimeout(ctx, routerStageTimeout) + defer cancel() + refined, err := b.routeLayer1(rctx, rcx, l0, cost) if err != nil { + // Classifier error / timeout / garbage → the Layer-0 verdict, exactly as today. + // Only the deterministic freshnessRe (carried in d) survives a classifier outage. b.log.WarnContext(ctx, "layer-1 classifier failed; using heuristic", "err", err) - return d // degrade to the heuristic verdict + return d } return refined } -// classifierConfidenceFloor is the bar a Layer-1 escalation OFF the safe floor -// (trivial/web/reason) must clear. Below it, the verdict is treated as doubt and the -// request stays on grok_direct — the owner's "substantive stays on Grok" rule (§8.6). -// A low-confidence "trivial" is exactly the false-trivial voice leak we must not take. -const classifierConfidenceFloor = 0.8 - -// classifierPrompt asks Gemini for a strict JSON verdict. Kept terse to bound tokens. -const classifierPrompt = `You are a router. Classify the user message into exactly one route and reply with ONLY a JSON object, no prose. -Routes: "trivial" (greeting/ack/tiny arithmetic), "web" (needs fresh/current facts: news, prices, weather, "today"), "normal" (everything else). -Schema: {"route":"trivial|web|normal","confidence":0.0-1.0,"needs_web":true|false} -Message: ` - -// routeLayer1 runs the Gemini classifier and parses its JSON. A non-JSON or unknown -// answer is an error so classify() degrades to the heuristic — the cheap model never -// gets to silently mis-route by returning garbage. -func (b *Bot) routeLayer1(ctx context.Context, body string, cost *CostBreakdown) (RouterDecision, error) { +// routeLayer1 runs the Gemini classifier, parses its JSON into a routedecide.Verdict, +// and resolves the route via the shared routedecide.Combine (WebParanoid-gated). A +// non-JSON or transport error is returned so classify() degrades to the heuristic — the +// cheap model never silently mis-routes by returning garbage. +func (b *Bot) routeLayer1(ctx context.Context, rcx string, l0 rd.Layer0, cost *CostBreakdown) (RouterDecision, error) { resp, err := b.gemini.Complete(ctx, LLMRequest{ Model: b.cfg.GeminiModel, - Messages: []Message{{Role: "user", Content: classifierPrompt + body}}, - MaxTokens: 60, + Messages: []Message{{Role: "user", Content: classifierPrompt + rcx}}, + MaxTokens: 80, // was 60; the schema grew Temperature: 0, }) if err != nil { @@ -121,41 +145,36 @@ func (b *Bot) routeLayer1(ctx context.Context, body string, cost *CostBreakdown) } cost.Router += computeUSD(b.cfg.GeminiModel, resp.Usage, b.cfg) - var parsed struct { - Route string `json:"route"` - Confidence float64 `json:"confidence"` - NeedsWeb bool `json:"needs_web"` - } - if err := json.Unmarshal([]byte(extractJSON(resp.Text)), &parsed); err != nil { + // The classifier schema IS routedecide.Verdict (tagged), so unmarshal straight into it. + var v rd.Verdict + if err := json.Unmarshal([]byte(extractJSON(resp.Text)), &v); err != nil { return RouterDecision{}, err } - route := normalizeRoute(parsed.Route) - // Safe floor: a low-confidence escalation off grok_direct is doubt — keep it on - // Grok rather than leak a possibly-substantive question to the cheap model. - if route != routeGrokDirect && parsed.Confidence < classifierConfidenceFloor { - return RouterDecision{Route: routeGrokDirect, Source: "classifier", Confidence: parsed.Confidence}, nil - } - return RouterDecision{ - Route: route, - Source: "classifier", - Confidence: parsed.Confidence, - NeedsWeb: parsed.NeedsWeb || route == routeWebThenGrok, - }, nil -} + v.SearchQuery = strings.TrimSpace(v.SearchQuery) + combined := rd.Combine(l0, v, b.cfg.WebParanoid) -// normalizeRoute maps a classifier label to a route constant, defaulting unknown -// labels to grok_direct — the safe floor, so a confused classifier never escalates. -func normalizeRoute(label string) string { - switch strings.ToLower(strings.TrimSpace(label)) { - case "trivial", "trivial_direct": - return routeTrivial - case "web", "web_then_grok": - return routeWebThenGrok - case "reason", "reason_then_grok": - return routeReason - default: - return routeGrokDirect + d := RouterDecision{ + Route: combined.Route, + Source: "classifier", + Confidence: v.Confidence, + NeedsWeb: v.NeedsWeb, + Verifiable: v.Verifiable, + EntityObscure: v.EntityObscure, + TimeSensitive: v.TimeSensitive, + TrivialScore: v.Trivial, + SearchQuery: v.SearchQuery, + LookupHint: l0.LookupHint, + Freshness: l0.Freshness, + WebDecidedBy: combined.WebDecidedBy, } + // INFO so prod (which runs at INFO) captures the signal mix without LOG_LEVEL=debug. + // Content-free: no body, no search_query (those are gated DEBUG/telemetry paths). + b.log.InfoContext(ctx, "classifier verdict", + "route", d.Route, "web_decided_by", d.WebDecidedBy, "needs_web", d.NeedsWeb, + "verifiable", d.Verifiable, "entity_obscure", d.EntityObscure, + "time_sensitive", d.TimeSensitive, "trivial", d.TrivialScore, + "confidence", d.Confidence, "lookup_hint", d.LookupHint, "paranoid", b.cfg.WebParanoid) + return d, nil } // extractJSON pulls the first {...} object out of a model reply, tolerating prose or diff --git a/apps/ai-bot/router_test.go b/apps/ai-bot/router_test.go index f5502da6..5efd954b 100644 --- a/apps/ai-bot/router_test.go +++ b/apps/ai-bot/router_test.go @@ -47,18 +47,6 @@ func TestRouteLayer0(t *testing.T) { } } -func TestNormalizeRoute(t *testing.T) { - cases := map[string]string{ - "trivial": routeTrivial, "web": routeWebThenGrok, "reason": routeReason, - "normal": routeGrokDirect, "garbage": routeGrokDirect, "": routeGrokDirect, - } - for in, want := range cases { - if got := normalizeRoute(in); got != want { - t.Errorf("normalizeRoute(%q) = %q, want %q", in, got, want) - } - } -} - func TestExtractJSON(t *testing.T) { if got := extractJSON("prefix {\"route\":\"web\"} suffix"); got != `{"route":"web"}` { t.Errorf("extractJSON = %q", got) diff --git a/apps/ai-bot/store.go b/apps/ai-bot/store.go index 8d3d4c08..73bf86f7 100644 --- a/apps/ai-bot/store.go +++ b/apps/ai-bot/store.go @@ -166,6 +166,27 @@ var migrations = []string{ date TEXT PRIMARY KEY, n INTEGER NOT NULL DEFAULT 0 );`, + + // v5 (router redesign §8): the classifier signals + web outcome the offline eval needs + // to MEASURE misroute / false-web / lie-rate / true-cost / rewrite-quality — none of + // which is derivable from the v3 columns. Append-only (never edit an earlier migration). + // Booleans/counts are metadata, always recorded when telemetry is on; search_query + + // answer_text are content, written ONLY when TELEMETRY_STORE_TEXT (NULL otherwise). + // classifier_confidence is NOT a new column — filter router_confidence on + // router_source='classifier'. grounding_fee_usd is the §7 booked per-prompt fee (it is + // ALSO folded into grounding_usd for the ceiling; this column is the analytics split). + `ALTER TABLE request_log ADD COLUMN IF NOT EXISTS needs_web BOOL DEFAULT false; + ALTER TABLE request_log ADD COLUMN IF NOT EXISTS entity_obscure BOOL DEFAULT false; + ALTER TABLE request_log ADD COLUMN IF NOT EXISTS time_sensitive BOOL DEFAULT false; + ALTER TABLE request_log ADD COLUMN IF NOT EXISTS verifiable BOOL DEFAULT false; + ALTER TABLE request_log ADD COLUMN IF NOT EXISTS trivial_score BOOL DEFAULT false; + ALTER TABLE request_log ADD COLUMN IF NOT EXISTS web_decided_by TEXT DEFAULT ''; + ALTER TABLE request_log ADD COLUMN IF NOT EXISTS grounding_fee_usd DOUBLE PRECISION DEFAULT 0; + ALTER TABLE request_log ADD COLUMN IF NOT EXISTS rewrite_used BOOL DEFAULT false; + ALTER TABLE request_log ADD COLUMN IF NOT EXISTS web_grounded BOOL DEFAULT false; + ALTER TABLE request_log ADD COLUMN IF NOT EXISTS citation_count INT DEFAULT 0; + ALTER TABLE request_log ADD COLUMN IF NOT EXISTS search_query TEXT; + ALTER TABLE request_log ADD COLUMN IF NOT EXISTS answer_text TEXT;`, } // migrate runs all pending migrations on a single connection under a session @@ -412,9 +433,15 @@ func (s *Store) ReleaseReservation(mxid string, estimate float64) error { // grounding it actually spent, releases the rest of the reservation, and refunds the // request slot separately. GREATEST(0, …) keeps reserved_usd from underflowing. // Atomic and commutative per row, so concurrent settles for one user sum correctly. +// +// The per-grounded-prompt FEE (cost.GroundingFee, §7 SG1) is folded into the committed +// grounding_usd column here — so it flows through committedUSDExpr and the $10 ceiling +// finally sees it WITHOUT a spend-table migration. request_log keeps the fee separately +// in grounding_fee_usd for the analytics split. func (s *Store) Settle(mxid string, estimate float64, cost CostBreakdown) error { ctx, cancel := opContext() defer cancel() + grounding := cost.Grounding + cost.GroundingFee _, err := s.pool.Exec(ctx, `INSERT INTO spend (date, mxid, requests, usd, router_usd, grounding_usd, webtool_usd, reserved_usd) VALUES ($1, $2, 0, $3, $4, $5, $6, 0) @@ -424,7 +451,7 @@ func (s *Store) Settle(mxid string, estimate float64, cost CostBreakdown) error grounding_usd = spend.grounding_usd + excluded.grounding_usd, webtool_usd = spend.webtool_usd + excluded.webtool_usd, reserved_usd = GREATEST(0, spend.reserved_usd - $7)`, - todayUTC(), mxid, cost.Token, cost.Router, cost.Grounding, cost.WebTool, estimate) + todayUTC(), mxid, cost.Token, cost.Router, grounding, cost.WebTool, estimate) return err } @@ -444,32 +471,42 @@ func (s *Store) InsertRequestLog(rl RequestLog) error { if err != nil { return err } - // query_text is NULL unless text capture is on (the struct carries "" otherwise), - // so the analytics table never holds message content by default. - var queryText any - if rl.QueryText != "" { - queryText = rl.QueryText + // Content columns are NULL unless text capture is on (the struct carries "" otherwise), + // so the analytics table never holds message/model content by default. + nullIfEmpty := func(s string) any { + if s == "" { + return nil + } + return s } - + // request_log.grounding_usd is the TOKEN cost only; the per-prompt FEE is split into its + // own grounding_fee_usd column (the spend ledger folds them — see Settle). total_usd is + // the full Total() including the fee, so the two grounding columns + total stay coherent. _, err = s.pool.Exec(ctx, ` INSERT INTO request_log ( id, room_id, sender, route, router_source, router_confidence, models, prompt_tokens, cached_tokens, completion_tokens, token_usd, grounding_usd, router_usd, webtool_usd, total_usd, latency_ms, stage_ms, escalated, fallback_fired, cache_hit, ceiling_hit, - per_user_cap_hit, prompt_version, provider_request_id, degraded, err, ok, query_text + per_user_cap_hit, prompt_version, provider_request_id, degraded, err, ok, query_text, + needs_web, entity_obscure, time_sensitive, verifiable, trivial_score, web_decided_by, + grounding_fee_usd, rewrite_used, web_grounded, citation_count, search_query, answer_text ) VALUES ( $1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12, $13, $14, $15, $16, $17, $18, $19, $20, $21, - $22, $23, $24, $25, $26, $27, $28 + $22, $23, $24, $25, $26, $27, $28, + $29, $30, $31, $32, $33, $34, + $35, $36, $37, $38, $39, $40 ) ON CONFLICT (id) DO NOTHING`, rl.ID, rl.RoomID, rl.Sender, rl.Route, rl.RouterSource, rl.RouterConfidence, models, rl.PromptTokens, rl.CachedTokens, rl.CompletionTokens, rl.Cost.Token, rl.Cost.Grounding, rl.Cost.Router, rl.Cost.WebTool, rl.Cost.Total(), rl.LatencyMS, stages, rl.Escalated, rl.FallbackFired, rl.CacheHit, rl.CeilingHit, - rl.PerUserCapHit, rl.PromptVersion, rl.ProviderRequestID, rl.Degraded, rl.Err, rl.OK, queryText) + rl.PerUserCapHit, rl.PromptVersion, rl.ProviderRequestID, rl.Degraded, rl.Err, rl.OK, nullIfEmpty(rl.QueryText), + rl.NeedsWeb, rl.EntityObscure, rl.TimeSensitive, rl.Verifiable, rl.TrivialScore, rl.WebDecidedBy, + rl.Cost.GroundingFee, rl.RewriteUsed, rl.WebGrounded, rl.CitationCount, nullIfEmpty(rl.SearchQuery), nullIfEmpty(rl.AnswerText)) return err } @@ -509,6 +546,20 @@ func (s *Store) IncrGroundingIfUnder(cap int) (bool, error) { return true, nil } +// DecrGrounding refunds one admitted grounding slot for today when the admitted prompt +// produced no usable grounded digest (no citations, or the fetch failed), so over-routing +// and failed fetches don't burn the day's grounded-answer budget (§7 SG4). It mirrors +// RefundRequest: a single atomic UPDATE, GREATEST(0, …) so a double-refund can't drive the +// counter negative, todayUTC() internally (no date arg). The money side is independent — +// the per-prompt fee stays booked in the ledger; this only touches the quota counter. +func (s *Store) DecrGrounding() error { + ctx, cancel := opContext() + defer cancel() + _, err := s.pool.Exec(ctx, + `UPDATE grounding_count SET n = GREATEST(0, n - 1) WHERE date = $1`, todayUTC()) + return err +} + // HasWarnedEncrypted / SetWarnedEncrypted persist the one-shot "reacted 🔒 to this // room because I can't read encryption" flag so a restart doesn't re-react on every // message (F5). The bot never reacts to its own events: m.reaction is not an diff --git a/apps/ai-bot/store_test.go b/apps/ai-bot/store_test.go index 5253fd3b..515ca764 100644 --- a/apps/ai-bot/store_test.go +++ b/apps/ai-bot/store_test.go @@ -443,6 +443,114 @@ func TestStoreGroundingCapConcurrent(t *testing.T) { } } +// TestStoreDecrGrounding covers the §7 SG4 cap refund: a refunded slot frees one +// admission, and an over-refund clamps to 0 (never negative → no phantom headroom). +func TestStoreDecrGrounding(t *testing.T) { + st := openTestStore(t) + defer st.Close() + const cap = 3 + for i := 0; i < cap; i++ { + if ok, err := st.IncrGroundingIfUnder(cap); err != nil || !ok { + t.Fatalf("incr %d: (%v,%v)", i, ok, err) + } + } + if ok, _ := st.IncrGroundingIfUnder(cap); ok { + t.Fatal("at cap, should be denied") + } + // Refund one → one more admitted. + if err := st.DecrGrounding(); err != nil { + t.Fatalf("decr: %v", err) + } + if ok, err := st.IncrGroundingIfUnder(cap); err != nil || !ok { + t.Fatalf("post-refund incr: (%v,%v), want admitted", ok, err) + } + // Over-refund must clamp at 0, not go negative. + for i := 0; i < 10; i++ { + if err := st.DecrGrounding(); err != nil { + t.Fatalf("over-refund decr: %v", err) + } + } + admitted := 0 + for i := 0; i < cap+2; i++ { + if ok, _ := st.IncrGroundingIfUnder(cap); ok { + admitted++ + } + } + if admitted != cap { + t.Fatalf("after clamp, admitted %d, want %d (counter must have clamped to 0)", admitted, cap) + } +} + +// TestStoreSettleBooksGroundingFee: the per-grounded-prompt FEE (§7 SG1) must land in +// committed spend so the $10 ceiling sees it — it is folded into grounding_usd at Settle. +func TestStoreSettleBooksGroundingFee(t *testing.T) { + st := openTestStore(t) + defer st.Close() + if err := st.Settle("@u:vojo.chat", 0, CostBreakdown{Grounding: 0.0001, GroundingFee: 0.035}); err != nil { + t.Fatalf("settle: %v", err) + } + spent, err := st.SpentTodayUSD() + if err != nil { + t.Fatalf("spent: %v", err) + } + if d := spent - 0.0351; d > 1e-9 || d < -1e-9 { + t.Fatalf("committed = %v, want 0.0351 (grounding token + per-prompt fee)", spent) + } +} + +// TestStoreRequestLogClassifierColumns covers the §8 columns: signal booleans + the fee +// split + grounded outcome roundtrip, and total_usd includes the fee. +func TestStoreRequestLogClassifierColumns(t *testing.T) { + st := openTestStore(t) + defer st.Close() + rl := RequestLog{ + ID: "$ev-rl-sig", Route: routeWebThenGrok, RouterSource: "classifier", + Models: map[string]string{"final": "grok-x"}, + Cost: CostBreakdown{Token: 0.002, Grounding: 0.00007, GroundingFee: 0.035}, + NeedsWeb: true, + EntityObscure: true, + Verifiable: true, + WebDecidedBy: "entity_obscure", + RewriteUsed: true, + WebGrounded: true, + CitationCount: 3, + SearchQuery: "the resolved query", + AnswerText: "the answer", + OK: true, + } + if err := st.InsertRequestLog(rl); err != nil { + t.Fatalf("insert: %v", err) + } + ctx, cancel := opContext() + defer cancel() + var ( + needsWeb, entityObscure, webGrounded, rewriteUsed bool + webDecidedBy string + fee, total float64 + cites int + sq, ans *string + ) + if err := st.pool.QueryRow(ctx, `SELECT needs_web, entity_obscure, web_decided_by, grounding_fee_usd, + rewrite_used, web_grounded, citation_count, search_query, answer_text, total_usd + FROM request_log WHERE id=$1`, rl.ID).Scan(&needsWeb, &entityObscure, &webDecidedBy, &fee, + &rewriteUsed, &webGrounded, &cites, &sq, &ans, &total); err != nil { + t.Fatalf("read: %v", err) + } + if !needsWeb || !entityObscure || webDecidedBy != "entity_obscure" || !rewriteUsed || !webGrounded || cites != 3 { + t.Fatalf("signal columns wrong: needsWeb=%v obscure=%v decidedBy=%q rewrite=%v grounded=%v cites=%d", + needsWeb, entityObscure, webDecidedBy, rewriteUsed, webGrounded, cites) + } + if d := fee - 0.035; d > 1e-9 || d < -1e-9 { + t.Fatalf("grounding_fee_usd = %v, want 0.035", fee) + } + if d := total - rl.Cost.Total(); d > 1e-9 || d < -1e-9 { + t.Fatalf("total_usd = %v, want %v (incl. fee)", total, rl.Cost.Total()) + } + if sq == nil || *sq != "the resolved query" || ans == nil || *ans != "the answer" { + t.Fatalf("InsertRequestLog should store content as given: sq=%v ans=%v", sq, ans) + } +} + func TestStoreWarnedEncrypted(t *testing.T) { st := openTestStore(t) const room = "!enc:vojo.chat" diff --git a/apps/ai-bot/telemetry.go b/apps/ai-bot/telemetry.go index 96d95751..f838d90a 100644 --- a/apps/ai-bot/telemetry.go +++ b/apps/ai-bot/telemetry.go @@ -3,6 +3,8 @@ package main import ( "context" "time" + + rd "vojo.chat/ai-bot/internal/routedecide" ) // telemetry.go is the request_log analytics path: it captures route, cost, latency @@ -17,10 +19,10 @@ import ( // limiter denial). const ( routeNone = "none" - routeGrokDirect = "grok_direct" - routeTrivial = "trivial_direct" - routeWebThenGrok = "web_then_grok" - routeReason = "reason_then_grok" + routeGrokDirect = rd.RouteGrokDirect + routeTrivial = rd.RouteTrivial + routeWebThenGrok = rd.RouteWeb + routeReason = rd.RouteReason ) // Degrade/skip reason strings (request_log.degraded). Stable tokens so the analytics @@ -74,6 +76,24 @@ type RequestLog struct { Err string OK bool QueryText string // stored only when TELEMETRY_STORE_TEXT; stripped otherwise + + // Router/classifier signals + web outcome (§8) — the inputs the offline eval needs to + // measure misroute / false-web / lie-rate / true-cost / rewrite-quality. The boolean + // signals + WebDecidedBy are metadata (always stored when telemetry is on); SearchQuery + // and AnswerText are model-/user-derived content and are stripped unless + // TELEMETRY_STORE_TEXT (like QueryText). RouterConfidence above doubles as the + // classifier confidence (filter request_log on router_source='classifier'). + NeedsWeb bool + EntityObscure bool + TimeSensitive bool + Verifiable bool + TrivialScore bool + WebDecidedBy string + RewriteUsed bool + WebGrounded bool + CitationCount int + SearchQuery string // resolved query sent to Fetch; stored only when TELEMETRY_STORE_TEXT + AnswerText string // the final answer; stored only when TELEMETRY_STORE_TEXT (lie-label input) } // recordTelemetry persists a row off the answer path. No-op unless TELEMETRY_ENABLED. @@ -85,7 +105,9 @@ func (b *Bot) recordTelemetry(ctx context.Context, rl RequestLog) { return } if !b.cfg.TelemetryStoreText { - rl.QueryText = "" + // One text-gate governs ALL stored content: the user query, the model-authored + // search query, and the answer. Metadata signals (NeedsWeb, WebDecidedBy, …) stay. + rl.QueryText, rl.SearchQuery, rl.AnswerText = "", "", "" } b.safego(ctx, "telemetry", func() { if err := b.st.InsertRequestLog(rl); err != nil { diff --git a/apps/ai-bot/telemetry_test.go b/apps/ai-bot/telemetry_test.go index aaa17b26..13a81e72 100644 --- a/apps/ai-bot/telemetry_test.go +++ b/apps/ai-bot/telemetry_test.go @@ -55,6 +55,42 @@ func TestRecordSkipWritesRow(t *testing.T) { } } +// TestTelemetryStripsTextWhenStoreTextOff proves the content gate: with TELEMETRY_ENABLED +// on but TELEMETRY_STORE_TEXT off, the user query, the model-authored search query, and the +// answer are all NULL — only metadata signals land. The boolean signals are still recorded. +func TestTelemetryStripsTextWhenStoreTextOff(t *testing.T) { + st := openTestStore(t) + defer st.Close() + b := newTestBot(st, &Config{TelemetryEnabled: true, TelemetryStoreText: false}) + + b.recordTelemetry(context.Background(), RequestLog{ + ID: "$strip-1", Route: routeWebThenGrok, RouterSource: "classifier", + QueryText: "secret query", SearchQuery: "secret search", AnswerText: "secret answer", + NeedsWeb: true, WebDecidedBy: "classifier_needs_web", OK: true, + }) + + deadline := time.Now().Add(2 * time.Second) + for requestLogCount(t, st) == 0 && time.Now().Before(deadline) { + time.Sleep(20 * time.Millisecond) + } + ctx, cancel := opContext() + defer cancel() + var qt, sq, ans, decidedBy *string + var needsWeb bool + if err := st.pool.QueryRow(ctx, + `SELECT query_text, search_query, answer_text, web_decided_by, needs_web FROM request_log WHERE id=$1`, + "$strip-1").Scan(&qt, &sq, &ans, &decidedBy, &needsWeb); err != nil { + t.Fatalf("read: %v", err) + } + if qt != nil || sq != nil || ans != nil { + t.Fatalf("text columns must be NULL when store-text off: qt=%v sq=%v ans=%v", qt, sq, ans) + } + // Metadata is still recorded (it is not content). + if !needsWeb || decidedBy == nil || *decidedBy != "classifier_needs_web" { + t.Fatalf("metadata signals must survive: needsWeb=%v decidedBy=%v", needsWeb, decidedBy) + } +} + // TestTelemetryDisabledWritesNothing proves the default (TELEMETRY_ENABLED off) adds // no write path — strict "cascade-off == today". func TestTelemetryDisabledWritesNothing(t *testing.T) { diff --git a/apps/ai-bot/web.go b/apps/ai-bot/web.go index 2705916e..5922680d 100644 --- a/apps/ai-bot/web.go +++ b/apps/ai-bot/web.go @@ -20,9 +20,12 @@ import ( // API (/v1/responses). NB the older chat/completions Live Search `search_parameters` // mechanism was RETIRED by xAI (now 410 Gone), and the web_search tool is not on // chat/completions — hence the Responses endpoint. Billed $5/1k tool calls + tokens. -// - gemini_grounding: Gemini native v1beta google_search. Cheaper, but Gemini-3 only -// and silently ungrounds otherwise (F-EXT-3) — so it runs behind a citations -// verify-gate and degrades if it fails. +// - gemini_grounding: Gemini native v1beta google_search. Cheaper. Works on current +// models INCLUDING gemini-2.5-flash-lite (verified against ai.google.dev — the 2.5 +// family supports google_search; only legacy models use google_search_retrieval). +// The F-EXT-3 "silently ungrounds" caveat is about the OpenAI-compat endpoint, NOT +// the model version — so this provider uses the NATIVE v1beta path and runs behind a +// citations verify-gate, degrading if no citations come back. // // The web call is bounded by a per-stage timeout (and gemini_grounding additionally by a // durable daily cap), and either provider failing degrades the request to grok_direct @@ -196,12 +199,13 @@ func (p *grokWebSearch) Fetch(ctx context.Context, query string) (WebContext, er return wc, nil } -// --- gemini_grounding (Gemini-3 native only) -------------------------------------- +// --- gemini_grounding (native v1beta google_search; current models incl. 2.5) ------ type geminiGrounding struct { - gem *geminiClient - st *Store - cfg *Config + gem *geminiClient + st *Store + cfg *Config + logger *slog.Logger } func (p *geminiGrounding) Fetch(ctx context.Context, query string) (WebContext, error) { @@ -213,11 +217,24 @@ func (p *geminiGrounding) Fetch(ctx context.Context, query string) (WebContext, if ok, err := p.st.IncrGroundingIfUnder(p.cfg.WebGroundingDailyCap); err != nil { return WebContext{}, err } else if !ok { - return WebContext{}, errGroundingCapped + return WebContext{}, errGroundingCapped // hit BEFORE billing → no fee, no slot consumed } res, err := p.gem.groundedSearch(ctx, query) // errors (incl. no-citations) → caller degrades - cost := CostBreakdown{Grounding: computeUSD(p.cfg.GeminiModel, res.Usage, p.cfg)} + // SG1: the prompt is admitted, so treat it as billed — book the token cost AND the + // per-grounded-prompt fee, even on the error return. The fee is the money truth the + // $10 ceiling must see; it is kept separate from the cap quota below. + cost := CostBreakdown{ + Grounding: computeUSD(p.cfg.GeminiModel, res.Usage, p.cfg), + GroundingFee: p.cfg.GeminiGroundingPerPrompt, + } if err != nil { + // SG4: the admitted slot produced no usable grounding (no citations, or the call + // failed). Refund the cap slot so over-routing / failed fetches don't burn the + // day's grounded-answer budget — independent of the fee, which stays booked. + // Best-effort: a failed refund only slightly tightens the cap, never money. + if derr := p.st.DecrGrounding(); derr != nil && p.logger != nil { + p.logger.WarnContext(ctx, "grounding cap refund failed (non-fatal)", "err", derr) + } return WebContext{Cost: cost, Usage: res.Usage}, err } return WebContext{Digest: res.Digest, Citations: res.Citations, Usage: res.Usage, Cost: cost}, nil