feat(ai-bot): paranoid epistemic-risk router that grounds checkable facts behind default-off flags, with booked grounding fee and offline eval harness

2026-06-02 21:08:27 +03:00 · 2026-06-02 21:08:27 +03:00 · 7ae77da2d0
commit 7ae77da2d0
parent 6f19feac91
21 changed files with 1818 additions and 213 deletions
--- a/apps/ai-bot/.gitignore
+++ b/apps/ai-bot/.gitignore
@ -1,4 +1,5 @@
 .env
 state/
 ai-bot
 /routereval
 *.local
--- a/apps/ai-bot/README.md
+++ b/apps/ai-bot/README.md
@ -162,11 +162,13 @@ offline-eval gate (misroute < 2-3% AND measured saving > the second provider's c
 | Env | Default | Meaning |
 |---|---|---|
 | `ROUTER_ENABLED` | false | Layer-0 heuristic router (else everything → grok_direct) |
-| `ROUTER_CLASSIFIER_ENABLED` | false | Layer-1 Gemini classifier on uncertain cases (requires `ROUTER_ENABLED` + Gemini key) |
+| `ROUTER_CLASSIFIER_ENABLED` | false | Layer-1 Gemini classifier — runs on **every** message when on (not just uncertain ones): it agreement-confirms trivial and, with `WEB_PARANOID`, raises checkable-fact lookups to web. Budget ~$0.00004/msg, reserved unconditionally. Requires `ROUTER_ENABLED` + Gemini key. |
 | `TRIVIAL_OFFLOAD_ENABLED` | false | answer trivial messages with Gemini (requires Gemini key) |
 | `WEB_ENABLED` | false | web_then_grok route (Gemini/Grok fetches fresh facts, **Grok stays the voice**) |
 | `WEB_PROVIDER` | `grok_web_search` | `grok_web_search` (xAI Agent Tools `web_search` on the Responses API, $5/1k calls, no Gemini key) or `gemini_grounding` (**cheapest**: Gemini does the fetch via native v1beta `google_search`, Grok voices it — ~$0.0013/query, validated on `gemini-2.5-flash-lite`; the F-EXT-3 "Gemini-3 only" caveat is the OpenAI-compat endpoint, native v1beta works on 2.5). Requires `GEMINI_API_KEY`. |
-| `WEB_GROUNDING_DAILY_CAP` | 450 | durable per-day cap for `gemini_grounding` before degrading (keep < the 500/day free grounding RPD; guards the per-1k overage) |
+| `WEB_PARANOID` | false | **the single switch that activates epistemic grounding.** Beyond freshness words, it unlocks the classifier-driven web arms (needs_web≥0.55, obscure entity, time-sensitive, lookup-hint) — i.e. it routes checkable-fact lookups (a film's cast, a date) to grounding instead of letting Grok answer from memory and hallucinate. With it off, web routing is freshness-only (= today), so turning on the classifier alone is web-routing-neutral. **Requires `WEB_PROVIDER=gemini_grounding`** (refuses to boot on `grok_web_search`, which has no daily cap). |
 | `WEB_GROUNDING_DAILY_CAP` | 450 | durable per-day cap for `gemini_grounding` before degrading. Google gives **1,500 grounded requests/day free** (shared Flash/Flash-Lite, both free & paid tiers; verified ai.google.dev/pricing); keep the cap **under 1,500** so grounding stays free (token-only). Must be > 0 for `gemini_grounding` (a non-positive cap silently disables grounding → refuses to boot). |
 | `GEMINI_GROUNDING_PER_PROMPT_USD` | 0.035 | the per-grounded-prompt FEE booked into the ledger so the `DAILY_USD_CEILING` accounts for it. The fee is **$35/1k = $0.035** but ONLY applies **above** the 1,500/day free allowance. So while `WEB_GROUNDING_DAILY_CAP ≤ 1,500` (e.g. the 450 default) grounding never hits the fee → **set `0`** (the bot then books only token cost, which is correct). Set `0.035` only if you raise the cap above 1,500/day, so the ceiling throttles before silently overrunning on requests #1501+. |
 | `REASONING_ENABLED` | false | manual "think harder" route on `REASONING_TRIGGER` |
 | `REASONING_TRIGGER` | `подумай глубже` | trigger phrase |
 | `REASONING_MODEL` | `grok-4.3` | a **reasoning-capable** model (the default `grok-4.20-non-reasoning` rejects `reasoning_effort`) |
--- a/apps/ai-bot/bot.go
+++ b/apps/ai-bot/bot.go
@ -111,7 +111,7 @@ func NewBot(ctx context.Context, cfg *Config, logger *slog.Logger) (*Bot, error)
 	}
 	if cfg.WebEnabled {
 		if cfg.WebProvider == webProviderGeminiGrounding {
-			b.web = &geminiGrounding{gem: gc, st: st, cfg: cfg}
+			b.web = &geminiGrounding{gem: gc, st: st, cfg: cfg, logger: logger}
 		} else {
 			b.web = newGrokWebSearch(cfg, logger)
 		}
@ -466,7 +466,7 @@ func (b *Bot) respond(ctx context.Context, roomID, threadRoot string, isDM bool,
 	defer cancel()
 	msgs := buildContext(b.cfg.SystemPrompt, history, isDM, mc.Body, b.cfg.MaxCtxEvent, maxPromptTokens)
-	res, err := b.generate(genCtx, mc.Body, msgs, b.convID(roomID, threadRoot))
+	res, err := b.generate(genCtx, mc.Body, msgs, b.convID(roomID, threadRoot), isDM)
 	// Record what the routing + generation actually did, whatever the outcome.
 	rl.Route = res.route
@ -487,6 +487,20 @@ func (b *Bot) respond(ctx context.Context, roomID, threadRoot string, isDM bool,
 	if res.degraded != "" {
 		rl.Degraded = res.degraded
 	}
 	// Classifier signals + web outcome for the offline eval (§8). Booleans/counts are
 	// metadata (always recorded when telemetry is on); SearchQuery/AnswerText are content
 	// (stripped unless TELEMETRY_STORE_TEXT — see recordTelemetry).
 	rl.NeedsWeb = res.decision.NeedsWeb
 	rl.EntityObscure = res.decision.EntityObscure
 	rl.TimeSensitive = res.decision.TimeSensitive
 	rl.Verifiable = res.decision.Verifiable
 	rl.TrivialScore = res.decision.TrivialScore
 	rl.WebDecidedBy = res.decision.WebDecidedBy
 	rl.RewriteUsed = res.rewriteUsed
 	rl.WebGrounded = res.webGrounded
 	rl.CitationCount = res.citationCount
 	rl.SearchQuery = res.searchQuery
 	rl.AnswerText = res.text
 	// The full routing/generation picture for one request, in one line: which route ran,
 	// whether it was a fallback, the degrade reason (if any), the per-stage timings and
@ -494,7 +508,9 @@ func (b *Bot) respond(ctx context.Context, roomID, threadRoot string, isDM bool,
 	b.log.DebugContext(ctx, "generation outcome",
 		"route", res.route, "router_source", res.decision.Source,
 		"router_confidence", res.decision.Confidence, "fallback", res.fallback,
-		"degraded", res.degraded, "stage_ms", res.stageMS, "usd", res.cost.Total())
+		"degraded", res.degraded, "stage_ms", res.stageMS, "usd", res.cost.Total(),
 		"web_grounded", res.webGrounded, "citation_count", res.citationCount,
 		"grounding_fee_usd", res.cost.GroundingFee, "rewrite_used", res.rewriteUsed)
 	if err != nil {
 		// Terminal: even grok_direct failed. Settle whatever the cascade ACTUALLY spent
--- a/apps/ai-bot/cascade.go
+++ b/apps/ai-bot/cascade.go
@ -30,6 +30,14 @@ type genResult struct {
 	fallback   bool   // true if we degraded off the decided route
 	degraded   string // degrade reason for request_log
 	stageMS    map[string]int
 	// Web-route outcome (for request_log §8): the resolved query actually sent to Fetch,
 	// whether the context-resolved rewrite was used (vs the bare body), and whether the
 	// fetch came back grounded with citations (a zero-citation synth is a silent false-web).
 	searchQuery   string
 	rewriteUsed   bool
 	webGrounded   bool
 	citationCount int
 }
 func msSince(t time.Time) int { return int(time.Since(t).Milliseconds()) }
@ -47,33 +55,49 @@ func (b *Bot) reserveEstimate() float64 {
 			// fetch can search several times and pull large context; reserve generously.
 			est += float64(maxWebSearchCalls)*grokWebSearchPerCall + b.estimateUSD(b.cfg.XAIModel)
 		} else {
-			est += b.estimateUSD(b.cfg.GeminiModel)
+			// gemini grounding: the fetch's tokens PLUS the per-grounded-prompt fee (§7
 			// SG2), so the admission envelope is a true upper bound once the fee is booked.
 			est += b.estimateUSD(b.cfg.GeminiModel) + b.cfg.GeminiGroundingPerPrompt
 		}
 	}
 	if b.cfg.ReasoningEnabled {
 		// Higher reasoning effort can burn more output tokens; reserve double.
 		est = max(est, 2*b.estimateUSD(b.cfg.ReasoningModel))
 	}
 	// The always-on Layer-1 classifier leg (§7 Finding 4): a cheap Gemini call on every
 	// message when the classifier is enabled, so reserved ≥ actual stays true. Added after
 	// the max() so it is never swallowed by the reasoning branch.
 	if b.cfg.RouterClassifierEnabled {
 		est += b.estimateUSD(b.cfg.GeminiModel)
 	}
 	return est
 }
 // generate routes and produces an answer, degrading to grok_direct on any failure.
 // It returns a terminal error ONLY if even grok_direct fails; every other route falls
 // through to grok_direct rather than erroring.
-func (b *Bot) generate(ctx context.Context, body string, msgs []Message, convID string) (genResult, error) {
+func (b *Bot) generate(ctx context.Context, body string, msgs []Message, convID string, isDM bool) (genResult, error) {
 	res := genResult{stageMS: map[string]int{}, finalModel: b.cfg.XAIModel}
 	// The privacy-minimised conversation window for the classifier + follow-up rewrite.
 	// DM-resolved (last ≤2 turns); bare trigger in groups (no cross-member subject bleed).
 	rcx := routerContext(msgs, isDM)
 	t0 := time.Now()
-	res.decision = b.classify(ctx, body, &res.cost) // accumulates cost.Router if Layer-1 runs
+	res.decision = b.classify(ctx, body, rcx, &res.cost) // accumulates cost.Router if Layer-1 runs
 	res.stageMS["router"] = msSince(t0)
 	res.route = res.decision.Route
 	// The router's pre-dispatch verdict (what it chose, why, how sure). On a degrade the
 	// route that actually runs differs from this — respond logs that final outcome — so
-	// the two lines together show "router wanted X, we ran Y". DEBUG: routing diagnostics.
+	// the two lines together show "router wanted X, we ran Y". DEBUG: routing diagnostics,
 	// content-free (the resolved search_query is NOT logged here — it's a gated path, §8).
 	b.log.DebugContext(ctx, "route decided",
 		"route", res.decision.Route, "source", res.decision.Source,
 		"confidence", res.decision.Confidence, "needs_web", res.decision.NeedsWeb,
 		"web_decided_by", res.decision.WebDecidedBy, "verifiable", res.decision.Verifiable,
 		"entity_obscure", res.decision.EntityObscure, "time_sensitive", res.decision.TimeSensitive,
 		"trivial", res.decision.TrivialScore, "lookup_hint", res.decision.LookupHint,
 		"reasoning_level", res.decision.ReasoningLevel)
 	finalMsgs := msgs
@ -89,14 +113,21 @@ func (b *Bot) generate(ctx context.Context, body string, msgs []Message, convID
 		}
 	case routeWebThenGrok:
 		if b.cfg.WebEnabled && b.web != nil {
-			if err := b.genWebThenGrok(ctx, body, msgs, convID, &res); err == nil {
+			if err := b.genWebThenGrok(ctx, body, isDM, msgs, convID, &res); err == nil {
 				return res, nil
 			} else {
 				b.log.WarnContext(ctx, "web route failed; degrading to grok_direct", "err", err, "reason", res.degraded)
 				b.degradeTo(&res, degradeWeb)
-				// The question wanted fresh facts but we have none — answer from training
+				// We have no fresh facts. For a RECENCY miss, hedge with an honest staleness
-				// knowledge WITH an honest staleness caveat, not stale-as-current (§8.2.1).
+				// caveat (§8.2.1). For a STATIC verifiable-fact miss (a film cast, a date),
-				finalMsgs = hedgeMessages(msgs)
+				// the staleness caveat is wrong — a stale caveat on a wrong cast still ships
 				// the wrong cast — so instruct Grok to ABSTAIN on specific names/dates/numbers
 				// instead of emitting a confident guess (§4.4).
 				if res.decision.factualMiss() {
 					finalMsgs = factualAbstainMessages(msgs)
 				} else {
 					finalMsgs = hedgeMessages(msgs)
 				}
 			}
 		}
 	case routeReason:
@ -209,17 +240,38 @@ const webStageTimeout = 15 * time.Second
 // accounts for the spend before the caller degrades to grok_direct (the partial cascade
 // case, §8.1). The daily cap and per-stage deadline are applied here, uniformly for both
 // providers.
-func (b *Bot) genWebThenGrok(ctx context.Context, body string, msgs []Message, convID string, res *genResult) error {
+func (b *Bot) genWebThenGrok(ctx context.Context, body string, isDM bool, msgs []Message, convID string, res *genResult) error {
 	// DM-gated rewrite-with-fallback (§6): use the classifier's self-contained,
 	// follow-up-resolved query, but ONLY in a DM (a group buffer interleaves members'
 	// topics) and only when it's present and not over-long; otherwise the bare body — so
 	// the fetch is never worse than today. Sanitise before egress (it is model-authored
 	// text going to an external search API): collapse control chars/whitespace, cap length.
 	q := body
 	if isDM {
 		if sq := strings.TrimSpace(res.decision.SearchQuery); sq != "" && len([]rune(sq)) <= 200 {
 			q, res.rewriteUsed = sq, true
 		}
 	}
 	q = sanitizeSearchQuery(q)
 	if q == "" {
 		q, res.rewriteUsed = sanitizeSearchQuery(body), false // never send an empty query
 	}
 	res.searchQuery = q
 	// Per-stage web/grounding deadline, independent of the overall budget.
 	wctx, cancelW := context.WithTimeout(ctx, webStageTimeout)
 	tw := time.Now()
-	wc, ferr := b.web.Fetch(wctx, body)
+	wc, ferr := b.web.Fetch(wctx, q)
 	cancelW()
 	res.stageMS["web"] = msSince(tw)
 	// Book the fetch's fee + tokens whether or not it produced a usable digest — the call
-	// was billed (the daily cap, if any, is enforced inside the provider).
+	// was billed (the daily cap, if any, is enforced inside the provider). GroundingFee is
 	// the per-grounded-prompt overage (§7 SG1), booked even on the error return.
 	res.cost.Grounding += wc.Cost.Grounding
 	res.cost.GroundingFee += wc.Cost.GroundingFee
 	res.cost.WebTool += wc.Cost.WebTool
 	res.citationCount = len(wc.Citations)
 	res.webGrounded = len(wc.Citations) > 0
 	webUsage := wc.Usage
 	if ferr != nil {
 		if errors.Is(ferr, errGroundingCapped) {
@ -227,6 +279,12 @@ func (b *Bot) genWebThenGrok(ctx context.Context, body string, msgs []Message, c
 		}
 		return ferr // web fee already booked; caller degrades to grok_direct (with hedge)
 	}
 	// A non-empty digest with NO citations is a silent false-web (the answer is synthesised
 	// from an ungrounded fetch). gemini_grounding errors out before here; grok_web_search
 	// can reach this — surface it at WARN so it's visible at the default level (§8).
 	if len(wc.Citations) == 0 {
 		b.log.WarnContext(ctx, "web no-citation synth (ungrounded digest)", "provider", b.cfg.WebProvider)
 	}
 	tf := time.Now()
 	resp, err := b.llm.Complete(ctx, LLMRequest{
@ -256,23 +314,70 @@ func (b *Bot) genWebThenGrok(ctx context.Context, body string, msgs []Message, c
 	return nil
 }
-// webSynthMessages inserts the fresh web digest (and its sources) as a system note just
+// webSynthMessages inserts the fresh web digest as a system note just after the system
-// after the system prompt, so Grok answers in voice using current facts.
+// prompt, so Grok answers in voice using current facts. It deliberately does NOT pass the
 // raw citation URLs into the prompt, nor ask Grok to "cite sources": gemini grounding
 // returns opaque vertexaisearch.../grounding-api-redirect/... redirect links (not publisher
 // URLs), and instructing Grok to cite made it paste those ugly redirects verbatim into the
 // reply and mis-attribute them ("ссылок из твоего сообщения"). The grounding already
 // happened (citation_count is recorded for telemetry); the user wants the answer, not
 // Google's internal redirect links. Real source attribution (resolving redirects to
 // domains) is a separate, deferred feature.
 func webSynthMessages(base []Message, wc WebContext) []Message {
-	facts := "Свежие данные из веба (используй их в ответе и сошлись на источники):\n" + wc.Digest
+	facts := "Свежие данные из веба — ответь на их основе, кратко и по делу, без URL и ссылок:\n" + wc.Digest
 	if len(wc.Citations) > 0 {
 		facts += "\nИсточники: " + strings.Join(wc.Citations, ", ")
 	}
 	return insertSystemNote(base, facts)
 }
-// hedgeMessages adds an honest staleness caveat for a web→grok_direct degrade: the user
+// hedgeMessages adds an honest staleness caveat for a web→grok_direct degrade on a
-// wanted fresh facts but we couldn't fetch them, so the model must flag that its answer
+// RECENCY query: the user wanted fresh facts but we couldn't fetch them, so the model
-// is from training knowledge and may be out of date.
+// must flag that its answer is from training knowledge and may be out of date.
 func hedgeMessages(base []Message) []Message {
 	return insertSystemNote(base, "Нет доступа к свежим источникам прямо сейчас — отвечай по знаниям на момент обучения и честно предупреди, что данные могут быть устаревшими.")
 }
 // factualAbstainMessages is the degrade hedge for a STATIC verifiable-fact miss (§4.4):
 // a staleness caveat is wrong here (the fact isn't stale, it's checkable and the model
 // may simply not know it), so instruct Grok to ABSTAIN on specific names/dates/numbers
 // rather than ship a confident guess — the exact failure (the hallucinated film cast)
 // this redesign exists to stop.
 func factualAbstainMessages(base []Message) []Message {
 	return insertSystemNote(base, "Не удалось проверить факты через веб. Если ответ зависит от конкретных имён, дат, годов, чисел или состава — честно скажи, что не уверен в точной фактуре и можешь ошибаться; НЕ выдавай догадку за факт.")
 }
 // factualMiss reports whether a web degrade should use the abstain hedge (a static
 // checkable-fact question) rather than the staleness hedge (a recency question). A
 // recency signal (freshnessRe or the classifier's time_sensitive) always means
 // staleness; otherwise a verifiable / obscure-entity question — OR any non-recency
 // needs_web verdict (so an off-spec needs_web-only verdict still abstains rather than
 // emit a confident guess) — means abstain.
 func (d RouterDecision) factualMiss() bool {
 	if d.Freshness != "" || d.TimeSensitive {
 		return false
 	}
 	return d.Verifiable || d.EntityObscure || d.NeedsWeb
 }
 // sanitizeSearchQuery prepares a (possibly model-authored) query for egress to an
 // external search API: collapse newlines/control chars/runs of whitespace to single
 // spaces and cap the rune length. Never trusts the model to have produced clean,
 // bounded text.
 func sanitizeSearchQuery(q string) string {
 	q = strings.Map(func(r rune) rune {
 		if r == '\n' || r == '\r' || r == '\t' {
 			return ' '
 		}
 		if r < 0x20 || r == 0x7f {
 			return -1 // drop other control chars
 		}
 		return r
 	}, q)
 	q = strings.Join(strings.Fields(q), " ") // collapse whitespace runs
 	if r := []rune(q); len(r) > 200 {
 		q = strings.TrimSpace(string(r[:200]))
 	}
 	return q
 }
 // insertSystemNote inserts an extra system message right after the system prompt
 // (base[0] from buildContext), preserving the rest of the window.
 func insertSystemNote(base []Message, content string) []Message {
--- a/apps/ai-bot/cascade_test.go
+++ b/apps/ai-bot/cascade_test.go
@ -5,6 +5,7 @@ import (
 	"errors"
 	"io"
 	"log/slog"
 	"strings"
 	"testing"
 )
@ -29,13 +30,15 @@ func (f *fakeLLM) Complete(_ context.Context, req LLMRequest) (*LLMResponse, err
 }
 type fakeWeb struct {
-	wc    WebContext
+	wc        WebContext
-	err   error
+	err       error
-	calls int
+	calls     int
 	lastQuery string
 }
-func (f *fakeWeb) Fetch(_ context.Context, _ string) (WebContext, error) {
+func (f *fakeWeb) Fetch(_ context.Context, q string) (WebContext, error) {
 	f.calls++
 	f.lastQuery = q
 	if f.err != nil {
 		return WebContext{}, f.err
 	}
@ -71,7 +74,7 @@ func TestGenerateAllFlagsOffIsGrokDirect(t *testing.T) {
 	cfg := cascadeCfg()
 	b := &Bot{cfg: &cfg, llm: grok, gemini: gem, log: discardLog()}
-	res, err := b.generate(context.Background(), "привет", msgs("привет"), "")
+	res, err := b.generate(context.Background(), "привет", msgs("привет"), "", true)
 	if err != nil {
 		t.Fatalf("generate: %v", err)
 	}
@ -93,7 +96,7 @@ func TestGenerateTrivialOffload(t *testing.T) {
 	cfg.RouterEnabled, cfg.TrivialOffloadEnabled = true, true
 	b := &Bot{cfg: &cfg, llm: grok, gemini: gem, log: discardLog()}
-	res, err := b.generate(context.Background(), "привет", msgs("привет"), "")
+	res, err := b.generate(context.Background(), "привет", msgs("привет"), "", true)
 	if err != nil {
 		t.Fatalf("generate: %v", err)
 	}
@ -114,7 +117,7 @@ func TestGenerateTrivialDegradesToGrok(t *testing.T) {
 	cfg.RouterEnabled, cfg.TrivialOffloadEnabled = true, true
 	b := &Bot{cfg: &cfg, llm: grok, gemini: gem, log: discardLog()}
-	res, err := b.generate(context.Background(), "привет", msgs("привет"), "")
+	res, err := b.generate(context.Background(), "привет", msgs("привет"), "", true)
 	if err != nil {
 		t.Fatalf("generate: %v", err)
 	}
@ -129,6 +132,8 @@ func TestGenerateTrivialDegradesToGrok(t *testing.T) {
 	}
 }
 // TestGenerateWebThenGrok: a freshness query (classifier off → Layer-0 web) fetches then
 // has Grok synthesise, booking both calls' tokens + the web fee.
 func TestGenerateWebThenGrok(t *testing.T) {
 	grok := &fakeLLM{text: "synthesised", usage: Usage{PromptTokens: 100, CompletionTokens: 50}}
 	web := &fakeWeb{wc: WebContext{Digest: "fresh facts", Citations: []string{"http://src"}, Cost: CostBreakdown{WebTool: 0.1}}}
@ -136,7 +141,7 @@ func TestGenerateWebThenGrok(t *testing.T) {
 	cfg.RouterEnabled, cfg.WebEnabled = true, true
 	b := &Bot{cfg: &cfg, llm: grok, web: web, log: discardLog()}
-	res, err := b.generate(context.Background(), "какие новости сегодня", msgs("какие новости сегодня"), "")
+	res, err := b.generate(context.Background(), "какие новости сегодня", msgs("какие новости сегодня"), "", true)
 	if err != nil {
 		t.Fatalf("generate: %v", err)
 	}
@ -146,13 +151,16 @@ func TestGenerateWebThenGrok(t *testing.T) {
 	if res.cost.WebTool != 0.1 || res.cost.Token <= 0 {
 		t.Fatalf("cost = %+v, want WebTool 0.1 + Token>0", res.cost)
 	}
 	if !res.webGrounded || res.citationCount != 1 {
 		t.Fatalf("webGrounded=%v citations=%d, want true/1", res.webGrounded, res.citationCount)
 	}
 	if web.calls != 1 || grok.calls != 1 {
 		t.Fatalf("calls web=%d grok=%d, want 1/1", web.calls, grok.calls)
 	}
 }
-// TestGenerateWebDegradesToGrok: a web fetch failure (provider down or cap hit) degrades
+// TestGenerateWebDegradesToGrok: a web fetch failure (cap hit) degrades to grok_direct,
-// to grok_direct and books no web cost.
+// books no web cost, and — being a RECENCY query — uses the staleness hedge, not abstain.
 func TestGenerateWebDegradesToGrok(t *testing.T) {
 	grok := &fakeLLM{text: "grok fallback"}
 	web := &fakeWeb{err: errGroundingCapped}
@ -160,7 +168,7 @@ func TestGenerateWebDegradesToGrok(t *testing.T) {
 	cfg.RouterEnabled, cfg.WebEnabled = true, true
 	b := &Bot{cfg: &cfg, llm: grok, web: web, log: discardLog()}
-	res, err := b.generate(context.Background(), "новости сегодня", msgs("новости сегодня"), "")
+	res, err := b.generate(context.Background(), "новости сегодня", msgs("новости сегодня"), "", true)
 	if err != nil {
 		t.Fatalf("generate: %v", err)
 	}
@ -173,6 +181,10 @@ func TestGenerateWebDegradesToGrok(t *testing.T) {
 	if res.cost.WebTool != 0 || res.cost.Grounding != 0 {
 		t.Fatalf("web cost = %+v, want 0 (fetch failed before billing)", res.cost)
 	}
 	// Recency miss → staleness hedge ("устаревшими"), not the factual-abstain hedge.
 	if !hedgeContains(grok.lastReq.Messages, "устаревш") {
 		t.Fatalf("freshness degrade should use the staleness hedge; messages = %+v", grok.lastReq.Messages)
 	}
 }
 // TestGenerateReasoningForced: the manual trigger routes to the reasoning model with
@ -183,7 +195,7 @@ func TestGenerateReasoningForced(t *testing.T) {
 	cfg.ReasoningEnabled = true // ROUTER_ENABLED deliberately left off
 	b := &Bot{cfg: &cfg, llm: grok, log: discardLog()}
-	res, err := b.generate(context.Background(), "подумай глубже про сознание", msgs("подумай глубже про сознание"), "")
+	res, err := b.generate(context.Background(), "подумай глубже про сознание", msgs("подумай глубже про сознание"), "", true)
 	if err != nil {
 		t.Fatalf("generate: %v", err)
 	}
@ -195,29 +207,240 @@ func TestGenerateReasoningForced(t *testing.T) {
 	}
 }
-// TestClassifierConfidenceFloor: a Layer-1 classifier label that escalates off the safe
+// TestClassifyTrivialAgreementGate: a trivial route requires the Layer-0 candidate AND
-// floor (trivial/web) must clear the confidence floor, else the request stays on
+// classifier.trivial AND confidence ≥ trivialFloor. A low-confidence "trivial" or a
-// grok_direct — the false-trivial voice-leak guard (§8.6).
+// classifier that disagrees stays on grok_direct (no voice leak).
-func TestClassifierConfidenceFloor(t *testing.T) {
+func TestClassifyTrivialAgreementGate(t *testing.T) {
 	cfg := cascadeCfg()
 	cfg.RouterEnabled, cfg.RouterClassifierEnabled = true, true
 	gem := &fakeLLM{}
 	b := &Bot{cfg: &cfg, gemini: gem, log: discardLog()}
 	var cost CostBreakdown
 	const substantive = "напиши подробное эссе про историю римской империи" // Layer-0 → grok_direct
-	gem.text = `{"route":"trivial","confidence":0.2}` // low-confidence escalation
+	gem.text = `{"trivial":true,"needs_web":false,"confidence":0.95}`
-	if d := b.classify(context.Background(), substantive, &cost); d.Route != routeGrokDirect {
+	if d := b.classify(context.Background(), "привет", "USER: привет", &cost); d.Route != routeTrivial {
-		t.Fatalf("low-confidence trivial must stay grok_direct (safe floor), got %q", d.Route)
+		t.Fatalf("agreed high-confidence trivial = %q, want trivial", d.Route)
 	}
-	gem.text = `{"route":"trivial","confidence":0.95}` // confident escalation is honoured
+	gem.text = `{"trivial":true,"needs_web":false,"confidence":0.5}`
-	if d := b.classify(context.Background(), substantive, &cost); d.Route != routeTrivial {
+	if d := b.classify(context.Background(), "привет", "USER: привет", &cost); d.Route != routeGrokDirect {
-		t.Fatalf("high-confidence trivial should route trivial, got %q", d.Route)
+		t.Fatalf("low-confidence trivial = %q, want grok_direct (no leak)", d.Route)
 	}
-	// A classifier error degrades to the Layer-0 verdict (grok_direct), never silence.
+	// A non-trivial body can never be trivial even if the classifier claims so.
-	gem.text, gem.err = "", errors.New("gemini down")
+	gem.text = `{"trivial":true,"needs_web":false,"confidence":0.99}`
-	if d := b.classify(context.Background(), substantive, &cost); d.Route != routeGrokDirect {
+	const substantive = "напиши подробное эссе про историю римской империи"
-		t.Fatalf("classifier failure must fall back to heuristic grok_direct, got %q", d.Route)
+	if d := b.classify(context.Background(), substantive, "USER: …", &cost); d.Route != routeGrokDirect {
 		t.Fatalf("classifier.trivial on a substantive body = %q, want grok_direct", d.Route)
 	}
 }
 // TestClassifyClassifierErrorFallsBackToLayer0: a classifier error/garbage degrades to the
 // deterministic Layer-0 verdict — grok_direct for a substantive body, web for a freshness
 // body — never an ungrounded confident answer, never a degrade-to-web.
 func TestClassifyClassifierErrorFallsBackToLayer0(t *testing.T) {
 	cfg := cascadeCfg()
 	cfg.RouterEnabled, cfg.RouterClassifierEnabled, cfg.WebParanoid = true, true, true
 	gem := &fakeLLM{}
 	b := &Bot{cfg: &cfg, gemini: gem, log: discardLog()}
 	var cost CostBreakdown
 	// Transport error → Layer-0.
 	gem.err = errors.New("gemini down")
 	if d := b.classify(context.Background(), "напиши эссе про рим", "USER: …", &cost); d.Route != routeGrokDirect {
 		t.Fatalf("classifier error on substantive body = %q, want grok_direct (Layer-0)", d.Route)
 	}
 	if d := b.classify(context.Background(), "новости сегодня", "USER: …", &cost); d.Route != routeWebThenGrok {
 		t.Fatalf("classifier error on freshness body = %q, want web (deterministic Layer-0 survives)", d.Route)
 	}
 	// Garbage JSON (no transport error) → also Layer-0.
 	gem.err, gem.text = nil, "not json at all"
 	if d := b.classify(context.Background(), "напиши эссе про рим", "USER: …", &cost); d.Route != routeGrokDirect {
 		t.Fatalf("garbage classifier JSON = %q, want grok_direct (Layer-0)", d.Route)
 	}
 }
 // TestGenerateRoadHouseWebParanoidDM is the headline regression: an obscure-entity factual
 // lookup in a DM, with the classifier + WEB_PARANOID on, routes to web AND the fetch uses
 // the classifier's context-resolved search_query (the follow-up rewrite). With paranoid
 // off it correctly stays grok_direct (the canary-neutral baseline).
 func TestGenerateRoadHouseWebParanoidDM(t *testing.T) {
 	const verdict = `{"needs_web":true,"verifiable":true,"entity_obscure":true,"time_sensitive":false,"trivial":false,"search_query":"Дом у дороги 2024 фильм актёрский состав","confidence":0.7}`
 	mk := func(paranoid bool) (*fakeLLM, *fakeWeb, genResult) {
 		grok := &fakeLLM{text: "voiced", usage: Usage{PromptTokens: 10, CompletionTokens: 5}}
 		gem := &fakeLLM{text: verdict}
 		web := &fakeWeb{wc: WebContext{Digest: "cast: Patrick Swayze…", Citations: []string{"http://imdb"}}}
 		cfg := cascadeCfg()
 		cfg.RouterEnabled, cfg.RouterClassifierEnabled, cfg.WebEnabled, cfg.WebParanoid = true, true, true, paranoid
 		b := &Bot{cfg: &cfg, llm: grok, gemini: gem, web: web, log: discardLog()}
 		res, err := b.generate(context.Background(), "2024 года", []Message{
 			{Role: "system", Content: "SYS"},
 			{Role: "user", Content: "кто снимался в фильме дом у дороги"},
 			{Role: "assistant", Content: "В фильме 1989 года…"},
 			{Role: "user", Content: "2024 года"},
 		}, "", true)
 		if err != nil {
 			t.Fatalf("generate: %v", err)
 		}
 		return grok, web, res
 	}
 	_, web, res := mk(true)
 	if res.route != routeWebThenGrok {
 		t.Fatalf("paranoid DM road-house = %q, want web_then_grok (the fix)", res.route)
 	}
 	if !res.rewriteUsed || web.lastQuery != "Дом у дороги 2024 фильм актёрский состав" {
 		t.Fatalf("fetch should use the rewritten query: rewriteUsed=%v lastQuery=%q", res.rewriteUsed, web.lastQuery)
 	}
 	_, _, resOff := mk(false)
 	if resOff.route != routeGrokDirect {
 		t.Fatalf("paranoid OFF road-house = %q, want grok_direct (baseline)", resOff.route)
 	}
 }
 // TestGenerateFollowupGroupUsesBareBody: in a GROUP the context-resolved rewrite is
 // suppressed — the fetch uses the bare (sanitised) body, never the classifier's
 // search_query, so a member's follow-up can't ground the wrong prior subject.
 func TestGenerateFollowupGroupUsesBareBody(t *testing.T) {
 	const verdict = `{"needs_web":true,"verifiable":true,"entity_obscure":true,"search_query":"какой-то чужой фильм 2024","confidence":0.7}`
 	grok := &fakeLLM{text: "voiced"}
 	gem := &fakeLLM{text: verdict}
 	web := &fakeWeb{wc: WebContext{Digest: "d", Citations: []string{"http://s"}}}
 	cfg := cascadeCfg()
 	cfg.RouterEnabled, cfg.RouterClassifierEnabled, cfg.WebEnabled, cfg.WebParanoid = true, true, true, true
 	b := &Bot{cfg: &cfg, llm: grok, gemini: gem, web: web, log: discardLog()}
 	res, err := b.generate(context.Background(), "2024 года", msgs("2024 года"), "", false /* group */)
 	if err != nil {
 		t.Fatalf("generate: %v", err)
 	}
 	if res.route != routeWebThenGrok {
 		t.Fatalf("group route = %q, want web_then_grok", res.route)
 	}
 	if res.rewriteUsed || web.lastQuery != "2024 года" {
 		t.Fatalf("group must use the bare body, not the rewrite: rewriteUsed=%v lastQuery=%q", res.rewriteUsed, web.lastQuery)
 	}
 }
 // TestGenerateWebEmptySearchQueryFallsBackToBody: the rewrite-with-fallback contract's
 // empty arm (§6/§12). A DM web route whose classifier returned an empty search_query must
 // fetch the bare (sanitised) body and report rewriteUsed=false — never an empty query.
 func TestGenerateWebEmptySearchQueryFallsBackToBody(t *testing.T) {
 	// verifiable:true so it genuinely routes web (the needs_web arm requires verifiable);
 	// search_query empty is the point — the fetch must fall back to the bare body.
 	const verdict = `{"needs_web":true,"verifiable":true,"entity_obscure":false,"search_query":"","confidence":0.7}`
 	grok := &fakeLLM{text: "voiced"}
 	gem := &fakeLLM{text: verdict}
 	web := &fakeWeb{wc: WebContext{Digest: "d", Citations: []string{"http://s"}}}
 	cfg := cascadeCfg()
 	cfg.RouterEnabled, cfg.RouterClassifierEnabled, cfg.WebEnabled, cfg.WebParanoid = true, true, true, true
 	b := &Bot{cfg: &cfg, llm: grok, gemini: gem, web: web, log: discardLog()}
 	const body = "в каком году основан Рим"
 	res, err := b.generate(context.Background(), body, msgs(body), "", true /* DM */)
 	if err != nil {
 		t.Fatalf("generate: %v", err)
 	}
 	if res.route != routeWebThenGrok {
 		t.Fatalf("route = %q, want web_then_grok", res.route)
 	}
 	if res.rewriteUsed || web.lastQuery != body {
 		t.Fatalf("empty search_query must fall back to the bare body: rewriteUsed=%v lastQuery=%q", res.rewriteUsed, web.lastQuery)
 	}
 }
 // TestGenerateFreshnessTrapDesignedWeb: a freshness lexeme in a rumination
 // ("сегодня…") still hard-routes to web (the accepted, designed cheap false-web, §14.1).
 func TestGenerateFreshnessTrapDesignedWeb(t *testing.T) {
 	grok := &fakeLLM{text: "x"}
 	web := &fakeWeb{wc: WebContext{Digest: "d", Citations: []string{"http://s"}}}
 	cfg := cascadeCfg()
 	cfg.RouterEnabled, cfg.WebEnabled = true, true // classifier off — freshness alone routes
 	b := &Bot{cfg: &cfg, llm: grok, web: web, log: discardLog()}
 	res, err := b.generate(context.Background(), "сегодня я думаю о смысле жизни", msgs("сегодня я думаю о смысле жизни"), "", true)
 	if err != nil {
 		t.Fatalf("generate: %v", err)
 	}
 	if res.route != routeWebThenGrok {
 		t.Fatalf("freshness rumination = %q, want web_then_grok (designed)", res.route)
 	}
 }
 // TestGenerateWebDegradeFactualAbstain: a STATIC verifiable-fact web miss uses the
 // factual-abstain hedge (not the staleness caveat), so Grok abstains on names/dates
 // rather than shipping a confident guess.
 func TestGenerateWebDegradeFactualAbstain(t *testing.T) {
 	const verdict = `{"needs_web":true,"verifiable":true,"entity_obscure":true,"time_sensitive":false,"search_query":"q","confidence":0.7}`
 	grok := &fakeLLM{text: "honest answer"}
 	gem := &fakeLLM{text: verdict}
 	web := &fakeWeb{err: errors.New("fetch boom")}
 	cfg := cascadeCfg()
 	cfg.RouterEnabled, cfg.RouterClassifierEnabled, cfg.WebEnabled, cfg.WebParanoid = true, true, true, true
 	b := &Bot{cfg: &cfg, llm: grok, gemini: gem, web: web, log: discardLog()}
 	res, err := b.generate(context.Background(), "кто снимался в фильме дом у дороги", msgs("кто снимался в фильме дом у дороги"), "", true)
 	if err != nil {
 		t.Fatalf("generate: %v", err)
 	}
 	if res.route != routeGrokDirect || !res.fallback {
 		t.Fatalf("res route=%q fallback=%v, want grok_direct fallback", res.route, res.fallback)
 	}
 	if !hedgeContains(grok.lastReq.Messages, "Не удалось проверить") {
 		t.Fatalf("factual miss should use the abstain hedge; messages = %+v", grok.lastReq.Messages)
 	}
 	if hedgeContains(grok.lastReq.Messages, "устаревш") {
 		t.Fatalf("factual miss must NOT use the staleness hedge")
 	}
 }
 // TestFactualMissHedge: the web-degrade hedge selection. A recency signal (Freshness or
 // time_sensitive) → staleness (factualMiss=false); a static checkable-fact signal
 // (verifiable / entity_obscure / a non-recency needs_web) → abstain (factualMiss=true).
 func TestFactualMissHedge(t *testing.T) {
 	cases := []struct {
 		d    RouterDecision
 		want bool // true => abstain hedge
 	}{
 		{RouterDecision{Freshness: "recent"}, false},
 		{RouterDecision{TimeSensitive: true}, false},
 		{RouterDecision{Verifiable: true}, true},
 		{RouterDecision{EntityObscure: true}, true},
 		{RouterDecision{NeedsWeb: true}, true},                       // off-spec needs_web-only → abstain (Q3)
 		{RouterDecision{NeedsWeb: true, TimeSensitive: true}, false}, // recency still wins
 		{RouterDecision{}, false},
 	}
 	for _, c := range cases {
 		if got := c.d.factualMiss(); got != c.want {
 			t.Errorf("factualMiss(%+v) = %v, want %v", c.d, got, c.want)
 		}
 	}
 }
 // TestReserveEstimate: flags off → exactly grok_direct's estimate; with gemini grounding +
 // classifier on, it includes the per-prompt fee AND the always-on classifier leg (§7).
 func TestReserveEstimate(t *testing.T) {
 	cfg := cascadeCfg()
 	b := &Bot{cfg: &cfg, log: discardLog()}
 	base := b.estimateUSD("grok-x")
 	if got := b.reserveEstimate(); !approxEq(got, base) {
 		t.Fatalf("flags-off reserve = %v, want grok_direct estimate %v", got, base)
 	}
 	cfg2 := cascadeCfg()
 	cfg2.WebEnabled, cfg2.WebProvider = true, webProviderGeminiGrounding
 	cfg2.RouterEnabled, cfg2.RouterClassifierEnabled = true, true
 	cfg2.GeminiGroundingPerPrompt = 0.035
 	b2 := &Bot{cfg: &cfg2, log: discardLog()}
 	want := b2.estimateUSD("grok-x") + b2.estimateUSD("gemini-x") + 0.035 + b2.estimateUSD("gemini-x")
 	if got := b2.reserveEstimate(); !approxEq(got, want) {
 		t.Fatalf("web+classifier reserve = %v, want %v (XAI + gemini fetch + $0.035 fee + classifier leg)", got, want)
 	}
 	// The fee must actually move the envelope (regression guard for an unbooked fee).
 	cfg3 := cfg2
 	cfg3.GeminiGroundingPerPrompt = 0
 	b3 := &Bot{cfg: &cfg3, log: discardLog()}
 	if b2.reserveEstimate()-b3.reserveEstimate() < 0.0349 {
 		t.Fatalf("the grounding fee must raise the reservation by ~0.035")
 	}
 }
@ -225,36 +448,33 @@ func TestClassifierConfidenceFloor(t *testing.T) {
 // be kept fast with "none"), empty means not sent (compat with grok-4.20-non-reasoning),
 // and the reason route always overrides to "high" regardless.
 func TestGrokReasoningEffort(t *testing.T) {
 	// Configured effort reaches grok_direct.
 	grok := &fakeLLM{text: "ok"}
 	cfg := cascadeCfg()
 	cfg.GrokReasoningEffort = "none"
 	b := &Bot{cfg: &cfg, llm: grok, log: discardLog()}
-	if _, err := b.generate(context.Background(), "hello", msgs("hello"), ""); err != nil {
+	if _, err := b.generate(context.Background(), "hello", msgs("hello"), "", true); err != nil {
 		t.Fatal(err)
 	}
 	if grok.lastReq.ReasoningEffort != "none" {
 		t.Fatalf("grok_direct effort = %q, want none", grok.lastReq.ReasoningEffort)
 	}
 	// Empty default → not sent (so grok-4.20-non-reasoning keeps working).
 	grokDef := &fakeLLM{text: "ok"}
 	cfgDef := cascadeCfg() // GrokReasoningEffort == ""
 	bDef := &Bot{cfg: &cfgDef, llm: grokDef, log: discardLog()}
-	if _, err := bDef.generate(context.Background(), "hello", msgs("hello"), ""); err != nil {
+	if _, err := bDef.generate(context.Background(), "hello", msgs("hello"), "", true); err != nil {
 		t.Fatal(err)
 	}
 	if grokDef.lastReq.ReasoningEffort != "" {
 		t.Fatalf("default effort = %q, want empty (not sent)", grokDef.lastReq.ReasoningEffort)
 	}
 	// The reason route ignores GROK_REASONING_EFFORT and always uses "high".
 	grokR := &fakeLLM{text: "deep"}
 	cfgR := cascadeCfg()
 	cfgR.GrokReasoningEffort = "none"
 	cfgR.ReasoningEnabled = true
 	bR := &Bot{cfg: &cfgR, llm: grokR, log: discardLog()}
-	if _, err := bR.generate(context.Background(), "подумай глубже про X", msgs("подумай глубже про X"), ""); err != nil {
+	if _, err := bR.generate(context.Background(), "подумай глубже про X", msgs("подумай глубже про X"), "", true); err != nil {
 		t.Fatal(err)
 	}
 	if grokR.lastReq.ReasoningEffort != "high" {
@ -269,7 +489,47 @@ func TestGenerateTerminalErrorPropagates(t *testing.T) {
 	cfg := cascadeCfg()
 	b := &Bot{cfg: &cfg, llm: grok, log: discardLog()}
-	if _, err := b.generate(context.Background(), "hello", msgs("hello"), ""); err == nil {
+	if _, err := b.generate(context.Background(), "hello", msgs("hello"), "", true); err == nil {
 		t.Fatal("want terminal error when grok_direct fails, got nil")
 	}
 }
 // TestWebSynthMessagesNoRawURLs guards the source-leak fix: the grounded digest is
 // injected, but the raw gemini-grounding redirect URLs must NOT reach the synth prompt
 // (Grok was pasting vertexaisearch.../grounding-api-redirect/... links into the reply).
 func TestWebSynthMessagesNoRawURLs(t *testing.T) {
 	wc := WebContext{
 		Digest:    "Титаник вышел в 1997, режиссёр Джеймс Кэмерон.",
 		Citations: []string{"https://vertexaisearch.cloud.google.com/grounding-api-redirect/AUZIYQabc123"},
 	}
 	out := webSynthMessages(msgs("в каком году титаник"), wc)
 	var note string
 	for _, m := range out {
 		if m.Role == "system" && strings.Contains(m.Content, "Свежие данные") {
 			note = m.Content
 		}
 	}
 	if note == "" {
 		t.Fatal("web synth note missing")
 	}
 	if !strings.Contains(note, "Титаник вышел в 1997") {
 		t.Fatalf("digest not injected: %q", note)
 	}
 	if strings.Contains(note, "vertexaisearch") || strings.Contains(note, "grounding-api-redirect") || strings.Contains(note, "http") {
 		t.Fatalf("raw citation URL leaked into the synth prompt: %q", note)
 	}
 }
 func hedgeContains(ms []Message, sub string) bool {
 	for _, m := range ms {
 		if strings.Contains(m.Content, sub) {
 			return true
 		}
 	}
 	return false
 }
 func approxEq(a, b float64) bool {
 	d := a - b
 	return d < 1e-9 && d > -1e-9
 }
--- a/apps/ai-bot/cmd/routereval/golden_sample.json
+++ b/apps/ai-bot/cmd/routereval/golden_sample.json
@ -0,0 +1,227 @@
 [
  {
    "name": "road house first turn (obscure cast)",
    "message": "кто снимался в фильме дом у дороги",
    "verdict": {
      "needs_web": true,
      "verifiable": true,
      "entity_obscure": true,
      "time_sensitive": false,
      "trivial": false,
      "search_query": "Дом у дороги фильм актёрский состав",
      "confidence": 0.7
    },
    "expected_route": "web_then_grok",
    "factual": true
  },
  {
    "name": "road house follow-up (DM, resolved)",
    "message": "2024 года",
    "verdict": {
      "needs_web": true,
      "verifiable": true,
      "entity_obscure": true,
      "time_sensitive": false,
      "trivial": false,
      "search_query": "Дом у дороги 2024 фильм актёрский состав",
      "confidence": 0.65
    },
    "expected_route": "web_then_grok",
    "factual": true
  },
  {
    "name": "weather (freshness lexeme, forced web)",
    "message": "погода сегодня в Москве",
    "verdict": {
      "needs_web": true,
      "verifiable": false,
      "entity_obscure": false,
      "time_sensitive": true,
      "trivial": false,
      "search_query": "погода сегодня Москва",
      "confidence": 0.95
    },
    "expected_route": "web_then_grok",
    "factual": false
  },
  {
    "name": "freshness rumination (accepted designed false-web, §14.1)",
    "message": "сегодня я думаю о смысле жизни",
    "verdict": {
      "needs_web": false,
      "verifiable": false,
      "entity_obscure": false,
      "time_sensitive": false,
      "trivial": false,
      "search_query": "",
      "confidence": 0.2
    },
    "expected_route": "web_then_grok",
    "factual": false
  },
  {
    "name": "obscure entity founder (no freshness word)",
    "message": "кто основал компанию Acme Widgets",
    "verdict": {
      "needs_web": true,
      "verifiable": true,
      "entity_obscure": true,
      "time_sensitive": false,
      "trivial": false,
      "search_query": "Acme Widgets основатель компании",
      "confidence": 0.6
    },
    "expected_route": "web_then_grok",
    "factual": true
  },
  {
    "name": "static famous fact (author lookup)",
    "message": "кто написал войну и мир",
    "verdict": {
      "needs_web": true,
      "verifiable": true,
      "entity_obscure": false,
      "time_sensitive": false,
      "trivial": false,
      "search_query": "Война и мир автор",
      "confidence": 0.62
    },
    "expected_route": "web_then_grok",
    "factual": true
  },
  {
    "name": "current CEO (time-sensitive, sub-floor needs_web)",
    "message": "кто возглавляет Tesla",
    "verdict": {
      "needs_web": true,
      "verifiable": true,
      "entity_obscure": false,
      "time_sensitive": true,
      "trivial": false,
      "search_query": "Tesla CEO",
      "confidence": 0.5
    },
    "expected_route": "web_then_grok",
    "factual": false
  },
  {
    "name": "greeting (trivial, high confidence)",
    "message": "привет",
    "verdict": {
      "needs_web": false,
      "verifiable": false,
      "entity_obscure": false,
      "time_sensitive": false,
      "trivial": true,
      "search_query": "",
      "confidence": 0.95
    },
    "expected_route": "trivial_direct",
    "factual": false
  },
  {
    "name": "ack low-confidence trivial (no voice leak → grok)",
    "message": "спасибо",
    "verdict": {
      "needs_web": false,
      "verifiable": false,
      "entity_obscure": false,
      "time_sensitive": false,
      "trivial": true,
      "search_query": "",
      "confidence": 0.5
    },
    "expected_route": "grok_direct",
    "factual": false
  },
  {
    "name": "opinion / recommendation (safe floor)",
    "message": "посоветуй фильм на вечер",
    "verdict": {
      "needs_web": false,
      "verifiable": false,
      "entity_obscure": false,
      "time_sensitive": false,
      "trivial": false,
      "search_query": "",
      "confidence": 0.82
    },
    "expected_route": "grok_direct",
    "factual": false
  },
  {
    "name": "code help (safe floor)",
    "message": "напиши функцию сортировки на python",
    "verdict": {
      "needs_web": false,
      "verifiable": false,
      "entity_obscure": false,
      "time_sensitive": false,
      "trivial": false,
      "search_query": "",
      "confidence": 0.9
    },
    "expected_route": "grok_direct",
    "factual": false
  },
  {
    "name": "vague needs_web below floor (stays grok)",
    "message": "что ты думаешь о криптовалютах",
    "verdict": {
      "needs_web": true,
      "verifiable": false,
      "entity_obscure": false,
      "time_sensitive": false,
      "trivial": false,
      "search_query": "",
      "confidence": 0.4
    },
    "expected_route": "grok_direct",
    "factual": false
  },
  {
    "name": "explanation over-flagged needs_web but NOT verifiable (false-web fix)",
    "message": "объясни как работают горутины в Go",
    "verdict": {
      "needs_web": true,
      "verifiable": false,
      "entity_obscure": false,
      "time_sensitive": false,
      "trivial": false,
      "search_query": "",
      "confidence": 0.9
    },
    "expected_route": "grok_direct",
    "factual": false
  },
  {
    "name": "ack-prefixed long real question (not trivial, safe floor)",
    "message": "спасибо, а теперь подробно объясни квантовую запутанность",
    "verdict": {
      "needs_web": false,
      "verifiable": false,
      "entity_obscure": false,
      "time_sensitive": false,
      "trivial": false,
      "search_query": "",
      "confidence": 0.85
    },
    "expected_route": "grok_direct",
    "factual": false
  },
  {
    "name": "bare follow-up in a GROUP (no resolvable subject → grok)",
    "message": "2024 года",
    "verdict": {
      "needs_web": false,
      "verifiable": false,
      "entity_obscure": false,
      "time_sensitive": false,
      "trivial": false,
      "search_query": "",
      "confidence": 0.3
    },
    "expected_route": "grok_direct",
    "factual": false
  }
 ]
--- a/apps/ai-bot/cmd/routereval/main.go
+++ b/apps/ai-bot/cmd/routereval/main.go
@ -0,0 +1,188 @@
 // Command routereval is the OFFLINE router-replay harness for the §11 P1 gate. It reads
 // a golden set of (message, recorded classifier verdict, expected route, factual flag),
 // replays each item through the REAL decision functions (routedecide.ClassifyLayer0 +
 // CombineWithFloors — the same code package main uses, never a copy), and reports the
 // confusion matrix + the four P1 metrics: false-grok-on-factual (the lie metric),
 // false-web, trivial-leak, misroute. It is fully deterministic and needs no network: it
 // measures the ROUTING LAYER given a verdict, so you can sweep WEB_PARANOID and the
 // floors instantly. (Classifier accuracy itself is a separate LIVE check — §11 P2.)
 //
 // The lie label on the web path uses the citation-presence proxy by convention: a golden
 // item's `factual:true` + `expected_route:web_then_grok` marks "this MUST ground"; an
 // LLM-judge over query+answer is the higher-fidelity option to wire later (§14.6/§15).
 //
 // Usage:
 //
 //	go run ./cmd/routereval -golden cmd/routereval/golden_sample.json
 //	go run ./cmd/routereval -golden set.json -web-floor 0.7         # sweep the needs_web floor
 //
 // NOTE: golden_sample.json is labelled for the PRODUCTION config (paranoid ON) — its
 // expected_route values assume the epistemic web arms are active. Running -paranoid=false
 // against it is a what-if sweep that WILL report NO-GO (the entity facts fall to grok by
 // design); it is NOT a passing baseline. To evaluate the paranoid-off behaviour, label a
 // separate set whose expected_route reflects freshness-only web routing.
 package main
 import (
 	"encoding/json"
 	"flag"
 	"fmt"
 	"os"
 	rd "vojo.chat/ai-bot/internal/routedecide"
 )
 // goldenItem is one labelled row. Message drives the free Layer-0; Verdict is the
 // recorded classifier output; ExpectedRoute + Factual are the ground-truth labels.
 type goldenItem struct {
 	Name          string     `json:"name"`
 	Message       string     `json:"message"`
 	Verdict       rd.Verdict `json:"verdict"`
 	ExpectedRoute string     `json:"expected_route"`
 	Factual       bool       `json:"factual"` // a checkable-fact query that MUST ground
 }
 func main() {
 	goldenPath := flag.String("golden", "cmd/routereval/golden_sample.json", "path to the golden-set JSON")
 	paranoid := flag.Bool("paranoid", true, "apply the WEB_PARANOID classifier-driven web arms")
 	webFloor := flag.Float64("web-floor", rd.WebNeedsWebFloor, "needs_web confidence floor to sweep")
 	trivialFloor := flag.Float64("trivial-floor", rd.TrivialFloor, "trivial confidence floor")
 	verbose := flag.Bool("v", false, "print every item, not just the mismatches")
 	flag.Parse()
 	raw, err := os.ReadFile(*goldenPath)
 	if err != nil {
 		fmt.Fprintf(os.Stderr, "read golden set: %v\n", err)
 		os.Exit(2)
 	}
 	var items []goldenItem
 	if err := json.Unmarshal(raw, &items); err != nil {
 		fmt.Fprintf(os.Stderr, "parse golden set: %v\n", err)
 		os.Exit(2)
 	}
 	if len(items) == 0 {
 		fmt.Fprintln(os.Stderr, "golden set is empty")
 		os.Exit(2)
 	}
 	floors := rd.Floors{WebNeedsWeb: *webFloor, Trivial: *trivialFloor}
 	fmt.Printf("routereval: %d items | paranoid=%v web-floor=%.2f trivial-floor=%.2f\n\n",
 		len(items), *paranoid, *webFloor, *trivialFloor)
 	var (
 		correct                         int
 		factualWeb, factualWebMissed    int // denominator/numerator of false-grok-on-factual
 		nonWebExpected, falseWeb        int
 		nonTrivialExpected, trivialLeak int
 	)
 	roadHouseSeen := false
 	roadHousePass := true
 	for _, it := range items {
 		l0 := rd.ClassifyLayer0(it.Message)
 		got := rd.CombineWithFloors(l0, it.Verdict, *paranoid, floors).Route
 		ok := got == it.ExpectedRoute
 		if ok {
 			correct++
 		}
 		if it.Factual && it.ExpectedRoute == rd.RouteWeb {
 			factualWeb++
 			if got == rd.RouteGrokDirect {
 				factualWebMissed++ // a confident-lie risk: a checkable fact answered from memory
 			}
 		}
 		if it.ExpectedRoute != rd.RouteWeb {
 			nonWebExpected++
 			if got == rd.RouteWeb {
 				falseWeb++
 			}
 		}
 		if it.ExpectedRoute != rd.RouteTrivial {
 			nonTrivialExpected++
 			if got == rd.RouteTrivial {
 				trivialLeak++
 			}
 		}
 		// The Road House regression pair must pass (its name carries "road house").
 		if contains(it.Name, "road house") {
 			roadHouseSeen = true
 			if !ok {
 				roadHousePass = false
 			}
 		}
 		if *verbose || !ok {
 			flag := "ok "
 			if !ok {
 				flag = "MISS"
 			}
 			fmt.Printf("  [%s] %-40s want=%-16s got=%-16s\n", flag, trunc(it.Name, 40), it.ExpectedRoute, got)
 		}
 	}
 	rate := func(num, den int) float64 {
 		if den == 0 {
 			return 0
 		}
 		return float64(num) / float64(den)
 	}
 	misroute := 1 - rate(correct, len(items))
 	lie := rate(factualWebMissed, factualWeb)
 	fw := rate(falseWeb, nonWebExpected)
 	leak := rate(trivialLeak, nonTrivialExpected)
 	fmt.Printf("\n— metrics (§11 P1 gates) —\n")
 	fmt.Printf("  false-grok-on-FACTUAL : %5.1f%%  (%d/%d)   gate < 5%%   %s\n", lie*100, factualWebMissed, factualWeb, pass(lie < 0.05))
 	fmt.Printf("  false-web             : %5.1f%%  (%d/%d)   gate ≤ 15%%  %s\n", fw*100, falseWeb, nonWebExpected, pass(fw <= 0.15))
 	fmt.Printf("  trivial-leak          : %5.1f%%  (%d/%d)   gate ~ 0%%   %s\n", leak*100, trivialLeak, nonTrivialExpected, pass(leak == 0))
 	fmt.Printf("  misroute              : %5.1f%%  (%d/%d)   gate < 3%%   %s\n", misroute*100, len(items)-correct, len(items), pass(misroute < 0.03))
 	if roadHouseSeen {
 		fmt.Printf("  road-house pair       : %s\n", pass(roadHousePass))
 	}
 	// Exit non-zero if any gate fails, so the harness is CI/owner-runnable as a go/no-go.
 	if lie >= 0.05 || fw > 0.15 || leak > 0 || misroute >= 0.03 || (roadHouseSeen && !roadHousePass) {
 		fmt.Println("\nRESULT: NO-GO (a P1 gate failed)")
 		os.Exit(1)
 	}
 	fmt.Println("\nRESULT: GO")
 }
 func pass(ok bool) string {
 	if ok {
 		return "PASS"
 	}
 	return "FAIL"
 }
 func contains(s, sub string) bool {
 	return len(sub) == 0 || indexFold(s, sub) >= 0
 }
 // indexFold is a tiny case-insensitive substring search (avoids importing strings just
 // for ToLower+Index in this small tool).
 func indexFold(s, sub string) int {
 	ls, lsub := toLower(s), toLower(sub)
 	for i := 0; i+len(lsub) <= len(ls); i++ {
 		if ls[i:i+len(lsub)] == lsub {
 			return i
 		}
 	}
 	return -1
 }
 func toLower(s string) string {
 	b := []byte(s)
 	for i, c := range b {
 		if 'A' <= c && c <= 'Z' {
 			b[i] = c + ('a' - 'A')
 		}
 	}
 	return string(b)
 }
 func trunc(s string, n int) string {
 	r := []rune(s)
 	if len(r) <= n {
 		return s
 	}
 	return string(r[:n-1]) + "…"
 }
--- a/apps/ai-bot/config.go
+++ b/apps/ai-bot/config.go
@ -109,15 +109,28 @@ type Config struct {
 	// still goes to Grok.
 	TrivialOffloadEnabled bool
 	// WebEnabled turns on the web_then_grok route. WebProvider selects the source:
-	// grok_web_search (default, works on chat/completions via Live Search) or
+	// grok_web_search (default, the xAI web_search tool on the Responses API) or
-	// gemini_grounding (Gemini-3 native only — see F-EXT-3).
+	// gemini_grounding (native v1beta google_search — current models incl. 2.5; the
 	// F-EXT-3 caveat is OpenAI-compat-only, not a model-version limit).
 	WebEnabled  bool
 	WebProvider string
 	// WebParanoid biases the router toward grounding: beyond freshnessRe, it unlocks the
 	// classifier-driven web arms (needs_web≥0.55, entity_obscure, time_sensitive,
 	// lookupHint && verifiable). Off (default) → web routing is freshness-only (today's
 	// behaviour), so enabling the classifier is web-routing-neutral and this is the single
 	// switch that activates epistemic grounding (§3/§15). Requires gemini_grounding.
 	WebParanoid bool
 	// WebGroundingDailyCap caps grounded prompts/day (durable counter) before falling
-	// back, guarding the $/1k grounding overage. WebGroundingTier records the Gemini
+	// back, guarding the $/1k grounding overage.
 	// plan the cap reflects.
 	WebGroundingDailyCap int
-	WebGroundingTier     string
+	// WebGroundingTier is a documentation-only label of which Gemini plan the operator is
 	// on; it is NOT read by any logic. The money knob is GeminiGroundingPerPrompt
 	// (GEMINI_GROUNDING_PER_PROMPT_USD) — that is what the ledger/ceiling actually use.
 	WebGroundingTier string
 	// GeminiGroundingPerPrompt is the per-grounded-prompt FEE booked into the ledger so the
 	// daily ceiling sees it (§7 SG1). Default 0.035 (the paid-tier $35/1k overage); set 0
 	// ONLY when genuinely on the free grounded-prompt tier. Booked even on the error return.
 	GeminiGroundingPerPrompt float64
 	// Reasoning route: a manual "think harder" trigger. ReasoningModel must be a
 	// reasoning-capable model (the default grok-4.20-non-reasoning is NOT — see the
 	// docs.x.ai finding); set REASONING_MODEL to e.g. grok-4.3 to use it.
@ -378,6 +391,7 @@ func LoadConfig() (*Config, error) {
 		{"ROUTER_CLASSIFIER_ENABLED", &cfg.RouterClassifierEnabled},
 		{"TRIVIAL_OFFLOAD_ENABLED", &cfg.TrivialOffloadEnabled},
 		{"WEB_ENABLED", &cfg.WebEnabled},
 		{"WEB_PARANOID", &cfg.WebParanoid},
 		{"REASONING_ENABLED", &cfg.ReasoningEnabled},
 	} {
 		if *f.dest, err = getenvBool(f.key, false); err != nil {
@ -387,6 +401,11 @@ func LoadConfig() (*Config, error) {
 	if cfg.WebGroundingDailyCap, err = getenvInt("WEB_GROUNDING_DAILY_CAP", 450); err != nil {
 		problems = append(problems, err.Error())
 	}
 	// The per-grounded-prompt fee booked into the ledger (§7 SG1). Default 0.035 (paid
 	// tier). An operator on the free tier sets 0 deliberately.
 	if cfg.GeminiGroundingPerPrompt, err = getenvFloat("GEMINI_GROUNDING_PER_PROMPT_USD", 0.035); err != nil {
 		problems = append(problems, err.Error())
 	}
 	if cfg.CanaryPercent, err = getenvInt("CANARY_PERCENT", 0); err != nil {
 		problems = append(problems, err.Error())
 	}
@ -428,6 +447,17 @@ func LoadConfig() (*Config, error) {
 		problems = append(problems, fmt.Sprintf("WEB_PROVIDER must be %q or %q, got %q",
 			webProviderGrokWebSearch, webProviderGeminiGrounding, cfg.WebProvider))
 	}
 	// §7 SG3: paranoid web requires gemini_grounding. grok_web_search has no daily cap and
 	// costs 10–18× per request — letting the paranoid bias drive it would only be backstopped
 	// by the $10 ceiling. Refuse to boot (consistent with the other fail-fast blocks).
 	if cfg.WebEnabled && cfg.WebParanoid && cfg.WebProvider == webProviderGrokWebSearch {
 		problems = append(problems, "WEB_PARANOID requires WEB_PROVIDER=gemini_grounding (grok_web_search has no daily cap and is far costlier)")
 	}
 	// §7 SG5: a non-positive grounding cap silently disables grounding (IncrGroundingIfUnder
 	// denies everything), so every query would degrade — refuse it for gemini_grounding.
 	if cfg.WebEnabled && cfg.WebProvider == webProviderGeminiGrounding && cfg.WebGroundingDailyCap <= 0 {
 		problems = append(problems, "WEB_GROUNDING_DAILY_CAP must be > 0 for gemini_grounding (a non-positive cap silently disables grounding)")
 	}
 	if cfg.ReasoningEnabled && cfg.ReasoningModel == "" {
 		problems = append(problems, "REASONING_MODEL is required when REASONING_ENABLED is set")
 	}
@ -524,9 +554,10 @@ func (c *Config) Summary() string {
 				}
 				return strings.Join(bodyUsers, ",")
 			}()),
-		fmt.Sprintf("  CASCADE: router=%t classifier=%t trivial=%t web=%t(%s, cap=%d) reason=%t(%s)",
+		fmt.Sprintf("  CASCADE: router=%t classifier=%t trivial=%t web=%t(%s, paranoid=%t, cap=%d, fee=$%g/prompt) reason=%t(%s)",
 			c.RouterEnabled, c.RouterClassifierEnabled, c.TrivialOffloadEnabled,
-			c.WebEnabled, c.WebProvider, c.WebGroundingDailyCap, c.ReasoningEnabled, c.ReasoningEffort),
+			c.WebEnabled, c.WebProvider, c.WebParanoid, c.WebGroundingDailyCap,
 			c.GeminiGroundingPerPrompt, c.ReasoningEnabled, c.ReasoningEffort),
 		"  GEMINI_MODEL       = " + c.GeminiModel,
 		"  GEMINI_API_KEY     = " + redact(c.GeminiAPIKey),
 	}, "\n")
--- a/apps/ai-bot/config_test.go
+++ b/apps/ai-bot/config_test.go
@ -20,6 +20,7 @@ func setBaseEnv(t *testing.T) {
 	for _, k := range []string{
 		"GEMINI_API_KEY", "GEMINI_API_KEY_FILE", "ROUTER_ENABLED", "ROUTER_CLASSIFIER_ENABLED",
 		"TRIVIAL_OFFLOAD_ENABLED", "WEB_ENABLED", "REASONING_ENABLED", "WEB_PROVIDER", "REASONING_MODEL",
 		"WEB_PARANOID", "WEB_GROUNDING_DAILY_CAP", "GEMINI_GROUNDING_PER_PROMPT_USD",
 	} {
 		t.Setenv(k, "")
 	}
@ -96,3 +97,47 @@ func TestConfigWebGeminiGroundingNeedsKey(t *testing.T) {
 		t.Fatalf("want GEMINI_API_KEY error, got %v", err)
 	}
 }
 // §7 SG3: paranoid web on the uncapped grok_web_search must refuse to boot; with
 // gemini_grounding (+ key) it is valid.
 func TestConfigParanoidRequiresGeminiGrounding(t *testing.T) {
 	setBaseEnv(t)
 	t.Setenv("WEB_ENABLED", "true")
 	t.Setenv("WEB_PARANOID", "true") // default provider is grok_web_search
 	if _, err := LoadConfig(); err == nil || !strings.Contains(err.Error(), "WEB_PARANOID") {
 		t.Fatalf("want WEB_PARANOID error on grok_web_search, got %v", err)
 	}
 	t.Setenv("WEB_PROVIDER", webProviderGeminiGrounding)
 	t.Setenv("GEMINI_API_KEY", "gk")
 	if _, err := LoadConfig(); err != nil {
 		t.Fatalf("paranoid + gemini_grounding should be valid: %v", err)
 	}
 }
 // §7 SG5: a non-positive grounding cap silently disables grounding — refuse it for
 // gemini_grounding.
 func TestConfigGeminiGroundingCapMustBePositive(t *testing.T) {
 	setBaseEnv(t)
 	t.Setenv("WEB_ENABLED", "true")
 	t.Setenv("WEB_PROVIDER", webProviderGeminiGrounding)
 	t.Setenv("GEMINI_API_KEY", "gk")
 	t.Setenv("WEB_GROUNDING_DAILY_CAP", "0")
 	if _, err := LoadConfig(); err == nil || !strings.Contains(err.Error(), "WEB_GROUNDING_DAILY_CAP") {
 		t.Fatalf("want WEB_GROUNDING_DAILY_CAP error, got %v", err)
 	}
 }
 // The default per-prompt grounding fee is the paid-tier $0.035 (the operator must opt to 0).
 func TestConfigGroundingFeeDefault(t *testing.T) {
 	setBaseEnv(t)
 	cfg, err := LoadConfig()
 	if err != nil {
 		t.Fatalf("%v", err)
 	}
 	if cfg.GeminiGroundingPerPrompt != 0.035 {
 		t.Fatalf("GEMINI_GROUNDING_PER_PROMPT_USD default = %v, want 0.035", cfg.GeminiGroundingPerPrompt)
 	}
 	if cfg.WebParanoid {
 		t.Fatal("WEB_PARANOID must default off")
 	}
 }
--- a/apps/ai-bot/context.go
+++ b/apps/ai-bot/context.go
@ -47,6 +47,65 @@ func buildContext(system string, history []bufferedMsg, isDM bool, triggerBody s
 	return truncateToTokens(msgs, maxTokens)
 }
 // routerContextMaxRunes caps each line fed to the classifier/rewrite so a long buffered
 // turn can't blow the router's token budget; ~200 runes is plenty to resolve a follow-up.
 const routerContextMaxRunes = 200
 // routerContext returns the privacy-minimised conversation window the Layer-1 classifier
 // and the follow-up rewrite read, drawn ONLY from the already-minimised `msgs` (a strict
 // subset of what the final Grok call sees — no new privacy surface, §6):
 //
 //   - DM: the last ≤2 bot (assistant) turns plus the interleaved/final user turns, so a
 //     bare follow-up like "2024 года" carries the prior film name into search_query.
 //   - GROUP: ONLY the final user line. The per-(room,thread) buffer interleaves different
 //     members' topics (it is keyed by room+thread, not sender), so resolving a follow-up
 //     against prior turns could ground a confidently-wrong answer about the WRONG subject.
 //
 // Formatted "BOT: …\nUSER: …", each line truncated to routerContextMaxRunes. Empty when
 // there is nothing to send.
 func routerContext(msgs []Message, isDM bool) string {
 	conv := msgs
 	if len(conv) > 0 && conv[0].Role == "system" {
 		conv = conv[1:]
 	}
 	if len(conv) == 0 {
 		return ""
 	}
 	start := len(conv) - 1 // group default: only the final user line
 	if isDM {
 		// Walk back to include up to the 2 most recent assistant turns before the trigger.
 		const maxAssistant = 2
 		seen := 0
 		for i := len(conv) - 1; i >= 0; i-- {
 			start = i
 			if conv[i].Role == "assistant" {
 				if seen++; seen >= maxAssistant {
 					break
 				}
 			}
 		}
 	}
 	var b strings.Builder
 	for _, m := range conv[start:] {
 		text := strings.TrimSpace(m.Content)
 		if text == "" {
 			continue
 		}
 		if r := []rune(text); len(r) > routerContextMaxRunes {
 			text = string(r[:routerContextMaxRunes])
 		}
 		label := "USER"
 		if m.Role == "assistant" {
 			label = "BOT"
 		}
 		b.WriteString(label)
 		b.WriteString(": ")
 		b.WriteString(text)
 		b.WriteByte('\n')
 	}
 	return strings.TrimRight(b.String(), "\n")
 }
 // estimateTokens is a cheap upper-ish heuristic (~4 chars/token + per-message
 // overhead). Used only to bound request size, not for billing (billing reads the
 // API's returned usage).
--- a/apps/ai-bot/internal/routedecide/routedecide.go
+++ b/apps/ai-bot/internal/routedecide/routedecide.go
@ -0,0 +1,193 @@
 // Package routedecide is the PURE, importable core of the AI-bot router: the free
 // Layer-0 regex pre-classification and the Layer-0+classifier combine. It holds no
 // I/O, no vendor clients, no Bot/Config — only the decision math — so two callers can
 // share exactly one decision function:
 //
 //   - package main (router.go) parses the live Gemini classifier JSON into a Verdict,
 //     then calls Combine to resolve the route;
 //   - cmd/routereval replays a golden set of recorded Verdicts through the same
 //     ClassifyLayer0 + Combine to measure misroute / false-web / trivial-leak offline.
 //
 // Go forbids importing package main, so this core had to live in its own package for
 // the offline harness to exercise the REAL routing logic instead of a drift-prone copy.
 package routedecide
 import (
 	"regexp"
 	"strings"
 )
 // Route names — the canonical wire/log/request_log tokens. package main aliases these
 // (telemetry.go) so there is a single source of truth for the strings.
 const (
 	RouteTrivial    = "trivial_direct"
 	RouteGrokDirect = "grok_direct"
 	RouteWeb        = "web_then_grok"
 	RouteReason     = "reason_then_grok"
 )
 // Confidence floors the combine uses. These are the values the offline eval (§11)
 // tunes; keeping them here lets cmd/routereval sweep them without touching main.
 //
 //   - WebNeedsWebFloor: a classifier needs_web verdict must clear this to route to web
 //     (paranoid-low — grounding is cheap, a confident wrong fact is not).
 //   - TrivialFloor: the bar a trivial offload must clear (conservative — a false trivial
 //     leaks a real question to the cheap model).
 const (
 	WebNeedsWebFloor = 0.55
 	TrivialFloor     = 0.85
 )
 // Floors are the two confidence thresholds Combine applies, parameterised so the offline
 // eval (cmd/routereval) can SWEEP them over a golden set without recompiling. Production
 // uses DefaultFloors (the consts above).
 type Floors struct {
 	WebNeedsWeb float64
 	Trivial     float64
 }
 // DefaultFloors is the production threshold set.
 func DefaultFloors() Floors { return Floors{WebNeedsWeb: WebNeedsWebFloor, Trivial: TrivialFloor} }
 // web_decided_by attribution tokens (request_log.web_decided_by). Stable so analytics
 // can GROUP BY them and tune WebNeedsWebFloor from data.
 const (
 	WebByNone       = "none"
 	WebByFreshness  = "freshness"
 	WebByNeedsWeb   = "classifier_needs_web"
 	WebByObscure    = "entity_obscure"
 	WebByTime       = "time_sensitive"
 	WebByLookupHint = "lookup_hint"
 )
 // Verdict is the classifier's parsed JSON output (§4.1). The json tags match the
 // classifier schema exactly, so both routeLayer1 (live classifier reply) and
 // cmd/routereval (recorded golden verdicts) unmarshal straight into it. Confidence is
 // the model's honest certainty in needs_web; it doubles as the trivial-gate threshold
 // (a clear greeting is high-certainty-no-web, so the gate passes).
 type Verdict struct {
 	NeedsWeb      bool    `json:"needs_web"`
 	Verifiable    bool    `json:"verifiable"`
 	EntityObscure bool    `json:"entity_obscure"`
 	TimeSensitive bool    `json:"time_sensitive"`
 	Trivial       bool    `json:"trivial"`
 	SearchQuery   string  `json:"search_query"`
 	Confidence    float64 `json:"confidence"`
 }
 // Layer0 is the free-regex pre-classification result. Route is the verdict when the
 // classifier is OFF; WebForce/Trivial/LookupHint feed the Combine when it is ON.
 type Layer0 struct {
 	Route      string // RouteWeb (freshness) | RouteTrivial | RouteGrokDirect
 	WebForce   bool   // freshnessRe hit — a HARD web signal (survives the classifier being down)
 	Trivial    bool   // a trivial candidate (greeting/ack/bare arithmetic)
 	LookupHint bool   // lookupIntentRe hit — a SOFT hint only (never sets the route)
 	Freshness  string // "recent" when WebForce, else ""
 }
 // Heuristic patterns. Kept deliberately tight. Freshness words route to web (a false
 // web-route only costs a fetch and degrades cleanly). Trivial fires only on short,
 // unmistakable greetings/acks or bare arithmetic.
 var (
 	greetingRe   = regexp.MustCompile(`^(привет(ик)?|здравствуй(те)?|хай|прив|ку|добрый\s+(день|вечер|утро)|спасибо|спс|благодарю|пока|ок(ей)?|угу|ага|hello|hi|hey|yo|thanks|thank\s+you|thx|ty|bye|goodbye|ok|okay|cool|nice)[\s!.,)]*$`)
 	arithmeticRe = regexp.MustCompile(`^[\s(]*\d+(\s*[-+*/×÷]\s*\d+)+[\s)=?]*$`)
 	// Russian tokens are deliberately STEM matches (новост→новости/новостей, погод→погода…)
 	// so they stay un-anchored. English standalone tokens are \b-anchored so they fire on
 	// whole words only — not inside scoreboard / concurrent / weathering / newsletter (a
 	// pre-existing false-web source; \b removes that pointless grounding spend). RE2's \b is
 	// ASCII-word-based, so it is used only around the ASCII tokens, never the Cyrillic stems.
 	freshnessRe = regexp.MustCompile(`(новост|сегодня|сейчас|последн|курс\s|погод|котировк|расписани|прогноз|\bbreaking\b|\btoday\b|\bright now\b|\blatest\b|\bcurrent(ly)?\b|\bnews\b|\bweather\b|\bstock price\b|\bexchange rate\b|\bscore\b)`)
 	// lookupIntentRe — SOFT HINT ONLY (§5): raises the classifier's needs_web prior via
 	// the lookupHint && verifiable arm; must NEVER set the route. Anchored on
 	// interrogative + lookup-verb so it fires on lookup INTENT, not entity presence.
 	// Deliberately leaky (false negatives are caught by the classifier, the real safety
 	// net). Do NOT add a capitalised-word or guillemet branch — those false-positive on
 	// greetings/idioms ("Привет, Москва!", "«Война и мир» — топ", "ну ты прям Эйнштейн").
 	// The leading [\s«"„(] class is only an OPTIONAL left boundary, never a trigger.
 	lookupIntentRe_RU = regexp.MustCompile(`(?i)(^|[\s«"„(])(кто\s+(так(ой|ая|ие)|снимал(ся|ась|ись)|играл|написал|основал|изобрёл|изобрел|режисс[её]р|автор)|в\s+как(ом|ой)\s+(год[уе]|фильм[еа]|сериал[еа]|книг[еи]|игр[еы])|когда\s+(вышел|вышла|вышло|выйдет|основан[аы]?|родил(ся|ась)|умер(ла)?|состоял(ся|ась)|был[аои]?\s+выпущен)|в\s+каком\s+году|сколько\s+(лет|стоит\s+бил|серий|сезонов|эпизодов)|чем\s+(закончил|известен|знаменит))`)
 	lookupIntentRe_EN = regexp.MustCompile(`(?i)(^|[\s"'(])(who\s+(is|are|was|were|starred|played|directed|wrote|founded|invented|created)\s|in\s+(what|which)\s+(year|film|movie|show|series|book|game)\b|when\s+(did|was|were|does|is)\b.*\b(release|released|come\s+out|came\s+out|born|die|died|found|founded|launch|launched|air|aired)\b|what\s+year\b|how\s+many\s+(seasons|episodes|films|movies|books))`)
 )
 // ClassifyLayer0 runs the free heuristic over a message body. The result drives routing
 // only when the classifier is off; when it is on, WebForce/Trivial/LookupHint feed
 // Combine. Empty body → grok_direct (the safe floor).
 func ClassifyLayer0(body string) Layer0 {
 	s := strings.ToLower(strings.TrimSpace(body))
 	if s == "" {
 		return Layer0{Route: RouteGrokDirect}
 	}
 	lookupHint := lookupIntentRe_RU.MatchString(s) || lookupIntentRe_EN.MatchString(s)
 	if freshnessRe.MatchString(s) {
 		return Layer0{Route: RouteWeb, WebForce: true, Freshness: "recent", LookupHint: lookupHint}
 	}
 	if IsTrivial(s) {
 		return Layer0{Route: RouteTrivial, Trivial: true, LookupHint: lookupHint}
 	}
 	return Layer0{Route: RouteGrokDirect, LookupHint: lookupHint}
 }
 // IsTrivial: a short greeting/ack or a bare arithmetic expression, with no sign of a
 // real question. Length-bounded so "thanks, now explain quantum tunnelling" is NOT
 // trivial. Expects an already-lowercased/trimmed string from ClassifyLayer0; callers
 // passing raw input should lower/trim first (the greeting regex is lowercase-anchored).
 func IsTrivial(s string) bool {
 	if arithmeticRe.MatchString(s) {
 		return true
 	}
 	if len(strings.Fields(s)) <= 4 && greetingRe.MatchString(s) {
 		return true
 	}
 	return false
 }
 // Combined is the resolved route plus its web attribution (for request_log).
 type Combined struct {
 	Route        string
 	WebDecidedBy string
 }
 // Combine resolves the Layer-0 decision + the classifier Verdict into the final route.
 // It is the router's brain and it never blindly trusts the model:
 //
 //   - freshnessRe (WebForce) is a HARD web signal, always honoured (it survives the
 //     classifier being down).
 //   - Every OTHER web arm (the classifier's needs_web≥floor AND verifiable,
 //     entity_obscure, time_sensitive, lookupHint && verifiable) is gated by `paranoid`
 //     (WEB_PARANOID). The needs_web arm additionally requires `verifiable`: on a small
 //     flash-lite classifier, `needs_web` over-fires on open-ended advice/explanations
 //     (observed live: "посоветуй фильм", "объясни goroutines" → needs_web=true,
 //     verifiable=false → a false-web). `verifiable` ("a checkable fact about a NAMED
 //     entity") is the reliable discriminator; recency still routes via time_sensitive/
 //     freshness and obscurity via entity_obscure, so no genuine grounding is lost.
 //     With paranoid off, web routing equals today's freshness-only behavior — so
 //     enabling the classifier is web-routing-neutral and WEB_PARANOID is the single
 //     switch that activates epistemic grounding (clean canary; cost increase behind it).
 //   - trivial is agreement-gated: a Layer-0 trivial candidate AND classifier.trivial AND
 //     confidence ≥ TrivialFloor. A lone signal stays on grok_direct (no voice leak).
 //   - everything else falls to grok_direct (the safe floor: opinion/chat/advice/code).
 //
 // The switch ORDER determines web_decided_by attribution; the boolean result is the OR.
 func Combine(l0 Layer0, v Verdict, paranoid bool) Combined {
 	return CombineWithFloors(l0, v, paranoid, DefaultFloors())
 }
 // CombineWithFloors is Combine with explicit thresholds (the offline-eval sweep entry).
 func CombineWithFloors(l0 Layer0, v Verdict, paranoid bool, f Floors) Combined {
 	switch {
 	case l0.WebForce:
 		return Combined{Route: RouteWeb, WebDecidedBy: WebByFreshness}
 	case paranoid && v.NeedsWeb && v.Verifiable && v.Confidence >= f.WebNeedsWeb:
 		return Combined{Route: RouteWeb, WebDecidedBy: WebByNeedsWeb}
 	case paranoid && v.EntityObscure:
 		return Combined{Route: RouteWeb, WebDecidedBy: WebByObscure}
 	case paranoid && v.TimeSensitive:
 		return Combined{Route: RouteWeb, WebDecidedBy: WebByTime}
 	case paranoid && l0.LookupHint && v.Verifiable:
 		return Combined{Route: RouteWeb, WebDecidedBy: WebByLookupHint}
 	}
 	if l0.Trivial && v.Trivial && v.Confidence >= f.Trivial {
 		return Combined{Route: RouteTrivial, WebDecidedBy: WebByNone}
 	}
 	return Combined{Route: RouteGrokDirect, WebDecidedBy: WebByNone}
 }
--- a/apps/ai-bot/internal/routedecide/routedecide_test.go
+++ b/apps/ai-bot/internal/routedecide/routedecide_test.go
@ -0,0 +1,230 @@
 package routedecide
 import "testing"
 // TestClassifyLayer0 is the free-heuristic golden set: freshness → web (WebForce),
 // short greetings/acks/bare-arithmetic → trivial candidate, everything else →
 // grok_direct, with substantive messages never trivial.
 func TestClassifyLayer0(t *testing.T) {
 	cases := []struct {
 		body         string
 		wantRoute    string
 		wantWebForce bool
 		wantTrivial  bool
 	}{
 		{"привет", RouteTrivial, false, true},
 		{"спасибо", RouteTrivial, false, true},
 		{"2+2", RouteTrivial, false, true},
 		{"12 / 4 - 1", RouteTrivial, false, true},
 		{"hello", RouteTrivial, false, true},
 		{"какие новости сегодня?", RouteWeb, true, false},
 		{"курс доллара сегодня", RouteWeb, true, false},
 		{"what's the weather today", RouteWeb, true, false},
 		{"посоветуй фильм на вечер", RouteGrokDirect, false, false},
 		{"explain how TCP works", RouteGrokDirect, false, false},
 		{"спасибо, а теперь подробно объясни квантовую запутанность", RouteGrokDirect, false, false},
 		{"", RouteGrokDirect, false, false},
 	}
 	for _, c := range cases {
 		l0 := ClassifyLayer0(c.body)
 		if l0.Route != c.wantRoute || l0.WebForce != c.wantWebForce || l0.Trivial != c.wantTrivial {
 			t.Errorf("ClassifyLayer0(%q) = {route:%q webForce:%v trivial:%v}, want {%q %v %v}",
 				c.body, l0.Route, l0.WebForce, l0.Trivial, c.wantRoute, c.wantWebForce, c.wantTrivial)
 		}
 	}
 }
 // TestFreshnessWordBoundaries guards the §7-#7 \b tightening: English freshness tokens
 // fire on whole words only — never inside scoreboard / concurrent / weathering — while
 // genuine freshness phrases still force web, and Russian stems stay stem-matched.
 func TestFreshnessWordBoundaries(t *testing.T) {
 	shouldForceWeb := []string{
 		"what's the weather today",
 		"latest news on AI",
 		"current bitcoin price",
 		"какие новости сегодня", // RU stems unchanged
 		"курс доллара сегодня",
 	}
 	for _, s := range shouldForceWeb {
 		if !ClassifyLayer0(s).WebForce {
 			t.Errorf("expected WebForce on freshness phrase: %q", s)
 		}
 	}
 	shouldNotForceWeb := []string{
 		"the scoreboard shows 3:1",             // score inside scoreboard
 		"concurrent programming in Go",         // current inside concurrent
 		"weathering the storm, metaphorically", // weather inside weathering
 		"subscribe to my newsletter please",    // news inside newsletter
 	}
 	for _, s := range shouldNotForceWeb {
 		if ClassifyLayer0(s).WebForce {
 			t.Errorf("freshness false-positive (substring match) on: %q", s)
 		}
 	}
 }
 // TestLookupHintFalsePositiveCorpus is the §5 guarantee: the soft lookup-intent regex
 // must NOT fire on greetings/vocatives/idioms/non-lookup interrogatives — it is anchored
 // on interrogative + lookup-verb, never on a capitalised word or a guillemet. A false
 // LookupHint can only ever bias the classifier (and only when WEB_PARANOID + verifiable),
 // but we still hold the regex itself to near-zero false positives.
 func TestLookupHintFalsePositiveCorpus(t *testing.T) {
 	falsePositives := []string{
 		"Привет, Москва!",     // vocative, no interrogative
 		"«Война и мир» — топ", // guillemets are not a trigger
 		"ну ты прям Эйнштейн", // proper noun, no «кто такой»
 		"кто это сделал?",     // «кто» not followed by a lookup-verb
 		"когда ты придёшь?",   // «когда» needs a release/birth verb
 		"спасибо большое",     // ack
 		"расскажи что-нибудь", // imperative, no lookup interrogative
 		"I love this movie",   // English, no interrogative
 		"who cares",           // «who» not followed by is/was/starred/…
 	}
 	for _, s := range falsePositives {
 		if l0 := ClassifyLayer0(s); l0.LookupHint {
 			t.Errorf("lookupHint fired on a false-positive trap: %q", s)
 		}
 	}
 	// And it MUST fire on genuine lookup intent (otherwise it's useless).
 	truePositives := []string{
 		"кто снимался в фильме дом у дороги",
 		"кто написал войну и мир",
 		"в каком году вышел фильм матрица",
 		"who directed Inception",
 		"in what year was the Matrix released",
 		"how many seasons of breaking bad",
 	}
 	for _, s := range truePositives {
 		if l0 := ClassifyLayer0(s); !l0.LookupHint {
 			t.Errorf("lookupHint should fire on genuine lookup intent: %q", s)
 		}
 	}
 }
 // TestCombineFreshnessAlwaysWeb: a freshnessRe hit (WebForce) routes to web regardless of
 // WEB_PARANOID and regardless of the classifier verdict — the deterministic signal that
 // survives the classifier being down (§4.4).
 func TestCombineFreshnessAlwaysWeb(t *testing.T) {
 	l0 := Layer0{Route: RouteWeb, WebForce: true, Freshness: "recent"}
 	v := Verdict{NeedsWeb: false, Confidence: 0.1} // classifier disagrees
 	for _, paranoid := range []bool{true, false} {
 		if got := Combine(l0, v, paranoid).Route; got != RouteWeb {
 			t.Errorf("freshness with paranoid=%v = %q, want web", paranoid, got)
 		}
 	}
 }
 // TestCombineParanoidGating is the Design-X invariant (§15): with WEB_PARANOID OFF, only
 // freshness routes to web — the classifier's needs_web/entity/time/lookup signals are
 // recorded but do NOT change the route. With it ON, those arms activate.
 func TestCombineParanoidGating(t *testing.T) {
 	l0 := Layer0{Route: RouteGrokDirect, LookupHint: true} // no freshness
 	arms := []Verdict{
 		{NeedsWeb: true, Verifiable: true, Confidence: 0.9}, // classifier_needs_web (needs verifiable)
 		{EntityObscure: true, Confidence: 0.4},              // entity_obscure
 		{TimeSensitive: true, Confidence: 0.4},              // time_sensitive
 		{Verifiable: true, Confidence: 0.4},                 // lookup_hint && verifiable
 	}
 	for i, v := range arms {
 		if got := Combine(l0, v, false).Route; got != RouteGrokDirect {
 			t.Errorf("arm %d with paranoid OFF = %q, want grok_direct (web is freshness-only)", i, got)
 		}
 		if got := Combine(l0, v, true).Route; got != RouteWeb {
 			t.Errorf("arm %d with paranoid ON = %q, want web", i, got)
 		}
 	}
 }
 // TestCombineWebFloor: the needs_web arm only fires at/above WebNeedsWebFloor (paranoid).
 func TestCombineWebFloor(t *testing.T) {
 	l0 := Layer0{Route: RouteGrokDirect}
 	below := Verdict{NeedsWeb: true, Verifiable: true, Confidence: WebNeedsWebFloor - 0.01}
 	atFloor := Verdict{NeedsWeb: true, Verifiable: true, Confidence: WebNeedsWebFloor}
 	if got := Combine(l0, below, true).Route; got != RouteGrokDirect {
 		t.Errorf("needs_web below floor = %q, want grok_direct", got)
 	}
 	if got := Combine(l0, atFloor, true).Route; got != RouteWeb {
 		t.Errorf("needs_web at floor = %q, want web", got)
 	}
 }
 // TestCombineNeedsWebRequiresVerifiable is the false-web fix (observed live): the needs_web
 // arm fires ONLY when the classifier also flagged a checkable named-entity fact
 // (verifiable). A high-confidence needs_web on a non-verifiable query — an opinion or
 // explanation the small flash-lite over-eagerly marked needs_web=true ("посоветуй фильм",
 // "объясни goroutines") — stays on grok_direct. Recency (time_sensitive/freshness) and
 // obscurity (entity_obscure) keep their own arms, so no genuine grounding is lost.
 func TestCombineNeedsWebRequiresVerifiable(t *testing.T) {
 	l0 := Layer0{Route: RouteGrokDirect}
 	if got := Combine(l0, Verdict{NeedsWeb: true, Verifiable: false, Confidence: 1.0}, true).Route; got != RouteGrokDirect {
 		t.Errorf("needs_web && !verifiable = %q, want grok_direct (false-web fix)", got)
 	}
 	if got := Combine(l0, Verdict{NeedsWeb: true, Verifiable: true, Confidence: 0.6}, true).Route; got != RouteWeb {
 		t.Errorf("needs_web && verifiable = %q, want web", got)
 	}
 	// A non-verifiable needs_web that is ALSO entity_obscure still grounds (obscure arm).
 	if got := Combine(l0, Verdict{NeedsWeb: true, Verifiable: false, EntityObscure: true, Confidence: 0.1}, true).Route; got != RouteWeb {
 		t.Errorf("entity_obscure must still route web regardless of verifiable, got %q", got)
 	}
 }
 // TestCombineTrivialAgreementGate: trivial requires BOTH the Layer-0 candidate AND
 // classifier.trivial AND confidence ≥ TrivialFloor. A lone signal stays on grok_direct.
 func TestCombineTrivialAgreementGate(t *testing.T) {
 	trivialL0 := Layer0{Route: RouteTrivial, Trivial: true}
 	nonTrivialL0 := Layer0{Route: RouteGrokDirect}
 	if got := Combine(trivialL0, Verdict{Trivial: true, Confidence: 0.95}, true).Route; got != RouteTrivial {
 		t.Errorf("agreed high-confidence trivial = %q, want trivial", got)
 	}
 	if got := Combine(trivialL0, Verdict{Trivial: true, Confidence: 0.5}, true).Route; got != RouteGrokDirect {
 		t.Errorf("low-confidence trivial = %q, want grok_direct (no voice leak)", got)
 	}
 	if got := Combine(trivialL0, Verdict{Trivial: false, Confidence: 0.95}, true).Route; got != RouteGrokDirect {
 		t.Errorf("classifier disagrees on trivial = %q, want grok_direct", got)
 	}
 	// Never trust classifier.trivial alone: without the Layer-0 candidate it stays grok.
 	if got := Combine(nonTrivialL0, Verdict{Trivial: true, Confidence: 0.99}, true).Route; got == RouteTrivial {
 		t.Errorf("classifier.trivial alone routed to trivial; must require the Layer-0 candidate")
 	}
 }
 // TestCombineRoadHouse is the regression: the hallucinated-cast bug. With WEB_PARANOID on
 // and the classifier flagging the (obscure, verifiable) entity, both the first turn and
 // the resolved follow-up route to web; with paranoid off they fall to grok_direct (the
 // canary-neutral baseline).
 func TestCombineRoadHouse(t *testing.T) {
 	first := ClassifyLayer0("кто снимался в фильме дом у дороги")
 	followup := ClassifyLayer0("2024 года") // bare; the classifier resolves via context
 	v := Verdict{NeedsWeb: true, Verifiable: true, EntityObscure: true, Confidence: 0.7}
 	for _, l0 := range []Layer0{first, followup} {
 		if got := Combine(l0, v, true).Route; got != RouteWeb {
 			t.Errorf("road house with paranoid ON = %q, want web (the hallucination fix)", got)
 		}
 		if got := Combine(l0, v, false).Route; got != RouteGrokDirect {
 			t.Errorf("road house with paranoid OFF = %q, want grok_direct (baseline)", got)
 		}
 	}
 }
 // TestWebDecidedByAttribution: the switch order attributes the right arm (for tuning 0.55).
 func TestWebDecidedByAttribution(t *testing.T) {
 	cases := []struct {
 		l0   Layer0
 		v    Verdict
 		want string
 	}{
 		{Layer0{WebForce: true}, Verdict{}, WebByFreshness},
 		{Layer0{}, Verdict{NeedsWeb: true, Verifiable: true, Confidence: 0.9}, WebByNeedsWeb},
 		{Layer0{}, Verdict{EntityObscure: true, Confidence: 0.1}, WebByObscure},
 		{Layer0{}, Verdict{TimeSensitive: true, Confidence: 0.1}, WebByTime},
 		{Layer0{LookupHint: true}, Verdict{Verifiable: true, Confidence: 0.1}, WebByLookupHint},
 		{Layer0{Route: RouteGrokDirect}, Verdict{Confidence: 0.1}, WebByNone},
 	}
 	for _, c := range cases {
 		if got := Combine(c.l0, c.v, true).WebDecidedBy; got != c.want {
 			t.Errorf("web_decided_by(%+v,%+v) = %q, want %q", c.l0, c.v, got, c.want)
 		}
 	}
 }
--- a/apps/ai-bot/pricing.go
+++ b/apps/ai-bot/pricing.go
@ -20,15 +20,21 @@ type ModelPrice struct {
 // and so a half-finished cascade can book only what it actually spent (§8.1).
 type CostBreakdown struct {
 	Token     float64
-	Grounding float64
+	Grounding float64 // Gemini grounded-prompt TOKEN cost
 	WebTool   float64
 	Router    float64
 	// GroundingFee is the per-grounded-prompt FEE (the $35/1k overage on a paid Gemini
 	// tier, GEMINI_GROUNDING_PER_PROMPT_USD) — kept separate from Grounding (the token
 	// cost) for clean analytics. Booked the moment the grounded prompt is admitted, even
 	// on the error return (§7 SG1). Settle folds it into the grounding_usd spend column,
 	// so the $10 ceiling finally sees it without a spend-table migration.
 	GroundingFee float64
 }
 // Total is the grand total across all components (the number the wallet ceiling and
 // request_log.total_usd care about). Computed, never stored, so it can't drift.
 func (c CostBreakdown) Total() float64 {
-	return c.Token + c.Grounding + c.WebTool + c.Router
+	return c.Token + c.Grounding + c.WebTool + c.Router + c.GroundingFee
 }
 // priceFor returns the configured price for a model. An unknown model falls back to
--- a/apps/ai-bot/provider_gemini.go
+++ b/apps/ai-bot/provider_gemini.go
@ -19,10 +19,11 @@ import (
 //     cheap trivial route and the Layer-1 router classifier. Same wire format as Grok,
 //     so it reuses the shared transport (httpllm.go).
 //   - groundedSearch: a SEPARATE call against the NATIVE v1beta generateContent endpoint
-//     with the google_search tool. Grounding does NOT work on the OpenAI-compat layer
+//     with the google_search tool. Grounding does NOT work on the OpenAI-compat layer —
-//     (it is silently ignored there, and only on Gemini 3+) — verified against Google's
+//     it is silently ignored THERE (F-EXT-3, an endpoint limitation, NOT a model-version
-//     docs (F-EXT-3) — so the web layer that wants Gemini grounding must use this native
+//     one: the google_search tool is supported by current models including
-//     path and VERIFY citations came back, else degrade.
+//     gemini-2.5-flash-lite per ai.google.dev). So the web layer that wants Gemini
 //     grounding must use this native path and VERIFY citations came back, else degrade.
 type geminiClient struct {
 	http       *openAIClient
 	nativeBase string // …/v1beta — derived from the OpenAI-compat base by dropping /openai
@ -94,8 +95,8 @@ type geminiPart struct {
 	Text string `json:"text"`
 }
 type geminiTool struct {
-	// google_search is the current grounding tool (Gemini 3 / current models). The
+	// google_search is the current grounding tool (all current models, incl. the 2.5
-	// empty object enables it.
+	// family; legacy models used google_search_retrieval). The empty object enables it.
 	GoogleSearch struct{} `json:"google_search"`
 }
 type geminiNativeResponse struct {
--- a/apps/ai-bot/router.go
+++ b/apps/ai-bot/router.go
@ -3,117 +3,141 @@ package main
 import (
 	"context"
 	"encoding/json"
 	"regexp"
 	"strings"
 	"time"
 	rd "vojo.chat/ai-bot/internal/routedecide"
 )
 // router.go classifies a message into a route. It runs INSIDE respond() — after the
 // mention/media/foreign/single-flight gates (F-FUNC-7) — so a paid Layer-1 classifier
 // is never spent on a message today's bot drops for free.
 //
-// Two layers, both conservative (doubt → grok_direct, the safe floor that keeps
+// Two layers; the decision MATH lives in the pure internal/routedecide package so the
-// substantive questions on Grok, §8.6):
+// offline eval (cmd/routereval) replays the SAME function instead of a copy:
 //   - Layer-0: free regex heuristics (RU+EN). Always runs when ROUTER_ENABLED.
-//   - Layer-1: a cheap Gemini JSON classifier, consulted ONLY on Layer-0 grok_direct
+//   - Layer-1: a cheap Gemini JSON classifier (ROUTER_CLASSIFIER_ENABLED). It now runs
-//     when ROUTER_CLASSIFIER_ENABLED. Any failure falls back to the Layer-0 verdict.
+//     on EVERY message (greetings + freshness hits included) so trivial can be
 //     agreement-confirmed and follow-ups get a context-resolved search_query. Any
 //     failure (incl. the 4s sub-deadline) falls back to the Layer-0 verdict — never an
 //     ungrounded confident answer, never a degrade-to-web (the classifier is Gemini, so
 //     a Gemini outage means the grounding fetch is down too, §4.4).
-// RouterDecision is the route plus the signals behind it (logged for threshold
+// RouterDecision is the route plus the signals behind it (logged + persisted for
-// calibration). Only Route/Source/Confidence/NeedsWeb drive behaviour today; the rest
+// threshold calibration and misroute attribution, §8). Route/Source/Confidence drive
-// are recorded for the offline router-replay eval (§9).
+// behaviour; the epistemic signals + SearchQuery feed the web route and the analytics.
 type RouterDecision struct {
 	Route          string
 	Source         string // heuristic | classifier | default | forced | degraded
 	Confidence     float64
 	NeedsWeb       bool
-	Freshness      string
+	Freshness      string // "recent" on a freshnessRe hit (read by factualMiss + logged)
-	ReasoningLevel string
+	ReasoningLevel string // "high" on the forced reason route (logged)
-	Domain         string
+
-	Difficulty     string
+	// Classifier signals (§4) — populated only when Layer-1 ran. SearchQuery is the
 	// self-contained, follow-up-resolved web query (carried to genWebThenGrok in DMs).
 	SearchQuery   string
 	EntityObscure bool
 	TimeSensitive bool
 	Verifiable    bool
 	TrivialScore  bool   // the classifier's raw "trivial" verdict
 	LookupHint    bool   // Layer-0 soft hint (never sets the route on its own, §5)
 	WebDecidedBy  string // which arm chose web — routedecide.WebBy* (request_log)
 }
-// Heuristic patterns. Kept deliberately tight: a false "trivial" leaks a real question
+// routerStageTimeout bounds the classifier call independently of the overall budget
-// to the cheap model, so trivial fires only on short, unmistakable greetings/acks or
+// (mirrors webStageTimeout, §4.4). It is derived from the parent genCtx so a budget
-// bare arithmetic. Freshness words route to web (a false web-route only costs a fetch
+// cancel still propagates; its expiry is treated exactly like a classifier error → the
-// and degrades cleanly — never a wrong answer).
+// Layer-0 verdict, never a terminal error.
-var (
+const routerStageTimeout = 4 * time.Second
 	greetingRe   = regexp.MustCompile(`^(привет(ик)?|здравствуй(те)?|хай|прив|ку|добрый\s+(день|вечер|утро)|спасибо|спс|благодарю|пока|ок(ей)?|угу|ага|hello|hi|hey|yo|thanks|thank\s+you|thx|ty|bye|goodbye|ok|okay|cool|nice)[\s!.,)]*$`)
 	arithmeticRe = regexp.MustCompile(`^[\s(]*\d+(\s*[-+*/×÷]\s*\d+)+[\s)=?]*$`)
 	freshnessRe  = regexp.MustCompile(`(новост|сегодня|сейчас|последн|курс\s|погод|котировк|расписани|прогноз|breaking|today|right now|latest|current(ly)?|news|weather|stock price|exchange rate|score)`)
 )
-// routeLayer0 is the free heuristic. Confidence is a rough self-estimate used only for
+// classifierPrompt asks Gemini an EPISTEMIC-RISK question (not a topic label) and
-// logging/threshold tuning, not control flow.
+// resolves follow-ups from the short conversation that is appended after it (rcx). Kept
 // terse to bound tokens; extractJSON tolerates code fences.
 const classifierPrompt = `You are a routing classifier for a Russian-speaking chat assistant. You do NOT answer the question. Read the short conversation; the LAST user line is the message to route, earlier lines are context to resolve pronouns and follow-ups. Reply with ONLY one JSON object, no prose.
 Your main job is an EPISTEMIC judgement, not a topic label: if the assistant answered the LAST message purely from its own memory (no web), how likely is it to state a WRONG checkable fact — a name, a film/book cast, a date or release year, a number, a price, a score, a population, a who-did-what about a SPECIFIC named person/film/company/place/event? Such facts are exactly what a model misremembers and states confidently.
 Decide:
 - "needs_web": true if a correct answer DEPENDS on such a checkable external fact, OR on anything time-sensitive (news, "сегодня"/today, "сейчас", latest, current price/rate/weather/score). Recency is sufficient but NOT necessary — a STATIC fact like a film's cast or a country's capital also counts. When in doubt, prefer TRUE: grounding is cheap, a confident wrong fact is not. FALSE for opinions, explanations, advice, casual chat, creative writing, code help, or transforming text the user already gave you.
 - "verifiable": true if the message is specifically a checkable fact about a NAMED entity (who acted in <film>, who is CEO of <company>, what year <event>, population of <place>) — even if not about "today". A bare follow-up like "2024 года" inherits the entity from the previous turn.
 - "entity_obscure": true if the salient entity is plausibly long-tail / not a household name (a minor film, a non-famous person, a niche product) — these are where memory fails hardest.
 - "time_sensitive": true if the answer can change over time (news, prices, weather, standings, "current"/"latest"/"now").
 - "trivial": true ONLY for a bare greeting, acknowledgement, or tiny arithmetic with no real question.
 - "search_query": a SELF-CONTAINED web search query for this message, with follow-ups resolved from context (a bare "2024 года" after discussing a film becomes "<film name> 2024 фильм актёрский состав"). Empty string ONLY if both needs_web and verifiable are false.
 - "confidence": 0.0-1.0, your honest certainty in needs_web.
 Schema: {"needs_web":bool,"verifiable":bool,"entity_obscure":bool,"time_sensitive":bool,"trivial":bool,"search_query":"<query or empty>","confidence":0.0-1.0}
 Conversation:
 `
 // routeLayer0 is the free heuristic verdict (RouterDecision shape), built from the pure
 // core. Used directly when the classifier is off, and exported here for the heuristic
 // golden test. Confidence is a rough self-estimate, logging-only (not control flow).
 func routeLayer0(body string) RouterDecision {
-	s := strings.ToLower(strings.TrimSpace(body))
+	return layer0Decision(rd.ClassifyLayer0(body))
 	if s == "" {
 		return RouterDecision{Route: routeGrokDirect, Source: "heuristic", Confidence: 0.5}
 	}
 	if freshnessRe.MatchString(s) {
 		return RouterDecision{Route: routeWebThenGrok, Source: "heuristic", Confidence: 0.7, NeedsWeb: true, Freshness: "recent"}
 	}
 	if isTrivial(s) {
 		return RouterDecision{Route: routeTrivial, Source: "heuristic", Confidence: 0.85, Difficulty: "trivial"}
 	}
 	return RouterDecision{Route: routeGrokDirect, Source: "heuristic", Confidence: 0.6}
 }
-// isTrivial: a short greeting/ack or a bare arithmetic expression, with no sign of a
+// layer0Decision maps the pure routedecide.Layer0 onto a RouterDecision, attaching the
-// real question. Length-bounded so "thanks, now explain quantum tunnelling" is NOT
+// logging-only confidence self-estimates the old heuristic used.
-// trivial.
+func layer0Decision(l0 rd.Layer0) RouterDecision {
-func isTrivial(s string) bool {
+	d := RouterDecision{Route: l0.Route, Source: "heuristic", LookupHint: l0.LookupHint, Freshness: l0.Freshness}
-	if arithmeticRe.MatchString(s) {
+	switch l0.Route {
-		return true
+	case routeWebThenGrok:
 		d.Confidence, d.NeedsWeb = 0.7, true
 	case routeTrivial:
 		d.Confidence = 0.85
 	default:
 		d.Confidence = 0.6
 	}
-	if len(strings.Fields(s)) <= 4 && greetingRe.MatchString(s) {
+	return d
 		return true
 	}
 	return false
 }
-// classify produces the final RouterDecision for a request. The manual reasoning
+// isTrivial reports a short greeting/ack or bare arithmetic (the Layer-0 regex). Kept
-// trigger is honoured independently of the heuristic router (it's a deliberate user
+// as a thin wrapper over the pure core for in-package callers/tests.
-// signal). Layer-1's cost, when it runs, is accumulated into cost.Router.
+func isTrivial(s string) bool { return rd.IsTrivial(strings.ToLower(strings.TrimSpace(s))) }
-func (b *Bot) classify(ctx context.Context, body string, cost *CostBreakdown) RouterDecision {
+
 // classify produces the final RouterDecision. The manual reasoning trigger is honoured
 // independently of the heuristic router (a deliberate user signal). rcx is the
 // privacy-minimised conversation window (DM-resolved; bare trigger in groups) appended
 // to the classifier prompt. Layer-1's cost, when it runs, accumulates into cost.Router.
 func (b *Bot) classify(ctx context.Context, body, rcx string, cost *CostBreakdown) RouterDecision {
 	if b.cfg.ReasoningEnabled && containsTrigger(body, b.cfg.ReasoningTrigger) {
 		return RouterDecision{Route: routeReason, Source: "forced", Confidence: 1, ReasoningLevel: "high"}
 	}
 	if !b.cfg.RouterEnabled {
 		return RouterDecision{Route: routeGrokDirect, Source: "default"}
 	}
-	d := routeLayer0(body)
+	l0 := rd.ClassifyLayer0(body)
-	// Layer-1 only refines the uncertain grok_direct verdict, and only if enabled and
+	d := layer0Decision(l0)
-	// the Gemini client exists. Anything else stands on the heuristic.
+	// Drop the old "only on grok_direct" gate: the classifier now runs on every message
-	if d.Route != routeGrokDirect || !b.cfg.RouterClassifierEnabled || b.gemini == nil {
+	// (when enabled) so it can raise a quiet factual question to web AND agreement-confirm
 	// a trivial. With it disabled, the Layer-0 verdict stands (today's behaviour).
 	if !b.cfg.RouterClassifierEnabled || b.gemini == nil {
 		return d
 	}
-	refined, err := b.routeLayer1(ctx, body, cost)
+	// 4s router sub-deadline derived from genCtx (a budget cancel still propagates).
 	rctx, cancel := context.WithTimeout(ctx, routerStageTimeout)
 	defer cancel()
 	refined, err := b.routeLayer1(rctx, rcx, l0, cost)
 	if err != nil {
 		// Classifier error / timeout / garbage → the Layer-0 verdict, exactly as today.
 		// Only the deterministic freshnessRe (carried in d) survives a classifier outage.
 		b.log.WarnContext(ctx, "layer-1 classifier failed; using heuristic", "err", err)
-		return d // degrade to the heuristic verdict
+		return d
 	}
 	return refined
 }
-// classifierConfidenceFloor is the bar a Layer-1 escalation OFF the safe floor
+// routeLayer1 runs the Gemini classifier, parses its JSON into a routedecide.Verdict,
-// (trivial/web/reason) must clear. Below it, the verdict is treated as doubt and the
+// and resolves the route via the shared routedecide.Combine (WebParanoid-gated). A
-// request stays on grok_direct — the owner's "substantive stays on Grok" rule (§8.6).
+// non-JSON or transport error is returned so classify() degrades to the heuristic — the
-// A low-confidence "trivial" is exactly the false-trivial voice leak we must not take.
+// cheap model never silently mis-routes by returning garbage.
-const classifierConfidenceFloor = 0.8
+func (b *Bot) routeLayer1(ctx context.Context, rcx string, l0 rd.Layer0, cost *CostBreakdown) (RouterDecision, error) {
 // classifierPrompt asks Gemini for a strict JSON verdict. Kept terse to bound tokens.
 const classifierPrompt = `You are a router. Classify the user message into exactly one route and reply with ONLY a JSON object, no prose.
 Routes: "trivial" (greeting/ack/tiny arithmetic), "web" (needs fresh/current facts: news, prices, weather, "today"), "normal" (everything else).
 Schema: {"route":"trivial|web|normal","confidence":0.0-1.0,"needs_web":true|false}
 Message: `
 // routeLayer1 runs the Gemini classifier and parses its JSON. A non-JSON or unknown
 // answer is an error so classify() degrades to the heuristic — the cheap model never
 // gets to silently mis-route by returning garbage.
 func (b *Bot) routeLayer1(ctx context.Context, body string, cost *CostBreakdown) (RouterDecision, error) {
 	resp, err := b.gemini.Complete(ctx, LLMRequest{
 		Model:       b.cfg.GeminiModel,
-		Messages:    []Message{{Role: "user", Content: classifierPrompt + body}},
+		Messages:    []Message{{Role: "user", Content: classifierPrompt + rcx}},
-		MaxTokens:   60,
+		MaxTokens:   80, // was 60; the schema grew
 		Temperature: 0,
 	})
 	if err != nil {
@ -121,41 +145,36 @@ func (b *Bot) routeLayer1(ctx context.Context, body string, cost *CostBreakdown)
 	}
 	cost.Router += computeUSD(b.cfg.GeminiModel, resp.Usage, b.cfg)
-	var parsed struct {
+	// The classifier schema IS routedecide.Verdict (tagged), so unmarshal straight into it.
-		Route      string  `json:"route"`
+	var v rd.Verdict
-		Confidence float64 `json:"confidence"`
+	if err := json.Unmarshal([]byte(extractJSON(resp.Text)), &v); err != nil {
 		NeedsWeb   bool    `json:"needs_web"`
 	}
 	if err := json.Unmarshal([]byte(extractJSON(resp.Text)), &parsed); err != nil {
 		return RouterDecision{}, err
 	}
-	route := normalizeRoute(parsed.Route)
+	v.SearchQuery = strings.TrimSpace(v.SearchQuery)
-	// Safe floor: a low-confidence escalation off grok_direct is doubt — keep it on
+	combined := rd.Combine(l0, v, b.cfg.WebParanoid)
 	// Grok rather than leak a possibly-substantive question to the cheap model.
 	if route != routeGrokDirect && parsed.Confidence < classifierConfidenceFloor {
 		return RouterDecision{Route: routeGrokDirect, Source: "classifier", Confidence: parsed.Confidence}, nil
 	}
 	return RouterDecision{
 		Route:      route,
 		Source:     "classifier",
 		Confidence: parsed.Confidence,
 		NeedsWeb:   parsed.NeedsWeb || route == routeWebThenGrok,
 	}, nil
 }
-// normalizeRoute maps a classifier label to a route constant, defaulting unknown
+	d := RouterDecision{
-// labels to grok_direct — the safe floor, so a confused classifier never escalates.
+		Route:         combined.Route,
-func normalizeRoute(label string) string {
+		Source:        "classifier",
-	switch strings.ToLower(strings.TrimSpace(label)) {
+		Confidence:    v.Confidence,
-	case "trivial", "trivial_direct":
+		NeedsWeb:      v.NeedsWeb,
-		return routeTrivial
+		Verifiable:    v.Verifiable,
-	case "web", "web_then_grok":
+		EntityObscure: v.EntityObscure,
-		return routeWebThenGrok
+		TimeSensitive: v.TimeSensitive,
-	case "reason", "reason_then_grok":
+		TrivialScore:  v.Trivial,
-		return routeReason
+		SearchQuery:   v.SearchQuery,
-	default:
+		LookupHint:    l0.LookupHint,
-		return routeGrokDirect
+		Freshness:     l0.Freshness,
 		WebDecidedBy:  combined.WebDecidedBy,
 	}
 	// INFO so prod (which runs at INFO) captures the signal mix without LOG_LEVEL=debug.
 	// Content-free: no body, no search_query (those are gated DEBUG/telemetry paths).
 	b.log.InfoContext(ctx, "classifier verdict",
 		"route", d.Route, "web_decided_by", d.WebDecidedBy, "needs_web", d.NeedsWeb,
 		"verifiable", d.Verifiable, "entity_obscure", d.EntityObscure,
 		"time_sensitive", d.TimeSensitive, "trivial", d.TrivialScore,
 		"confidence", d.Confidence, "lookup_hint", d.LookupHint, "paranoid", b.cfg.WebParanoid)
 	return d, nil
 }
 // extractJSON pulls the first {...} object out of a model reply, tolerating prose or
--- a/apps/ai-bot/router_test.go
+++ b/apps/ai-bot/router_test.go
@ -47,18 +47,6 @@ func TestRouteLayer0(t *testing.T) {
 	}
 }
 func TestNormalizeRoute(t *testing.T) {
 	cases := map[string]string{
 		"trivial": routeTrivial, "web": routeWebThenGrok, "reason": routeReason,
 		"normal": routeGrokDirect, "garbage": routeGrokDirect, "": routeGrokDirect,
 	}
 	for in, want := range cases {
 		if got := normalizeRoute(in); got != want {
 			t.Errorf("normalizeRoute(%q) = %q, want %q", in, got, want)
 		}
 	}
 }
 func TestExtractJSON(t *testing.T) {
 	if got := extractJSON("prefix {\"route\":\"web\"} suffix"); got != `{"route":"web"}` {
 		t.Errorf("extractJSON = %q", got)
--- a/apps/ai-bot/store.go
+++ b/apps/ai-bot/store.go
@ -166,6 +166,27 @@ var migrations = []string{
 		date TEXT PRIMARY KEY,
 		n    INTEGER NOT NULL DEFAULT 0
 	);`,
 	// v5 (router redesign §8): the classifier signals + web outcome the offline eval needs
 	// to MEASURE misroute / false-web / lie-rate / true-cost / rewrite-quality — none of
 	// which is derivable from the v3 columns. Append-only (never edit an earlier migration).
 	// Booleans/counts are metadata, always recorded when telemetry is on; search_query +
 	// answer_text are content, written ONLY when TELEMETRY_STORE_TEXT (NULL otherwise).
 	// classifier_confidence is NOT a new column — filter router_confidence on
 	// router_source='classifier'. grounding_fee_usd is the §7 booked per-prompt fee (it is
 	// ALSO folded into grounding_usd for the ceiling; this column is the analytics split).
 	`ALTER TABLE request_log ADD COLUMN IF NOT EXISTS needs_web         BOOL DEFAULT false;
 	 ALTER TABLE request_log ADD COLUMN IF NOT EXISTS entity_obscure    BOOL DEFAULT false;
 	 ALTER TABLE request_log ADD COLUMN IF NOT EXISTS time_sensitive    BOOL DEFAULT false;
 	 ALTER TABLE request_log ADD COLUMN IF NOT EXISTS verifiable        BOOL DEFAULT false;
 	 ALTER TABLE request_log ADD COLUMN IF NOT EXISTS trivial_score     BOOL DEFAULT false;
 	 ALTER TABLE request_log ADD COLUMN IF NOT EXISTS web_decided_by    TEXT DEFAULT '';
 	 ALTER TABLE request_log ADD COLUMN IF NOT EXISTS grounding_fee_usd DOUBLE PRECISION DEFAULT 0;
 	 ALTER TABLE request_log ADD COLUMN IF NOT EXISTS rewrite_used      BOOL DEFAULT false;
 	 ALTER TABLE request_log ADD COLUMN IF NOT EXISTS web_grounded      BOOL DEFAULT false;
 	 ALTER TABLE request_log ADD COLUMN IF NOT EXISTS citation_count    INT DEFAULT 0;
 	 ALTER TABLE request_log ADD COLUMN IF NOT EXISTS search_query      TEXT;
 	 ALTER TABLE request_log ADD COLUMN IF NOT EXISTS answer_text       TEXT;`,
 }
 // migrate runs all pending migrations on a single connection under a session
@ -412,9 +433,15 @@ func (s *Store) ReleaseReservation(mxid string, estimate float64) error {
 // grounding it actually spent, releases the rest of the reservation, and refunds the
 // request slot separately. GREATEST(0, …) keeps reserved_usd from underflowing.
 // Atomic and commutative per row, so concurrent settles for one user sum correctly.
 //
 // The per-grounded-prompt FEE (cost.GroundingFee, §7 SG1) is folded into the committed
 // grounding_usd column here — so it flows through committedUSDExpr and the $10 ceiling
 // finally sees it WITHOUT a spend-table migration. request_log keeps the fee separately
 // in grounding_fee_usd for the analytics split.
 func (s *Store) Settle(mxid string, estimate float64, cost CostBreakdown) error {
 	ctx, cancel := opContext()
 	defer cancel()
 	grounding := cost.Grounding + cost.GroundingFee
 	_, err := s.pool.Exec(ctx,
 		`INSERT INTO spend (date, mxid, requests, usd, router_usd, grounding_usd, webtool_usd, reserved_usd)
 		 VALUES ($1, $2, 0, $3, $4, $5, $6, 0)
@ -424,7 +451,7 @@ func (s *Store) Settle(mxid string, estimate float64, cost CostBreakdown) error
 		   grounding_usd = spend.grounding_usd + excluded.grounding_usd,
 		   webtool_usd   = spend.webtool_usd   + excluded.webtool_usd,
 		   reserved_usd  = GREATEST(0, spend.reserved_usd - $7)`,
-		todayUTC(), mxid, cost.Token, cost.Router, cost.Grounding, cost.WebTool, estimate)
+		todayUTC(), mxid, cost.Token, cost.Router, grounding, cost.WebTool, estimate)
 	return err
 }
@ -444,32 +471,42 @@ func (s *Store) InsertRequestLog(rl RequestLog) error {
 	if err != nil {
 		return err
 	}
-	// query_text is NULL unless text capture is on (the struct carries "" otherwise),
+	// Content columns are NULL unless text capture is on (the struct carries "" otherwise),
-	// so the analytics table never holds message content by default.
+	// so the analytics table never holds message/model content by default.
-	var queryText any
+	nullIfEmpty := func(s string) any {
-	if rl.QueryText != "" {
+		if s == "" {
-		queryText = rl.QueryText
+			return nil
 		}
 		return s
 	}
-
+	// request_log.grounding_usd is the TOKEN cost only; the per-prompt FEE is split into its
 	// own grounding_fee_usd column (the spend ledger folds them — see Settle). total_usd is
 	// the full Total() including the fee, so the two grounding columns + total stay coherent.
 	_, err = s.pool.Exec(ctx, `
 		INSERT INTO request_log (
 			id, room_id, sender, route, router_source, router_confidence, models,
 			prompt_tokens, cached_tokens, completion_tokens,
 			token_usd, grounding_usd, router_usd, webtool_usd, total_usd,
 			latency_ms, stage_ms, escalated, fallback_fired, cache_hit, ceiling_hit,
-			per_user_cap_hit, prompt_version, provider_request_id, degraded, err, ok, query_text
+			per_user_cap_hit, prompt_version, provider_request_id, degraded, err, ok, query_text,
 			needs_web, entity_obscure, time_sensitive, verifiable, trivial_score, web_decided_by,
 			grounding_fee_usd, rewrite_used, web_grounded, citation_count, search_query, answer_text
 		) VALUES (
 			$1, $2, $3, $4, $5, $6, $7,
 			$8, $9, $10,
 			$11, $12, $13, $14, $15,
 			$16, $17, $18, $19, $20, $21,
-			$22, $23, $24, $25, $26, $27, $28
+			$22, $23, $24, $25, $26, $27, $28,
 			$29, $30, $31, $32, $33, $34,
 			$35, $36, $37, $38, $39, $40
 		) ON CONFLICT (id) DO NOTHING`,
 		rl.ID, rl.RoomID, rl.Sender, rl.Route, rl.RouterSource, rl.RouterConfidence, models,
 		rl.PromptTokens, rl.CachedTokens, rl.CompletionTokens,
 		rl.Cost.Token, rl.Cost.Grounding, rl.Cost.Router, rl.Cost.WebTool, rl.Cost.Total(),
 		rl.LatencyMS, stages, rl.Escalated, rl.FallbackFired, rl.CacheHit, rl.CeilingHit,
-		rl.PerUserCapHit, rl.PromptVersion, rl.ProviderRequestID, rl.Degraded, rl.Err, rl.OK, queryText)
+		rl.PerUserCapHit, rl.PromptVersion, rl.ProviderRequestID, rl.Degraded, rl.Err, rl.OK, nullIfEmpty(rl.QueryText),
 		rl.NeedsWeb, rl.EntityObscure, rl.TimeSensitive, rl.Verifiable, rl.TrivialScore, rl.WebDecidedBy,
 		rl.Cost.GroundingFee, rl.RewriteUsed, rl.WebGrounded, rl.CitationCount, nullIfEmpty(rl.SearchQuery), nullIfEmpty(rl.AnswerText))
 	return err
 }
@ -509,6 +546,20 @@ func (s *Store) IncrGroundingIfUnder(cap int) (bool, error) {
 	return true, nil
 }
 // DecrGrounding refunds one admitted grounding slot for today when the admitted prompt
 // produced no usable grounded digest (no citations, or the fetch failed), so over-routing
 // and failed fetches don't burn the day's grounded-answer budget (§7 SG4). It mirrors
 // RefundRequest: a single atomic UPDATE, GREATEST(0, …) so a double-refund can't drive the
 // counter negative, todayUTC() internally (no date arg). The money side is independent —
 // the per-prompt fee stays booked in the ledger; this only touches the quota counter.
 func (s *Store) DecrGrounding() error {
 	ctx, cancel := opContext()
 	defer cancel()
 	_, err := s.pool.Exec(ctx,
 		`UPDATE grounding_count SET n = GREATEST(0, n - 1) WHERE date = $1`, todayUTC())
 	return err
 }
 // HasWarnedEncrypted / SetWarnedEncrypted persist the one-shot "reacted 🔒 to this
 // room because I can't read encryption" flag so a restart doesn't re-react on every
 // message (F5). The bot never reacts to its own events: m.reaction is not an
--- a/apps/ai-bot/store_test.go
+++ b/apps/ai-bot/store_test.go
@ -443,6 +443,114 @@ func TestStoreGroundingCapConcurrent(t *testing.T) {
 	}
 }
 // TestStoreDecrGrounding covers the §7 SG4 cap refund: a refunded slot frees one
 // admission, and an over-refund clamps to 0 (never negative → no phantom headroom).
 func TestStoreDecrGrounding(t *testing.T) {
 	st := openTestStore(t)
 	defer st.Close()
 	const cap = 3
 	for i := 0; i < cap; i++ {
 		if ok, err := st.IncrGroundingIfUnder(cap); err != nil || !ok {
 			t.Fatalf("incr %d: (%v,%v)", i, ok, err)
 		}
 	}
 	if ok, _ := st.IncrGroundingIfUnder(cap); ok {
 		t.Fatal("at cap, should be denied")
 	}
 	// Refund one → one more admitted.
 	if err := st.DecrGrounding(); err != nil {
 		t.Fatalf("decr: %v", err)
 	}
 	if ok, err := st.IncrGroundingIfUnder(cap); err != nil || !ok {
 		t.Fatalf("post-refund incr: (%v,%v), want admitted", ok, err)
 	}
 	// Over-refund must clamp at 0, not go negative.
 	for i := 0; i < 10; i++ {
 		if err := st.DecrGrounding(); err != nil {
 			t.Fatalf("over-refund decr: %v", err)
 		}
 	}
 	admitted := 0
 	for i := 0; i < cap+2; i++ {
 		if ok, _ := st.IncrGroundingIfUnder(cap); ok {
 			admitted++
 		}
 	}
 	if admitted != cap {
 		t.Fatalf("after clamp, admitted %d, want %d (counter must have clamped to 0)", admitted, cap)
 	}
 }
 // TestStoreSettleBooksGroundingFee: the per-grounded-prompt FEE (§7 SG1) must land in
 // committed spend so the $10 ceiling sees it — it is folded into grounding_usd at Settle.
 func TestStoreSettleBooksGroundingFee(t *testing.T) {
 	st := openTestStore(t)
 	defer st.Close()
 	if err := st.Settle("@u:vojo.chat", 0, CostBreakdown{Grounding: 0.0001, GroundingFee: 0.035}); err != nil {
 		t.Fatalf("settle: %v", err)
 	}
 	spent, err := st.SpentTodayUSD()
 	if err != nil {
 		t.Fatalf("spent: %v", err)
 	}
 	if d := spent - 0.0351; d > 1e-9 || d < -1e-9 {
 		t.Fatalf("committed = %v, want 0.0351 (grounding token + per-prompt fee)", spent)
 	}
 }
 // TestStoreRequestLogClassifierColumns covers the §8 columns: signal booleans + the fee
 // split + grounded outcome roundtrip, and total_usd includes the fee.
 func TestStoreRequestLogClassifierColumns(t *testing.T) {
 	st := openTestStore(t)
 	defer st.Close()
 	rl := RequestLog{
 		ID: "$ev-rl-sig", Route: routeWebThenGrok, RouterSource: "classifier",
 		Models:        map[string]string{"final": "grok-x"},
 		Cost:          CostBreakdown{Token: 0.002, Grounding: 0.00007, GroundingFee: 0.035},
 		NeedsWeb:      true,
 		EntityObscure: true,
 		Verifiable:    true,
 		WebDecidedBy:  "entity_obscure",
 		RewriteUsed:   true,
 		WebGrounded:   true,
 		CitationCount: 3,
 		SearchQuery:   "the resolved query",
 		AnswerText:    "the answer",
 		OK:            true,
 	}
 	if err := st.InsertRequestLog(rl); err != nil {
 		t.Fatalf("insert: %v", err)
 	}
 	ctx, cancel := opContext()
 	defer cancel()
 	var (
 		needsWeb, entityObscure, webGrounded, rewriteUsed bool
 		webDecidedBy                                      string
 		fee, total                                        float64
 		cites                                             int
 		sq, ans                                           *string
 	)
 	if err := st.pool.QueryRow(ctx, `SELECT needs_web, entity_obscure, web_decided_by, grounding_fee_usd,
 		rewrite_used, web_grounded, citation_count, search_query, answer_text, total_usd
 		FROM request_log WHERE id=$1`, rl.ID).Scan(&needsWeb, &entityObscure, &webDecidedBy, &fee,
 		&rewriteUsed, &webGrounded, &cites, &sq, &ans, &total); err != nil {
 		t.Fatalf("read: %v", err)
 	}
 	if !needsWeb || !entityObscure || webDecidedBy != "entity_obscure" || !rewriteUsed || !webGrounded || cites != 3 {
 		t.Fatalf("signal columns wrong: needsWeb=%v obscure=%v decidedBy=%q rewrite=%v grounded=%v cites=%d",
 			needsWeb, entityObscure, webDecidedBy, rewriteUsed, webGrounded, cites)
 	}
 	if d := fee - 0.035; d > 1e-9 || d < -1e-9 {
 		t.Fatalf("grounding_fee_usd = %v, want 0.035", fee)
 	}
 	if d := total - rl.Cost.Total(); d > 1e-9 || d < -1e-9 {
 		t.Fatalf("total_usd = %v, want %v (incl. fee)", total, rl.Cost.Total())
 	}
 	if sq == nil || *sq != "the resolved query" || ans == nil || *ans != "the answer" {
 		t.Fatalf("InsertRequestLog should store content as given: sq=%v ans=%v", sq, ans)
 	}
 }
 func TestStoreWarnedEncrypted(t *testing.T) {
 	st := openTestStore(t)
 	const room = "!enc:vojo.chat"
--- a/apps/ai-bot/telemetry.go
+++ b/apps/ai-bot/telemetry.go
@ -3,6 +3,8 @@ package main
 import (
 	"context"
 	"time"
 	rd "vojo.chat/ai-bot/internal/routedecide"
 )
 // telemetry.go is the request_log analytics path: it captures route, cost, latency
@ -17,10 +19,10 @@ import (
 // limiter denial).
 const (
 	routeNone        = "none"
-	routeGrokDirect  = "grok_direct"
+	routeGrokDirect  = rd.RouteGrokDirect
-	routeTrivial     = "trivial_direct"
+	routeTrivial     = rd.RouteTrivial
-	routeWebThenGrok = "web_then_grok"
+	routeWebThenGrok = rd.RouteWeb
-	routeReason      = "reason_then_grok"
+	routeReason      = rd.RouteReason
 )
 // Degrade/skip reason strings (request_log.degraded). Stable tokens so the analytics
@ -74,6 +76,24 @@ type RequestLog struct {
 	Err               string
 	OK                bool
 	QueryText         string // stored only when TELEMETRY_STORE_TEXT; stripped otherwise
 	// Router/classifier signals + web outcome (§8) — the inputs the offline eval needs to
 	// measure misroute / false-web / lie-rate / true-cost / rewrite-quality. The boolean
 	// signals + WebDecidedBy are metadata (always stored when telemetry is on); SearchQuery
 	// and AnswerText are model-/user-derived content and are stripped unless
 	// TELEMETRY_STORE_TEXT (like QueryText). RouterConfidence above doubles as the
 	// classifier confidence (filter request_log on router_source='classifier').
 	NeedsWeb      bool
 	EntityObscure bool
 	TimeSensitive bool
 	Verifiable    bool
 	TrivialScore  bool
 	WebDecidedBy  string
 	RewriteUsed   bool
 	WebGrounded   bool
 	CitationCount int
 	SearchQuery   string // resolved query sent to Fetch; stored only when TELEMETRY_STORE_TEXT
 	AnswerText    string // the final answer; stored only when TELEMETRY_STORE_TEXT (lie-label input)
 }
 // recordTelemetry persists a row off the answer path. No-op unless TELEMETRY_ENABLED.
@ -85,7 +105,9 @@ func (b *Bot) recordTelemetry(ctx context.Context, rl RequestLog) {
 		return
 	}
 	if !b.cfg.TelemetryStoreText {
-		rl.QueryText = ""
+		// One text-gate governs ALL stored content: the user query, the model-authored
 		// search query, and the answer. Metadata signals (NeedsWeb, WebDecidedBy, …) stay.
 		rl.QueryText, rl.SearchQuery, rl.AnswerText = "", "", ""
 	}
 	b.safego(ctx, "telemetry", func() {
 		if err := b.st.InsertRequestLog(rl); err != nil {
--- a/apps/ai-bot/telemetry_test.go
+++ b/apps/ai-bot/telemetry_test.go
@ -55,6 +55,42 @@ func TestRecordSkipWritesRow(t *testing.T) {
 	}
 }
 // TestTelemetryStripsTextWhenStoreTextOff proves the content gate: with TELEMETRY_ENABLED
 // on but TELEMETRY_STORE_TEXT off, the user query, the model-authored search query, and the
 // answer are all NULL — only metadata signals land. The boolean signals are still recorded.
 func TestTelemetryStripsTextWhenStoreTextOff(t *testing.T) {
 	st := openTestStore(t)
 	defer st.Close()
 	b := newTestBot(st, &Config{TelemetryEnabled: true, TelemetryStoreText: false})
 	b.recordTelemetry(context.Background(), RequestLog{
 		ID: "$strip-1", Route: routeWebThenGrok, RouterSource: "classifier",
 		QueryText: "secret query", SearchQuery: "secret search", AnswerText: "secret answer",
 		NeedsWeb: true, WebDecidedBy: "classifier_needs_web", OK: true,
 	})
 	deadline := time.Now().Add(2 * time.Second)
 	for requestLogCount(t, st) == 0 && time.Now().Before(deadline) {
 		time.Sleep(20 * time.Millisecond)
 	}
 	ctx, cancel := opContext()
 	defer cancel()
 	var qt, sq, ans, decidedBy *string
 	var needsWeb bool
 	if err := st.pool.QueryRow(ctx,
 		`SELECT query_text, search_query, answer_text, web_decided_by, needs_web FROM request_log WHERE id=$1`,
 		"$strip-1").Scan(&qt, &sq, &ans, &decidedBy, &needsWeb); err != nil {
 		t.Fatalf("read: %v", err)
 	}
 	if qt != nil || sq != nil || ans != nil {
 		t.Fatalf("text columns must be NULL when store-text off: qt=%v sq=%v ans=%v", qt, sq, ans)
 	}
 	// Metadata is still recorded (it is not content).
 	if !needsWeb || decidedBy == nil || *decidedBy != "classifier_needs_web" {
 		t.Fatalf("metadata signals must survive: needsWeb=%v decidedBy=%v", needsWeb, decidedBy)
 	}
 }
 // TestTelemetryDisabledWritesNothing proves the default (TELEMETRY_ENABLED off) adds
 // no write path — strict "cascade-off == today".
 func TestTelemetryDisabledWritesNothing(t *testing.T) {
--- a/apps/ai-bot/web.go
+++ b/apps/ai-bot/web.go
@ -20,9 +20,12 @@ import (
 //     API (/v1/responses). NB the older chat/completions Live Search `search_parameters`
 //     mechanism was RETIRED by xAI (now 410 Gone), and the web_search tool is not on
 //     chat/completions — hence the Responses endpoint. Billed $5/1k tool calls + tokens.
-//   - gemini_grounding: Gemini native v1beta google_search. Cheaper, but Gemini-3 only
+//   - gemini_grounding: Gemini native v1beta google_search. Cheaper. Works on current
-//     and silently ungrounds otherwise (F-EXT-3) — so it runs behind a citations
+//     models INCLUDING gemini-2.5-flash-lite (verified against ai.google.dev — the 2.5
-//     verify-gate and degrades if it fails.
+//     family supports google_search; only legacy models use google_search_retrieval).
 //     The F-EXT-3 "silently ungrounds" caveat is about the OpenAI-compat endpoint, NOT
 //     the model version — so this provider uses the NATIVE v1beta path and runs behind a
 //     citations verify-gate, degrading if no citations come back.
 //
 // The web call is bounded by a per-stage timeout (and gemini_grounding additionally by a
 // durable daily cap), and either provider failing degrades the request to grok_direct
@ -196,12 +199,13 @@ func (p *grokWebSearch) Fetch(ctx context.Context, query string) (WebContext, er
 	return wc, nil
 }
-// --- gemini_grounding (Gemini-3 native only) --------------------------------------
+// --- gemini_grounding (native v1beta google_search; current models incl. 2.5) ------
 type geminiGrounding struct {
-	gem *geminiClient
+	gem    *geminiClient
-	st  *Store
+	st     *Store
-	cfg *Config
+	cfg    *Config
 	logger *slog.Logger
 }
 func (p *geminiGrounding) Fetch(ctx context.Context, query string) (WebContext, error) {
@ -213,11 +217,24 @@ func (p *geminiGrounding) Fetch(ctx context.Context, query string) (WebContext,
 	if ok, err := p.st.IncrGroundingIfUnder(p.cfg.WebGroundingDailyCap); err != nil {
 		return WebContext{}, err
 	} else if !ok {
-		return WebContext{}, errGroundingCapped
+		return WebContext{}, errGroundingCapped // hit BEFORE billing → no fee, no slot consumed
 	}
 	res, err := p.gem.groundedSearch(ctx, query) // errors (incl. no-citations) → caller degrades
-	cost := CostBreakdown{Grounding: computeUSD(p.cfg.GeminiModel, res.Usage, p.cfg)}
+	// SG1: the prompt is admitted, so treat it as billed — book the token cost AND the
 	// per-grounded-prompt fee, even on the error return. The fee is the money truth the
 	// $10 ceiling must see; it is kept separate from the cap quota below.
 	cost := CostBreakdown{
 		Grounding:    computeUSD(p.cfg.GeminiModel, res.Usage, p.cfg),
 		GroundingFee: p.cfg.GeminiGroundingPerPrompt,
 	}
 	if err != nil {
 		// SG4: the admitted slot produced no usable grounding (no citations, or the call
 		// failed). Refund the cap slot so over-routing / failed fetches don't burn the
 		// day's grounded-answer budget — independent of the fee, which stays booked.
 		// Best-effort: a failed refund only slightly tightens the cap, never money.
 		if derr := p.st.DecrGrounding(); derr != nil && p.logger != nil {
 			p.logger.WarnContext(ctx, "grounding cap refund failed (non-fatal)", "err", derr)
 		}
 		return WebContext{Cost: cost, Usage: res.Usage}, err
 	}
 	return WebContext{Digest: res.Digest, Citations: res.Citations, Usage: res.Usage, Cost: cost}, nil