feat(ai-bot): paranoid epistemic-risk router that grounds checkable facts behind default-off flags, with booked grounding fee and offline eval harness

This commit is contained in:
heaven 2026-06-02 21:08:27 +03:00
parent 6f19feac91
commit 7ae77da2d0
21 changed files with 1818 additions and 213 deletions

View file

@ -1,4 +1,5 @@
.env
state/
ai-bot
/routereval
*.local

View file

@ -162,11 +162,13 @@ offline-eval gate (misroute < 2-3% AND measured saving > the second provider's c
| Env | Default | Meaning |
|---|---|---|
| `ROUTER_ENABLED` | false | Layer-0 heuristic router (else everything → grok_direct) |
| `ROUTER_CLASSIFIER_ENABLED` | false | Layer-1 Gemini classifier on uncertain cases (requires `ROUTER_ENABLED` + Gemini key) |
| `ROUTER_CLASSIFIER_ENABLED` | false | Layer-1 Gemini classifier — runs on **every** message when on (not just uncertain ones): it agreement-confirms trivial and, with `WEB_PARANOID`, raises checkable-fact lookups to web. Budget ~$0.00004/msg, reserved unconditionally. Requires `ROUTER_ENABLED` + Gemini key. |
| `TRIVIAL_OFFLOAD_ENABLED` | false | answer trivial messages with Gemini (requires Gemini key) |
| `WEB_ENABLED` | false | web_then_grok route (Gemini/Grok fetches fresh facts, **Grok stays the voice**) |
| `WEB_PROVIDER` | `grok_web_search` | `grok_web_search` (xAI Agent Tools `web_search` on the Responses API, $5/1k calls, no Gemini key) or `gemini_grounding` (**cheapest**: Gemini does the fetch via native v1beta `google_search`, Grok voices it — ~$0.0013/query, validated on `gemini-2.5-flash-lite`; the F-EXT-3 "Gemini-3 only" caveat is the OpenAI-compat endpoint, native v1beta works on 2.5). Requires `GEMINI_API_KEY`. |
| `WEB_GROUNDING_DAILY_CAP` | 450 | durable per-day cap for `gemini_grounding` before degrading (keep < the 500/day free grounding RPD; guards the per-1k overage) |
| `WEB_PARANOID` | false | **the single switch that activates epistemic grounding.** Beyond freshness words, it unlocks the classifier-driven web arms (needs_web≥0.55, obscure entity, time-sensitive, lookup-hint) — i.e. it routes checkable-fact lookups (a film's cast, a date) to grounding instead of letting Grok answer from memory and hallucinate. With it off, web routing is freshness-only (= today), so turning on the classifier alone is web-routing-neutral. **Requires `WEB_PROVIDER=gemini_grounding`** (refuses to boot on `grok_web_search`, which has no daily cap). |
| `WEB_GROUNDING_DAILY_CAP` | 450 | durable per-day cap for `gemini_grounding` before degrading. Google gives **1,500 grounded requests/day free** (shared Flash/Flash-Lite, both free & paid tiers; verified ai.google.dev/pricing); keep the cap **under 1,500** so grounding stays free (token-only). Must be > 0 for `gemini_grounding` (a non-positive cap silently disables grounding → refuses to boot). |
| `GEMINI_GROUNDING_PER_PROMPT_USD` | 0.035 | the per-grounded-prompt FEE booked into the ledger so the `DAILY_USD_CEILING` accounts for it. The fee is **$35/1k = $0.035** but ONLY applies **above** the 1,500/day free allowance. So while `WEB_GROUNDING_DAILY_CAP ≤ 1,500` (e.g. the 450 default) grounding never hits the fee → **set `0`** (the bot then books only token cost, which is correct). Set `0.035` only if you raise the cap above 1,500/day, so the ceiling throttles before silently overrunning on requests #1501+. |
| `REASONING_ENABLED` | false | manual "think harder" route on `REASONING_TRIGGER` |
| `REASONING_TRIGGER` | `подумай глубже` | trigger phrase |
| `REASONING_MODEL` | `grok-4.3` | a **reasoning-capable** model (the default `grok-4.20-non-reasoning` rejects `reasoning_effort`) |

View file

@ -111,7 +111,7 @@ func NewBot(ctx context.Context, cfg *Config, logger *slog.Logger) (*Bot, error)
}
if cfg.WebEnabled {
if cfg.WebProvider == webProviderGeminiGrounding {
b.web = &geminiGrounding{gem: gc, st: st, cfg: cfg}
b.web = &geminiGrounding{gem: gc, st: st, cfg: cfg, logger: logger}
} else {
b.web = newGrokWebSearch(cfg, logger)
}
@ -466,7 +466,7 @@ func (b *Bot) respond(ctx context.Context, roomID, threadRoot string, isDM bool,
defer cancel()
msgs := buildContext(b.cfg.SystemPrompt, history, isDM, mc.Body, b.cfg.MaxCtxEvent, maxPromptTokens)
res, err := b.generate(genCtx, mc.Body, msgs, b.convID(roomID, threadRoot))
res, err := b.generate(genCtx, mc.Body, msgs, b.convID(roomID, threadRoot), isDM)
// Record what the routing + generation actually did, whatever the outcome.
rl.Route = res.route
@ -487,6 +487,20 @@ func (b *Bot) respond(ctx context.Context, roomID, threadRoot string, isDM bool,
if res.degraded != "" {
rl.Degraded = res.degraded
}
// Classifier signals + web outcome for the offline eval (§8). Booleans/counts are
// metadata (always recorded when telemetry is on); SearchQuery/AnswerText are content
// (stripped unless TELEMETRY_STORE_TEXT — see recordTelemetry).
rl.NeedsWeb = res.decision.NeedsWeb
rl.EntityObscure = res.decision.EntityObscure
rl.TimeSensitive = res.decision.TimeSensitive
rl.Verifiable = res.decision.Verifiable
rl.TrivialScore = res.decision.TrivialScore
rl.WebDecidedBy = res.decision.WebDecidedBy
rl.RewriteUsed = res.rewriteUsed
rl.WebGrounded = res.webGrounded
rl.CitationCount = res.citationCount
rl.SearchQuery = res.searchQuery
rl.AnswerText = res.text
// The full routing/generation picture for one request, in one line: which route ran,
// whether it was a fallback, the degrade reason (if any), the per-stage timings and
@ -494,7 +508,9 @@ func (b *Bot) respond(ctx context.Context, roomID, threadRoot string, isDM bool,
b.log.DebugContext(ctx, "generation outcome",
"route", res.route, "router_source", res.decision.Source,
"router_confidence", res.decision.Confidence, "fallback", res.fallback,
"degraded", res.degraded, "stage_ms", res.stageMS, "usd", res.cost.Total())
"degraded", res.degraded, "stage_ms", res.stageMS, "usd", res.cost.Total(),
"web_grounded", res.webGrounded, "citation_count", res.citationCount,
"grounding_fee_usd", res.cost.GroundingFee, "rewrite_used", res.rewriteUsed)
if err != nil {
// Terminal: even grok_direct failed. Settle whatever the cascade ACTUALLY spent

View file

@ -30,6 +30,14 @@ type genResult struct {
fallback bool // true if we degraded off the decided route
degraded string // degrade reason for request_log
stageMS map[string]int
// Web-route outcome (for request_log §8): the resolved query actually sent to Fetch,
// whether the context-resolved rewrite was used (vs the bare body), and whether the
// fetch came back grounded with citations (a zero-citation synth is a silent false-web).
searchQuery string
rewriteUsed bool
webGrounded bool
citationCount int
}
func msSince(t time.Time) int { return int(time.Since(t).Milliseconds()) }
@ -47,33 +55,49 @@ func (b *Bot) reserveEstimate() float64 {
// fetch can search several times and pull large context; reserve generously.
est += float64(maxWebSearchCalls)*grokWebSearchPerCall + b.estimateUSD(b.cfg.XAIModel)
} else {
est += b.estimateUSD(b.cfg.GeminiModel)
// gemini grounding: the fetch's tokens PLUS the per-grounded-prompt fee (§7
// SG2), so the admission envelope is a true upper bound once the fee is booked.
est += b.estimateUSD(b.cfg.GeminiModel) + b.cfg.GeminiGroundingPerPrompt
}
}
if b.cfg.ReasoningEnabled {
// Higher reasoning effort can burn more output tokens; reserve double.
est = max(est, 2*b.estimateUSD(b.cfg.ReasoningModel))
}
// The always-on Layer-1 classifier leg (§7 Finding 4): a cheap Gemini call on every
// message when the classifier is enabled, so reserved ≥ actual stays true. Added after
// the max() so it is never swallowed by the reasoning branch.
if b.cfg.RouterClassifierEnabled {
est += b.estimateUSD(b.cfg.GeminiModel)
}
return est
}
// generate routes and produces an answer, degrading to grok_direct on any failure.
// It returns a terminal error ONLY if even grok_direct fails; every other route falls
// through to grok_direct rather than erroring.
func (b *Bot) generate(ctx context.Context, body string, msgs []Message, convID string) (genResult, error) {
func (b *Bot) generate(ctx context.Context, body string, msgs []Message, convID string, isDM bool) (genResult, error) {
res := genResult{stageMS: map[string]int{}, finalModel: b.cfg.XAIModel}
// The privacy-minimised conversation window for the classifier + follow-up rewrite.
// DM-resolved (last ≤2 turns); bare trigger in groups (no cross-member subject bleed).
rcx := routerContext(msgs, isDM)
t0 := time.Now()
res.decision = b.classify(ctx, body, &res.cost) // accumulates cost.Router if Layer-1 runs
res.decision = b.classify(ctx, body, rcx, &res.cost) // accumulates cost.Router if Layer-1 runs
res.stageMS["router"] = msSince(t0)
res.route = res.decision.Route
// The router's pre-dispatch verdict (what it chose, why, how sure). On a degrade the
// route that actually runs differs from this — respond logs that final outcome — so
// the two lines together show "router wanted X, we ran Y". DEBUG: routing diagnostics.
// the two lines together show "router wanted X, we ran Y". DEBUG: routing diagnostics,
// content-free (the resolved search_query is NOT logged here — it's a gated path, §8).
b.log.DebugContext(ctx, "route decided",
"route", res.decision.Route, "source", res.decision.Source,
"confidence", res.decision.Confidence, "needs_web", res.decision.NeedsWeb,
"web_decided_by", res.decision.WebDecidedBy, "verifiable", res.decision.Verifiable,
"entity_obscure", res.decision.EntityObscure, "time_sensitive", res.decision.TimeSensitive,
"trivial", res.decision.TrivialScore, "lookup_hint", res.decision.LookupHint,
"reasoning_level", res.decision.ReasoningLevel)
finalMsgs := msgs
@ -89,14 +113,21 @@ func (b *Bot) generate(ctx context.Context, body string, msgs []Message, convID
}
case routeWebThenGrok:
if b.cfg.WebEnabled && b.web != nil {
if err := b.genWebThenGrok(ctx, body, msgs, convID, &res); err == nil {
if err := b.genWebThenGrok(ctx, body, isDM, msgs, convID, &res); err == nil {
return res, nil
} else {
b.log.WarnContext(ctx, "web route failed; degrading to grok_direct", "err", err, "reason", res.degraded)
b.degradeTo(&res, degradeWeb)
// The question wanted fresh facts but we have none — answer from training
// knowledge WITH an honest staleness caveat, not stale-as-current (§8.2.1).
finalMsgs = hedgeMessages(msgs)
// We have no fresh facts. For a RECENCY miss, hedge with an honest staleness
// caveat (§8.2.1). For a STATIC verifiable-fact miss (a film cast, a date),
// the staleness caveat is wrong — a stale caveat on a wrong cast still ships
// the wrong cast — so instruct Grok to ABSTAIN on specific names/dates/numbers
// instead of emitting a confident guess (§4.4).
if res.decision.factualMiss() {
finalMsgs = factualAbstainMessages(msgs)
} else {
finalMsgs = hedgeMessages(msgs)
}
}
}
case routeReason:
@ -209,17 +240,38 @@ const webStageTimeout = 15 * time.Second
// accounts for the spend before the caller degrades to grok_direct (the partial cascade
// case, §8.1). The daily cap and per-stage deadline are applied here, uniformly for both
// providers.
func (b *Bot) genWebThenGrok(ctx context.Context, body string, msgs []Message, convID string, res *genResult) error {
func (b *Bot) genWebThenGrok(ctx context.Context, body string, isDM bool, msgs []Message, convID string, res *genResult) error {
// DM-gated rewrite-with-fallback (§6): use the classifier's self-contained,
// follow-up-resolved query, but ONLY in a DM (a group buffer interleaves members'
// topics) and only when it's present and not over-long; otherwise the bare body — so
// the fetch is never worse than today. Sanitise before egress (it is model-authored
// text going to an external search API): collapse control chars/whitespace, cap length.
q := body
if isDM {
if sq := strings.TrimSpace(res.decision.SearchQuery); sq != "" && len([]rune(sq)) <= 200 {
q, res.rewriteUsed = sq, true
}
}
q = sanitizeSearchQuery(q)
if q == "" {
q, res.rewriteUsed = sanitizeSearchQuery(body), false // never send an empty query
}
res.searchQuery = q
// Per-stage web/grounding deadline, independent of the overall budget.
wctx, cancelW := context.WithTimeout(ctx, webStageTimeout)
tw := time.Now()
wc, ferr := b.web.Fetch(wctx, body)
wc, ferr := b.web.Fetch(wctx, q)
cancelW()
res.stageMS["web"] = msSince(tw)
// Book the fetch's fee + tokens whether or not it produced a usable digest — the call
// was billed (the daily cap, if any, is enforced inside the provider).
// was billed (the daily cap, if any, is enforced inside the provider). GroundingFee is
// the per-grounded-prompt overage (§7 SG1), booked even on the error return.
res.cost.Grounding += wc.Cost.Grounding
res.cost.GroundingFee += wc.Cost.GroundingFee
res.cost.WebTool += wc.Cost.WebTool
res.citationCount = len(wc.Citations)
res.webGrounded = len(wc.Citations) > 0
webUsage := wc.Usage
if ferr != nil {
if errors.Is(ferr, errGroundingCapped) {
@ -227,6 +279,12 @@ func (b *Bot) genWebThenGrok(ctx context.Context, body string, msgs []Message, c
}
return ferr // web fee already booked; caller degrades to grok_direct (with hedge)
}
// A non-empty digest with NO citations is a silent false-web (the answer is synthesised
// from an ungrounded fetch). gemini_grounding errors out before here; grok_web_search
// can reach this — surface it at WARN so it's visible at the default level (§8).
if len(wc.Citations) == 0 {
b.log.WarnContext(ctx, "web no-citation synth (ungrounded digest)", "provider", b.cfg.WebProvider)
}
tf := time.Now()
resp, err := b.llm.Complete(ctx, LLMRequest{
@ -256,23 +314,70 @@ func (b *Bot) genWebThenGrok(ctx context.Context, body string, msgs []Message, c
return nil
}
// webSynthMessages inserts the fresh web digest (and its sources) as a system note just
// after the system prompt, so Grok answers in voice using current facts.
// webSynthMessages inserts the fresh web digest as a system note just after the system
// prompt, so Grok answers in voice using current facts. It deliberately does NOT pass the
// raw citation URLs into the prompt, nor ask Grok to "cite sources": gemini grounding
// returns opaque vertexaisearch.../grounding-api-redirect/... redirect links (not publisher
// URLs), and instructing Grok to cite made it paste those ugly redirects verbatim into the
// reply and mis-attribute them ("ссылок из твоего сообщения"). The grounding already
// happened (citation_count is recorded for telemetry); the user wants the answer, not
// Google's internal redirect links. Real source attribution (resolving redirects to
// domains) is a separate, deferred feature.
func webSynthMessages(base []Message, wc WebContext) []Message {
facts := "Свежие данные из веба (используй их в ответе и сошлись на источники):\n" + wc.Digest
if len(wc.Citations) > 0 {
facts += "\nИсточники: " + strings.Join(wc.Citations, ", ")
}
facts := "Свежие данные из веба — ответь на их основе, кратко и по делу, без URL и ссылок:\n" + wc.Digest
return insertSystemNote(base, facts)
}
// hedgeMessages adds an honest staleness caveat for a web→grok_direct degrade: the user
// wanted fresh facts but we couldn't fetch them, so the model must flag that its answer
// is from training knowledge and may be out of date.
// hedgeMessages adds an honest staleness caveat for a web→grok_direct degrade on a
// RECENCY query: the user wanted fresh facts but we couldn't fetch them, so the model
// must flag that its answer is from training knowledge and may be out of date.
func hedgeMessages(base []Message) []Message {
return insertSystemNote(base, "Нет доступа к свежим источникам прямо сейчас — отвечай по знаниям на момент обучения и честно предупреди, что данные могут быть устаревшими.")
}
// factualAbstainMessages is the degrade hedge for a STATIC verifiable-fact miss (§4.4):
// a staleness caveat is wrong here (the fact isn't stale, it's checkable and the model
// may simply not know it), so instruct Grok to ABSTAIN on specific names/dates/numbers
// rather than ship a confident guess — the exact failure (the hallucinated film cast)
// this redesign exists to stop.
func factualAbstainMessages(base []Message) []Message {
return insertSystemNote(base, "Не удалось проверить факты через веб. Если ответ зависит от конкретных имён, дат, годов, чисел или состава — честно скажи, что не уверен в точной фактуре и можешь ошибаться; НЕ выдавай догадку за факт.")
}
// factualMiss reports whether a web degrade should use the abstain hedge (a static
// checkable-fact question) rather than the staleness hedge (a recency question). A
// recency signal (freshnessRe or the classifier's time_sensitive) always means
// staleness; otherwise a verifiable / obscure-entity question — OR any non-recency
// needs_web verdict (so an off-spec needs_web-only verdict still abstains rather than
// emit a confident guess) — means abstain.
func (d RouterDecision) factualMiss() bool {
if d.Freshness != "" || d.TimeSensitive {
return false
}
return d.Verifiable || d.EntityObscure || d.NeedsWeb
}
// sanitizeSearchQuery prepares a (possibly model-authored) query for egress to an
// external search API: collapse newlines/control chars/runs of whitespace to single
// spaces and cap the rune length. Never trusts the model to have produced clean,
// bounded text.
func sanitizeSearchQuery(q string) string {
q = strings.Map(func(r rune) rune {
if r == '\n' || r == '\r' || r == '\t' {
return ' '
}
if r < 0x20 || r == 0x7f {
return -1 // drop other control chars
}
return r
}, q)
q = strings.Join(strings.Fields(q), " ") // collapse whitespace runs
if r := []rune(q); len(r) > 200 {
q = strings.TrimSpace(string(r[:200]))
}
return q
}
// insertSystemNote inserts an extra system message right after the system prompt
// (base[0] from buildContext), preserving the rest of the window.
func insertSystemNote(base []Message, content string) []Message {

View file

@ -5,6 +5,7 @@ import (
"errors"
"io"
"log/slog"
"strings"
"testing"
)
@ -29,13 +30,15 @@ func (f *fakeLLM) Complete(_ context.Context, req LLMRequest) (*LLMResponse, err
}
type fakeWeb struct {
wc WebContext
err error
calls int
wc WebContext
err error
calls int
lastQuery string
}
func (f *fakeWeb) Fetch(_ context.Context, _ string) (WebContext, error) {
func (f *fakeWeb) Fetch(_ context.Context, q string) (WebContext, error) {
f.calls++
f.lastQuery = q
if f.err != nil {
return WebContext{}, f.err
}
@ -71,7 +74,7 @@ func TestGenerateAllFlagsOffIsGrokDirect(t *testing.T) {
cfg := cascadeCfg()
b := &Bot{cfg: &cfg, llm: grok, gemini: gem, log: discardLog()}
res, err := b.generate(context.Background(), "привет", msgs("привет"), "")
res, err := b.generate(context.Background(), "привет", msgs("привет"), "", true)
if err != nil {
t.Fatalf("generate: %v", err)
}
@ -93,7 +96,7 @@ func TestGenerateTrivialOffload(t *testing.T) {
cfg.RouterEnabled, cfg.TrivialOffloadEnabled = true, true
b := &Bot{cfg: &cfg, llm: grok, gemini: gem, log: discardLog()}
res, err := b.generate(context.Background(), "привет", msgs("привет"), "")
res, err := b.generate(context.Background(), "привет", msgs("привет"), "", true)
if err != nil {
t.Fatalf("generate: %v", err)
}
@ -114,7 +117,7 @@ func TestGenerateTrivialDegradesToGrok(t *testing.T) {
cfg.RouterEnabled, cfg.TrivialOffloadEnabled = true, true
b := &Bot{cfg: &cfg, llm: grok, gemini: gem, log: discardLog()}
res, err := b.generate(context.Background(), "привет", msgs("привет"), "")
res, err := b.generate(context.Background(), "привет", msgs("привет"), "", true)
if err != nil {
t.Fatalf("generate: %v", err)
}
@ -129,6 +132,8 @@ func TestGenerateTrivialDegradesToGrok(t *testing.T) {
}
}
// TestGenerateWebThenGrok: a freshness query (classifier off → Layer-0 web) fetches then
// has Grok synthesise, booking both calls' tokens + the web fee.
func TestGenerateWebThenGrok(t *testing.T) {
grok := &fakeLLM{text: "synthesised", usage: Usage{PromptTokens: 100, CompletionTokens: 50}}
web := &fakeWeb{wc: WebContext{Digest: "fresh facts", Citations: []string{"http://src"}, Cost: CostBreakdown{WebTool: 0.1}}}
@ -136,7 +141,7 @@ func TestGenerateWebThenGrok(t *testing.T) {
cfg.RouterEnabled, cfg.WebEnabled = true, true
b := &Bot{cfg: &cfg, llm: grok, web: web, log: discardLog()}
res, err := b.generate(context.Background(), "какие новости сегодня", msgs("какие новости сегодня"), "")
res, err := b.generate(context.Background(), "какие новости сегодня", msgs("какие новости сегодня"), "", true)
if err != nil {
t.Fatalf("generate: %v", err)
}
@ -146,13 +151,16 @@ func TestGenerateWebThenGrok(t *testing.T) {
if res.cost.WebTool != 0.1 || res.cost.Token <= 0 {
t.Fatalf("cost = %+v, want WebTool 0.1 + Token>0", res.cost)
}
if !res.webGrounded || res.citationCount != 1 {
t.Fatalf("webGrounded=%v citations=%d, want true/1", res.webGrounded, res.citationCount)
}
if web.calls != 1 || grok.calls != 1 {
t.Fatalf("calls web=%d grok=%d, want 1/1", web.calls, grok.calls)
}
}
// TestGenerateWebDegradesToGrok: a web fetch failure (provider down or cap hit) degrades
// to grok_direct and books no web cost.
// TestGenerateWebDegradesToGrok: a web fetch failure (cap hit) degrades to grok_direct,
// books no web cost, and — being a RECENCY query — uses the staleness hedge, not abstain.
func TestGenerateWebDegradesToGrok(t *testing.T) {
grok := &fakeLLM{text: "grok fallback"}
web := &fakeWeb{err: errGroundingCapped}
@ -160,7 +168,7 @@ func TestGenerateWebDegradesToGrok(t *testing.T) {
cfg.RouterEnabled, cfg.WebEnabled = true, true
b := &Bot{cfg: &cfg, llm: grok, web: web, log: discardLog()}
res, err := b.generate(context.Background(), "новости сегодня", msgs("новости сегодня"), "")
res, err := b.generate(context.Background(), "новости сегодня", msgs("новости сегодня"), "", true)
if err != nil {
t.Fatalf("generate: %v", err)
}
@ -173,6 +181,10 @@ func TestGenerateWebDegradesToGrok(t *testing.T) {
if res.cost.WebTool != 0 || res.cost.Grounding != 0 {
t.Fatalf("web cost = %+v, want 0 (fetch failed before billing)", res.cost)
}
// Recency miss → staleness hedge ("устаревшими"), not the factual-abstain hedge.
if !hedgeContains(grok.lastReq.Messages, "устаревш") {
t.Fatalf("freshness degrade should use the staleness hedge; messages = %+v", grok.lastReq.Messages)
}
}
// TestGenerateReasoningForced: the manual trigger routes to the reasoning model with
@ -183,7 +195,7 @@ func TestGenerateReasoningForced(t *testing.T) {
cfg.ReasoningEnabled = true // ROUTER_ENABLED deliberately left off
b := &Bot{cfg: &cfg, llm: grok, log: discardLog()}
res, err := b.generate(context.Background(), "подумай глубже про сознание", msgs("подумай глубже про сознание"), "")
res, err := b.generate(context.Background(), "подумай глубже про сознание", msgs("подумай глубже про сознание"), "", true)
if err != nil {
t.Fatalf("generate: %v", err)
}
@ -195,29 +207,240 @@ func TestGenerateReasoningForced(t *testing.T) {
}
}
// TestClassifierConfidenceFloor: a Layer-1 classifier label that escalates off the safe
// floor (trivial/web) must clear the confidence floor, else the request stays on
// grok_direct — the false-trivial voice-leak guard (§8.6).
func TestClassifierConfidenceFloor(t *testing.T) {
// TestClassifyTrivialAgreementGate: a trivial route requires the Layer-0 candidate AND
// classifier.trivial AND confidence ≥ trivialFloor. A low-confidence "trivial" or a
// classifier that disagrees stays on grok_direct (no voice leak).
func TestClassifyTrivialAgreementGate(t *testing.T) {
cfg := cascadeCfg()
cfg.RouterEnabled, cfg.RouterClassifierEnabled = true, true
gem := &fakeLLM{}
b := &Bot{cfg: &cfg, gemini: gem, log: discardLog()}
var cost CostBreakdown
const substantive = "напиши подробное эссе про историю римской империи" // Layer-0 → grok_direct
gem.text = `{"route":"trivial","confidence":0.2}` // low-confidence escalation
if d := b.classify(context.Background(), substantive, &cost); d.Route != routeGrokDirect {
t.Fatalf("low-confidence trivial must stay grok_direct (safe floor), got %q", d.Route)
gem.text = `{"trivial":true,"needs_web":false,"confidence":0.95}`
if d := b.classify(context.Background(), "привет", "USER: привет", &cost); d.Route != routeTrivial {
t.Fatalf("agreed high-confidence trivial = %q, want trivial", d.Route)
}
gem.text = `{"route":"trivial","confidence":0.95}` // confident escalation is honoured
if d := b.classify(context.Background(), substantive, &cost); d.Route != routeTrivial {
t.Fatalf("high-confidence trivial should route trivial, got %q", d.Route)
gem.text = `{"trivial":true,"needs_web":false,"confidence":0.5}`
if d := b.classify(context.Background(), "привет", "USER: привет", &cost); d.Route != routeGrokDirect {
t.Fatalf("low-confidence trivial = %q, want grok_direct (no leak)", d.Route)
}
// A classifier error degrades to the Layer-0 verdict (grok_direct), never silence.
gem.text, gem.err = "", errors.New("gemini down")
if d := b.classify(context.Background(), substantive, &cost); d.Route != routeGrokDirect {
t.Fatalf("classifier failure must fall back to heuristic grok_direct, got %q", d.Route)
// A non-trivial body can never be trivial even if the classifier claims so.
gem.text = `{"trivial":true,"needs_web":false,"confidence":0.99}`
const substantive = "напиши подробное эссе про историю римской империи"
if d := b.classify(context.Background(), substantive, "USER: …", &cost); d.Route != routeGrokDirect {
t.Fatalf("classifier.trivial on a substantive body = %q, want grok_direct", d.Route)
}
}
// TestClassifyClassifierErrorFallsBackToLayer0: a classifier error/garbage degrades to the
// deterministic Layer-0 verdict — grok_direct for a substantive body, web for a freshness
// body — never an ungrounded confident answer, never a degrade-to-web.
func TestClassifyClassifierErrorFallsBackToLayer0(t *testing.T) {
cfg := cascadeCfg()
cfg.RouterEnabled, cfg.RouterClassifierEnabled, cfg.WebParanoid = true, true, true
gem := &fakeLLM{}
b := &Bot{cfg: &cfg, gemini: gem, log: discardLog()}
var cost CostBreakdown
// Transport error → Layer-0.
gem.err = errors.New("gemini down")
if d := b.classify(context.Background(), "напиши эссе про рим", "USER: …", &cost); d.Route != routeGrokDirect {
t.Fatalf("classifier error on substantive body = %q, want grok_direct (Layer-0)", d.Route)
}
if d := b.classify(context.Background(), "новости сегодня", "USER: …", &cost); d.Route != routeWebThenGrok {
t.Fatalf("classifier error on freshness body = %q, want web (deterministic Layer-0 survives)", d.Route)
}
// Garbage JSON (no transport error) → also Layer-0.
gem.err, gem.text = nil, "not json at all"
if d := b.classify(context.Background(), "напиши эссе про рим", "USER: …", &cost); d.Route != routeGrokDirect {
t.Fatalf("garbage classifier JSON = %q, want grok_direct (Layer-0)", d.Route)
}
}
// TestGenerateRoadHouseWebParanoidDM is the headline regression: an obscure-entity factual
// lookup in a DM, with the classifier + WEB_PARANOID on, routes to web AND the fetch uses
// the classifier's context-resolved search_query (the follow-up rewrite). With paranoid
// off it correctly stays grok_direct (the canary-neutral baseline).
func TestGenerateRoadHouseWebParanoidDM(t *testing.T) {
const verdict = `{"needs_web":true,"verifiable":true,"entity_obscure":true,"time_sensitive":false,"trivial":false,"search_query":"Дом у дороги 2024 фильм актёрский состав","confidence":0.7}`
mk := func(paranoid bool) (*fakeLLM, *fakeWeb, genResult) {
grok := &fakeLLM{text: "voiced", usage: Usage{PromptTokens: 10, CompletionTokens: 5}}
gem := &fakeLLM{text: verdict}
web := &fakeWeb{wc: WebContext{Digest: "cast: Patrick Swayze…", Citations: []string{"http://imdb"}}}
cfg := cascadeCfg()
cfg.RouterEnabled, cfg.RouterClassifierEnabled, cfg.WebEnabled, cfg.WebParanoid = true, true, true, paranoid
b := &Bot{cfg: &cfg, llm: grok, gemini: gem, web: web, log: discardLog()}
res, err := b.generate(context.Background(), "2024 года", []Message{
{Role: "system", Content: "SYS"},
{Role: "user", Content: "кто снимался в фильме дом у дороги"},
{Role: "assistant", Content: "В фильме 1989 года…"},
{Role: "user", Content: "2024 года"},
}, "", true)
if err != nil {
t.Fatalf("generate: %v", err)
}
return grok, web, res
}
_, web, res := mk(true)
if res.route != routeWebThenGrok {
t.Fatalf("paranoid DM road-house = %q, want web_then_grok (the fix)", res.route)
}
if !res.rewriteUsed || web.lastQuery != "Дом у дороги 2024 фильм актёрский состав" {
t.Fatalf("fetch should use the rewritten query: rewriteUsed=%v lastQuery=%q", res.rewriteUsed, web.lastQuery)
}
_, _, resOff := mk(false)
if resOff.route != routeGrokDirect {
t.Fatalf("paranoid OFF road-house = %q, want grok_direct (baseline)", resOff.route)
}
}
// TestGenerateFollowupGroupUsesBareBody: in a GROUP the context-resolved rewrite is
// suppressed — the fetch uses the bare (sanitised) body, never the classifier's
// search_query, so a member's follow-up can't ground the wrong prior subject.
func TestGenerateFollowupGroupUsesBareBody(t *testing.T) {
const verdict = `{"needs_web":true,"verifiable":true,"entity_obscure":true,"search_query":"какой-то чужой фильм 2024","confidence":0.7}`
grok := &fakeLLM{text: "voiced"}
gem := &fakeLLM{text: verdict}
web := &fakeWeb{wc: WebContext{Digest: "d", Citations: []string{"http://s"}}}
cfg := cascadeCfg()
cfg.RouterEnabled, cfg.RouterClassifierEnabled, cfg.WebEnabled, cfg.WebParanoid = true, true, true, true
b := &Bot{cfg: &cfg, llm: grok, gemini: gem, web: web, log: discardLog()}
res, err := b.generate(context.Background(), "2024 года", msgs("2024 года"), "", false /* group */)
if err != nil {
t.Fatalf("generate: %v", err)
}
if res.route != routeWebThenGrok {
t.Fatalf("group route = %q, want web_then_grok", res.route)
}
if res.rewriteUsed || web.lastQuery != "2024 года" {
t.Fatalf("group must use the bare body, not the rewrite: rewriteUsed=%v lastQuery=%q", res.rewriteUsed, web.lastQuery)
}
}
// TestGenerateWebEmptySearchQueryFallsBackToBody: the rewrite-with-fallback contract's
// empty arm (§6/§12). A DM web route whose classifier returned an empty search_query must
// fetch the bare (sanitised) body and report rewriteUsed=false — never an empty query.
func TestGenerateWebEmptySearchQueryFallsBackToBody(t *testing.T) {
// verifiable:true so it genuinely routes web (the needs_web arm requires verifiable);
// search_query empty is the point — the fetch must fall back to the bare body.
const verdict = `{"needs_web":true,"verifiable":true,"entity_obscure":false,"search_query":"","confidence":0.7}`
grok := &fakeLLM{text: "voiced"}
gem := &fakeLLM{text: verdict}
web := &fakeWeb{wc: WebContext{Digest: "d", Citations: []string{"http://s"}}}
cfg := cascadeCfg()
cfg.RouterEnabled, cfg.RouterClassifierEnabled, cfg.WebEnabled, cfg.WebParanoid = true, true, true, true
b := &Bot{cfg: &cfg, llm: grok, gemini: gem, web: web, log: discardLog()}
const body = "в каком году основан Рим"
res, err := b.generate(context.Background(), body, msgs(body), "", true /* DM */)
if err != nil {
t.Fatalf("generate: %v", err)
}
if res.route != routeWebThenGrok {
t.Fatalf("route = %q, want web_then_grok", res.route)
}
if res.rewriteUsed || web.lastQuery != body {
t.Fatalf("empty search_query must fall back to the bare body: rewriteUsed=%v lastQuery=%q", res.rewriteUsed, web.lastQuery)
}
}
// TestGenerateFreshnessTrapDesignedWeb: a freshness lexeme in a rumination
// ("сегодня…") still hard-routes to web (the accepted, designed cheap false-web, §14.1).
func TestGenerateFreshnessTrapDesignedWeb(t *testing.T) {
grok := &fakeLLM{text: "x"}
web := &fakeWeb{wc: WebContext{Digest: "d", Citations: []string{"http://s"}}}
cfg := cascadeCfg()
cfg.RouterEnabled, cfg.WebEnabled = true, true // classifier off — freshness alone routes
b := &Bot{cfg: &cfg, llm: grok, web: web, log: discardLog()}
res, err := b.generate(context.Background(), "сегодня я думаю о смысле жизни", msgs("сегодня я думаю о смысле жизни"), "", true)
if err != nil {
t.Fatalf("generate: %v", err)
}
if res.route != routeWebThenGrok {
t.Fatalf("freshness rumination = %q, want web_then_grok (designed)", res.route)
}
}
// TestGenerateWebDegradeFactualAbstain: a STATIC verifiable-fact web miss uses the
// factual-abstain hedge (not the staleness caveat), so Grok abstains on names/dates
// rather than shipping a confident guess.
func TestGenerateWebDegradeFactualAbstain(t *testing.T) {
const verdict = `{"needs_web":true,"verifiable":true,"entity_obscure":true,"time_sensitive":false,"search_query":"q","confidence":0.7}`
grok := &fakeLLM{text: "honest answer"}
gem := &fakeLLM{text: verdict}
web := &fakeWeb{err: errors.New("fetch boom")}
cfg := cascadeCfg()
cfg.RouterEnabled, cfg.RouterClassifierEnabled, cfg.WebEnabled, cfg.WebParanoid = true, true, true, true
b := &Bot{cfg: &cfg, llm: grok, gemini: gem, web: web, log: discardLog()}
res, err := b.generate(context.Background(), "кто снимался в фильме дом у дороги", msgs("кто снимался в фильме дом у дороги"), "", true)
if err != nil {
t.Fatalf("generate: %v", err)
}
if res.route != routeGrokDirect || !res.fallback {
t.Fatalf("res route=%q fallback=%v, want grok_direct fallback", res.route, res.fallback)
}
if !hedgeContains(grok.lastReq.Messages, "Не удалось проверить") {
t.Fatalf("factual miss should use the abstain hedge; messages = %+v", grok.lastReq.Messages)
}
if hedgeContains(grok.lastReq.Messages, "устаревш") {
t.Fatalf("factual miss must NOT use the staleness hedge")
}
}
// TestFactualMissHedge: the web-degrade hedge selection. A recency signal (Freshness or
// time_sensitive) → staleness (factualMiss=false); a static checkable-fact signal
// (verifiable / entity_obscure / a non-recency needs_web) → abstain (factualMiss=true).
func TestFactualMissHedge(t *testing.T) {
cases := []struct {
d RouterDecision
want bool // true => abstain hedge
}{
{RouterDecision{Freshness: "recent"}, false},
{RouterDecision{TimeSensitive: true}, false},
{RouterDecision{Verifiable: true}, true},
{RouterDecision{EntityObscure: true}, true},
{RouterDecision{NeedsWeb: true}, true}, // off-spec needs_web-only → abstain (Q3)
{RouterDecision{NeedsWeb: true, TimeSensitive: true}, false}, // recency still wins
{RouterDecision{}, false},
}
for _, c := range cases {
if got := c.d.factualMiss(); got != c.want {
t.Errorf("factualMiss(%+v) = %v, want %v", c.d, got, c.want)
}
}
}
// TestReserveEstimate: flags off → exactly grok_direct's estimate; with gemini grounding +
// classifier on, it includes the per-prompt fee AND the always-on classifier leg (§7).
func TestReserveEstimate(t *testing.T) {
cfg := cascadeCfg()
b := &Bot{cfg: &cfg, log: discardLog()}
base := b.estimateUSD("grok-x")
if got := b.reserveEstimate(); !approxEq(got, base) {
t.Fatalf("flags-off reserve = %v, want grok_direct estimate %v", got, base)
}
cfg2 := cascadeCfg()
cfg2.WebEnabled, cfg2.WebProvider = true, webProviderGeminiGrounding
cfg2.RouterEnabled, cfg2.RouterClassifierEnabled = true, true
cfg2.GeminiGroundingPerPrompt = 0.035
b2 := &Bot{cfg: &cfg2, log: discardLog()}
want := b2.estimateUSD("grok-x") + b2.estimateUSD("gemini-x") + 0.035 + b2.estimateUSD("gemini-x")
if got := b2.reserveEstimate(); !approxEq(got, want) {
t.Fatalf("web+classifier reserve = %v, want %v (XAI + gemini fetch + $0.035 fee + classifier leg)", got, want)
}
// The fee must actually move the envelope (regression guard for an unbooked fee).
cfg3 := cfg2
cfg3.GeminiGroundingPerPrompt = 0
b3 := &Bot{cfg: &cfg3, log: discardLog()}
if b2.reserveEstimate()-b3.reserveEstimate() < 0.0349 {
t.Fatalf("the grounding fee must raise the reservation by ~0.035")
}
}
@ -225,36 +448,33 @@ func TestClassifierConfidenceFloor(t *testing.T) {
// be kept fast with "none"), empty means not sent (compat with grok-4.20-non-reasoning),
// and the reason route always overrides to "high" regardless.
func TestGrokReasoningEffort(t *testing.T) {
// Configured effort reaches grok_direct.
grok := &fakeLLM{text: "ok"}
cfg := cascadeCfg()
cfg.GrokReasoningEffort = "none"
b := &Bot{cfg: &cfg, llm: grok, log: discardLog()}
if _, err := b.generate(context.Background(), "hello", msgs("hello"), ""); err != nil {
if _, err := b.generate(context.Background(), "hello", msgs("hello"), "", true); err != nil {
t.Fatal(err)
}
if grok.lastReq.ReasoningEffort != "none" {
t.Fatalf("grok_direct effort = %q, want none", grok.lastReq.ReasoningEffort)
}
// Empty default → not sent (so grok-4.20-non-reasoning keeps working).
grokDef := &fakeLLM{text: "ok"}
cfgDef := cascadeCfg() // GrokReasoningEffort == ""
bDef := &Bot{cfg: &cfgDef, llm: grokDef, log: discardLog()}
if _, err := bDef.generate(context.Background(), "hello", msgs("hello"), ""); err != nil {
if _, err := bDef.generate(context.Background(), "hello", msgs("hello"), "", true); err != nil {
t.Fatal(err)
}
if grokDef.lastReq.ReasoningEffort != "" {
t.Fatalf("default effort = %q, want empty (not sent)", grokDef.lastReq.ReasoningEffort)
}
// The reason route ignores GROK_REASONING_EFFORT and always uses "high".
grokR := &fakeLLM{text: "deep"}
cfgR := cascadeCfg()
cfgR.GrokReasoningEffort = "none"
cfgR.ReasoningEnabled = true
bR := &Bot{cfg: &cfgR, llm: grokR, log: discardLog()}
if _, err := bR.generate(context.Background(), "подумай глубже про X", msgs("подумай глубже про X"), ""); err != nil {
if _, err := bR.generate(context.Background(), "подумай глубже про X", msgs("подумай глубже про X"), "", true); err != nil {
t.Fatal(err)
}
if grokR.lastReq.ReasoningEffort != "high" {
@ -269,7 +489,47 @@ func TestGenerateTerminalErrorPropagates(t *testing.T) {
cfg := cascadeCfg()
b := &Bot{cfg: &cfg, llm: grok, log: discardLog()}
if _, err := b.generate(context.Background(), "hello", msgs("hello"), ""); err == nil {
if _, err := b.generate(context.Background(), "hello", msgs("hello"), "", true); err == nil {
t.Fatal("want terminal error when grok_direct fails, got nil")
}
}
// TestWebSynthMessagesNoRawURLs guards the source-leak fix: the grounded digest is
// injected, but the raw gemini-grounding redirect URLs must NOT reach the synth prompt
// (Grok was pasting vertexaisearch.../grounding-api-redirect/... links into the reply).
func TestWebSynthMessagesNoRawURLs(t *testing.T) {
wc := WebContext{
Digest: "Титаник вышел в 1997, режиссёр Джеймс Кэмерон.",
Citations: []string{"https://vertexaisearch.cloud.google.com/grounding-api-redirect/AUZIYQabc123"},
}
out := webSynthMessages(msgs("в каком году титаник"), wc)
var note string
for _, m := range out {
if m.Role == "system" && strings.Contains(m.Content, "Свежие данные") {
note = m.Content
}
}
if note == "" {
t.Fatal("web synth note missing")
}
if !strings.Contains(note, "Титаник вышел в 1997") {
t.Fatalf("digest not injected: %q", note)
}
if strings.Contains(note, "vertexaisearch") || strings.Contains(note, "grounding-api-redirect") || strings.Contains(note, "http") {
t.Fatalf("raw citation URL leaked into the synth prompt: %q", note)
}
}
func hedgeContains(ms []Message, sub string) bool {
for _, m := range ms {
if strings.Contains(m.Content, sub) {
return true
}
}
return false
}
func approxEq(a, b float64) bool {
d := a - b
return d < 1e-9 && d > -1e-9
}

View file

@ -0,0 +1,227 @@
[
{
"name": "road house first turn (obscure cast)",
"message": "кто снимался в фильме дом у дороги",
"verdict": {
"needs_web": true,
"verifiable": true,
"entity_obscure": true,
"time_sensitive": false,
"trivial": false,
"search_query": "Дом у дороги фильм актёрский состав",
"confidence": 0.7
},
"expected_route": "web_then_grok",
"factual": true
},
{
"name": "road house follow-up (DM, resolved)",
"message": "2024 года",
"verdict": {
"needs_web": true,
"verifiable": true,
"entity_obscure": true,
"time_sensitive": false,
"trivial": false,
"search_query": "Дом у дороги 2024 фильм актёрский состав",
"confidence": 0.65
},
"expected_route": "web_then_grok",
"factual": true
},
{
"name": "weather (freshness lexeme, forced web)",
"message": "погода сегодня в Москве",
"verdict": {
"needs_web": true,
"verifiable": false,
"entity_obscure": false,
"time_sensitive": true,
"trivial": false,
"search_query": "погода сегодня Москва",
"confidence": 0.95
},
"expected_route": "web_then_grok",
"factual": false
},
{
"name": "freshness rumination (accepted designed false-web, §14.1)",
"message": "сегодня я думаю о смысле жизни",
"verdict": {
"needs_web": false,
"verifiable": false,
"entity_obscure": false,
"time_sensitive": false,
"trivial": false,
"search_query": "",
"confidence": 0.2
},
"expected_route": "web_then_grok",
"factual": false
},
{
"name": "obscure entity founder (no freshness word)",
"message": "кто основал компанию Acme Widgets",
"verdict": {
"needs_web": true,
"verifiable": true,
"entity_obscure": true,
"time_sensitive": false,
"trivial": false,
"search_query": "Acme Widgets основатель компании",
"confidence": 0.6
},
"expected_route": "web_then_grok",
"factual": true
},
{
"name": "static famous fact (author lookup)",
"message": "кто написал войну и мир",
"verdict": {
"needs_web": true,
"verifiable": true,
"entity_obscure": false,
"time_sensitive": false,
"trivial": false,
"search_query": "Война и мир автор",
"confidence": 0.62
},
"expected_route": "web_then_grok",
"factual": true
},
{
"name": "current CEO (time-sensitive, sub-floor needs_web)",
"message": "кто возглавляет Tesla",
"verdict": {
"needs_web": true,
"verifiable": true,
"entity_obscure": false,
"time_sensitive": true,
"trivial": false,
"search_query": "Tesla CEO",
"confidence": 0.5
},
"expected_route": "web_then_grok",
"factual": false
},
{
"name": "greeting (trivial, high confidence)",
"message": "привет",
"verdict": {
"needs_web": false,
"verifiable": false,
"entity_obscure": false,
"time_sensitive": false,
"trivial": true,
"search_query": "",
"confidence": 0.95
},
"expected_route": "trivial_direct",
"factual": false
},
{
"name": "ack low-confidence trivial (no voice leak → grok)",
"message": "спасибо",
"verdict": {
"needs_web": false,
"verifiable": false,
"entity_obscure": false,
"time_sensitive": false,
"trivial": true,
"search_query": "",
"confidence": 0.5
},
"expected_route": "grok_direct",
"factual": false
},
{
"name": "opinion / recommendation (safe floor)",
"message": "посоветуй фильм на вечер",
"verdict": {
"needs_web": false,
"verifiable": false,
"entity_obscure": false,
"time_sensitive": false,
"trivial": false,
"search_query": "",
"confidence": 0.82
},
"expected_route": "grok_direct",
"factual": false
},
{
"name": "code help (safe floor)",
"message": "напиши функцию сортировки на python",
"verdict": {
"needs_web": false,
"verifiable": false,
"entity_obscure": false,
"time_sensitive": false,
"trivial": false,
"search_query": "",
"confidence": 0.9
},
"expected_route": "grok_direct",
"factual": false
},
{
"name": "vague needs_web below floor (stays grok)",
"message": "что ты думаешь о криптовалютах",
"verdict": {
"needs_web": true,
"verifiable": false,
"entity_obscure": false,
"time_sensitive": false,
"trivial": false,
"search_query": "",
"confidence": 0.4
},
"expected_route": "grok_direct",
"factual": false
},
{
"name": "explanation over-flagged needs_web but NOT verifiable (false-web fix)",
"message": "объясни как работают горутины в Go",
"verdict": {
"needs_web": true,
"verifiable": false,
"entity_obscure": false,
"time_sensitive": false,
"trivial": false,
"search_query": "",
"confidence": 0.9
},
"expected_route": "grok_direct",
"factual": false
},
{
"name": "ack-prefixed long real question (not trivial, safe floor)",
"message": "спасибо, а теперь подробно объясни квантовую запутанность",
"verdict": {
"needs_web": false,
"verifiable": false,
"entity_obscure": false,
"time_sensitive": false,
"trivial": false,
"search_query": "",
"confidence": 0.85
},
"expected_route": "grok_direct",
"factual": false
},
{
"name": "bare follow-up in a GROUP (no resolvable subject → grok)",
"message": "2024 года",
"verdict": {
"needs_web": false,
"verifiable": false,
"entity_obscure": false,
"time_sensitive": false,
"trivial": false,
"search_query": "",
"confidence": 0.3
},
"expected_route": "grok_direct",
"factual": false
}
]

View file

@ -0,0 +1,188 @@
// Command routereval is the OFFLINE router-replay harness for the §11 P1 gate. It reads
// a golden set of (message, recorded classifier verdict, expected route, factual flag),
// replays each item through the REAL decision functions (routedecide.ClassifyLayer0 +
// CombineWithFloors — the same code package main uses, never a copy), and reports the
// confusion matrix + the four P1 metrics: false-grok-on-factual (the lie metric),
// false-web, trivial-leak, misroute. It is fully deterministic and needs no network: it
// measures the ROUTING LAYER given a verdict, so you can sweep WEB_PARANOID and the
// floors instantly. (Classifier accuracy itself is a separate LIVE check — §11 P2.)
//
// The lie label on the web path uses the citation-presence proxy by convention: a golden
// item's `factual:true` + `expected_route:web_then_grok` marks "this MUST ground"; an
// LLM-judge over query+answer is the higher-fidelity option to wire later (§14.6/§15).
//
// Usage:
//
// go run ./cmd/routereval -golden cmd/routereval/golden_sample.json
// go run ./cmd/routereval -golden set.json -web-floor 0.7 # sweep the needs_web floor
//
// NOTE: golden_sample.json is labelled for the PRODUCTION config (paranoid ON) — its
// expected_route values assume the epistemic web arms are active. Running -paranoid=false
// against it is a what-if sweep that WILL report NO-GO (the entity facts fall to grok by
// design); it is NOT a passing baseline. To evaluate the paranoid-off behaviour, label a
// separate set whose expected_route reflects freshness-only web routing.
package main
import (
"encoding/json"
"flag"
"fmt"
"os"
rd "vojo.chat/ai-bot/internal/routedecide"
)
// goldenItem is one labelled row. Message drives the free Layer-0; Verdict is the
// recorded classifier output; ExpectedRoute + Factual are the ground-truth labels.
type goldenItem struct {
Name string `json:"name"`
Message string `json:"message"`
Verdict rd.Verdict `json:"verdict"`
ExpectedRoute string `json:"expected_route"`
Factual bool `json:"factual"` // a checkable-fact query that MUST ground
}
func main() {
goldenPath := flag.String("golden", "cmd/routereval/golden_sample.json", "path to the golden-set JSON")
paranoid := flag.Bool("paranoid", true, "apply the WEB_PARANOID classifier-driven web arms")
webFloor := flag.Float64("web-floor", rd.WebNeedsWebFloor, "needs_web confidence floor to sweep")
trivialFloor := flag.Float64("trivial-floor", rd.TrivialFloor, "trivial confidence floor")
verbose := flag.Bool("v", false, "print every item, not just the mismatches")
flag.Parse()
raw, err := os.ReadFile(*goldenPath)
if err != nil {
fmt.Fprintf(os.Stderr, "read golden set: %v\n", err)
os.Exit(2)
}
var items []goldenItem
if err := json.Unmarshal(raw, &items); err != nil {
fmt.Fprintf(os.Stderr, "parse golden set: %v\n", err)
os.Exit(2)
}
if len(items) == 0 {
fmt.Fprintln(os.Stderr, "golden set is empty")
os.Exit(2)
}
floors := rd.Floors{WebNeedsWeb: *webFloor, Trivial: *trivialFloor}
fmt.Printf("routereval: %d items | paranoid=%v web-floor=%.2f trivial-floor=%.2f\n\n",
len(items), *paranoid, *webFloor, *trivialFloor)
var (
correct int
factualWeb, factualWebMissed int // denominator/numerator of false-grok-on-factual
nonWebExpected, falseWeb int
nonTrivialExpected, trivialLeak int
)
roadHouseSeen := false
roadHousePass := true
for _, it := range items {
l0 := rd.ClassifyLayer0(it.Message)
got := rd.CombineWithFloors(l0, it.Verdict, *paranoid, floors).Route
ok := got == it.ExpectedRoute
if ok {
correct++
}
if it.Factual && it.ExpectedRoute == rd.RouteWeb {
factualWeb++
if got == rd.RouteGrokDirect {
factualWebMissed++ // a confident-lie risk: a checkable fact answered from memory
}
}
if it.ExpectedRoute != rd.RouteWeb {
nonWebExpected++
if got == rd.RouteWeb {
falseWeb++
}
}
if it.ExpectedRoute != rd.RouteTrivial {
nonTrivialExpected++
if got == rd.RouteTrivial {
trivialLeak++
}
}
// The Road House regression pair must pass (its name carries "road house").
if contains(it.Name, "road house") {
roadHouseSeen = true
if !ok {
roadHousePass = false
}
}
if *verbose || !ok {
flag := "ok "
if !ok {
flag = "MISS"
}
fmt.Printf(" [%s] %-40s want=%-16s got=%-16s\n", flag, trunc(it.Name, 40), it.ExpectedRoute, got)
}
}
rate := func(num, den int) float64 {
if den == 0 {
return 0
}
return float64(num) / float64(den)
}
misroute := 1 - rate(correct, len(items))
lie := rate(factualWebMissed, factualWeb)
fw := rate(falseWeb, nonWebExpected)
leak := rate(trivialLeak, nonTrivialExpected)
fmt.Printf("\n— metrics (§11 P1 gates) —\n")
fmt.Printf(" false-grok-on-FACTUAL : %5.1f%% (%d/%d) gate < 5%% %s\n", lie*100, factualWebMissed, factualWeb, pass(lie < 0.05))
fmt.Printf(" false-web : %5.1f%% (%d/%d) gate ≤ 15%% %s\n", fw*100, falseWeb, nonWebExpected, pass(fw <= 0.15))
fmt.Printf(" trivial-leak : %5.1f%% (%d/%d) gate ~ 0%% %s\n", leak*100, trivialLeak, nonTrivialExpected, pass(leak == 0))
fmt.Printf(" misroute : %5.1f%% (%d/%d) gate < 3%% %s\n", misroute*100, len(items)-correct, len(items), pass(misroute < 0.03))
if roadHouseSeen {
fmt.Printf(" road-house pair : %s\n", pass(roadHousePass))
}
// Exit non-zero if any gate fails, so the harness is CI/owner-runnable as a go/no-go.
if lie >= 0.05 || fw > 0.15 || leak > 0 || misroute >= 0.03 || (roadHouseSeen && !roadHousePass) {
fmt.Println("\nRESULT: NO-GO (a P1 gate failed)")
os.Exit(1)
}
fmt.Println("\nRESULT: GO")
}
func pass(ok bool) string {
if ok {
return "PASS"
}
return "FAIL"
}
func contains(s, sub string) bool {
return len(sub) == 0 || indexFold(s, sub) >= 0
}
// indexFold is a tiny case-insensitive substring search (avoids importing strings just
// for ToLower+Index in this small tool).
func indexFold(s, sub string) int {
ls, lsub := toLower(s), toLower(sub)
for i := 0; i+len(lsub) <= len(ls); i++ {
if ls[i:i+len(lsub)] == lsub {
return i
}
}
return -1
}
func toLower(s string) string {
b := []byte(s)
for i, c := range b {
if 'A' <= c && c <= 'Z' {
b[i] = c + ('a' - 'A')
}
}
return string(b)
}
func trunc(s string, n int) string {
r := []rune(s)
if len(r) <= n {
return s
}
return string(r[:n-1]) + "…"
}

View file

@ -109,15 +109,28 @@ type Config struct {
// still goes to Grok.
TrivialOffloadEnabled bool
// WebEnabled turns on the web_then_grok route. WebProvider selects the source:
// grok_web_search (default, works on chat/completions via Live Search) or
// gemini_grounding (Gemini-3 native only — see F-EXT-3).
// grok_web_search (default, the xAI web_search tool on the Responses API) or
// gemini_grounding (native v1beta google_search — current models incl. 2.5; the
// F-EXT-3 caveat is OpenAI-compat-only, not a model-version limit).
WebEnabled bool
WebProvider string
// WebParanoid biases the router toward grounding: beyond freshnessRe, it unlocks the
// classifier-driven web arms (needs_web≥0.55, entity_obscure, time_sensitive,
// lookupHint && verifiable). Off (default) → web routing is freshness-only (today's
// behaviour), so enabling the classifier is web-routing-neutral and this is the single
// switch that activates epistemic grounding (§3/§15). Requires gemini_grounding.
WebParanoid bool
// WebGroundingDailyCap caps grounded prompts/day (durable counter) before falling
// back, guarding the $/1k grounding overage. WebGroundingTier records the Gemini
// plan the cap reflects.
// back, guarding the $/1k grounding overage.
WebGroundingDailyCap int
WebGroundingTier string
// WebGroundingTier is a documentation-only label of which Gemini plan the operator is
// on; it is NOT read by any logic. The money knob is GeminiGroundingPerPrompt
// (GEMINI_GROUNDING_PER_PROMPT_USD) — that is what the ledger/ceiling actually use.
WebGroundingTier string
// GeminiGroundingPerPrompt is the per-grounded-prompt FEE booked into the ledger so the
// daily ceiling sees it (§7 SG1). Default 0.035 (the paid-tier $35/1k overage); set 0
// ONLY when genuinely on the free grounded-prompt tier. Booked even on the error return.
GeminiGroundingPerPrompt float64
// Reasoning route: a manual "think harder" trigger. ReasoningModel must be a
// reasoning-capable model (the default grok-4.20-non-reasoning is NOT — see the
// docs.x.ai finding); set REASONING_MODEL to e.g. grok-4.3 to use it.
@ -378,6 +391,7 @@ func LoadConfig() (*Config, error) {
{"ROUTER_CLASSIFIER_ENABLED", &cfg.RouterClassifierEnabled},
{"TRIVIAL_OFFLOAD_ENABLED", &cfg.TrivialOffloadEnabled},
{"WEB_ENABLED", &cfg.WebEnabled},
{"WEB_PARANOID", &cfg.WebParanoid},
{"REASONING_ENABLED", &cfg.ReasoningEnabled},
} {
if *f.dest, err = getenvBool(f.key, false); err != nil {
@ -387,6 +401,11 @@ func LoadConfig() (*Config, error) {
if cfg.WebGroundingDailyCap, err = getenvInt("WEB_GROUNDING_DAILY_CAP", 450); err != nil {
problems = append(problems, err.Error())
}
// The per-grounded-prompt fee booked into the ledger (§7 SG1). Default 0.035 (paid
// tier). An operator on the free tier sets 0 deliberately.
if cfg.GeminiGroundingPerPrompt, err = getenvFloat("GEMINI_GROUNDING_PER_PROMPT_USD", 0.035); err != nil {
problems = append(problems, err.Error())
}
if cfg.CanaryPercent, err = getenvInt("CANARY_PERCENT", 0); err != nil {
problems = append(problems, err.Error())
}
@ -428,6 +447,17 @@ func LoadConfig() (*Config, error) {
problems = append(problems, fmt.Sprintf("WEB_PROVIDER must be %q or %q, got %q",
webProviderGrokWebSearch, webProviderGeminiGrounding, cfg.WebProvider))
}
// §7 SG3: paranoid web requires gemini_grounding. grok_web_search has no daily cap and
// costs 1018× per request — letting the paranoid bias drive it would only be backstopped
// by the $10 ceiling. Refuse to boot (consistent with the other fail-fast blocks).
if cfg.WebEnabled && cfg.WebParanoid && cfg.WebProvider == webProviderGrokWebSearch {
problems = append(problems, "WEB_PARANOID requires WEB_PROVIDER=gemini_grounding (grok_web_search has no daily cap and is far costlier)")
}
// §7 SG5: a non-positive grounding cap silently disables grounding (IncrGroundingIfUnder
// denies everything), so every query would degrade — refuse it for gemini_grounding.
if cfg.WebEnabled && cfg.WebProvider == webProviderGeminiGrounding && cfg.WebGroundingDailyCap <= 0 {
problems = append(problems, "WEB_GROUNDING_DAILY_CAP must be > 0 for gemini_grounding (a non-positive cap silently disables grounding)")
}
if cfg.ReasoningEnabled && cfg.ReasoningModel == "" {
problems = append(problems, "REASONING_MODEL is required when REASONING_ENABLED is set")
}
@ -524,9 +554,10 @@ func (c *Config) Summary() string {
}
return strings.Join(bodyUsers, ",")
}()),
fmt.Sprintf(" CASCADE: router=%t classifier=%t trivial=%t web=%t(%s, cap=%d) reason=%t(%s)",
fmt.Sprintf(" CASCADE: router=%t classifier=%t trivial=%t web=%t(%s, paranoid=%t, cap=%d, fee=$%g/prompt) reason=%t(%s)",
c.RouterEnabled, c.RouterClassifierEnabled, c.TrivialOffloadEnabled,
c.WebEnabled, c.WebProvider, c.WebGroundingDailyCap, c.ReasoningEnabled, c.ReasoningEffort),
c.WebEnabled, c.WebProvider, c.WebParanoid, c.WebGroundingDailyCap,
c.GeminiGroundingPerPrompt, c.ReasoningEnabled, c.ReasoningEffort),
" GEMINI_MODEL = " + c.GeminiModel,
" GEMINI_API_KEY = " + redact(c.GeminiAPIKey),
}, "\n")

View file

@ -20,6 +20,7 @@ func setBaseEnv(t *testing.T) {
for _, k := range []string{
"GEMINI_API_KEY", "GEMINI_API_KEY_FILE", "ROUTER_ENABLED", "ROUTER_CLASSIFIER_ENABLED",
"TRIVIAL_OFFLOAD_ENABLED", "WEB_ENABLED", "REASONING_ENABLED", "WEB_PROVIDER", "REASONING_MODEL",
"WEB_PARANOID", "WEB_GROUNDING_DAILY_CAP", "GEMINI_GROUNDING_PER_PROMPT_USD",
} {
t.Setenv(k, "")
}
@ -96,3 +97,47 @@ func TestConfigWebGeminiGroundingNeedsKey(t *testing.T) {
t.Fatalf("want GEMINI_API_KEY error, got %v", err)
}
}
// §7 SG3: paranoid web on the uncapped grok_web_search must refuse to boot; with
// gemini_grounding (+ key) it is valid.
func TestConfigParanoidRequiresGeminiGrounding(t *testing.T) {
setBaseEnv(t)
t.Setenv("WEB_ENABLED", "true")
t.Setenv("WEB_PARANOID", "true") // default provider is grok_web_search
if _, err := LoadConfig(); err == nil || !strings.Contains(err.Error(), "WEB_PARANOID") {
t.Fatalf("want WEB_PARANOID error on grok_web_search, got %v", err)
}
t.Setenv("WEB_PROVIDER", webProviderGeminiGrounding)
t.Setenv("GEMINI_API_KEY", "gk")
if _, err := LoadConfig(); err != nil {
t.Fatalf("paranoid + gemini_grounding should be valid: %v", err)
}
}
// §7 SG5: a non-positive grounding cap silently disables grounding — refuse it for
// gemini_grounding.
func TestConfigGeminiGroundingCapMustBePositive(t *testing.T) {
setBaseEnv(t)
t.Setenv("WEB_ENABLED", "true")
t.Setenv("WEB_PROVIDER", webProviderGeminiGrounding)
t.Setenv("GEMINI_API_KEY", "gk")
t.Setenv("WEB_GROUNDING_DAILY_CAP", "0")
if _, err := LoadConfig(); err == nil || !strings.Contains(err.Error(), "WEB_GROUNDING_DAILY_CAP") {
t.Fatalf("want WEB_GROUNDING_DAILY_CAP error, got %v", err)
}
}
// The default per-prompt grounding fee is the paid-tier $0.035 (the operator must opt to 0).
func TestConfigGroundingFeeDefault(t *testing.T) {
setBaseEnv(t)
cfg, err := LoadConfig()
if err != nil {
t.Fatalf("%v", err)
}
if cfg.GeminiGroundingPerPrompt != 0.035 {
t.Fatalf("GEMINI_GROUNDING_PER_PROMPT_USD default = %v, want 0.035", cfg.GeminiGroundingPerPrompt)
}
if cfg.WebParanoid {
t.Fatal("WEB_PARANOID must default off")
}
}

View file

@ -47,6 +47,65 @@ func buildContext(system string, history []bufferedMsg, isDM bool, triggerBody s
return truncateToTokens(msgs, maxTokens)
}
// routerContextMaxRunes caps each line fed to the classifier/rewrite so a long buffered
// turn can't blow the router's token budget; ~200 runes is plenty to resolve a follow-up.
const routerContextMaxRunes = 200
// routerContext returns the privacy-minimised conversation window the Layer-1 classifier
// and the follow-up rewrite read, drawn ONLY from the already-minimised `msgs` (a strict
// subset of what the final Grok call sees — no new privacy surface, §6):
//
// - DM: the last ≤2 bot (assistant) turns plus the interleaved/final user turns, so a
// bare follow-up like "2024 года" carries the prior film name into search_query.
// - GROUP: ONLY the final user line. The per-(room,thread) buffer interleaves different
// members' topics (it is keyed by room+thread, not sender), so resolving a follow-up
// against prior turns could ground a confidently-wrong answer about the WRONG subject.
//
// Formatted "BOT: …\nUSER: …", each line truncated to routerContextMaxRunes. Empty when
// there is nothing to send.
func routerContext(msgs []Message, isDM bool) string {
conv := msgs
if len(conv) > 0 && conv[0].Role == "system" {
conv = conv[1:]
}
if len(conv) == 0 {
return ""
}
start := len(conv) - 1 // group default: only the final user line
if isDM {
// Walk back to include up to the 2 most recent assistant turns before the trigger.
const maxAssistant = 2
seen := 0
for i := len(conv) - 1; i >= 0; i-- {
start = i
if conv[i].Role == "assistant" {
if seen++; seen >= maxAssistant {
break
}
}
}
}
var b strings.Builder
for _, m := range conv[start:] {
text := strings.TrimSpace(m.Content)
if text == "" {
continue
}
if r := []rune(text); len(r) > routerContextMaxRunes {
text = string(r[:routerContextMaxRunes])
}
label := "USER"
if m.Role == "assistant" {
label = "BOT"
}
b.WriteString(label)
b.WriteString(": ")
b.WriteString(text)
b.WriteByte('\n')
}
return strings.TrimRight(b.String(), "\n")
}
// estimateTokens is a cheap upper-ish heuristic (~4 chars/token + per-message
// overhead). Used only to bound request size, not for billing (billing reads the
// API's returned usage).

View file

@ -0,0 +1,193 @@
// Package routedecide is the PURE, importable core of the AI-bot router: the free
// Layer-0 regex pre-classification and the Layer-0+classifier combine. It holds no
// I/O, no vendor clients, no Bot/Config — only the decision math — so two callers can
// share exactly one decision function:
//
// - package main (router.go) parses the live Gemini classifier JSON into a Verdict,
// then calls Combine to resolve the route;
// - cmd/routereval replays a golden set of recorded Verdicts through the same
// ClassifyLayer0 + Combine to measure misroute / false-web / trivial-leak offline.
//
// Go forbids importing package main, so this core had to live in its own package for
// the offline harness to exercise the REAL routing logic instead of a drift-prone copy.
package routedecide
import (
"regexp"
"strings"
)
// Route names — the canonical wire/log/request_log tokens. package main aliases these
// (telemetry.go) so there is a single source of truth for the strings.
const (
RouteTrivial = "trivial_direct"
RouteGrokDirect = "grok_direct"
RouteWeb = "web_then_grok"
RouteReason = "reason_then_grok"
)
// Confidence floors the combine uses. These are the values the offline eval (§11)
// tunes; keeping them here lets cmd/routereval sweep them without touching main.
//
// - WebNeedsWebFloor: a classifier needs_web verdict must clear this to route to web
// (paranoid-low — grounding is cheap, a confident wrong fact is not).
// - TrivialFloor: the bar a trivial offload must clear (conservative — a false trivial
// leaks a real question to the cheap model).
const (
WebNeedsWebFloor = 0.55
TrivialFloor = 0.85
)
// Floors are the two confidence thresholds Combine applies, parameterised so the offline
// eval (cmd/routereval) can SWEEP them over a golden set without recompiling. Production
// uses DefaultFloors (the consts above).
type Floors struct {
WebNeedsWeb float64
Trivial float64
}
// DefaultFloors is the production threshold set.
func DefaultFloors() Floors { return Floors{WebNeedsWeb: WebNeedsWebFloor, Trivial: TrivialFloor} }
// web_decided_by attribution tokens (request_log.web_decided_by). Stable so analytics
// can GROUP BY them and tune WebNeedsWebFloor from data.
const (
WebByNone = "none"
WebByFreshness = "freshness"
WebByNeedsWeb = "classifier_needs_web"
WebByObscure = "entity_obscure"
WebByTime = "time_sensitive"
WebByLookupHint = "lookup_hint"
)
// Verdict is the classifier's parsed JSON output (§4.1). The json tags match the
// classifier schema exactly, so both routeLayer1 (live classifier reply) and
// cmd/routereval (recorded golden verdicts) unmarshal straight into it. Confidence is
// the model's honest certainty in needs_web; it doubles as the trivial-gate threshold
// (a clear greeting is high-certainty-no-web, so the gate passes).
type Verdict struct {
NeedsWeb bool `json:"needs_web"`
Verifiable bool `json:"verifiable"`
EntityObscure bool `json:"entity_obscure"`
TimeSensitive bool `json:"time_sensitive"`
Trivial bool `json:"trivial"`
SearchQuery string `json:"search_query"`
Confidence float64 `json:"confidence"`
}
// Layer0 is the free-regex pre-classification result. Route is the verdict when the
// classifier is OFF; WebForce/Trivial/LookupHint feed the Combine when it is ON.
type Layer0 struct {
Route string // RouteWeb (freshness) | RouteTrivial | RouteGrokDirect
WebForce bool // freshnessRe hit — a HARD web signal (survives the classifier being down)
Trivial bool // a trivial candidate (greeting/ack/bare arithmetic)
LookupHint bool // lookupIntentRe hit — a SOFT hint only (never sets the route)
Freshness string // "recent" when WebForce, else ""
}
// Heuristic patterns. Kept deliberately tight. Freshness words route to web (a false
// web-route only costs a fetch and degrades cleanly). Trivial fires only on short,
// unmistakable greetings/acks or bare arithmetic.
var (
greetingRe = regexp.MustCompile(`^(привет(ик)?|здравствуй(те)?|хай|прив|ку|добрый\s+(день|вечер|утро)|спасибо|спс|благодарю|пока|ок(ей)?|угу|ага|hello|hi|hey|yo|thanks|thank\s+you|thx|ty|bye|goodbye|ok|okay|cool|nice)[\s!.,)]*$`)
arithmeticRe = regexp.MustCompile(`^[\s(]*\d+(\s*[-+*/×÷]\s*\d+)+[\s)=?]*$`)
// Russian tokens are deliberately STEM matches (новост→новости/новостей, погод→погода…)
// so they stay un-anchored. English standalone tokens are \b-anchored so they fire on
// whole words only — not inside scoreboard / concurrent / weathering / newsletter (a
// pre-existing false-web source; \b removes that pointless grounding spend). RE2's \b is
// ASCII-word-based, so it is used only around the ASCII tokens, never the Cyrillic stems.
freshnessRe = regexp.MustCompile(`(новост|сегодня|сейчас|последн|курс\s|погод|котировк|расписани|прогноз|\bbreaking\b|\btoday\b|\bright now\b|\blatest\b|\bcurrent(ly)?\b|\bnews\b|\bweather\b|\bstock price\b|\bexchange rate\b|\bscore\b)`)
// lookupIntentRe — SOFT HINT ONLY (§5): raises the classifier's needs_web prior via
// the lookupHint && verifiable arm; must NEVER set the route. Anchored on
// interrogative + lookup-verb so it fires on lookup INTENT, not entity presence.
// Deliberately leaky (false negatives are caught by the classifier, the real safety
// net). Do NOT add a capitalised-word or guillemet branch — those false-positive on
// greetings/idioms ("Привет, Москва!", "«Война и мир» — топ", "ну ты прям Эйнштейн").
// The leading [\s«"„(] class is only an OPTIONAL left boundary, never a trigger.
lookupIntentRe_RU = regexp.MustCompile(`(?i)(^|[\s«"„(])(кто\s+(так(ой|ая|ие)|снимал(ся|ась|ись)|играл|написал|основал|изобрёл|изобрел|режисс[её]р|автор)|в\s+как(ом|ой)\s+(год[уе]|фильм[еа]|сериал[еа]|книг[еи]|игр[еы])|когда\s+(вышел|вышла|вышло|выйдет|основан[аы]?|родил(ся|ась)|умер(ла)?|состоял(ся|ась)|был[аои]?\s+выпущен)|в\s+каком\s+году|сколько\s+(лет|стоит\s+бил|серий|сезонов|эпизодов)|чем\s+(закончил|известен|знаменит))`)
lookupIntentRe_EN = regexp.MustCompile(`(?i)(^|[\s"'(])(who\s+(is|are|was|were|starred|played|directed|wrote|founded|invented|created)\s|in\s+(what|which)\s+(year|film|movie|show|series|book|game)\b|when\s+(did|was|were|does|is)\b.*\b(release|released|come\s+out|came\s+out|born|die|died|found|founded|launch|launched|air|aired)\b|what\s+year\b|how\s+many\s+(seasons|episodes|films|movies|books))`)
)
// ClassifyLayer0 runs the free heuristic over a message body. The result drives routing
// only when the classifier is off; when it is on, WebForce/Trivial/LookupHint feed
// Combine. Empty body → grok_direct (the safe floor).
func ClassifyLayer0(body string) Layer0 {
s := strings.ToLower(strings.TrimSpace(body))
if s == "" {
return Layer0{Route: RouteGrokDirect}
}
lookupHint := lookupIntentRe_RU.MatchString(s) || lookupIntentRe_EN.MatchString(s)
if freshnessRe.MatchString(s) {
return Layer0{Route: RouteWeb, WebForce: true, Freshness: "recent", LookupHint: lookupHint}
}
if IsTrivial(s) {
return Layer0{Route: RouteTrivial, Trivial: true, LookupHint: lookupHint}
}
return Layer0{Route: RouteGrokDirect, LookupHint: lookupHint}
}
// IsTrivial: a short greeting/ack or a bare arithmetic expression, with no sign of a
// real question. Length-bounded so "thanks, now explain quantum tunnelling" is NOT
// trivial. Expects an already-lowercased/trimmed string from ClassifyLayer0; callers
// passing raw input should lower/trim first (the greeting regex is lowercase-anchored).
func IsTrivial(s string) bool {
if arithmeticRe.MatchString(s) {
return true
}
if len(strings.Fields(s)) <= 4 && greetingRe.MatchString(s) {
return true
}
return false
}
// Combined is the resolved route plus its web attribution (for request_log).
type Combined struct {
Route string
WebDecidedBy string
}
// Combine resolves the Layer-0 decision + the classifier Verdict into the final route.
// It is the router's brain and it never blindly trusts the model:
//
// - freshnessRe (WebForce) is a HARD web signal, always honoured (it survives the
// classifier being down).
// - Every OTHER web arm (the classifier's needs_web≥floor AND verifiable,
// entity_obscure, time_sensitive, lookupHint && verifiable) is gated by `paranoid`
// (WEB_PARANOID). The needs_web arm additionally requires `verifiable`: on a small
// flash-lite classifier, `needs_web` over-fires on open-ended advice/explanations
// (observed live: "посоветуй фильм", "объясни goroutines" → needs_web=true,
// verifiable=false → a false-web). `verifiable` ("a checkable fact about a NAMED
// entity") is the reliable discriminator; recency still routes via time_sensitive/
// freshness and obscurity via entity_obscure, so no genuine grounding is lost.
// With paranoid off, web routing equals today's freshness-only behavior — so
// enabling the classifier is web-routing-neutral and WEB_PARANOID is the single
// switch that activates epistemic grounding (clean canary; cost increase behind it).
// - trivial is agreement-gated: a Layer-0 trivial candidate AND classifier.trivial AND
// confidence ≥ TrivialFloor. A lone signal stays on grok_direct (no voice leak).
// - everything else falls to grok_direct (the safe floor: opinion/chat/advice/code).
//
// The switch ORDER determines web_decided_by attribution; the boolean result is the OR.
func Combine(l0 Layer0, v Verdict, paranoid bool) Combined {
return CombineWithFloors(l0, v, paranoid, DefaultFloors())
}
// CombineWithFloors is Combine with explicit thresholds (the offline-eval sweep entry).
func CombineWithFloors(l0 Layer0, v Verdict, paranoid bool, f Floors) Combined {
switch {
case l0.WebForce:
return Combined{Route: RouteWeb, WebDecidedBy: WebByFreshness}
case paranoid && v.NeedsWeb && v.Verifiable && v.Confidence >= f.WebNeedsWeb:
return Combined{Route: RouteWeb, WebDecidedBy: WebByNeedsWeb}
case paranoid && v.EntityObscure:
return Combined{Route: RouteWeb, WebDecidedBy: WebByObscure}
case paranoid && v.TimeSensitive:
return Combined{Route: RouteWeb, WebDecidedBy: WebByTime}
case paranoid && l0.LookupHint && v.Verifiable:
return Combined{Route: RouteWeb, WebDecidedBy: WebByLookupHint}
}
if l0.Trivial && v.Trivial && v.Confidence >= f.Trivial {
return Combined{Route: RouteTrivial, WebDecidedBy: WebByNone}
}
return Combined{Route: RouteGrokDirect, WebDecidedBy: WebByNone}
}

View file

@ -0,0 +1,230 @@
package routedecide
import "testing"
// TestClassifyLayer0 is the free-heuristic golden set: freshness → web (WebForce),
// short greetings/acks/bare-arithmetic → trivial candidate, everything else →
// grok_direct, with substantive messages never trivial.
func TestClassifyLayer0(t *testing.T) {
cases := []struct {
body string
wantRoute string
wantWebForce bool
wantTrivial bool
}{
{"привет", RouteTrivial, false, true},
{"спасибо", RouteTrivial, false, true},
{"2+2", RouteTrivial, false, true},
{"12 / 4 - 1", RouteTrivial, false, true},
{"hello", RouteTrivial, false, true},
{"какие новости сегодня?", RouteWeb, true, false},
{"курс доллара сегодня", RouteWeb, true, false},
{"what's the weather today", RouteWeb, true, false},
{"посоветуй фильм на вечер", RouteGrokDirect, false, false},
{"explain how TCP works", RouteGrokDirect, false, false},
{"спасибо, а теперь подробно объясни квантовую запутанность", RouteGrokDirect, false, false},
{"", RouteGrokDirect, false, false},
}
for _, c := range cases {
l0 := ClassifyLayer0(c.body)
if l0.Route != c.wantRoute || l0.WebForce != c.wantWebForce || l0.Trivial != c.wantTrivial {
t.Errorf("ClassifyLayer0(%q) = {route:%q webForce:%v trivial:%v}, want {%q %v %v}",
c.body, l0.Route, l0.WebForce, l0.Trivial, c.wantRoute, c.wantWebForce, c.wantTrivial)
}
}
}
// TestFreshnessWordBoundaries guards the §7-#7 \b tightening: English freshness tokens
// fire on whole words only — never inside scoreboard / concurrent / weathering — while
// genuine freshness phrases still force web, and Russian stems stay stem-matched.
func TestFreshnessWordBoundaries(t *testing.T) {
shouldForceWeb := []string{
"what's the weather today",
"latest news on AI",
"current bitcoin price",
"какие новости сегодня", // RU stems unchanged
"курс доллара сегодня",
}
for _, s := range shouldForceWeb {
if !ClassifyLayer0(s).WebForce {
t.Errorf("expected WebForce on freshness phrase: %q", s)
}
}
shouldNotForceWeb := []string{
"the scoreboard shows 3:1", // score inside scoreboard
"concurrent programming in Go", // current inside concurrent
"weathering the storm, metaphorically", // weather inside weathering
"subscribe to my newsletter please", // news inside newsletter
}
for _, s := range shouldNotForceWeb {
if ClassifyLayer0(s).WebForce {
t.Errorf("freshness false-positive (substring match) on: %q", s)
}
}
}
// TestLookupHintFalsePositiveCorpus is the §5 guarantee: the soft lookup-intent regex
// must NOT fire on greetings/vocatives/idioms/non-lookup interrogatives — it is anchored
// on interrogative + lookup-verb, never on a capitalised word or a guillemet. A false
// LookupHint can only ever bias the classifier (and only when WEB_PARANOID + verifiable),
// but we still hold the regex itself to near-zero false positives.
func TestLookupHintFalsePositiveCorpus(t *testing.T) {
falsePositives := []string{
"Привет, Москва!", // vocative, no interrogative
"«Война и мир» — топ", // guillemets are not a trigger
"ну ты прям Эйнштейн", // proper noun, no «кто такой»
"кто это сделал?", // «кто» not followed by a lookup-verb
"когда ты придёшь?", // «когда» needs a release/birth verb
"спасибо большое", // ack
"расскажи что-нибудь", // imperative, no lookup interrogative
"I love this movie", // English, no interrogative
"who cares", // «who» not followed by is/was/starred/…
}
for _, s := range falsePositives {
if l0 := ClassifyLayer0(s); l0.LookupHint {
t.Errorf("lookupHint fired on a false-positive trap: %q", s)
}
}
// And it MUST fire on genuine lookup intent (otherwise it's useless).
truePositives := []string{
"кто снимался в фильме дом у дороги",
"кто написал войну и мир",
"в каком году вышел фильм матрица",
"who directed Inception",
"in what year was the Matrix released",
"how many seasons of breaking bad",
}
for _, s := range truePositives {
if l0 := ClassifyLayer0(s); !l0.LookupHint {
t.Errorf("lookupHint should fire on genuine lookup intent: %q", s)
}
}
}
// TestCombineFreshnessAlwaysWeb: a freshnessRe hit (WebForce) routes to web regardless of
// WEB_PARANOID and regardless of the classifier verdict — the deterministic signal that
// survives the classifier being down (§4.4).
func TestCombineFreshnessAlwaysWeb(t *testing.T) {
l0 := Layer0{Route: RouteWeb, WebForce: true, Freshness: "recent"}
v := Verdict{NeedsWeb: false, Confidence: 0.1} // classifier disagrees
for _, paranoid := range []bool{true, false} {
if got := Combine(l0, v, paranoid).Route; got != RouteWeb {
t.Errorf("freshness with paranoid=%v = %q, want web", paranoid, got)
}
}
}
// TestCombineParanoidGating is the Design-X invariant (§15): with WEB_PARANOID OFF, only
// freshness routes to web — the classifier's needs_web/entity/time/lookup signals are
// recorded but do NOT change the route. With it ON, those arms activate.
func TestCombineParanoidGating(t *testing.T) {
l0 := Layer0{Route: RouteGrokDirect, LookupHint: true} // no freshness
arms := []Verdict{
{NeedsWeb: true, Verifiable: true, Confidence: 0.9}, // classifier_needs_web (needs verifiable)
{EntityObscure: true, Confidence: 0.4}, // entity_obscure
{TimeSensitive: true, Confidence: 0.4}, // time_sensitive
{Verifiable: true, Confidence: 0.4}, // lookup_hint && verifiable
}
for i, v := range arms {
if got := Combine(l0, v, false).Route; got != RouteGrokDirect {
t.Errorf("arm %d with paranoid OFF = %q, want grok_direct (web is freshness-only)", i, got)
}
if got := Combine(l0, v, true).Route; got != RouteWeb {
t.Errorf("arm %d with paranoid ON = %q, want web", i, got)
}
}
}
// TestCombineWebFloor: the needs_web arm only fires at/above WebNeedsWebFloor (paranoid).
func TestCombineWebFloor(t *testing.T) {
l0 := Layer0{Route: RouteGrokDirect}
below := Verdict{NeedsWeb: true, Verifiable: true, Confidence: WebNeedsWebFloor - 0.01}
atFloor := Verdict{NeedsWeb: true, Verifiable: true, Confidence: WebNeedsWebFloor}
if got := Combine(l0, below, true).Route; got != RouteGrokDirect {
t.Errorf("needs_web below floor = %q, want grok_direct", got)
}
if got := Combine(l0, atFloor, true).Route; got != RouteWeb {
t.Errorf("needs_web at floor = %q, want web", got)
}
}
// TestCombineNeedsWebRequiresVerifiable is the false-web fix (observed live): the needs_web
// arm fires ONLY when the classifier also flagged a checkable named-entity fact
// (verifiable). A high-confidence needs_web on a non-verifiable query — an opinion or
// explanation the small flash-lite over-eagerly marked needs_web=true ("посоветуй фильм",
// "объясни goroutines") — stays on grok_direct. Recency (time_sensitive/freshness) and
// obscurity (entity_obscure) keep their own arms, so no genuine grounding is lost.
func TestCombineNeedsWebRequiresVerifiable(t *testing.T) {
l0 := Layer0{Route: RouteGrokDirect}
if got := Combine(l0, Verdict{NeedsWeb: true, Verifiable: false, Confidence: 1.0}, true).Route; got != RouteGrokDirect {
t.Errorf("needs_web && !verifiable = %q, want grok_direct (false-web fix)", got)
}
if got := Combine(l0, Verdict{NeedsWeb: true, Verifiable: true, Confidence: 0.6}, true).Route; got != RouteWeb {
t.Errorf("needs_web && verifiable = %q, want web", got)
}
// A non-verifiable needs_web that is ALSO entity_obscure still grounds (obscure arm).
if got := Combine(l0, Verdict{NeedsWeb: true, Verifiable: false, EntityObscure: true, Confidence: 0.1}, true).Route; got != RouteWeb {
t.Errorf("entity_obscure must still route web regardless of verifiable, got %q", got)
}
}
// TestCombineTrivialAgreementGate: trivial requires BOTH the Layer-0 candidate AND
// classifier.trivial AND confidence ≥ TrivialFloor. A lone signal stays on grok_direct.
func TestCombineTrivialAgreementGate(t *testing.T) {
trivialL0 := Layer0{Route: RouteTrivial, Trivial: true}
nonTrivialL0 := Layer0{Route: RouteGrokDirect}
if got := Combine(trivialL0, Verdict{Trivial: true, Confidence: 0.95}, true).Route; got != RouteTrivial {
t.Errorf("agreed high-confidence trivial = %q, want trivial", got)
}
if got := Combine(trivialL0, Verdict{Trivial: true, Confidence: 0.5}, true).Route; got != RouteGrokDirect {
t.Errorf("low-confidence trivial = %q, want grok_direct (no voice leak)", got)
}
if got := Combine(trivialL0, Verdict{Trivial: false, Confidence: 0.95}, true).Route; got != RouteGrokDirect {
t.Errorf("classifier disagrees on trivial = %q, want grok_direct", got)
}
// Never trust classifier.trivial alone: without the Layer-0 candidate it stays grok.
if got := Combine(nonTrivialL0, Verdict{Trivial: true, Confidence: 0.99}, true).Route; got == RouteTrivial {
t.Errorf("classifier.trivial alone routed to trivial; must require the Layer-0 candidate")
}
}
// TestCombineRoadHouse is the regression: the hallucinated-cast bug. With WEB_PARANOID on
// and the classifier flagging the (obscure, verifiable) entity, both the first turn and
// the resolved follow-up route to web; with paranoid off they fall to grok_direct (the
// canary-neutral baseline).
func TestCombineRoadHouse(t *testing.T) {
first := ClassifyLayer0("кто снимался в фильме дом у дороги")
followup := ClassifyLayer0("2024 года") // bare; the classifier resolves via context
v := Verdict{NeedsWeb: true, Verifiable: true, EntityObscure: true, Confidence: 0.7}
for _, l0 := range []Layer0{first, followup} {
if got := Combine(l0, v, true).Route; got != RouteWeb {
t.Errorf("road house with paranoid ON = %q, want web (the hallucination fix)", got)
}
if got := Combine(l0, v, false).Route; got != RouteGrokDirect {
t.Errorf("road house with paranoid OFF = %q, want grok_direct (baseline)", got)
}
}
}
// TestWebDecidedByAttribution: the switch order attributes the right arm (for tuning 0.55).
func TestWebDecidedByAttribution(t *testing.T) {
cases := []struct {
l0 Layer0
v Verdict
want string
}{
{Layer0{WebForce: true}, Verdict{}, WebByFreshness},
{Layer0{}, Verdict{NeedsWeb: true, Verifiable: true, Confidence: 0.9}, WebByNeedsWeb},
{Layer0{}, Verdict{EntityObscure: true, Confidence: 0.1}, WebByObscure},
{Layer0{}, Verdict{TimeSensitive: true, Confidence: 0.1}, WebByTime},
{Layer0{LookupHint: true}, Verdict{Verifiable: true, Confidence: 0.1}, WebByLookupHint},
{Layer0{Route: RouteGrokDirect}, Verdict{Confidence: 0.1}, WebByNone},
}
for _, c := range cases {
if got := Combine(c.l0, c.v, true).WebDecidedBy; got != c.want {
t.Errorf("web_decided_by(%+v,%+v) = %q, want %q", c.l0, c.v, got, c.want)
}
}
}

View file

@ -20,15 +20,21 @@ type ModelPrice struct {
// and so a half-finished cascade can book only what it actually spent (§8.1).
type CostBreakdown struct {
Token float64
Grounding float64
Grounding float64 // Gemini grounded-prompt TOKEN cost
WebTool float64
Router float64
// GroundingFee is the per-grounded-prompt FEE (the $35/1k overage on a paid Gemini
// tier, GEMINI_GROUNDING_PER_PROMPT_USD) — kept separate from Grounding (the token
// cost) for clean analytics. Booked the moment the grounded prompt is admitted, even
// on the error return (§7 SG1). Settle folds it into the grounding_usd spend column,
// so the $10 ceiling finally sees it without a spend-table migration.
GroundingFee float64
}
// Total is the grand total across all components (the number the wallet ceiling and
// request_log.total_usd care about). Computed, never stored, so it can't drift.
func (c CostBreakdown) Total() float64 {
return c.Token + c.Grounding + c.WebTool + c.Router
return c.Token + c.Grounding + c.WebTool + c.Router + c.GroundingFee
}
// priceFor returns the configured price for a model. An unknown model falls back to

View file

@ -19,10 +19,11 @@ import (
// cheap trivial route and the Layer-1 router classifier. Same wire format as Grok,
// so it reuses the shared transport (httpllm.go).
// - groundedSearch: a SEPARATE call against the NATIVE v1beta generateContent endpoint
// with the google_search tool. Grounding does NOT work on the OpenAI-compat layer
// (it is silently ignored there, and only on Gemini 3+) — verified against Google's
// docs (F-EXT-3) — so the web layer that wants Gemini grounding must use this native
// path and VERIFY citations came back, else degrade.
// with the google_search tool. Grounding does NOT work on the OpenAI-compat layer —
// it is silently ignored THERE (F-EXT-3, an endpoint limitation, NOT a model-version
// one: the google_search tool is supported by current models including
// gemini-2.5-flash-lite per ai.google.dev). So the web layer that wants Gemini
// grounding must use this native path and VERIFY citations came back, else degrade.
type geminiClient struct {
http *openAIClient
nativeBase string // …/v1beta — derived from the OpenAI-compat base by dropping /openai
@ -94,8 +95,8 @@ type geminiPart struct {
Text string `json:"text"`
}
type geminiTool struct {
// google_search is the current grounding tool (Gemini 3 / current models). The
// empty object enables it.
// google_search is the current grounding tool (all current models, incl. the 2.5
// family; legacy models used google_search_retrieval). The empty object enables it.
GoogleSearch struct{} `json:"google_search"`
}
type geminiNativeResponse struct {

View file

@ -3,117 +3,141 @@ package main
import (
"context"
"encoding/json"
"regexp"
"strings"
"time"
rd "vojo.chat/ai-bot/internal/routedecide"
)
// router.go classifies a message into a route. It runs INSIDE respond() — after the
// mention/media/foreign/single-flight gates (F-FUNC-7) — so a paid Layer-1 classifier
// is never spent on a message today's bot drops for free.
//
// Two layers, both conservative (doubt → grok_direct, the safe floor that keeps
// substantive questions on Grok, §8.6):
// Two layers; the decision MATH lives in the pure internal/routedecide package so the
// offline eval (cmd/routereval) replays the SAME function instead of a copy:
// - Layer-0: free regex heuristics (RU+EN). Always runs when ROUTER_ENABLED.
// - Layer-1: a cheap Gemini JSON classifier, consulted ONLY on Layer-0 grok_direct
// when ROUTER_CLASSIFIER_ENABLED. Any failure falls back to the Layer-0 verdict.
// - Layer-1: a cheap Gemini JSON classifier (ROUTER_CLASSIFIER_ENABLED). It now runs
// on EVERY message (greetings + freshness hits included) so trivial can be
// agreement-confirmed and follow-ups get a context-resolved search_query. Any
// failure (incl. the 4s sub-deadline) falls back to the Layer-0 verdict — never an
// ungrounded confident answer, never a degrade-to-web (the classifier is Gemini, so
// a Gemini outage means the grounding fetch is down too, §4.4).
// RouterDecision is the route plus the signals behind it (logged for threshold
// calibration). Only Route/Source/Confidence/NeedsWeb drive behaviour today; the rest
// are recorded for the offline router-replay eval (§9).
// RouterDecision is the route plus the signals behind it (logged + persisted for
// threshold calibration and misroute attribution, §8). Route/Source/Confidence drive
// behaviour; the epistemic signals + SearchQuery feed the web route and the analytics.
type RouterDecision struct {
Route string
Source string // heuristic | classifier | default | forced | degraded
Confidence float64
NeedsWeb bool
Freshness string
ReasoningLevel string
Domain string
Difficulty string
Freshness string // "recent" on a freshnessRe hit (read by factualMiss + logged)
ReasoningLevel string // "high" on the forced reason route (logged)
// Classifier signals (§4) — populated only when Layer-1 ran. SearchQuery is the
// self-contained, follow-up-resolved web query (carried to genWebThenGrok in DMs).
SearchQuery string
EntityObscure bool
TimeSensitive bool
Verifiable bool
TrivialScore bool // the classifier's raw "trivial" verdict
LookupHint bool // Layer-0 soft hint (never sets the route on its own, §5)
WebDecidedBy string // which arm chose web — routedecide.WebBy* (request_log)
}
// Heuristic patterns. Kept deliberately tight: a false "trivial" leaks a real question
// to the cheap model, so trivial fires only on short, unmistakable greetings/acks or
// bare arithmetic. Freshness words route to web (a false web-route only costs a fetch
// and degrades cleanly — never a wrong answer).
var (
greetingRe = regexp.MustCompile(`^(привет(ик)?|здравствуй(те)?|хай|прив|ку|добрый\s+(день|вечер|утро)|спасибо|спс|благодарю|пока|ок(ей)?|угу|ага|hello|hi|hey|yo|thanks|thank\s+you|thx|ty|bye|goodbye|ok|okay|cool|nice)[\s!.,)]*$`)
arithmeticRe = regexp.MustCompile(`^[\s(]*\d+(\s*[-+*/×÷]\s*\d+)+[\s)=?]*$`)
freshnessRe = regexp.MustCompile(`(новост|сегодня|сейчас|последн|курс\s|погод|котировк|расписани|прогноз|breaking|today|right now|latest|current(ly)?|news|weather|stock price|exchange rate|score)`)
)
// routerStageTimeout bounds the classifier call independently of the overall budget
// (mirrors webStageTimeout, §4.4). It is derived from the parent genCtx so a budget
// cancel still propagates; its expiry is treated exactly like a classifier error → the
// Layer-0 verdict, never a terminal error.
const routerStageTimeout = 4 * time.Second
// routeLayer0 is the free heuristic. Confidence is a rough self-estimate used only for
// logging/threshold tuning, not control flow.
// classifierPrompt asks Gemini an EPISTEMIC-RISK question (not a topic label) and
// resolves follow-ups from the short conversation that is appended after it (rcx). Kept
// terse to bound tokens; extractJSON tolerates code fences.
const classifierPrompt = `You are a routing classifier for a Russian-speaking chat assistant. You do NOT answer the question. Read the short conversation; the LAST user line is the message to route, earlier lines are context to resolve pronouns and follow-ups. Reply with ONLY one JSON object, no prose.
Your main job is an EPISTEMIC judgement, not a topic label: if the assistant answered the LAST message purely from its own memory (no web), how likely is it to state a WRONG checkable fact a name, a film/book cast, a date or release year, a number, a price, a score, a population, a who-did-what about a SPECIFIC named person/film/company/place/event? Such facts are exactly what a model misremembers and states confidently.
Decide:
- "needs_web": true if a correct answer DEPENDS on such a checkable external fact, OR on anything time-sensitive (news, "сегодня"/today, "сейчас", latest, current price/rate/weather/score). Recency is sufficient but NOT necessary a STATIC fact like a film's cast or a country's capital also counts. When in doubt, prefer TRUE: grounding is cheap, a confident wrong fact is not. FALSE for opinions, explanations, advice, casual chat, creative writing, code help, or transforming text the user already gave you.
- "verifiable": true if the message is specifically a checkable fact about a NAMED entity (who acted in <film>, who is CEO of <company>, what year <event>, population of <place>) even if not about "today". A bare follow-up like "2024 года" inherits the entity from the previous turn.
- "entity_obscure": true if the salient entity is plausibly long-tail / not a household name (a minor film, a non-famous person, a niche product) these are where memory fails hardest.
- "time_sensitive": true if the answer can change over time (news, prices, weather, standings, "current"/"latest"/"now").
- "trivial": true ONLY for a bare greeting, acknowledgement, or tiny arithmetic with no real question.
- "search_query": a SELF-CONTAINED web search query for this message, with follow-ups resolved from context (a bare "2024 года" after discussing a film becomes "<film name> 2024 фильм актёрский состав"). Empty string ONLY if both needs_web and verifiable are false.
- "confidence": 0.0-1.0, your honest certainty in needs_web.
Schema: {"needs_web":bool,"verifiable":bool,"entity_obscure":bool,"time_sensitive":bool,"trivial":bool,"search_query":"<query or empty>","confidence":0.0-1.0}
Conversation:
`
// routeLayer0 is the free heuristic verdict (RouterDecision shape), built from the pure
// core. Used directly when the classifier is off, and exported here for the heuristic
// golden test. Confidence is a rough self-estimate, logging-only (not control flow).
func routeLayer0(body string) RouterDecision {
s := strings.ToLower(strings.TrimSpace(body))
if s == "" {
return RouterDecision{Route: routeGrokDirect, Source: "heuristic", Confidence: 0.5}
}
if freshnessRe.MatchString(s) {
return RouterDecision{Route: routeWebThenGrok, Source: "heuristic", Confidence: 0.7, NeedsWeb: true, Freshness: "recent"}
}
if isTrivial(s) {
return RouterDecision{Route: routeTrivial, Source: "heuristic", Confidence: 0.85, Difficulty: "trivial"}
}
return RouterDecision{Route: routeGrokDirect, Source: "heuristic", Confidence: 0.6}
return layer0Decision(rd.ClassifyLayer0(body))
}
// isTrivial: a short greeting/ack or a bare arithmetic expression, with no sign of a
// real question. Length-bounded so "thanks, now explain quantum tunnelling" is NOT
// trivial.
func isTrivial(s string) bool {
if arithmeticRe.MatchString(s) {
return true
// layer0Decision maps the pure routedecide.Layer0 onto a RouterDecision, attaching the
// logging-only confidence self-estimates the old heuristic used.
func layer0Decision(l0 rd.Layer0) RouterDecision {
d := RouterDecision{Route: l0.Route, Source: "heuristic", LookupHint: l0.LookupHint, Freshness: l0.Freshness}
switch l0.Route {
case routeWebThenGrok:
d.Confidence, d.NeedsWeb = 0.7, true
case routeTrivial:
d.Confidence = 0.85
default:
d.Confidence = 0.6
}
if len(strings.Fields(s)) <= 4 && greetingRe.MatchString(s) {
return true
}
return false
return d
}
// classify produces the final RouterDecision for a request. The manual reasoning
// trigger is honoured independently of the heuristic router (it's a deliberate user
// signal). Layer-1's cost, when it runs, is accumulated into cost.Router.
func (b *Bot) classify(ctx context.Context, body string, cost *CostBreakdown) RouterDecision {
// isTrivial reports a short greeting/ack or bare arithmetic (the Layer-0 regex). Kept
// as a thin wrapper over the pure core for in-package callers/tests.
func isTrivial(s string) bool { return rd.IsTrivial(strings.ToLower(strings.TrimSpace(s))) }
// classify produces the final RouterDecision. The manual reasoning trigger is honoured
// independently of the heuristic router (a deliberate user signal). rcx is the
// privacy-minimised conversation window (DM-resolved; bare trigger in groups) appended
// to the classifier prompt. Layer-1's cost, when it runs, accumulates into cost.Router.
func (b *Bot) classify(ctx context.Context, body, rcx string, cost *CostBreakdown) RouterDecision {
if b.cfg.ReasoningEnabled && containsTrigger(body, b.cfg.ReasoningTrigger) {
return RouterDecision{Route: routeReason, Source: "forced", Confidence: 1, ReasoningLevel: "high"}
}
if !b.cfg.RouterEnabled {
return RouterDecision{Route: routeGrokDirect, Source: "default"}
}
d := routeLayer0(body)
// Layer-1 only refines the uncertain grok_direct verdict, and only if enabled and
// the Gemini client exists. Anything else stands on the heuristic.
if d.Route != routeGrokDirect || !b.cfg.RouterClassifierEnabled || b.gemini == nil {
l0 := rd.ClassifyLayer0(body)
d := layer0Decision(l0)
// Drop the old "only on grok_direct" gate: the classifier now runs on every message
// (when enabled) so it can raise a quiet factual question to web AND agreement-confirm
// a trivial. With it disabled, the Layer-0 verdict stands (today's behaviour).
if !b.cfg.RouterClassifierEnabled || b.gemini == nil {
return d
}
refined, err := b.routeLayer1(ctx, body, cost)
// 4s router sub-deadline derived from genCtx (a budget cancel still propagates).
rctx, cancel := context.WithTimeout(ctx, routerStageTimeout)
defer cancel()
refined, err := b.routeLayer1(rctx, rcx, l0, cost)
if err != nil {
// Classifier error / timeout / garbage → the Layer-0 verdict, exactly as today.
// Only the deterministic freshnessRe (carried in d) survives a classifier outage.
b.log.WarnContext(ctx, "layer-1 classifier failed; using heuristic", "err", err)
return d // degrade to the heuristic verdict
return d
}
return refined
}
// classifierConfidenceFloor is the bar a Layer-1 escalation OFF the safe floor
// (trivial/web/reason) must clear. Below it, the verdict is treated as doubt and the
// request stays on grok_direct — the owner's "substantive stays on Grok" rule (§8.6).
// A low-confidence "trivial" is exactly the false-trivial voice leak we must not take.
const classifierConfidenceFloor = 0.8
// classifierPrompt asks Gemini for a strict JSON verdict. Kept terse to bound tokens.
const classifierPrompt = `You are a router. Classify the user message into exactly one route and reply with ONLY a JSON object, no prose.
Routes: "trivial" (greeting/ack/tiny arithmetic), "web" (needs fresh/current facts: news, prices, weather, "today"), "normal" (everything else).
Schema: {"route":"trivial|web|normal","confidence":0.0-1.0,"needs_web":true|false}
Message: `
// routeLayer1 runs the Gemini classifier and parses its JSON. A non-JSON or unknown
// answer is an error so classify() degrades to the heuristic — the cheap model never
// gets to silently mis-route by returning garbage.
func (b *Bot) routeLayer1(ctx context.Context, body string, cost *CostBreakdown) (RouterDecision, error) {
// routeLayer1 runs the Gemini classifier, parses its JSON into a routedecide.Verdict,
// and resolves the route via the shared routedecide.Combine (WebParanoid-gated). A
// non-JSON or transport error is returned so classify() degrades to the heuristic — the
// cheap model never silently mis-routes by returning garbage.
func (b *Bot) routeLayer1(ctx context.Context, rcx string, l0 rd.Layer0, cost *CostBreakdown) (RouterDecision, error) {
resp, err := b.gemini.Complete(ctx, LLMRequest{
Model: b.cfg.GeminiModel,
Messages: []Message{{Role: "user", Content: classifierPrompt + body}},
MaxTokens: 60,
Messages: []Message{{Role: "user", Content: classifierPrompt + rcx}},
MaxTokens: 80, // was 60; the schema grew
Temperature: 0,
})
if err != nil {
@ -121,41 +145,36 @@ func (b *Bot) routeLayer1(ctx context.Context, body string, cost *CostBreakdown)
}
cost.Router += computeUSD(b.cfg.GeminiModel, resp.Usage, b.cfg)
var parsed struct {
Route string `json:"route"`
Confidence float64 `json:"confidence"`
NeedsWeb bool `json:"needs_web"`
}
if err := json.Unmarshal([]byte(extractJSON(resp.Text)), &parsed); err != nil {
// The classifier schema IS routedecide.Verdict (tagged), so unmarshal straight into it.
var v rd.Verdict
if err := json.Unmarshal([]byte(extractJSON(resp.Text)), &v); err != nil {
return RouterDecision{}, err
}
route := normalizeRoute(parsed.Route)
// Safe floor: a low-confidence escalation off grok_direct is doubt — keep it on
// Grok rather than leak a possibly-substantive question to the cheap model.
if route != routeGrokDirect && parsed.Confidence < classifierConfidenceFloor {
return RouterDecision{Route: routeGrokDirect, Source: "classifier", Confidence: parsed.Confidence}, nil
}
return RouterDecision{
Route: route,
Source: "classifier",
Confidence: parsed.Confidence,
NeedsWeb: parsed.NeedsWeb || route == routeWebThenGrok,
}, nil
}
v.SearchQuery = strings.TrimSpace(v.SearchQuery)
combined := rd.Combine(l0, v, b.cfg.WebParanoid)
// normalizeRoute maps a classifier label to a route constant, defaulting unknown
// labels to grok_direct — the safe floor, so a confused classifier never escalates.
func normalizeRoute(label string) string {
switch strings.ToLower(strings.TrimSpace(label)) {
case "trivial", "trivial_direct":
return routeTrivial
case "web", "web_then_grok":
return routeWebThenGrok
case "reason", "reason_then_grok":
return routeReason
default:
return routeGrokDirect
d := RouterDecision{
Route: combined.Route,
Source: "classifier",
Confidence: v.Confidence,
NeedsWeb: v.NeedsWeb,
Verifiable: v.Verifiable,
EntityObscure: v.EntityObscure,
TimeSensitive: v.TimeSensitive,
TrivialScore: v.Trivial,
SearchQuery: v.SearchQuery,
LookupHint: l0.LookupHint,
Freshness: l0.Freshness,
WebDecidedBy: combined.WebDecidedBy,
}
// INFO so prod (which runs at INFO) captures the signal mix without LOG_LEVEL=debug.
// Content-free: no body, no search_query (those are gated DEBUG/telemetry paths).
b.log.InfoContext(ctx, "classifier verdict",
"route", d.Route, "web_decided_by", d.WebDecidedBy, "needs_web", d.NeedsWeb,
"verifiable", d.Verifiable, "entity_obscure", d.EntityObscure,
"time_sensitive", d.TimeSensitive, "trivial", d.TrivialScore,
"confidence", d.Confidence, "lookup_hint", d.LookupHint, "paranoid", b.cfg.WebParanoid)
return d, nil
}
// extractJSON pulls the first {...} object out of a model reply, tolerating prose or

View file

@ -47,18 +47,6 @@ func TestRouteLayer0(t *testing.T) {
}
}
func TestNormalizeRoute(t *testing.T) {
cases := map[string]string{
"trivial": routeTrivial, "web": routeWebThenGrok, "reason": routeReason,
"normal": routeGrokDirect, "garbage": routeGrokDirect, "": routeGrokDirect,
}
for in, want := range cases {
if got := normalizeRoute(in); got != want {
t.Errorf("normalizeRoute(%q) = %q, want %q", in, got, want)
}
}
}
func TestExtractJSON(t *testing.T) {
if got := extractJSON("prefix {\"route\":\"web\"} suffix"); got != `{"route":"web"}` {
t.Errorf("extractJSON = %q", got)

View file

@ -166,6 +166,27 @@ var migrations = []string{
date TEXT PRIMARY KEY,
n INTEGER NOT NULL DEFAULT 0
);`,
// v5 (router redesign §8): the classifier signals + web outcome the offline eval needs
// to MEASURE misroute / false-web / lie-rate / true-cost / rewrite-quality — none of
// which is derivable from the v3 columns. Append-only (never edit an earlier migration).
// Booleans/counts are metadata, always recorded when telemetry is on; search_query +
// answer_text are content, written ONLY when TELEMETRY_STORE_TEXT (NULL otherwise).
// classifier_confidence is NOT a new column — filter router_confidence on
// router_source='classifier'. grounding_fee_usd is the §7 booked per-prompt fee (it is
// ALSO folded into grounding_usd for the ceiling; this column is the analytics split).
`ALTER TABLE request_log ADD COLUMN IF NOT EXISTS needs_web BOOL DEFAULT false;
ALTER TABLE request_log ADD COLUMN IF NOT EXISTS entity_obscure BOOL DEFAULT false;
ALTER TABLE request_log ADD COLUMN IF NOT EXISTS time_sensitive BOOL DEFAULT false;
ALTER TABLE request_log ADD COLUMN IF NOT EXISTS verifiable BOOL DEFAULT false;
ALTER TABLE request_log ADD COLUMN IF NOT EXISTS trivial_score BOOL DEFAULT false;
ALTER TABLE request_log ADD COLUMN IF NOT EXISTS web_decided_by TEXT DEFAULT '';
ALTER TABLE request_log ADD COLUMN IF NOT EXISTS grounding_fee_usd DOUBLE PRECISION DEFAULT 0;
ALTER TABLE request_log ADD COLUMN IF NOT EXISTS rewrite_used BOOL DEFAULT false;
ALTER TABLE request_log ADD COLUMN IF NOT EXISTS web_grounded BOOL DEFAULT false;
ALTER TABLE request_log ADD COLUMN IF NOT EXISTS citation_count INT DEFAULT 0;
ALTER TABLE request_log ADD COLUMN IF NOT EXISTS search_query TEXT;
ALTER TABLE request_log ADD COLUMN IF NOT EXISTS answer_text TEXT;`,
}
// migrate runs all pending migrations on a single connection under a session
@ -412,9 +433,15 @@ func (s *Store) ReleaseReservation(mxid string, estimate float64) error {
// grounding it actually spent, releases the rest of the reservation, and refunds the
// request slot separately. GREATEST(0, …) keeps reserved_usd from underflowing.
// Atomic and commutative per row, so concurrent settles for one user sum correctly.
//
// The per-grounded-prompt FEE (cost.GroundingFee, §7 SG1) is folded into the committed
// grounding_usd column here — so it flows through committedUSDExpr and the $10 ceiling
// finally sees it WITHOUT a spend-table migration. request_log keeps the fee separately
// in grounding_fee_usd for the analytics split.
func (s *Store) Settle(mxid string, estimate float64, cost CostBreakdown) error {
ctx, cancel := opContext()
defer cancel()
grounding := cost.Grounding + cost.GroundingFee
_, err := s.pool.Exec(ctx,
`INSERT INTO spend (date, mxid, requests, usd, router_usd, grounding_usd, webtool_usd, reserved_usd)
VALUES ($1, $2, 0, $3, $4, $5, $6, 0)
@ -424,7 +451,7 @@ func (s *Store) Settle(mxid string, estimate float64, cost CostBreakdown) error
grounding_usd = spend.grounding_usd + excluded.grounding_usd,
webtool_usd = spend.webtool_usd + excluded.webtool_usd,
reserved_usd = GREATEST(0, spend.reserved_usd - $7)`,
todayUTC(), mxid, cost.Token, cost.Router, cost.Grounding, cost.WebTool, estimate)
todayUTC(), mxid, cost.Token, cost.Router, grounding, cost.WebTool, estimate)
return err
}
@ -444,32 +471,42 @@ func (s *Store) InsertRequestLog(rl RequestLog) error {
if err != nil {
return err
}
// query_text is NULL unless text capture is on (the struct carries "" otherwise),
// so the analytics table never holds message content by default.
var queryText any
if rl.QueryText != "" {
queryText = rl.QueryText
// Content columns are NULL unless text capture is on (the struct carries "" otherwise),
// so the analytics table never holds message/model content by default.
nullIfEmpty := func(s string) any {
if s == "" {
return nil
}
return s
}
// request_log.grounding_usd is the TOKEN cost only; the per-prompt FEE is split into its
// own grounding_fee_usd column (the spend ledger folds them — see Settle). total_usd is
// the full Total() including the fee, so the two grounding columns + total stay coherent.
_, err = s.pool.Exec(ctx, `
INSERT INTO request_log (
id, room_id, sender, route, router_source, router_confidence, models,
prompt_tokens, cached_tokens, completion_tokens,
token_usd, grounding_usd, router_usd, webtool_usd, total_usd,
latency_ms, stage_ms, escalated, fallback_fired, cache_hit, ceiling_hit,
per_user_cap_hit, prompt_version, provider_request_id, degraded, err, ok, query_text
per_user_cap_hit, prompt_version, provider_request_id, degraded, err, ok, query_text,
needs_web, entity_obscure, time_sensitive, verifiable, trivial_score, web_decided_by,
grounding_fee_usd, rewrite_used, web_grounded, citation_count, search_query, answer_text
) VALUES (
$1, $2, $3, $4, $5, $6, $7,
$8, $9, $10,
$11, $12, $13, $14, $15,
$16, $17, $18, $19, $20, $21,
$22, $23, $24, $25, $26, $27, $28
$22, $23, $24, $25, $26, $27, $28,
$29, $30, $31, $32, $33, $34,
$35, $36, $37, $38, $39, $40
) ON CONFLICT (id) DO NOTHING`,
rl.ID, rl.RoomID, rl.Sender, rl.Route, rl.RouterSource, rl.RouterConfidence, models,
rl.PromptTokens, rl.CachedTokens, rl.CompletionTokens,
rl.Cost.Token, rl.Cost.Grounding, rl.Cost.Router, rl.Cost.WebTool, rl.Cost.Total(),
rl.LatencyMS, stages, rl.Escalated, rl.FallbackFired, rl.CacheHit, rl.CeilingHit,
rl.PerUserCapHit, rl.PromptVersion, rl.ProviderRequestID, rl.Degraded, rl.Err, rl.OK, queryText)
rl.PerUserCapHit, rl.PromptVersion, rl.ProviderRequestID, rl.Degraded, rl.Err, rl.OK, nullIfEmpty(rl.QueryText),
rl.NeedsWeb, rl.EntityObscure, rl.TimeSensitive, rl.Verifiable, rl.TrivialScore, rl.WebDecidedBy,
rl.Cost.GroundingFee, rl.RewriteUsed, rl.WebGrounded, rl.CitationCount, nullIfEmpty(rl.SearchQuery), nullIfEmpty(rl.AnswerText))
return err
}
@ -509,6 +546,20 @@ func (s *Store) IncrGroundingIfUnder(cap int) (bool, error) {
return true, nil
}
// DecrGrounding refunds one admitted grounding slot for today when the admitted prompt
// produced no usable grounded digest (no citations, or the fetch failed), so over-routing
// and failed fetches don't burn the day's grounded-answer budget (§7 SG4). It mirrors
// RefundRequest: a single atomic UPDATE, GREATEST(0, …) so a double-refund can't drive the
// counter negative, todayUTC() internally (no date arg). The money side is independent —
// the per-prompt fee stays booked in the ledger; this only touches the quota counter.
func (s *Store) DecrGrounding() error {
ctx, cancel := opContext()
defer cancel()
_, err := s.pool.Exec(ctx,
`UPDATE grounding_count SET n = GREATEST(0, n - 1) WHERE date = $1`, todayUTC())
return err
}
// HasWarnedEncrypted / SetWarnedEncrypted persist the one-shot "reacted 🔒 to this
// room because I can't read encryption" flag so a restart doesn't re-react on every
// message (F5). The bot never reacts to its own events: m.reaction is not an

View file

@ -443,6 +443,114 @@ func TestStoreGroundingCapConcurrent(t *testing.T) {
}
}
// TestStoreDecrGrounding covers the §7 SG4 cap refund: a refunded slot frees one
// admission, and an over-refund clamps to 0 (never negative → no phantom headroom).
func TestStoreDecrGrounding(t *testing.T) {
st := openTestStore(t)
defer st.Close()
const cap = 3
for i := 0; i < cap; i++ {
if ok, err := st.IncrGroundingIfUnder(cap); err != nil || !ok {
t.Fatalf("incr %d: (%v,%v)", i, ok, err)
}
}
if ok, _ := st.IncrGroundingIfUnder(cap); ok {
t.Fatal("at cap, should be denied")
}
// Refund one → one more admitted.
if err := st.DecrGrounding(); err != nil {
t.Fatalf("decr: %v", err)
}
if ok, err := st.IncrGroundingIfUnder(cap); err != nil || !ok {
t.Fatalf("post-refund incr: (%v,%v), want admitted", ok, err)
}
// Over-refund must clamp at 0, not go negative.
for i := 0; i < 10; i++ {
if err := st.DecrGrounding(); err != nil {
t.Fatalf("over-refund decr: %v", err)
}
}
admitted := 0
for i := 0; i < cap+2; i++ {
if ok, _ := st.IncrGroundingIfUnder(cap); ok {
admitted++
}
}
if admitted != cap {
t.Fatalf("after clamp, admitted %d, want %d (counter must have clamped to 0)", admitted, cap)
}
}
// TestStoreSettleBooksGroundingFee: the per-grounded-prompt FEE (§7 SG1) must land in
// committed spend so the $10 ceiling sees it — it is folded into grounding_usd at Settle.
func TestStoreSettleBooksGroundingFee(t *testing.T) {
st := openTestStore(t)
defer st.Close()
if err := st.Settle("@u:vojo.chat", 0, CostBreakdown{Grounding: 0.0001, GroundingFee: 0.035}); err != nil {
t.Fatalf("settle: %v", err)
}
spent, err := st.SpentTodayUSD()
if err != nil {
t.Fatalf("spent: %v", err)
}
if d := spent - 0.0351; d > 1e-9 || d < -1e-9 {
t.Fatalf("committed = %v, want 0.0351 (grounding token + per-prompt fee)", spent)
}
}
// TestStoreRequestLogClassifierColumns covers the §8 columns: signal booleans + the fee
// split + grounded outcome roundtrip, and total_usd includes the fee.
func TestStoreRequestLogClassifierColumns(t *testing.T) {
st := openTestStore(t)
defer st.Close()
rl := RequestLog{
ID: "$ev-rl-sig", Route: routeWebThenGrok, RouterSource: "classifier",
Models: map[string]string{"final": "grok-x"},
Cost: CostBreakdown{Token: 0.002, Grounding: 0.00007, GroundingFee: 0.035},
NeedsWeb: true,
EntityObscure: true,
Verifiable: true,
WebDecidedBy: "entity_obscure",
RewriteUsed: true,
WebGrounded: true,
CitationCount: 3,
SearchQuery: "the resolved query",
AnswerText: "the answer",
OK: true,
}
if err := st.InsertRequestLog(rl); err != nil {
t.Fatalf("insert: %v", err)
}
ctx, cancel := opContext()
defer cancel()
var (
needsWeb, entityObscure, webGrounded, rewriteUsed bool
webDecidedBy string
fee, total float64
cites int
sq, ans *string
)
if err := st.pool.QueryRow(ctx, `SELECT needs_web, entity_obscure, web_decided_by, grounding_fee_usd,
rewrite_used, web_grounded, citation_count, search_query, answer_text, total_usd
FROM request_log WHERE id=$1`, rl.ID).Scan(&needsWeb, &entityObscure, &webDecidedBy, &fee,
&rewriteUsed, &webGrounded, &cites, &sq, &ans, &total); err != nil {
t.Fatalf("read: %v", err)
}
if !needsWeb || !entityObscure || webDecidedBy != "entity_obscure" || !rewriteUsed || !webGrounded || cites != 3 {
t.Fatalf("signal columns wrong: needsWeb=%v obscure=%v decidedBy=%q rewrite=%v grounded=%v cites=%d",
needsWeb, entityObscure, webDecidedBy, rewriteUsed, webGrounded, cites)
}
if d := fee - 0.035; d > 1e-9 || d < -1e-9 {
t.Fatalf("grounding_fee_usd = %v, want 0.035", fee)
}
if d := total - rl.Cost.Total(); d > 1e-9 || d < -1e-9 {
t.Fatalf("total_usd = %v, want %v (incl. fee)", total, rl.Cost.Total())
}
if sq == nil || *sq != "the resolved query" || ans == nil || *ans != "the answer" {
t.Fatalf("InsertRequestLog should store content as given: sq=%v ans=%v", sq, ans)
}
}
func TestStoreWarnedEncrypted(t *testing.T) {
st := openTestStore(t)
const room = "!enc:vojo.chat"

View file

@ -3,6 +3,8 @@ package main
import (
"context"
"time"
rd "vojo.chat/ai-bot/internal/routedecide"
)
// telemetry.go is the request_log analytics path: it captures route, cost, latency
@ -17,10 +19,10 @@ import (
// limiter denial).
const (
routeNone = "none"
routeGrokDirect = "grok_direct"
routeTrivial = "trivial_direct"
routeWebThenGrok = "web_then_grok"
routeReason = "reason_then_grok"
routeGrokDirect = rd.RouteGrokDirect
routeTrivial = rd.RouteTrivial
routeWebThenGrok = rd.RouteWeb
routeReason = rd.RouteReason
)
// Degrade/skip reason strings (request_log.degraded). Stable tokens so the analytics
@ -74,6 +76,24 @@ type RequestLog struct {
Err string
OK bool
QueryText string // stored only when TELEMETRY_STORE_TEXT; stripped otherwise
// Router/classifier signals + web outcome (§8) — the inputs the offline eval needs to
// measure misroute / false-web / lie-rate / true-cost / rewrite-quality. The boolean
// signals + WebDecidedBy are metadata (always stored when telemetry is on); SearchQuery
// and AnswerText are model-/user-derived content and are stripped unless
// TELEMETRY_STORE_TEXT (like QueryText). RouterConfidence above doubles as the
// classifier confidence (filter request_log on router_source='classifier').
NeedsWeb bool
EntityObscure bool
TimeSensitive bool
Verifiable bool
TrivialScore bool
WebDecidedBy string
RewriteUsed bool
WebGrounded bool
CitationCount int
SearchQuery string // resolved query sent to Fetch; stored only when TELEMETRY_STORE_TEXT
AnswerText string // the final answer; stored only when TELEMETRY_STORE_TEXT (lie-label input)
}
// recordTelemetry persists a row off the answer path. No-op unless TELEMETRY_ENABLED.
@ -85,7 +105,9 @@ func (b *Bot) recordTelemetry(ctx context.Context, rl RequestLog) {
return
}
if !b.cfg.TelemetryStoreText {
rl.QueryText = ""
// One text-gate governs ALL stored content: the user query, the model-authored
// search query, and the answer. Metadata signals (NeedsWeb, WebDecidedBy, …) stay.
rl.QueryText, rl.SearchQuery, rl.AnswerText = "", "", ""
}
b.safego(ctx, "telemetry", func() {
if err := b.st.InsertRequestLog(rl); err != nil {

View file

@ -55,6 +55,42 @@ func TestRecordSkipWritesRow(t *testing.T) {
}
}
// TestTelemetryStripsTextWhenStoreTextOff proves the content gate: with TELEMETRY_ENABLED
// on but TELEMETRY_STORE_TEXT off, the user query, the model-authored search query, and the
// answer are all NULL — only metadata signals land. The boolean signals are still recorded.
func TestTelemetryStripsTextWhenStoreTextOff(t *testing.T) {
st := openTestStore(t)
defer st.Close()
b := newTestBot(st, &Config{TelemetryEnabled: true, TelemetryStoreText: false})
b.recordTelemetry(context.Background(), RequestLog{
ID: "$strip-1", Route: routeWebThenGrok, RouterSource: "classifier",
QueryText: "secret query", SearchQuery: "secret search", AnswerText: "secret answer",
NeedsWeb: true, WebDecidedBy: "classifier_needs_web", OK: true,
})
deadline := time.Now().Add(2 * time.Second)
for requestLogCount(t, st) == 0 && time.Now().Before(deadline) {
time.Sleep(20 * time.Millisecond)
}
ctx, cancel := opContext()
defer cancel()
var qt, sq, ans, decidedBy *string
var needsWeb bool
if err := st.pool.QueryRow(ctx,
`SELECT query_text, search_query, answer_text, web_decided_by, needs_web FROM request_log WHERE id=$1`,
"$strip-1").Scan(&qt, &sq, &ans, &decidedBy, &needsWeb); err != nil {
t.Fatalf("read: %v", err)
}
if qt != nil || sq != nil || ans != nil {
t.Fatalf("text columns must be NULL when store-text off: qt=%v sq=%v ans=%v", qt, sq, ans)
}
// Metadata is still recorded (it is not content).
if !needsWeb || decidedBy == nil || *decidedBy != "classifier_needs_web" {
t.Fatalf("metadata signals must survive: needsWeb=%v decidedBy=%v", needsWeb, decidedBy)
}
}
// TestTelemetryDisabledWritesNothing proves the default (TELEMETRY_ENABLED off) adds
// no write path — strict "cascade-off == today".
func TestTelemetryDisabledWritesNothing(t *testing.T) {

View file

@ -20,9 +20,12 @@ import (
// API (/v1/responses). NB the older chat/completions Live Search `search_parameters`
// mechanism was RETIRED by xAI (now 410 Gone), and the web_search tool is not on
// chat/completions — hence the Responses endpoint. Billed $5/1k tool calls + tokens.
// - gemini_grounding: Gemini native v1beta google_search. Cheaper, but Gemini-3 only
// and silently ungrounds otherwise (F-EXT-3) — so it runs behind a citations
// verify-gate and degrades if it fails.
// - gemini_grounding: Gemini native v1beta google_search. Cheaper. Works on current
// models INCLUDING gemini-2.5-flash-lite (verified against ai.google.dev — the 2.5
// family supports google_search; only legacy models use google_search_retrieval).
// The F-EXT-3 "silently ungrounds" caveat is about the OpenAI-compat endpoint, NOT
// the model version — so this provider uses the NATIVE v1beta path and runs behind a
// citations verify-gate, degrading if no citations come back.
//
// The web call is bounded by a per-stage timeout (and gemini_grounding additionally by a
// durable daily cap), and either provider failing degrades the request to grok_direct
@ -196,12 +199,13 @@ func (p *grokWebSearch) Fetch(ctx context.Context, query string) (WebContext, er
return wc, nil
}
// --- gemini_grounding (Gemini-3 native only) --------------------------------------
// --- gemini_grounding (native v1beta google_search; current models incl. 2.5) ------
type geminiGrounding struct {
gem *geminiClient
st *Store
cfg *Config
gem *geminiClient
st *Store
cfg *Config
logger *slog.Logger
}
func (p *geminiGrounding) Fetch(ctx context.Context, query string) (WebContext, error) {
@ -213,11 +217,24 @@ func (p *geminiGrounding) Fetch(ctx context.Context, query string) (WebContext,
if ok, err := p.st.IncrGroundingIfUnder(p.cfg.WebGroundingDailyCap); err != nil {
return WebContext{}, err
} else if !ok {
return WebContext{}, errGroundingCapped
return WebContext{}, errGroundingCapped // hit BEFORE billing → no fee, no slot consumed
}
res, err := p.gem.groundedSearch(ctx, query) // errors (incl. no-citations) → caller degrades
cost := CostBreakdown{Grounding: computeUSD(p.cfg.GeminiModel, res.Usage, p.cfg)}
// SG1: the prompt is admitted, so treat it as billed — book the token cost AND the
// per-grounded-prompt fee, even on the error return. The fee is the money truth the
// $10 ceiling must see; it is kept separate from the cap quota below.
cost := CostBreakdown{
Grounding: computeUSD(p.cfg.GeminiModel, res.Usage, p.cfg),
GroundingFee: p.cfg.GeminiGroundingPerPrompt,
}
if err != nil {
// SG4: the admitted slot produced no usable grounding (no citations, or the call
// failed). Refund the cap slot so over-routing / failed fetches don't burn the
// day's grounded-answer budget — independent of the fee, which stays booked.
// Best-effort: a failed refund only slightly tightens the cap, never money.
if derr := p.st.DecrGrounding(); derr != nil && p.logger != nil {
p.logger.WarnContext(ctx, "grounding cap refund failed (non-fatal)", "err", derr)
}
return WebContext{Cost: cost, Usage: res.Usage}, err
}
return WebContext{Digest: res.Digest, Citations: res.Citations, Usage: res.Usage, Cost: cost}, nil