package main import ( "context" "encoding/json" "strings" "time" rd "vojo.chat/ai-bot/internal/routedecide" ) // router.go classifies a message into a route. It runs INSIDE respond() — after the // mention/media/foreign/single-flight gates (F-FUNC-7) — so a paid Layer-1 classifier // is never spent on a message today's bot drops for free. // // Two layers; the decision MATH lives in the pure internal/routedecide package so the // offline eval (cmd/routereval) replays the SAME function instead of a copy: // - Layer-0: free regex heuristics (RU+EN). Always runs when ROUTER_ENABLED. // - Layer-1: a cheap Gemini JSON classifier (ROUTER_CLASSIFIER_ENABLED). It now runs // on EVERY message (greetings + freshness hits included) so trivial can be // agreement-confirmed and follow-ups get a context-resolved search_query. Any // failure (incl. the 4s sub-deadline) falls back to the Layer-0 verdict — never an // ungrounded confident answer, never a degrade-to-web (the classifier is Gemini, so // a Gemini outage means the grounding fetch is down too, §4.4). // RouterDecision is the route plus the signals behind it (logged + persisted for // threshold calibration and misroute attribution, §8). Route/Source/Confidence drive // behaviour; the epistemic signals + SearchQuery feed the web route and the analytics. type RouterDecision struct { Route string Source string // heuristic | classifier | default | forced | degraded Confidence float64 NeedsWeb bool Freshness string // "recent" on a freshnessRe hit (read by factualMiss + logged) ReasoningLevel string // "high" on the forced reason route (logged) // Classifier signals (§4) — populated only when Layer-1 ran. SearchQuery is the // self-contained, follow-up-resolved web query (carried to genWebThenGrok in DMs). SearchQuery string EntityObscure bool TimeSensitive bool Verifiable bool TrivialScore bool // the classifier's raw "trivial" verdict LookupHint bool // Layer-0 soft hint (never sets the route on its own, §5) WebDecidedBy string // which arm chose web — routedecide.WebBy* (request_log) } // routerStageTimeout bounds the classifier call independently of the overall budget // (mirrors webStageTimeout, §4.4). It is derived from the parent genCtx so a budget // cancel still propagates; its expiry is treated exactly like a classifier error → the // Layer-0 verdict, never a terminal error. const routerStageTimeout = 4 * time.Second // classifierPrompt asks Gemini an EPISTEMIC-RISK question (not a topic label) and // resolves follow-ups from the short conversation that is appended after it (rcx). Kept // terse to bound tokens; extractJSON tolerates code fences. const classifierPrompt = `You are a routing classifier for a Russian-speaking chat assistant. You do NOT answer the question. Read the short conversation; the LAST user line is the message to route, earlier lines are context to resolve pronouns and follow-ups. Reply with ONLY one JSON object, no prose. Your main job is an EPISTEMIC judgement, not a topic label: if the assistant answered the LAST message purely from its own memory (no web), how likely is it to state a WRONG checkable fact — a name, a film/book cast, a date or release year, a number, a price, a score, a population, a who-did-what about a SPECIFIC named person/film/company/place/event? Such facts are exactly what a model misremembers and states confidently. Decide: - "needs_web": true if a correct answer DEPENDS on such a checkable external fact, OR on anything time-sensitive (news, "сегодня"/today, "сейчас", latest, current price/rate/weather/score). Recency is sufficient but NOT necessary — a STATIC fact like a film's cast or a country's capital also counts. When in doubt, prefer TRUE: grounding is cheap, a confident wrong fact is not. FALSE for opinions, explanations, advice, casual chat, creative writing, code help, or transforming text the user already gave you. - "verifiable": true if the message is specifically a checkable fact about a NAMED entity (who acted in , who is CEO of , what year , population of ) — even if not about "today". A bare follow-up like "2024 года" inherits the entity from the previous turn. - "entity_obscure": true if the salient entity is plausibly long-tail / not a household name (a minor film, a non-famous person, a niche product) — these are where memory fails hardest. - "time_sensitive": true if the answer can change over time (news, prices, weather, standings, "current"/"latest"/"now"). - "trivial": true ONLY for a bare greeting, acknowledgement, or tiny arithmetic with no real question. - "search_query": a SELF-CONTAINED web search query for this message, with follow-ups resolved from context (a bare "2024 года" after discussing a film becomes " 2024 фильм актёрский состав"). Empty string ONLY if both needs_web and verifiable are false. - "confidence": 0.0-1.0, your honest certainty in needs_web. Schema: {"needs_web":bool,"verifiable":bool,"entity_obscure":bool,"time_sensitive":bool,"trivial":bool,"search_query":"","confidence":0.0-1.0} Conversation: ` // routeLayer0 is the free heuristic verdict (RouterDecision shape), built from the pure // core. Used directly when the classifier is off, and exported here for the heuristic // golden test. Confidence is a rough self-estimate, logging-only (not control flow). func routeLayer0(body string) RouterDecision { return layer0Decision(rd.ClassifyLayer0(body)) } // layer0Decision maps the pure routedecide.Layer0 onto a RouterDecision, attaching the // logging-only confidence self-estimates the old heuristic used. func layer0Decision(l0 rd.Layer0) RouterDecision { d := RouterDecision{Route: l0.Route, Source: "heuristic", LookupHint: l0.LookupHint, Freshness: l0.Freshness} switch l0.Route { case routeWebThenGrok: d.Confidence, d.NeedsWeb = 0.7, true case routeTrivial: d.Confidence = 0.85 default: d.Confidence = 0.6 } return d } // isTrivial reports a short greeting/ack or bare arithmetic (the Layer-0 regex). Kept // as a thin wrapper over the pure core for in-package callers/tests. func isTrivial(s string) bool { return rd.IsTrivial(strings.ToLower(strings.TrimSpace(s))) } // classify produces the final RouterDecision. The manual reasoning trigger is honoured // independently of the heuristic router (a deliberate user signal). rcx is the // privacy-minimised conversation window (DM-resolved; bare trigger in groups) appended // to the classifier prompt. Layer-1's cost, when it runs, accumulates into cost.Router. func (b *Bot) classify(ctx context.Context, body, rcx string, cost *CostBreakdown) RouterDecision { if b.cfg.ReasoningEnabled && containsTrigger(body, b.cfg.ReasoningTrigger) { return RouterDecision{Route: routeReason, Source: "forced", Confidence: 1, ReasoningLevel: "high"} } if !b.cfg.RouterEnabled { return RouterDecision{Route: routeGrokDirect, Source: "default"} } l0 := rd.ClassifyLayer0(body) d := layer0Decision(l0) // Drop the old "only on grok_direct" gate: the classifier now runs on every message // (when enabled) so it can raise a quiet factual question to web AND agreement-confirm // a trivial. With it disabled, the Layer-0 verdict stands (today's behaviour). if !b.cfg.RouterClassifierEnabled || b.gemini == nil { return d } // 4s router sub-deadline derived from genCtx (a budget cancel still propagates). rctx, cancel := context.WithTimeout(ctx, routerStageTimeout) defer cancel() refined, err := b.routeLayer1(rctx, rcx, l0, cost) if err != nil { // Classifier error / timeout / garbage → the Layer-0 verdict, exactly as today. // Only the deterministic freshnessRe (carried in d) survives a classifier outage. b.log.WarnContext(ctx, "layer-1 classifier failed; using heuristic", "err", err) return d } return refined } // routeLayer1 runs the Gemini classifier, parses its JSON into a routedecide.Verdict, // and resolves the route via the shared routedecide.Combine (WebParanoid-gated). A // non-JSON or transport error is returned so classify() degrades to the heuristic — the // cheap model never silently mis-routes by returning garbage. func (b *Bot) routeLayer1(ctx context.Context, rcx string, l0 rd.Layer0, cost *CostBreakdown) (RouterDecision, error) { resp, err := b.gemini.Complete(ctx, LLMRequest{ Model: b.cfg.GeminiModel, Messages: []Message{{Role: "user", Content: classifierPrompt + rcx}}, MaxTokens: 80, // was 60; the schema grew Temperature: 0, }) if err != nil { return RouterDecision{}, err } cost.Router += computeUSD(b.cfg.GeminiModel, resp.Usage, b.cfg) // The classifier schema IS routedecide.Verdict (tagged), so unmarshal straight into it. var v rd.Verdict if err := json.Unmarshal([]byte(extractJSON(resp.Text)), &v); err != nil { return RouterDecision{}, err } v.SearchQuery = strings.TrimSpace(v.SearchQuery) combined := rd.Combine(l0, v, b.cfg.WebParanoid) d := RouterDecision{ Route: combined.Route, Source: "classifier", Confidence: v.Confidence, NeedsWeb: v.NeedsWeb, Verifiable: v.Verifiable, EntityObscure: v.EntityObscure, TimeSensitive: v.TimeSensitive, TrivialScore: v.Trivial, SearchQuery: v.SearchQuery, LookupHint: l0.LookupHint, Freshness: l0.Freshness, WebDecidedBy: combined.WebDecidedBy, } // INFO so prod (which runs at INFO) captures the signal mix without LOG_LEVEL=debug. // Content-free: no body, no search_query (those are gated DEBUG/telemetry paths). b.log.InfoContext(ctx, "classifier verdict", "route", d.Route, "web_decided_by", d.WebDecidedBy, "needs_web", d.NeedsWeb, "verifiable", d.Verifiable, "entity_obscure", d.EntityObscure, "time_sensitive", d.TimeSensitive, "trivial", d.TrivialScore, "confidence", d.Confidence, "lookup_hint", d.LookupHint, "paranoid", b.cfg.WebParanoid) return d, nil } // extractJSON pulls the first {...} object out of a model reply, tolerating prose or // code fences around it. Returns "" if none (→ a parse error → degrade). func extractJSON(s string) string { i := strings.IndexByte(s, '{') j := strings.LastIndexByte(s, '}') if i < 0 || j < i { return "" } return s[i : j+1] } // containsTrigger reports whether body contains the manual trigger phrase // (case-insensitive, whitespace-trimmed). Empty trigger never matches. func containsTrigger(body, trigger string) bool { trigger = strings.TrimSpace(strings.ToLower(trigger)) if trigger == "" { return false } return strings.Contains(strings.ToLower(body), trigger) }