199 lines
11 KiB
Go
199 lines
11 KiB
Go
package main
|
|
|
|
import (
|
|
"context"
|
|
"encoding/json"
|
|
"strings"
|
|
"time"
|
|
|
|
rd "vojo.chat/ai-bot/internal/routedecide"
|
|
)
|
|
|
|
// router.go classifies a message into a route. It runs INSIDE respond() — after the
|
|
// mention/media/foreign/single-flight gates (F-FUNC-7) — so a paid Layer-1 classifier
|
|
// is never spent on a message today's bot drops for free.
|
|
//
|
|
// Two layers; the decision MATH lives in the pure internal/routedecide package so the
|
|
// offline eval (cmd/routereval) replays the SAME function instead of a copy:
|
|
// - Layer-0: free regex heuristics (RU+EN). Always runs when ROUTER_ENABLED.
|
|
// - Layer-1: a cheap Gemini JSON classifier (ROUTER_CLASSIFIER_ENABLED). It now runs
|
|
// on EVERY message (greetings + freshness hits included) so trivial can be
|
|
// agreement-confirmed and follow-ups get a context-resolved search_query. Any
|
|
// failure (incl. the 4s sub-deadline) falls back to the Layer-0 verdict — never an
|
|
// ungrounded confident answer, never a degrade-to-web (the classifier is Gemini, so
|
|
// a Gemini outage means the grounding fetch is down too, §4.4).
|
|
|
|
// RouterDecision is the route plus the signals behind it (logged + persisted for
|
|
// threshold calibration and misroute attribution, §8). Route/Source/Confidence drive
|
|
// behaviour; the epistemic signals + SearchQuery feed the web route and the analytics.
|
|
type RouterDecision struct {
|
|
Route string
|
|
Source string // heuristic | classifier | default | forced | degraded
|
|
Confidence float64
|
|
NeedsWeb bool
|
|
Freshness string // "recent" on a freshnessRe hit (read by factualMiss + logged)
|
|
ReasoningLevel string // "high" on the forced reason route (logged)
|
|
|
|
// Classifier signals (§4) — populated only when Layer-1 ran. SearchQuery is the
|
|
// self-contained, follow-up-resolved web query (carried to genWebThenGrok in DMs).
|
|
SearchQuery string
|
|
EntityObscure bool
|
|
TimeSensitive bool
|
|
Verifiable bool
|
|
TrivialScore bool // the classifier's raw "trivial" verdict
|
|
LookupHint bool // Layer-0 soft hint (never sets the route on its own, §5)
|
|
WebDecidedBy string // which arm chose web — routedecide.WebBy* (request_log)
|
|
}
|
|
|
|
// routerStageTimeout bounds the classifier call independently of the overall budget
|
|
// (mirrors webStageTimeout, §4.4). It is derived from the parent genCtx so a budget
|
|
// cancel still propagates; its expiry is treated exactly like a classifier error → the
|
|
// Layer-0 verdict, never a terminal error.
|
|
const routerStageTimeout = 4 * time.Second
|
|
|
|
// classifierPrompt asks Gemini an EPISTEMIC-RISK question (not a topic label) and
|
|
// resolves follow-ups from the short conversation that is appended after it (rcx). Kept
|
|
// terse to bound tokens; extractJSON tolerates code fences.
|
|
const classifierPrompt = `You are a routing classifier for a multilingual chat assistant. You do NOT answer the question. Read the short conversation; the LAST user line is the message to route, earlier lines are context to resolve pronouns and follow-ups. Reply with ONLY one JSON object, no prose.
|
|
|
|
Your main job is an EPISTEMIC judgement, not a topic label: if the assistant answered the LAST message purely from its own memory (no web), how likely is it to state a WRONG checkable fact — a name, a film/book cast, a date or release year, a number, a price, a score, a population, a who-did-what about a SPECIFIC named person/film/company/place/event? Such facts are exactly what a model misremembers and states confidently.
|
|
|
|
Decide:
|
|
- "needs_web": true if a correct answer DEPENDS on such a checkable external fact, OR on anything time-sensitive (news, "сегодня"/today, "сейчас", latest, current price/rate/weather/score). Recency is sufficient but NOT necessary — a STATIC fact like a film's cast or a country's capital also counts. When in doubt, prefer TRUE: grounding is cheap, a confident wrong fact is not. FALSE for opinions, explanations, advice, casual chat, creative writing, code help, or transforming text the user already gave you.
|
|
- "verifiable": true if the message is specifically a checkable fact about a NAMED entity (who acted in <film>, who is CEO of <company>, what year <event>, population of <place>) — even if not about "today". A bare follow-up like "2024 года" inherits the entity from the previous turn.
|
|
- "entity_obscure": true if the salient entity is plausibly long-tail / not a household name (a minor film, a non-famous person, a niche product) — these are where memory fails hardest.
|
|
- "time_sensitive": true if the answer can change over time (news, prices, weather, standings, "current"/"latest"/"now").
|
|
- "trivial": true ONLY for a bare greeting, acknowledgement, or tiny arithmetic with no real question.
|
|
- "search_query": a SELF-CONTAINED web search query for this message, written in the LANGUAGE of the user's latest message (an English message → an English query; a Russian one → a Russian query) so the results match the user's language and region instead of defaulting to one country. Resolve follow-ups from context (a bare "2024 года" after discussing a film becomes "<film name> 2024 фильм актёрский состав"). For broad/region-neutral requests (e.g. "interesting news") keep it general and international, don't narrow it to a single country. Empty string ONLY if both needs_web and verifiable are false.
|
|
- "confidence": 0.0-1.0, your honest certainty in needs_web.
|
|
|
|
Schema: {"needs_web":bool,"verifiable":bool,"entity_obscure":bool,"time_sensitive":bool,"trivial":bool,"search_query":"<query or empty>","confidence":0.0-1.0}
|
|
Conversation:
|
|
`
|
|
|
|
// routeLayer0 is the free heuristic verdict (RouterDecision shape), built from the pure
|
|
// core. Used directly when the classifier is off, and exported here for the heuristic
|
|
// golden test. Confidence is a rough self-estimate, logging-only (not control flow).
|
|
func routeLayer0(body string) RouterDecision {
|
|
return layer0Decision(rd.ClassifyLayer0(body))
|
|
}
|
|
|
|
// layer0Decision maps the pure routedecide.Layer0 onto a RouterDecision, attaching the
|
|
// logging-only confidence self-estimates the old heuristic used.
|
|
func layer0Decision(l0 rd.Layer0) RouterDecision {
|
|
d := RouterDecision{Route: l0.Route, Source: "heuristic", LookupHint: l0.LookupHint, Freshness: l0.Freshness}
|
|
switch l0.Route {
|
|
case routeWebThenGrok:
|
|
d.Confidence, d.NeedsWeb = 0.7, true
|
|
case routeTrivial:
|
|
d.Confidence = 0.85
|
|
default:
|
|
d.Confidence = 0.6
|
|
}
|
|
return d
|
|
}
|
|
|
|
// isTrivial reports a short greeting/ack or bare arithmetic (the Layer-0 regex). Kept
|
|
// as a thin wrapper over the pure core for in-package callers/tests.
|
|
func isTrivial(s string) bool { return rd.IsTrivial(strings.ToLower(strings.TrimSpace(s))) }
|
|
|
|
// classify produces the final RouterDecision. The manual reasoning trigger is honoured
|
|
// independently of the heuristic router (a deliberate user signal). rcx is the
|
|
// privacy-minimised conversation window (DM-resolved; bare trigger in groups) appended
|
|
// to the classifier prompt. Layer-1's cost, when it runs, accumulates into cost.Router.
|
|
func (b *Bot) classify(ctx context.Context, body, rcx string, cost *CostBreakdown) RouterDecision {
|
|
if b.cfg.ReasoningEnabled && containsTrigger(body, b.cfg.ReasoningTrigger) {
|
|
return RouterDecision{Route: routeReason, Source: "forced", Confidence: 1, ReasoningLevel: "high"}
|
|
}
|
|
if !b.cfg.RouterEnabled {
|
|
return RouterDecision{Route: routeGrokDirect, Source: "default"}
|
|
}
|
|
l0 := rd.ClassifyLayer0(body)
|
|
d := layer0Decision(l0)
|
|
// Drop the old "only on grok_direct" gate: the classifier now runs on every message
|
|
// (when enabled) so it can raise a quiet factual question to web AND agreement-confirm
|
|
// a trivial. With it disabled, the Layer-0 verdict stands (today's behaviour).
|
|
if !b.cfg.RouterClassifierEnabled || b.gemini == nil {
|
|
return d
|
|
}
|
|
// 4s router sub-deadline derived from genCtx (a budget cancel still propagates).
|
|
rctx, cancel := context.WithTimeout(ctx, routerStageTimeout)
|
|
defer cancel()
|
|
refined, err := b.routeLayer1(rctx, rcx, l0, cost)
|
|
if err != nil {
|
|
// Classifier error / timeout / garbage → the Layer-0 verdict, exactly as today.
|
|
// Only the deterministic freshnessRe (carried in d) survives a classifier outage.
|
|
b.log.WarnContext(ctx, "layer-1 classifier failed; using heuristic", "err", err)
|
|
return d
|
|
}
|
|
return refined
|
|
}
|
|
|
|
// routeLayer1 runs the Gemini classifier, parses its JSON into a routedecide.Verdict,
|
|
// and resolves the route via the shared routedecide.Combine (WebParanoid-gated). A
|
|
// non-JSON or transport error is returned so classify() degrades to the heuristic — the
|
|
// cheap model never silently mis-routes by returning garbage.
|
|
func (b *Bot) routeLayer1(ctx context.Context, rcx string, l0 rd.Layer0, cost *CostBreakdown) (RouterDecision, error) {
|
|
resp, err := b.gemini.Complete(ctx, LLMRequest{
|
|
Model: b.cfg.GeminiModel,
|
|
Messages: []Message{{Role: "user", Content: classifierPrompt + rcx}},
|
|
MaxTokens: 80, // was 60; the schema grew
|
|
Temperature: 0,
|
|
})
|
|
if err != nil {
|
|
return RouterDecision{}, err
|
|
}
|
|
cost.Router += computeUSD(b.cfg.GeminiModel, resp.Usage, b.cfg)
|
|
|
|
// The classifier schema IS routedecide.Verdict (tagged), so unmarshal straight into it.
|
|
var v rd.Verdict
|
|
if err := json.Unmarshal([]byte(extractJSON(resp.Text)), &v); err != nil {
|
|
return RouterDecision{}, err
|
|
}
|
|
v.SearchQuery = strings.TrimSpace(v.SearchQuery)
|
|
combined := rd.Combine(l0, v, b.cfg.WebParanoid)
|
|
|
|
d := RouterDecision{
|
|
Route: combined.Route,
|
|
Source: "classifier",
|
|
Confidence: v.Confidence,
|
|
NeedsWeb: v.NeedsWeb,
|
|
Verifiable: v.Verifiable,
|
|
EntityObscure: v.EntityObscure,
|
|
TimeSensitive: v.TimeSensitive,
|
|
TrivialScore: v.Trivial,
|
|
SearchQuery: v.SearchQuery,
|
|
LookupHint: l0.LookupHint,
|
|
Freshness: l0.Freshness,
|
|
WebDecidedBy: combined.WebDecidedBy,
|
|
}
|
|
// INFO so prod (which runs at INFO) captures the signal mix without LOG_LEVEL=debug.
|
|
// Content-free: no body, no search_query (those are gated DEBUG/telemetry paths).
|
|
b.log.InfoContext(ctx, "classifier verdict",
|
|
"route", d.Route, "web_decided_by", d.WebDecidedBy, "needs_web", d.NeedsWeb,
|
|
"verifiable", d.Verifiable, "entity_obscure", d.EntityObscure,
|
|
"time_sensitive", d.TimeSensitive, "trivial", d.TrivialScore,
|
|
"confidence", d.Confidence, "lookup_hint", d.LookupHint, "paranoid", b.cfg.WebParanoid)
|
|
return d, nil
|
|
}
|
|
|
|
// extractJSON pulls the first {...} object out of a model reply, tolerating prose or
|
|
// code fences around it. Returns "" if none (→ a parse error → degrade).
|
|
func extractJSON(s string) string {
|
|
i := strings.IndexByte(s, '{')
|
|
j := strings.LastIndexByte(s, '}')
|
|
if i < 0 || j < i {
|
|
return ""
|
|
}
|
|
return s[i : j+1]
|
|
}
|
|
|
|
// containsTrigger reports whether body contains the manual trigger phrase
|
|
// (case-insensitive, whitespace-trimmed). Empty trigger never matches.
|
|
func containsTrigger(body, trigger string) bool {
|
|
trigger = strings.TrimSpace(strings.ToLower(trigger))
|
|
if trigger == "" {
|
|
return false
|
|
}
|
|
return strings.Contains(strings.ToLower(body), trigger)
|
|
}
|