vojo/apps/ai-bot/router.go

180 lines
7.6 KiB
Go
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

package main
import (
"context"
"encoding/json"
"regexp"
"strings"
)
// router.go classifies a message into a route. It runs INSIDE respond() — after the
// mention/media/foreign/single-flight gates (F-FUNC-7) — so a paid Layer-1 classifier
// is never spent on a message today's bot drops for free.
//
// Two layers, both conservative (doubt → grok_direct, the safe floor that keeps
// substantive questions on Grok, §8.6):
// - Layer-0: free regex heuristics (RU+EN). Always runs when ROUTER_ENABLED.
// - Layer-1: a cheap Gemini JSON classifier, consulted ONLY on Layer-0 grok_direct
// when ROUTER_CLASSIFIER_ENABLED. Any failure falls back to the Layer-0 verdict.
// RouterDecision is the route plus the signals behind it (logged for threshold
// calibration). Only Route/Source/Confidence/NeedsWeb drive behaviour today; the rest
// are recorded for the offline router-replay eval (§9).
type RouterDecision struct {
Route string
Source string // heuristic | classifier | default | forced | degraded
Confidence float64
NeedsWeb bool
Freshness string
ReasoningLevel string
Domain string
Difficulty string
}
// Heuristic patterns. Kept deliberately tight: a false "trivial" leaks a real question
// to the cheap model, so trivial fires only on short, unmistakable greetings/acks or
// bare arithmetic. Freshness words route to web (a false web-route only costs a fetch
// and degrades cleanly — never a wrong answer).
var (
greetingRe = regexp.MustCompile(`^(привет(ик)?|здравствуй(те)?|хай|прив|ку|добрый\s+(день|вечер|утро)|спасибо|спс|благодарю|пока|ок(ей)?|угу|ага|hello|hi|hey|yo|thanks|thank\s+you|thx|ty|bye|goodbye|ok|okay|cool|nice)[\s!.,)]*$`)
arithmeticRe = regexp.MustCompile(`^[\s(]*\d+(\s*[-+*/×÷]\s*\d+)+[\s)=?]*$`)
freshnessRe = regexp.MustCompile(`(новост|сегодня|сейчас|последн|курс\s|погод|котировк|расписани|прогноз|breaking|today|right now|latest|current(ly)?|news|weather|stock price|exchange rate|score)`)
)
// routeLayer0 is the free heuristic. Confidence is a rough self-estimate used only for
// logging/threshold tuning, not control flow.
func routeLayer0(body string) RouterDecision {
s := strings.ToLower(strings.TrimSpace(body))
if s == "" {
return RouterDecision{Route: routeGrokDirect, Source: "heuristic", Confidence: 0.5}
}
if freshnessRe.MatchString(s) {
return RouterDecision{Route: routeWebThenGrok, Source: "heuristic", Confidence: 0.7, NeedsWeb: true, Freshness: "recent"}
}
if isTrivial(s) {
return RouterDecision{Route: routeTrivial, Source: "heuristic", Confidence: 0.85, Difficulty: "trivial"}
}
return RouterDecision{Route: routeGrokDirect, Source: "heuristic", Confidence: 0.6}
}
// isTrivial: a short greeting/ack or a bare arithmetic expression, with no sign of a
// real question. Length-bounded so "thanks, now explain quantum tunnelling" is NOT
// trivial.
func isTrivial(s string) bool {
if arithmeticRe.MatchString(s) {
return true
}
if len(strings.Fields(s)) <= 4 && greetingRe.MatchString(s) {
return true
}
return false
}
// classify produces the final RouterDecision for a request. The manual reasoning
// trigger is honoured independently of the heuristic router (it's a deliberate user
// signal). Layer-1's cost, when it runs, is accumulated into cost.Router.
func (b *Bot) classify(ctx context.Context, body string, cost *CostBreakdown) RouterDecision {
if b.cfg.ReasoningEnabled && containsTrigger(body, b.cfg.ReasoningTrigger) {
return RouterDecision{Route: routeReason, Source: "forced", Confidence: 1, ReasoningLevel: "high"}
}
if !b.cfg.RouterEnabled {
return RouterDecision{Route: routeGrokDirect, Source: "default"}
}
d := routeLayer0(body)
// Layer-1 only refines the uncertain grok_direct verdict, and only if enabled and
// the Gemini client exists. Anything else stands on the heuristic.
if d.Route != routeGrokDirect || !b.cfg.RouterClassifierEnabled || b.gemini == nil {
return d
}
refined, err := b.routeLayer1(ctx, body, cost)
if err != nil {
b.log.Warn("layer-1 classifier failed; using heuristic", "err", err)
return d // degrade to the heuristic verdict
}
return refined
}
// classifierConfidenceFloor is the bar a Layer-1 escalation OFF the safe floor
// (trivial/web/reason) must clear. Below it, the verdict is treated as doubt and the
// request stays on grok_direct — the owner's "substantive stays on Grok" rule (§8.6).
// A low-confidence "trivial" is exactly the false-trivial voice leak we must not take.
const classifierConfidenceFloor = 0.8
// classifierPrompt asks Gemini for a strict JSON verdict. Kept terse to bound tokens.
const classifierPrompt = `You are a router. Classify the user message into exactly one route and reply with ONLY a JSON object, no prose.
Routes: "trivial" (greeting/ack/tiny arithmetic), "web" (needs fresh/current facts: news, prices, weather, "today"), "normal" (everything else).
Schema: {"route":"trivial|web|normal","confidence":0.0-1.0,"needs_web":true|false}
Message: `
// routeLayer1 runs the Gemini classifier and parses its JSON. A non-JSON or unknown
// answer is an error so classify() degrades to the heuristic — the cheap model never
// gets to silently mis-route by returning garbage.
func (b *Bot) routeLayer1(ctx context.Context, body string, cost *CostBreakdown) (RouterDecision, error) {
resp, err := b.gemini.Complete(ctx, LLMRequest{
Model: b.cfg.GeminiModel,
Messages: []Message{{Role: "user", Content: classifierPrompt + body}},
MaxTokens: 60,
Temperature: 0,
})
if err != nil {
return RouterDecision{}, err
}
cost.Router += computeUSD(b.cfg.GeminiModel, resp.Usage, b.cfg)
var parsed struct {
Route string `json:"route"`
Confidence float64 `json:"confidence"`
NeedsWeb bool `json:"needs_web"`
}
if err := json.Unmarshal([]byte(extractJSON(resp.Text)), &parsed); err != nil {
return RouterDecision{}, err
}
route := normalizeRoute(parsed.Route)
// Safe floor: a low-confidence escalation off grok_direct is doubt — keep it on
// Grok rather than leak a possibly-substantive question to the cheap model.
if route != routeGrokDirect && parsed.Confidence < classifierConfidenceFloor {
return RouterDecision{Route: routeGrokDirect, Source: "classifier", Confidence: parsed.Confidence}, nil
}
return RouterDecision{
Route: route,
Source: "classifier",
Confidence: parsed.Confidence,
NeedsWeb: parsed.NeedsWeb || route == routeWebThenGrok,
}, nil
}
// normalizeRoute maps a classifier label to a route constant, defaulting unknown
// labels to grok_direct — the safe floor, so a confused classifier never escalates.
func normalizeRoute(label string) string {
switch strings.ToLower(strings.TrimSpace(label)) {
case "trivial", "trivial_direct":
return routeTrivial
case "web", "web_then_grok":
return routeWebThenGrok
case "reason", "reason_then_grok":
return routeReason
default:
return routeGrokDirect
}
}
// extractJSON pulls the first {...} object out of a model reply, tolerating prose or
// code fences around it. Returns "" if none (→ a parse error → degrade).
func extractJSON(s string) string {
i := strings.IndexByte(s, '{')
j := strings.LastIndexByte(s, '}')
if i < 0 || j < i {
return ""
}
return s[i : j+1]
}
// containsTrigger reports whether body contains the manual trigger phrase
// (case-insensitive, whitespace-trimmed). Empty trigger never matches.
func containsTrigger(body, trigger string) bool {
trigger = strings.TrimSpace(strings.ToLower(trigger))
if trigger == "" {
return false
}
return strings.Contains(strings.ToLower(body), trigger)
}