vojo/apps/ai-bot/cascade_test.go

275 lines
10 KiB
Go

package main
import (
"context"
"errors"
"io"
"log/slog"
"testing"
)
func discardLog() *slog.Logger { return slog.New(slog.NewTextHandler(io.Discard, nil)) }
// fakeLLM is a scriptable LLMClient for dispatch/degrade tests.
type fakeLLM struct {
text string
usage Usage
err error
calls int
lastReq LLMRequest
}
func (f *fakeLLM) Complete(_ context.Context, req LLMRequest) (*LLMResponse, error) {
f.calls++
f.lastReq = req
if f.err != nil {
return nil, f.err
}
return &LLMResponse{Text: f.text, Usage: f.usage, ProviderRequestID: "fake"}, nil
}
type fakeWeb struct {
wc WebContext
err error
calls int
}
func (f *fakeWeb) Fetch(_ context.Context, _ string) (WebContext, error) {
f.calls++
if f.err != nil {
return WebContext{}, f.err
}
return f.wc, nil
}
// cascadeCfg is a config with the model/price table set and EVERY cascade flag off.
// Tests flip individual flags on a copy.
func cascadeCfg() Config {
return Config{
XAIModel: "grok-x", GeminiModel: "gemini-x", ReasoningModel: "grok-reason",
MaxOutTok: 100, XAITemp: 0.5,
ReasoningTrigger: "подумай глубже",
ReasoningEffort: "high",
WebProvider: webProviderGrokWebSearch,
Prices: map[string]ModelPrice{
"grok-x": {InputPerM: 1, CachedPerM: 0.2, OutputPerM: 2},
"gemini-x": {InputPerM: 0.1, CachedPerM: 0.1, OutputPerM: 0.4},
},
}
}
func msgs(body string) []Message {
return []Message{{Role: "system", Content: "SYS"}, {Role: "user", Content: body}}
}
// TestGenerateAllFlagsOffIsGrokDirect is the cascade-off parity invariant: even a
// "trivial"-looking message goes to Grok, and Gemini is never touched, when the router
// is off.
func TestGenerateAllFlagsOffIsGrokDirect(t *testing.T) {
grok := &fakeLLM{text: "grok answer"}
gem := &fakeLLM{text: "should not run"}
cfg := cascadeCfg()
b := &Bot{cfg: &cfg, llm: grok, gemini: gem, log: discardLog()}
res, err := b.generate(context.Background(), "привет", msgs("привет"), "")
if err != nil {
t.Fatalf("generate: %v", err)
}
if res.route != routeGrokDirect || res.text != "grok answer" {
t.Fatalf("res = (%q,%q), want grok_direct/\"grok answer\"", res.route, res.text)
}
if res.decision.Source != "default" {
t.Fatalf("router source = %q, want default (router off)", res.decision.Source)
}
if grok.calls != 1 || gem.calls != 0 {
t.Fatalf("calls grok=%d gem=%d, want 1/0", grok.calls, gem.calls)
}
}
func TestGenerateTrivialOffload(t *testing.T) {
grok := &fakeLLM{text: "grok"}
gem := &fakeLLM{text: "gemini trivial"}
cfg := cascadeCfg()
cfg.RouterEnabled, cfg.TrivialOffloadEnabled = true, true
b := &Bot{cfg: &cfg, llm: grok, gemini: gem, log: discardLog()}
res, err := b.generate(context.Background(), "привет", msgs("привет"), "")
if err != nil {
t.Fatalf("generate: %v", err)
}
if res.route != routeTrivial || res.text != "gemini trivial" || res.finalModel != "gemini-x" {
t.Fatalf("res = (%q,%q,%q), want trivial/gemini", res.route, res.text, res.finalModel)
}
if gem.calls != 1 || grok.calls != 0 {
t.Fatalf("calls grok=%d gem=%d, want 0/1 (Gemini answered)", grok.calls, gem.calls)
}
}
// TestGenerateTrivialDegradesToGrok: Gemini failing on the trivial route must fall back
// to Grok, never go silent.
func TestGenerateTrivialDegradesToGrok(t *testing.T) {
grok := &fakeLLM{text: "grok fallback"}
gem := &fakeLLM{err: errors.New("gemini down")}
cfg := cascadeCfg()
cfg.RouterEnabled, cfg.TrivialOffloadEnabled = true, true
b := &Bot{cfg: &cfg, llm: grok, gemini: gem, log: discardLog()}
res, err := b.generate(context.Background(), "привет", msgs("привет"), "")
if err != nil {
t.Fatalf("generate: %v", err)
}
if res.route != routeGrokDirect || res.text != "grok fallback" {
t.Fatalf("res = (%q,%q), want grok_direct fallback", res.route, res.text)
}
if !res.fallback || res.degraded != degradeTrivial {
t.Fatalf("fallback=%v degraded=%q, want true/trivial_failed", res.fallback, res.degraded)
}
if gem.calls != 1 || grok.calls != 1 {
t.Fatalf("calls grok=%d gem=%d, want 1/1", grok.calls, gem.calls)
}
}
func TestGenerateWebThenGrok(t *testing.T) {
grok := &fakeLLM{text: "synthesised", usage: Usage{PromptTokens: 100, CompletionTokens: 50}}
web := &fakeWeb{wc: WebContext{Digest: "fresh facts", Citations: []string{"http://src"}, Cost: CostBreakdown{WebTool: 0.1}}}
cfg := cascadeCfg()
cfg.RouterEnabled, cfg.WebEnabled = true, true
b := &Bot{cfg: &cfg, llm: grok, web: web, log: discardLog()}
res, err := b.generate(context.Background(), "какие новости сегодня", msgs("какие новости сегодня"), "")
if err != nil {
t.Fatalf("generate: %v", err)
}
if res.route != routeWebThenGrok || res.text != "synthesised" {
t.Fatalf("res = (%q,%q), want web_then_grok/synthesised", res.route, res.text)
}
if res.cost.WebTool != 0.1 || res.cost.Token <= 0 {
t.Fatalf("cost = %+v, want WebTool 0.1 + Token>0", res.cost)
}
if web.calls != 1 || grok.calls != 1 {
t.Fatalf("calls web=%d grok=%d, want 1/1", web.calls, grok.calls)
}
}
// TestGenerateWebDegradesToGrok: a web fetch failure (provider down or cap hit) degrades
// to grok_direct and books no web cost.
func TestGenerateWebDegradesToGrok(t *testing.T) {
grok := &fakeLLM{text: "grok fallback"}
web := &fakeWeb{err: errGroundingCapped}
cfg := cascadeCfg()
cfg.RouterEnabled, cfg.WebEnabled = true, true
b := &Bot{cfg: &cfg, llm: grok, web: web, log: discardLog()}
res, err := b.generate(context.Background(), "новости сегодня", msgs("новости сегодня"), "")
if err != nil {
t.Fatalf("generate: %v", err)
}
if res.route != routeGrokDirect || res.text != "grok fallback" || !res.fallback {
t.Fatalf("res = (%q,%q,fallback=%v), want grok_direct fallback", res.route, res.text, res.fallback)
}
if res.degraded != degradeGroundCap {
t.Fatalf("degraded = %q, want grounding_cap (the specific reason)", res.degraded)
}
if res.cost.WebTool != 0 || res.cost.Grounding != 0 {
t.Fatalf("web cost = %+v, want 0 (fetch failed before billing)", res.cost)
}
}
// TestGenerateReasoningForced: the manual trigger routes to the reasoning model with
// reasoning_effort, independent of ROUTER_ENABLED.
func TestGenerateReasoningForced(t *testing.T) {
grok := &fakeLLM{text: "deep answer"}
cfg := cascadeCfg()
cfg.ReasoningEnabled = true // ROUTER_ENABLED deliberately left off
b := &Bot{cfg: &cfg, llm: grok, log: discardLog()}
res, err := b.generate(context.Background(), "подумай глубже про сознание", msgs("подумай глубже про сознание"), "")
if err != nil {
t.Fatalf("generate: %v", err)
}
if res.route != routeReason || res.decision.Source != "forced" {
t.Fatalf("res route=%q source=%q, want reason/forced", res.route, res.decision.Source)
}
if grok.lastReq.ReasoningEffort != "high" || grok.lastReq.Model != "grok-reason" {
t.Fatalf("reasoning req = (effort %q, model %q), want high/grok-reason", grok.lastReq.ReasoningEffort, grok.lastReq.Model)
}
}
// TestClassifierConfidenceFloor: a Layer-1 classifier label that escalates off the safe
// floor (trivial/web) must clear the confidence floor, else the request stays on
// grok_direct — the false-trivial voice-leak guard (§8.6).
func TestClassifierConfidenceFloor(t *testing.T) {
cfg := cascadeCfg()
cfg.RouterEnabled, cfg.RouterClassifierEnabled = true, true
gem := &fakeLLM{}
b := &Bot{cfg: &cfg, gemini: gem, log: discardLog()}
var cost CostBreakdown
const substantive = "напиши подробное эссе про историю римской империи" // Layer-0 → grok_direct
gem.text = `{"route":"trivial","confidence":0.2}` // low-confidence escalation
if d := b.classify(context.Background(), substantive, &cost); d.Route != routeGrokDirect {
t.Fatalf("low-confidence trivial must stay grok_direct (safe floor), got %q", d.Route)
}
gem.text = `{"route":"trivial","confidence":0.95}` // confident escalation is honoured
if d := b.classify(context.Background(), substantive, &cost); d.Route != routeTrivial {
t.Fatalf("high-confidence trivial should route trivial, got %q", d.Route)
}
// A classifier error degrades to the Layer-0 verdict (grok_direct), never silence.
gem.text, gem.err = "", errors.New("gemini down")
if d := b.classify(context.Background(), substantive, &cost); d.Route != routeGrokDirect {
t.Fatalf("classifier failure must fall back to heuristic grok_direct, got %q", d.Route)
}
}
// TestGrokReasoningEffort: GROK_REASONING_EFFORT is sent on grok_direct (so grok-4.3 can
// be kept fast with "none"), empty means not sent (compat with grok-4.20-non-reasoning),
// and the reason route always overrides to "high" regardless.
func TestGrokReasoningEffort(t *testing.T) {
// Configured effort reaches grok_direct.
grok := &fakeLLM{text: "ok"}
cfg := cascadeCfg()
cfg.GrokReasoningEffort = "none"
b := &Bot{cfg: &cfg, llm: grok, log: discardLog()}
if _, err := b.generate(context.Background(), "hello", msgs("hello"), ""); err != nil {
t.Fatal(err)
}
if grok.lastReq.ReasoningEffort != "none" {
t.Fatalf("grok_direct effort = %q, want none", grok.lastReq.ReasoningEffort)
}
// Empty default → not sent (so grok-4.20-non-reasoning keeps working).
grokDef := &fakeLLM{text: "ok"}
cfgDef := cascadeCfg() // GrokReasoningEffort == ""
bDef := &Bot{cfg: &cfgDef, llm: grokDef, log: discardLog()}
if _, err := bDef.generate(context.Background(), "hello", msgs("hello"), ""); err != nil {
t.Fatal(err)
}
if grokDef.lastReq.ReasoningEffort != "" {
t.Fatalf("default effort = %q, want empty (not sent)", grokDef.lastReq.ReasoningEffort)
}
// The reason route ignores GROK_REASONING_EFFORT and always uses "high".
grokR := &fakeLLM{text: "deep"}
cfgR := cascadeCfg()
cfgR.GrokReasoningEffort = "none"
cfgR.ReasoningEnabled = true
bR := &Bot{cfg: &cfgR, llm: grokR, log: discardLog()}
if _, err := bR.generate(context.Background(), "подумай глубже про X", msgs("подумай глубже про X"), ""); err != nil {
t.Fatal(err)
}
if grokR.lastReq.ReasoningEffort != "high" {
t.Fatalf("reason route effort = %q, want high (overrides GROK_REASONING_EFFORT)", grokR.lastReq.ReasoningEffort)
}
}
// TestGenerateTerminalErrorPropagates: if even grok_direct fails, generate returns the
// error (respond turns it into refund + react), not a silent empty success.
func TestGenerateTerminalErrorPropagates(t *testing.T) {
grok := &fakeLLM{err: errors.New("xai down")}
cfg := cascadeCfg()
b := &Bot{cfg: &cfg, llm: grok, log: discardLog()}
if _, err := b.generate(context.Background(), "hello", msgs("hello"), ""); err == nil {
t.Fatal("want terminal error when grok_direct fails, got nil")
}
}