package main import ( "context" "errors" "io" "log/slog" "testing" ) func discardLog() *slog.Logger { return slog.New(slog.NewTextHandler(io.Discard, nil)) } // fakeLLM is a scriptable LLMClient for dispatch/degrade tests. type fakeLLM struct { text string usage Usage err error calls int lastReq LLMRequest } func (f *fakeLLM) Complete(_ context.Context, req LLMRequest) (*LLMResponse, error) { f.calls++ f.lastReq = req if f.err != nil { return nil, f.err } return &LLMResponse{Text: f.text, Usage: f.usage, ProviderRequestID: "fake"}, nil } type fakeWeb struct { wc WebContext err error calls int } func (f *fakeWeb) Fetch(_ context.Context, _ string) (WebContext, error) { f.calls++ if f.err != nil { return WebContext{}, f.err } return f.wc, nil } // cascadeCfg is a config with the model/price table set and EVERY cascade flag off. // Tests flip individual flags on a copy. func cascadeCfg() Config { return Config{ XAIModel: "grok-x", GeminiModel: "gemini-x", ReasoningModel: "grok-reason", MaxOutTok: 100, XAITemp: 0.5, ReasoningTrigger: "подумай глубже", ReasoningEffort: "high", WebProvider: webProviderGrokWebSearch, Prices: map[string]ModelPrice{ "grok-x": {InputPerM: 1, CachedPerM: 0.2, OutputPerM: 2}, "gemini-x": {InputPerM: 0.1, CachedPerM: 0.1, OutputPerM: 0.4}, }, } } func msgs(body string) []Message { return []Message{{Role: "system", Content: "SYS"}, {Role: "user", Content: body}} } // TestGenerateAllFlagsOffIsGrokDirect is the cascade-off parity invariant: even a // "trivial"-looking message goes to Grok, and Gemini is never touched, when the router // is off. func TestGenerateAllFlagsOffIsGrokDirect(t *testing.T) { grok := &fakeLLM{text: "grok answer"} gem := &fakeLLM{text: "should not run"} cfg := cascadeCfg() b := &Bot{cfg: &cfg, llm: grok, gemini: gem, log: discardLog()} res, err := b.generate(context.Background(), "привет", msgs("привет"), "") if err != nil { t.Fatalf("generate: %v", err) } if res.route != routeGrokDirect || res.text != "grok answer" { t.Fatalf("res = (%q,%q), want grok_direct/\"grok answer\"", res.route, res.text) } if res.decision.Source != "default" { t.Fatalf("router source = %q, want default (router off)", res.decision.Source) } if grok.calls != 1 || gem.calls != 0 { t.Fatalf("calls grok=%d gem=%d, want 1/0", grok.calls, gem.calls) } } func TestGenerateTrivialOffload(t *testing.T) { grok := &fakeLLM{text: "grok"} gem := &fakeLLM{text: "gemini trivial"} cfg := cascadeCfg() cfg.RouterEnabled, cfg.TrivialOffloadEnabled = true, true b := &Bot{cfg: &cfg, llm: grok, gemini: gem, log: discardLog()} res, err := b.generate(context.Background(), "привет", msgs("привет"), "") if err != nil { t.Fatalf("generate: %v", err) } if res.route != routeTrivial || res.text != "gemini trivial" || res.finalModel != "gemini-x" { t.Fatalf("res = (%q,%q,%q), want trivial/gemini", res.route, res.text, res.finalModel) } if gem.calls != 1 || grok.calls != 0 { t.Fatalf("calls grok=%d gem=%d, want 0/1 (Gemini answered)", grok.calls, gem.calls) } } // TestGenerateTrivialDegradesToGrok: Gemini failing on the trivial route must fall back // to Grok, never go silent. func TestGenerateTrivialDegradesToGrok(t *testing.T) { grok := &fakeLLM{text: "grok fallback"} gem := &fakeLLM{err: errors.New("gemini down")} cfg := cascadeCfg() cfg.RouterEnabled, cfg.TrivialOffloadEnabled = true, true b := &Bot{cfg: &cfg, llm: grok, gemini: gem, log: discardLog()} res, err := b.generate(context.Background(), "привет", msgs("привет"), "") if err != nil { t.Fatalf("generate: %v", err) } if res.route != routeGrokDirect || res.text != "grok fallback" { t.Fatalf("res = (%q,%q), want grok_direct fallback", res.route, res.text) } if !res.fallback || res.degraded != degradeTrivial { t.Fatalf("fallback=%v degraded=%q, want true/trivial_failed", res.fallback, res.degraded) } if gem.calls != 1 || grok.calls != 1 { t.Fatalf("calls grok=%d gem=%d, want 1/1", grok.calls, gem.calls) } } func TestGenerateWebThenGrok(t *testing.T) { grok := &fakeLLM{text: "synthesised", usage: Usage{PromptTokens: 100, CompletionTokens: 50}} web := &fakeWeb{wc: WebContext{Digest: "fresh facts", Citations: []string{"http://src"}, Cost: CostBreakdown{WebTool: 0.1}}} cfg := cascadeCfg() cfg.RouterEnabled, cfg.WebEnabled = true, true b := &Bot{cfg: &cfg, llm: grok, web: web, log: discardLog()} res, err := b.generate(context.Background(), "какие новости сегодня", msgs("какие новости сегодня"), "") if err != nil { t.Fatalf("generate: %v", err) } if res.route != routeWebThenGrok || res.text != "synthesised" { t.Fatalf("res = (%q,%q), want web_then_grok/synthesised", res.route, res.text) } if res.cost.WebTool != 0.1 || res.cost.Token <= 0 { t.Fatalf("cost = %+v, want WebTool 0.1 + Token>0", res.cost) } if web.calls != 1 || grok.calls != 1 { t.Fatalf("calls web=%d grok=%d, want 1/1", web.calls, grok.calls) } } // TestGenerateWebDegradesToGrok: a web fetch failure (provider down or cap hit) degrades // to grok_direct and books no web cost. func TestGenerateWebDegradesToGrok(t *testing.T) { grok := &fakeLLM{text: "grok fallback"} web := &fakeWeb{err: errGroundingCapped} cfg := cascadeCfg() cfg.RouterEnabled, cfg.WebEnabled = true, true b := &Bot{cfg: &cfg, llm: grok, web: web, log: discardLog()} res, err := b.generate(context.Background(), "новости сегодня", msgs("новости сегодня"), "") if err != nil { t.Fatalf("generate: %v", err) } if res.route != routeGrokDirect || res.text != "grok fallback" || !res.fallback { t.Fatalf("res = (%q,%q,fallback=%v), want grok_direct fallback", res.route, res.text, res.fallback) } if res.degraded != degradeGroundCap { t.Fatalf("degraded = %q, want grounding_cap (the specific reason)", res.degraded) } if res.cost.WebTool != 0 || res.cost.Grounding != 0 { t.Fatalf("web cost = %+v, want 0 (fetch failed before billing)", res.cost) } } // TestGenerateReasoningForced: the manual trigger routes to the reasoning model with // reasoning_effort, independent of ROUTER_ENABLED. func TestGenerateReasoningForced(t *testing.T) { grok := &fakeLLM{text: "deep answer"} cfg := cascadeCfg() cfg.ReasoningEnabled = true // ROUTER_ENABLED deliberately left off b := &Bot{cfg: &cfg, llm: grok, log: discardLog()} res, err := b.generate(context.Background(), "подумай глубже про сознание", msgs("подумай глубже про сознание"), "") if err != nil { t.Fatalf("generate: %v", err) } if res.route != routeReason || res.decision.Source != "forced" { t.Fatalf("res route=%q source=%q, want reason/forced", res.route, res.decision.Source) } if grok.lastReq.ReasoningEffort != "high" || grok.lastReq.Model != "grok-reason" { t.Fatalf("reasoning req = (effort %q, model %q), want high/grok-reason", grok.lastReq.ReasoningEffort, grok.lastReq.Model) } } // TestClassifierConfidenceFloor: a Layer-1 classifier label that escalates off the safe // floor (trivial/web) must clear the confidence floor, else the request stays on // grok_direct — the false-trivial voice-leak guard (§8.6). func TestClassifierConfidenceFloor(t *testing.T) { cfg := cascadeCfg() cfg.RouterEnabled, cfg.RouterClassifierEnabled = true, true gem := &fakeLLM{} b := &Bot{cfg: &cfg, gemini: gem, log: discardLog()} var cost CostBreakdown const substantive = "напиши подробное эссе про историю римской империи" // Layer-0 → grok_direct gem.text = `{"route":"trivial","confidence":0.2}` // low-confidence escalation if d := b.classify(context.Background(), substantive, &cost); d.Route != routeGrokDirect { t.Fatalf("low-confidence trivial must stay grok_direct (safe floor), got %q", d.Route) } gem.text = `{"route":"trivial","confidence":0.95}` // confident escalation is honoured if d := b.classify(context.Background(), substantive, &cost); d.Route != routeTrivial { t.Fatalf("high-confidence trivial should route trivial, got %q", d.Route) } // A classifier error degrades to the Layer-0 verdict (grok_direct), never silence. gem.text, gem.err = "", errors.New("gemini down") if d := b.classify(context.Background(), substantive, &cost); d.Route != routeGrokDirect { t.Fatalf("classifier failure must fall back to heuristic grok_direct, got %q", d.Route) } } // TestGrokReasoningEffort: GROK_REASONING_EFFORT is sent on grok_direct (so grok-4.3 can // be kept fast with "none"), empty means not sent (compat with grok-4.20-non-reasoning), // and the reason route always overrides to "high" regardless. func TestGrokReasoningEffort(t *testing.T) { // Configured effort reaches grok_direct. grok := &fakeLLM{text: "ok"} cfg := cascadeCfg() cfg.GrokReasoningEffort = "none" b := &Bot{cfg: &cfg, llm: grok, log: discardLog()} if _, err := b.generate(context.Background(), "hello", msgs("hello"), ""); err != nil { t.Fatal(err) } if grok.lastReq.ReasoningEffort != "none" { t.Fatalf("grok_direct effort = %q, want none", grok.lastReq.ReasoningEffort) } // Empty default → not sent (so grok-4.20-non-reasoning keeps working). grokDef := &fakeLLM{text: "ok"} cfgDef := cascadeCfg() // GrokReasoningEffort == "" bDef := &Bot{cfg: &cfgDef, llm: grokDef, log: discardLog()} if _, err := bDef.generate(context.Background(), "hello", msgs("hello"), ""); err != nil { t.Fatal(err) } if grokDef.lastReq.ReasoningEffort != "" { t.Fatalf("default effort = %q, want empty (not sent)", grokDef.lastReq.ReasoningEffort) } // The reason route ignores GROK_REASONING_EFFORT and always uses "high". grokR := &fakeLLM{text: "deep"} cfgR := cascadeCfg() cfgR.GrokReasoningEffort = "none" cfgR.ReasoningEnabled = true bR := &Bot{cfg: &cfgR, llm: grokR, log: discardLog()} if _, err := bR.generate(context.Background(), "подумай глубже про X", msgs("подумай глубже про X"), ""); err != nil { t.Fatal(err) } if grokR.lastReq.ReasoningEffort != "high" { t.Fatalf("reason route effort = %q, want high (overrides GROK_REASONING_EFFORT)", grokR.lastReq.ReasoningEffort) } } // TestGenerateTerminalErrorPropagates: if even grok_direct fails, generate returns the // error (respond turns it into refund + react), not a silent empty success. func TestGenerateTerminalErrorPropagates(t *testing.T) { grok := &fakeLLM{err: errors.New("xai down")} cfg := cascadeCfg() b := &Bot{cfg: &cfg, llm: grok, log: discardLog()} if _, err := b.generate(context.Background(), "hello", msgs("hello"), ""); err == nil { t.Fatal("want terminal error when grok_direct fails, got nil") } }