275 lines
10 KiB
Go
275 lines
10 KiB
Go
package main
|
|
|
|
import (
|
|
"context"
|
|
"errors"
|
|
"io"
|
|
"log/slog"
|
|
"testing"
|
|
)
|
|
|
|
func discardLog() *slog.Logger { return slog.New(slog.NewTextHandler(io.Discard, nil)) }
|
|
|
|
// fakeLLM is a scriptable LLMClient for dispatch/degrade tests.
|
|
type fakeLLM struct {
|
|
text string
|
|
usage Usage
|
|
err error
|
|
calls int
|
|
lastReq LLMRequest
|
|
}
|
|
|
|
func (f *fakeLLM) Complete(_ context.Context, req LLMRequest) (*LLMResponse, error) {
|
|
f.calls++
|
|
f.lastReq = req
|
|
if f.err != nil {
|
|
return nil, f.err
|
|
}
|
|
return &LLMResponse{Text: f.text, Usage: f.usage, ProviderRequestID: "fake"}, nil
|
|
}
|
|
|
|
type fakeWeb struct {
|
|
wc WebContext
|
|
err error
|
|
calls int
|
|
}
|
|
|
|
func (f *fakeWeb) Fetch(_ context.Context, _ string) (WebContext, error) {
|
|
f.calls++
|
|
if f.err != nil {
|
|
return WebContext{}, f.err
|
|
}
|
|
return f.wc, nil
|
|
}
|
|
|
|
// cascadeCfg is a config with the model/price table set and EVERY cascade flag off.
|
|
// Tests flip individual flags on a copy.
|
|
func cascadeCfg() Config {
|
|
return Config{
|
|
XAIModel: "grok-x", GeminiModel: "gemini-x", ReasoningModel: "grok-reason",
|
|
MaxOutTok: 100, XAITemp: 0.5,
|
|
ReasoningTrigger: "подумай глубже",
|
|
ReasoningEffort: "high",
|
|
WebProvider: webProviderGrokWebSearch,
|
|
Prices: map[string]ModelPrice{
|
|
"grok-x": {InputPerM: 1, CachedPerM: 0.2, OutputPerM: 2},
|
|
"gemini-x": {InputPerM: 0.1, CachedPerM: 0.1, OutputPerM: 0.4},
|
|
},
|
|
}
|
|
}
|
|
|
|
func msgs(body string) []Message {
|
|
return []Message{{Role: "system", Content: "SYS"}, {Role: "user", Content: body}}
|
|
}
|
|
|
|
// TestGenerateAllFlagsOffIsGrokDirect is the cascade-off parity invariant: even a
|
|
// "trivial"-looking message goes to Grok, and Gemini is never touched, when the router
|
|
// is off.
|
|
func TestGenerateAllFlagsOffIsGrokDirect(t *testing.T) {
|
|
grok := &fakeLLM{text: "grok answer"}
|
|
gem := &fakeLLM{text: "should not run"}
|
|
cfg := cascadeCfg()
|
|
b := &Bot{cfg: &cfg, llm: grok, gemini: gem, log: discardLog()}
|
|
|
|
res, err := b.generate(context.Background(), "привет", msgs("привет"), "")
|
|
if err != nil {
|
|
t.Fatalf("generate: %v", err)
|
|
}
|
|
if res.route != routeGrokDirect || res.text != "grok answer" {
|
|
t.Fatalf("res = (%q,%q), want grok_direct/\"grok answer\"", res.route, res.text)
|
|
}
|
|
if res.decision.Source != "default" {
|
|
t.Fatalf("router source = %q, want default (router off)", res.decision.Source)
|
|
}
|
|
if grok.calls != 1 || gem.calls != 0 {
|
|
t.Fatalf("calls grok=%d gem=%d, want 1/0", grok.calls, gem.calls)
|
|
}
|
|
}
|
|
|
|
func TestGenerateTrivialOffload(t *testing.T) {
|
|
grok := &fakeLLM{text: "grok"}
|
|
gem := &fakeLLM{text: "gemini trivial"}
|
|
cfg := cascadeCfg()
|
|
cfg.RouterEnabled, cfg.TrivialOffloadEnabled = true, true
|
|
b := &Bot{cfg: &cfg, llm: grok, gemini: gem, log: discardLog()}
|
|
|
|
res, err := b.generate(context.Background(), "привет", msgs("привет"), "")
|
|
if err != nil {
|
|
t.Fatalf("generate: %v", err)
|
|
}
|
|
if res.route != routeTrivial || res.text != "gemini trivial" || res.finalModel != "gemini-x" {
|
|
t.Fatalf("res = (%q,%q,%q), want trivial/gemini", res.route, res.text, res.finalModel)
|
|
}
|
|
if gem.calls != 1 || grok.calls != 0 {
|
|
t.Fatalf("calls grok=%d gem=%d, want 0/1 (Gemini answered)", grok.calls, gem.calls)
|
|
}
|
|
}
|
|
|
|
// TestGenerateTrivialDegradesToGrok: Gemini failing on the trivial route must fall back
|
|
// to Grok, never go silent.
|
|
func TestGenerateTrivialDegradesToGrok(t *testing.T) {
|
|
grok := &fakeLLM{text: "grok fallback"}
|
|
gem := &fakeLLM{err: errors.New("gemini down")}
|
|
cfg := cascadeCfg()
|
|
cfg.RouterEnabled, cfg.TrivialOffloadEnabled = true, true
|
|
b := &Bot{cfg: &cfg, llm: grok, gemini: gem, log: discardLog()}
|
|
|
|
res, err := b.generate(context.Background(), "привет", msgs("привет"), "")
|
|
if err != nil {
|
|
t.Fatalf("generate: %v", err)
|
|
}
|
|
if res.route != routeGrokDirect || res.text != "grok fallback" {
|
|
t.Fatalf("res = (%q,%q), want grok_direct fallback", res.route, res.text)
|
|
}
|
|
if !res.fallback || res.degraded != degradeTrivial {
|
|
t.Fatalf("fallback=%v degraded=%q, want true/trivial_failed", res.fallback, res.degraded)
|
|
}
|
|
if gem.calls != 1 || grok.calls != 1 {
|
|
t.Fatalf("calls grok=%d gem=%d, want 1/1", grok.calls, gem.calls)
|
|
}
|
|
}
|
|
|
|
func TestGenerateWebThenGrok(t *testing.T) {
|
|
grok := &fakeLLM{text: "synthesised", usage: Usage{PromptTokens: 100, CompletionTokens: 50}}
|
|
web := &fakeWeb{wc: WebContext{Digest: "fresh facts", Citations: []string{"http://src"}, Cost: CostBreakdown{WebTool: 0.1}}}
|
|
cfg := cascadeCfg()
|
|
cfg.RouterEnabled, cfg.WebEnabled = true, true
|
|
b := &Bot{cfg: &cfg, llm: grok, web: web, log: discardLog()}
|
|
|
|
res, err := b.generate(context.Background(), "какие новости сегодня", msgs("какие новости сегодня"), "")
|
|
if err != nil {
|
|
t.Fatalf("generate: %v", err)
|
|
}
|
|
if res.route != routeWebThenGrok || res.text != "synthesised" {
|
|
t.Fatalf("res = (%q,%q), want web_then_grok/synthesised", res.route, res.text)
|
|
}
|
|
if res.cost.WebTool != 0.1 || res.cost.Token <= 0 {
|
|
t.Fatalf("cost = %+v, want WebTool 0.1 + Token>0", res.cost)
|
|
}
|
|
if web.calls != 1 || grok.calls != 1 {
|
|
t.Fatalf("calls web=%d grok=%d, want 1/1", web.calls, grok.calls)
|
|
}
|
|
}
|
|
|
|
// TestGenerateWebDegradesToGrok: a web fetch failure (provider down or cap hit) degrades
|
|
// to grok_direct and books no web cost.
|
|
func TestGenerateWebDegradesToGrok(t *testing.T) {
|
|
grok := &fakeLLM{text: "grok fallback"}
|
|
web := &fakeWeb{err: errGroundingCapped}
|
|
cfg := cascadeCfg()
|
|
cfg.RouterEnabled, cfg.WebEnabled = true, true
|
|
b := &Bot{cfg: &cfg, llm: grok, web: web, log: discardLog()}
|
|
|
|
res, err := b.generate(context.Background(), "новости сегодня", msgs("новости сегодня"), "")
|
|
if err != nil {
|
|
t.Fatalf("generate: %v", err)
|
|
}
|
|
if res.route != routeGrokDirect || res.text != "grok fallback" || !res.fallback {
|
|
t.Fatalf("res = (%q,%q,fallback=%v), want grok_direct fallback", res.route, res.text, res.fallback)
|
|
}
|
|
if res.degraded != degradeGroundCap {
|
|
t.Fatalf("degraded = %q, want grounding_cap (the specific reason)", res.degraded)
|
|
}
|
|
if res.cost.WebTool != 0 || res.cost.Grounding != 0 {
|
|
t.Fatalf("web cost = %+v, want 0 (fetch failed before billing)", res.cost)
|
|
}
|
|
}
|
|
|
|
// TestGenerateReasoningForced: the manual trigger routes to the reasoning model with
|
|
// reasoning_effort, independent of ROUTER_ENABLED.
|
|
func TestGenerateReasoningForced(t *testing.T) {
|
|
grok := &fakeLLM{text: "deep answer"}
|
|
cfg := cascadeCfg()
|
|
cfg.ReasoningEnabled = true // ROUTER_ENABLED deliberately left off
|
|
b := &Bot{cfg: &cfg, llm: grok, log: discardLog()}
|
|
|
|
res, err := b.generate(context.Background(), "подумай глубже про сознание", msgs("подумай глубже про сознание"), "")
|
|
if err != nil {
|
|
t.Fatalf("generate: %v", err)
|
|
}
|
|
if res.route != routeReason || res.decision.Source != "forced" {
|
|
t.Fatalf("res route=%q source=%q, want reason/forced", res.route, res.decision.Source)
|
|
}
|
|
if grok.lastReq.ReasoningEffort != "high" || grok.lastReq.Model != "grok-reason" {
|
|
t.Fatalf("reasoning req = (effort %q, model %q), want high/grok-reason", grok.lastReq.ReasoningEffort, grok.lastReq.Model)
|
|
}
|
|
}
|
|
|
|
// TestClassifierConfidenceFloor: a Layer-1 classifier label that escalates off the safe
|
|
// floor (trivial/web) must clear the confidence floor, else the request stays on
|
|
// grok_direct — the false-trivial voice-leak guard (§8.6).
|
|
func TestClassifierConfidenceFloor(t *testing.T) {
|
|
cfg := cascadeCfg()
|
|
cfg.RouterEnabled, cfg.RouterClassifierEnabled = true, true
|
|
gem := &fakeLLM{}
|
|
b := &Bot{cfg: &cfg, gemini: gem, log: discardLog()}
|
|
var cost CostBreakdown
|
|
const substantive = "напиши подробное эссе про историю римской империи" // Layer-0 → grok_direct
|
|
|
|
gem.text = `{"route":"trivial","confidence":0.2}` // low-confidence escalation
|
|
if d := b.classify(context.Background(), substantive, &cost); d.Route != routeGrokDirect {
|
|
t.Fatalf("low-confidence trivial must stay grok_direct (safe floor), got %q", d.Route)
|
|
}
|
|
gem.text = `{"route":"trivial","confidence":0.95}` // confident escalation is honoured
|
|
if d := b.classify(context.Background(), substantive, &cost); d.Route != routeTrivial {
|
|
t.Fatalf("high-confidence trivial should route trivial, got %q", d.Route)
|
|
}
|
|
// A classifier error degrades to the Layer-0 verdict (grok_direct), never silence.
|
|
gem.text, gem.err = "", errors.New("gemini down")
|
|
if d := b.classify(context.Background(), substantive, &cost); d.Route != routeGrokDirect {
|
|
t.Fatalf("classifier failure must fall back to heuristic grok_direct, got %q", d.Route)
|
|
}
|
|
}
|
|
|
|
// TestGrokReasoningEffort: GROK_REASONING_EFFORT is sent on grok_direct (so grok-4.3 can
|
|
// be kept fast with "none"), empty means not sent (compat with grok-4.20-non-reasoning),
|
|
// and the reason route always overrides to "high" regardless.
|
|
func TestGrokReasoningEffort(t *testing.T) {
|
|
// Configured effort reaches grok_direct.
|
|
grok := &fakeLLM{text: "ok"}
|
|
cfg := cascadeCfg()
|
|
cfg.GrokReasoningEffort = "none"
|
|
b := &Bot{cfg: &cfg, llm: grok, log: discardLog()}
|
|
if _, err := b.generate(context.Background(), "hello", msgs("hello"), ""); err != nil {
|
|
t.Fatal(err)
|
|
}
|
|
if grok.lastReq.ReasoningEffort != "none" {
|
|
t.Fatalf("grok_direct effort = %q, want none", grok.lastReq.ReasoningEffort)
|
|
}
|
|
|
|
// Empty default → not sent (so grok-4.20-non-reasoning keeps working).
|
|
grokDef := &fakeLLM{text: "ok"}
|
|
cfgDef := cascadeCfg() // GrokReasoningEffort == ""
|
|
bDef := &Bot{cfg: &cfgDef, llm: grokDef, log: discardLog()}
|
|
if _, err := bDef.generate(context.Background(), "hello", msgs("hello"), ""); err != nil {
|
|
t.Fatal(err)
|
|
}
|
|
if grokDef.lastReq.ReasoningEffort != "" {
|
|
t.Fatalf("default effort = %q, want empty (not sent)", grokDef.lastReq.ReasoningEffort)
|
|
}
|
|
|
|
// The reason route ignores GROK_REASONING_EFFORT and always uses "high".
|
|
grokR := &fakeLLM{text: "deep"}
|
|
cfgR := cascadeCfg()
|
|
cfgR.GrokReasoningEffort = "none"
|
|
cfgR.ReasoningEnabled = true
|
|
bR := &Bot{cfg: &cfgR, llm: grokR, log: discardLog()}
|
|
if _, err := bR.generate(context.Background(), "подумай глубже про X", msgs("подумай глубже про X"), ""); err != nil {
|
|
t.Fatal(err)
|
|
}
|
|
if grokR.lastReq.ReasoningEffort != "high" {
|
|
t.Fatalf("reason route effort = %q, want high (overrides GROK_REASONING_EFFORT)", grokR.lastReq.ReasoningEffort)
|
|
}
|
|
}
|
|
|
|
// TestGenerateTerminalErrorPropagates: if even grok_direct fails, generate returns the
|
|
// error (respond turns it into refund + react), not a silent empty success.
|
|
func TestGenerateTerminalErrorPropagates(t *testing.T) {
|
|
grok := &fakeLLM{err: errors.New("xai down")}
|
|
cfg := cascadeCfg()
|
|
b := &Bot{cfg: &cfg, llm: grok, log: discardLog()}
|
|
|
|
if _, err := b.generate(context.Background(), "hello", msgs("hello"), ""); err == nil {
|
|
t.Fatal("want terminal error when grok_direct fails, got nil")
|
|
}
|
|
}
|