535 lines
23 KiB
Go
535 lines
23 KiB
Go
package main
|
||
|
||
import (
|
||
"context"
|
||
"errors"
|
||
"io"
|
||
"log/slog"
|
||
"strings"
|
||
"testing"
|
||
)
|
||
|
||
func discardLog() *slog.Logger { return slog.New(slog.NewTextHandler(io.Discard, nil)) }
|
||
|
||
// fakeLLM is a scriptable LLMClient for dispatch/degrade tests.
|
||
type fakeLLM struct {
|
||
text string
|
||
usage Usage
|
||
err error
|
||
calls int
|
||
lastReq LLMRequest
|
||
}
|
||
|
||
func (f *fakeLLM) Complete(_ context.Context, req LLMRequest) (*LLMResponse, error) {
|
||
f.calls++
|
||
f.lastReq = req
|
||
if f.err != nil {
|
||
return nil, f.err
|
||
}
|
||
return &LLMResponse{Text: f.text, Usage: f.usage, ProviderRequestID: "fake"}, nil
|
||
}
|
||
|
||
type fakeWeb struct {
|
||
wc WebContext
|
||
err error
|
||
calls int
|
||
lastQuery string
|
||
}
|
||
|
||
func (f *fakeWeb) Fetch(_ context.Context, q string) (WebContext, error) {
|
||
f.calls++
|
||
f.lastQuery = q
|
||
if f.err != nil {
|
||
return WebContext{}, f.err
|
||
}
|
||
return f.wc, nil
|
||
}
|
||
|
||
// cascadeCfg is a config with the model/price table set and EVERY cascade flag off.
|
||
// Tests flip individual flags on a copy.
|
||
func cascadeCfg() Config {
|
||
return Config{
|
||
XAIModel: "grok-x", GeminiModel: "gemini-x", ReasoningModel: "grok-reason",
|
||
MaxOutTok: 100, XAITemp: 0.5,
|
||
ReasoningTrigger: "подумай глубже",
|
||
ReasoningEffort: "high",
|
||
WebProvider: webProviderGrokWebSearch,
|
||
Prices: map[string]ModelPrice{
|
||
"grok-x": {InputPerM: 1, CachedPerM: 0.2, OutputPerM: 2},
|
||
"gemini-x": {InputPerM: 0.1, CachedPerM: 0.1, OutputPerM: 0.4},
|
||
},
|
||
}
|
||
}
|
||
|
||
func msgs(body string) []Message {
|
||
return []Message{{Role: "system", Content: "SYS"}, {Role: "user", Content: body}}
|
||
}
|
||
|
||
// TestGenerateAllFlagsOffIsGrokDirect is the cascade-off parity invariant: even a
|
||
// "trivial"-looking message goes to Grok, and Gemini is never touched, when the router
|
||
// is off.
|
||
func TestGenerateAllFlagsOffIsGrokDirect(t *testing.T) {
|
||
grok := &fakeLLM{text: "grok answer"}
|
||
gem := &fakeLLM{text: "should not run"}
|
||
cfg := cascadeCfg()
|
||
b := &Bot{cfg: &cfg, llm: grok, gemini: gem, log: discardLog()}
|
||
|
||
res, err := b.generate(context.Background(), "привет", msgs("привет"), "", true)
|
||
if err != nil {
|
||
t.Fatalf("generate: %v", err)
|
||
}
|
||
if res.route != routeGrokDirect || res.text != "grok answer" {
|
||
t.Fatalf("res = (%q,%q), want grok_direct/\"grok answer\"", res.route, res.text)
|
||
}
|
||
if res.decision.Source != "default" {
|
||
t.Fatalf("router source = %q, want default (router off)", res.decision.Source)
|
||
}
|
||
if grok.calls != 1 || gem.calls != 0 {
|
||
t.Fatalf("calls grok=%d gem=%d, want 1/0", grok.calls, gem.calls)
|
||
}
|
||
}
|
||
|
||
func TestGenerateTrivialOffload(t *testing.T) {
|
||
grok := &fakeLLM{text: "grok"}
|
||
gem := &fakeLLM{text: "gemini trivial"}
|
||
cfg := cascadeCfg()
|
||
cfg.RouterEnabled, cfg.TrivialOffloadEnabled = true, true
|
||
b := &Bot{cfg: &cfg, llm: grok, gemini: gem, log: discardLog()}
|
||
|
||
res, err := b.generate(context.Background(), "привет", msgs("привет"), "", true)
|
||
if err != nil {
|
||
t.Fatalf("generate: %v", err)
|
||
}
|
||
if res.route != routeTrivial || res.text != "gemini trivial" || res.finalModel != "gemini-x" {
|
||
t.Fatalf("res = (%q,%q,%q), want trivial/gemini", res.route, res.text, res.finalModel)
|
||
}
|
||
if gem.calls != 1 || grok.calls != 0 {
|
||
t.Fatalf("calls grok=%d gem=%d, want 0/1 (Gemini answered)", grok.calls, gem.calls)
|
||
}
|
||
}
|
||
|
||
// TestGenerateTrivialDegradesToGrok: Gemini failing on the trivial route must fall back
|
||
// to Grok, never go silent.
|
||
func TestGenerateTrivialDegradesToGrok(t *testing.T) {
|
||
grok := &fakeLLM{text: "grok fallback"}
|
||
gem := &fakeLLM{err: errors.New("gemini down")}
|
||
cfg := cascadeCfg()
|
||
cfg.RouterEnabled, cfg.TrivialOffloadEnabled = true, true
|
||
b := &Bot{cfg: &cfg, llm: grok, gemini: gem, log: discardLog()}
|
||
|
||
res, err := b.generate(context.Background(), "привет", msgs("привет"), "", true)
|
||
if err != nil {
|
||
t.Fatalf("generate: %v", err)
|
||
}
|
||
if res.route != routeGrokDirect || res.text != "grok fallback" {
|
||
t.Fatalf("res = (%q,%q), want grok_direct fallback", res.route, res.text)
|
||
}
|
||
if !res.fallback || res.degraded != degradeTrivial {
|
||
t.Fatalf("fallback=%v degraded=%q, want true/trivial_failed", res.fallback, res.degraded)
|
||
}
|
||
if gem.calls != 1 || grok.calls != 1 {
|
||
t.Fatalf("calls grok=%d gem=%d, want 1/1", grok.calls, gem.calls)
|
||
}
|
||
}
|
||
|
||
// TestGenerateWebThenGrok: a freshness query (classifier off → Layer-0 web) fetches then
|
||
// has Grok synthesise, booking both calls' tokens + the web fee.
|
||
func TestGenerateWebThenGrok(t *testing.T) {
|
||
grok := &fakeLLM{text: "synthesised", usage: Usage{PromptTokens: 100, CompletionTokens: 50}}
|
||
web := &fakeWeb{wc: WebContext{Digest: "fresh facts", Citations: []string{"http://src"}, Cost: CostBreakdown{WebTool: 0.1}}}
|
||
cfg := cascadeCfg()
|
||
cfg.RouterEnabled, cfg.WebEnabled = true, true
|
||
b := &Bot{cfg: &cfg, llm: grok, web: web, log: discardLog()}
|
||
|
||
res, err := b.generate(context.Background(), "какие новости сегодня", msgs("какие новости сегодня"), "", true)
|
||
if err != nil {
|
||
t.Fatalf("generate: %v", err)
|
||
}
|
||
if res.route != routeWebThenGrok || res.text != "synthesised" {
|
||
t.Fatalf("res = (%q,%q), want web_then_grok/synthesised", res.route, res.text)
|
||
}
|
||
if res.cost.WebTool != 0.1 || res.cost.Token <= 0 {
|
||
t.Fatalf("cost = %+v, want WebTool 0.1 + Token>0", res.cost)
|
||
}
|
||
if !res.webGrounded || res.citationCount != 1 {
|
||
t.Fatalf("webGrounded=%v citations=%d, want true/1", res.webGrounded, res.citationCount)
|
||
}
|
||
if web.calls != 1 || grok.calls != 1 {
|
||
t.Fatalf("calls web=%d grok=%d, want 1/1", web.calls, grok.calls)
|
||
}
|
||
}
|
||
|
||
// TestGenerateWebDegradesToGrok: a web fetch failure (cap hit) degrades to grok_direct,
|
||
// books no web cost, and — being a RECENCY query — uses the staleness hedge, not abstain.
|
||
func TestGenerateWebDegradesToGrok(t *testing.T) {
|
||
grok := &fakeLLM{text: "grok fallback"}
|
||
web := &fakeWeb{err: errGroundingCapped}
|
||
cfg := cascadeCfg()
|
||
cfg.RouterEnabled, cfg.WebEnabled = true, true
|
||
b := &Bot{cfg: &cfg, llm: grok, web: web, log: discardLog()}
|
||
|
||
res, err := b.generate(context.Background(), "новости сегодня", msgs("новости сегодня"), "", true)
|
||
if err != nil {
|
||
t.Fatalf("generate: %v", err)
|
||
}
|
||
if res.route != routeGrokDirect || res.text != "grok fallback" || !res.fallback {
|
||
t.Fatalf("res = (%q,%q,fallback=%v), want grok_direct fallback", res.route, res.text, res.fallback)
|
||
}
|
||
if res.degraded != degradeGroundCap {
|
||
t.Fatalf("degraded = %q, want grounding_cap (the specific reason)", res.degraded)
|
||
}
|
||
if res.cost.WebTool != 0 || res.cost.Grounding != 0 {
|
||
t.Fatalf("web cost = %+v, want 0 (fetch failed before billing)", res.cost)
|
||
}
|
||
// Recency miss → staleness hedge ("устаревшими"), not the factual-abstain hedge.
|
||
if !hedgeContains(grok.lastReq.Messages, "устаревш") {
|
||
t.Fatalf("freshness degrade should use the staleness hedge; messages = %+v", grok.lastReq.Messages)
|
||
}
|
||
}
|
||
|
||
// TestGenerateReasoningForced: the manual trigger routes to the reasoning model with
|
||
// reasoning_effort, independent of ROUTER_ENABLED.
|
||
func TestGenerateReasoningForced(t *testing.T) {
|
||
grok := &fakeLLM{text: "deep answer"}
|
||
cfg := cascadeCfg()
|
||
cfg.ReasoningEnabled = true // ROUTER_ENABLED deliberately left off
|
||
b := &Bot{cfg: &cfg, llm: grok, log: discardLog()}
|
||
|
||
res, err := b.generate(context.Background(), "подумай глубже про сознание", msgs("подумай глубже про сознание"), "", true)
|
||
if err != nil {
|
||
t.Fatalf("generate: %v", err)
|
||
}
|
||
if res.route != routeReason || res.decision.Source != "forced" {
|
||
t.Fatalf("res route=%q source=%q, want reason/forced", res.route, res.decision.Source)
|
||
}
|
||
if grok.lastReq.ReasoningEffort != "high" || grok.lastReq.Model != "grok-reason" {
|
||
t.Fatalf("reasoning req = (effort %q, model %q), want high/grok-reason", grok.lastReq.ReasoningEffort, grok.lastReq.Model)
|
||
}
|
||
}
|
||
|
||
// TestClassifyTrivialAgreementGate: a trivial route requires the Layer-0 candidate AND
|
||
// classifier.trivial AND confidence ≥ trivialFloor. A low-confidence "trivial" or a
|
||
// classifier that disagrees stays on grok_direct (no voice leak).
|
||
func TestClassifyTrivialAgreementGate(t *testing.T) {
|
||
cfg := cascadeCfg()
|
||
cfg.RouterEnabled, cfg.RouterClassifierEnabled = true, true
|
||
gem := &fakeLLM{}
|
||
b := &Bot{cfg: &cfg, gemini: gem, log: discardLog()}
|
||
var cost CostBreakdown
|
||
|
||
gem.text = `{"trivial":true,"needs_web":false,"confidence":0.95}`
|
||
if d := b.classify(context.Background(), "привет", "USER: привет", &cost); d.Route != routeTrivial {
|
||
t.Fatalf("agreed high-confidence trivial = %q, want trivial", d.Route)
|
||
}
|
||
gem.text = `{"trivial":true,"needs_web":false,"confidence":0.5}`
|
||
if d := b.classify(context.Background(), "привет", "USER: привет", &cost); d.Route != routeGrokDirect {
|
||
t.Fatalf("low-confidence trivial = %q, want grok_direct (no leak)", d.Route)
|
||
}
|
||
// A non-trivial body can never be trivial even if the classifier claims so.
|
||
gem.text = `{"trivial":true,"needs_web":false,"confidence":0.99}`
|
||
const substantive = "напиши подробное эссе про историю римской империи"
|
||
if d := b.classify(context.Background(), substantive, "USER: …", &cost); d.Route != routeGrokDirect {
|
||
t.Fatalf("classifier.trivial on a substantive body = %q, want grok_direct", d.Route)
|
||
}
|
||
}
|
||
|
||
// TestClassifyClassifierErrorFallsBackToLayer0: a classifier error/garbage degrades to the
|
||
// deterministic Layer-0 verdict — grok_direct for a substantive body, web for a freshness
|
||
// body — never an ungrounded confident answer, never a degrade-to-web.
|
||
func TestClassifyClassifierErrorFallsBackToLayer0(t *testing.T) {
|
||
cfg := cascadeCfg()
|
||
cfg.RouterEnabled, cfg.RouterClassifierEnabled, cfg.WebParanoid = true, true, true
|
||
gem := &fakeLLM{}
|
||
b := &Bot{cfg: &cfg, gemini: gem, log: discardLog()}
|
||
var cost CostBreakdown
|
||
|
||
// Transport error → Layer-0.
|
||
gem.err = errors.New("gemini down")
|
||
if d := b.classify(context.Background(), "напиши эссе про рим", "USER: …", &cost); d.Route != routeGrokDirect {
|
||
t.Fatalf("classifier error on substantive body = %q, want grok_direct (Layer-0)", d.Route)
|
||
}
|
||
if d := b.classify(context.Background(), "новости сегодня", "USER: …", &cost); d.Route != routeWebThenGrok {
|
||
t.Fatalf("classifier error on freshness body = %q, want web (deterministic Layer-0 survives)", d.Route)
|
||
}
|
||
// Garbage JSON (no transport error) → also Layer-0.
|
||
gem.err, gem.text = nil, "not json at all"
|
||
if d := b.classify(context.Background(), "напиши эссе про рим", "USER: …", &cost); d.Route != routeGrokDirect {
|
||
t.Fatalf("garbage classifier JSON = %q, want grok_direct (Layer-0)", d.Route)
|
||
}
|
||
}
|
||
|
||
// TestGenerateRoadHouseWebParanoidDM is the headline regression: an obscure-entity factual
|
||
// lookup in a DM, with the classifier + WEB_PARANOID on, routes to web AND the fetch uses
|
||
// the classifier's context-resolved search_query (the follow-up rewrite). With paranoid
|
||
// off it correctly stays grok_direct (the canary-neutral baseline).
|
||
func TestGenerateRoadHouseWebParanoidDM(t *testing.T) {
|
||
const verdict = `{"needs_web":true,"verifiable":true,"entity_obscure":true,"time_sensitive":false,"trivial":false,"search_query":"Дом у дороги 2024 фильм актёрский состав","confidence":0.7}`
|
||
mk := func(paranoid bool) (*fakeLLM, *fakeWeb, genResult) {
|
||
grok := &fakeLLM{text: "voiced", usage: Usage{PromptTokens: 10, CompletionTokens: 5}}
|
||
gem := &fakeLLM{text: verdict}
|
||
web := &fakeWeb{wc: WebContext{Digest: "cast: Patrick Swayze…", Citations: []string{"http://imdb"}}}
|
||
cfg := cascadeCfg()
|
||
cfg.RouterEnabled, cfg.RouterClassifierEnabled, cfg.WebEnabled, cfg.WebParanoid = true, true, true, paranoid
|
||
b := &Bot{cfg: &cfg, llm: grok, gemini: gem, web: web, log: discardLog()}
|
||
res, err := b.generate(context.Background(), "2024 года", []Message{
|
||
{Role: "system", Content: "SYS"},
|
||
{Role: "user", Content: "кто снимался в фильме дом у дороги"},
|
||
{Role: "assistant", Content: "В фильме 1989 года…"},
|
||
{Role: "user", Content: "2024 года"},
|
||
}, "", true)
|
||
if err != nil {
|
||
t.Fatalf("generate: %v", err)
|
||
}
|
||
return grok, web, res
|
||
}
|
||
|
||
_, web, res := mk(true)
|
||
if res.route != routeWebThenGrok {
|
||
t.Fatalf("paranoid DM road-house = %q, want web_then_grok (the fix)", res.route)
|
||
}
|
||
if !res.rewriteUsed || web.lastQuery != "Дом у дороги 2024 фильм актёрский состав" {
|
||
t.Fatalf("fetch should use the rewritten query: rewriteUsed=%v lastQuery=%q", res.rewriteUsed, web.lastQuery)
|
||
}
|
||
|
||
_, _, resOff := mk(false)
|
||
if resOff.route != routeGrokDirect {
|
||
t.Fatalf("paranoid OFF road-house = %q, want grok_direct (baseline)", resOff.route)
|
||
}
|
||
}
|
||
|
||
// TestGenerateFollowupGroupUsesBareBody: in a GROUP the context-resolved rewrite is
|
||
// suppressed — the fetch uses the bare (sanitised) body, never the classifier's
|
||
// search_query, so a member's follow-up can't ground the wrong prior subject.
|
||
func TestGenerateFollowupGroupUsesBareBody(t *testing.T) {
|
||
const verdict = `{"needs_web":true,"verifiable":true,"entity_obscure":true,"search_query":"какой-то чужой фильм 2024","confidence":0.7}`
|
||
grok := &fakeLLM{text: "voiced"}
|
||
gem := &fakeLLM{text: verdict}
|
||
web := &fakeWeb{wc: WebContext{Digest: "d", Citations: []string{"http://s"}}}
|
||
cfg := cascadeCfg()
|
||
cfg.RouterEnabled, cfg.RouterClassifierEnabled, cfg.WebEnabled, cfg.WebParanoid = true, true, true, true
|
||
b := &Bot{cfg: &cfg, llm: grok, gemini: gem, web: web, log: discardLog()}
|
||
|
||
res, err := b.generate(context.Background(), "2024 года", msgs("2024 года"), "", false /* group */)
|
||
if err != nil {
|
||
t.Fatalf("generate: %v", err)
|
||
}
|
||
if res.route != routeWebThenGrok {
|
||
t.Fatalf("group route = %q, want web_then_grok", res.route)
|
||
}
|
||
if res.rewriteUsed || web.lastQuery != "2024 года" {
|
||
t.Fatalf("group must use the bare body, not the rewrite: rewriteUsed=%v lastQuery=%q", res.rewriteUsed, web.lastQuery)
|
||
}
|
||
}
|
||
|
||
// TestGenerateWebEmptySearchQueryFallsBackToBody: the rewrite-with-fallback contract's
|
||
// empty arm (§6/§12). A DM web route whose classifier returned an empty search_query must
|
||
// fetch the bare (sanitised) body and report rewriteUsed=false — never an empty query.
|
||
func TestGenerateWebEmptySearchQueryFallsBackToBody(t *testing.T) {
|
||
// verifiable:true so it genuinely routes web (the needs_web arm requires verifiable);
|
||
// search_query empty is the point — the fetch must fall back to the bare body.
|
||
const verdict = `{"needs_web":true,"verifiable":true,"entity_obscure":false,"search_query":"","confidence":0.7}`
|
||
grok := &fakeLLM{text: "voiced"}
|
||
gem := &fakeLLM{text: verdict}
|
||
web := &fakeWeb{wc: WebContext{Digest: "d", Citations: []string{"http://s"}}}
|
||
cfg := cascadeCfg()
|
||
cfg.RouterEnabled, cfg.RouterClassifierEnabled, cfg.WebEnabled, cfg.WebParanoid = true, true, true, true
|
||
b := &Bot{cfg: &cfg, llm: grok, gemini: gem, web: web, log: discardLog()}
|
||
|
||
const body = "в каком году основан Рим"
|
||
res, err := b.generate(context.Background(), body, msgs(body), "", true /* DM */)
|
||
if err != nil {
|
||
t.Fatalf("generate: %v", err)
|
||
}
|
||
if res.route != routeWebThenGrok {
|
||
t.Fatalf("route = %q, want web_then_grok", res.route)
|
||
}
|
||
if res.rewriteUsed || web.lastQuery != body {
|
||
t.Fatalf("empty search_query must fall back to the bare body: rewriteUsed=%v lastQuery=%q", res.rewriteUsed, web.lastQuery)
|
||
}
|
||
}
|
||
|
||
// TestGenerateFreshnessTrapDesignedWeb: a freshness lexeme in a rumination
|
||
// ("сегодня…") still hard-routes to web (the accepted, designed cheap false-web, §14.1).
|
||
func TestGenerateFreshnessTrapDesignedWeb(t *testing.T) {
|
||
grok := &fakeLLM{text: "x"}
|
||
web := &fakeWeb{wc: WebContext{Digest: "d", Citations: []string{"http://s"}}}
|
||
cfg := cascadeCfg()
|
||
cfg.RouterEnabled, cfg.WebEnabled = true, true // classifier off — freshness alone routes
|
||
b := &Bot{cfg: &cfg, llm: grok, web: web, log: discardLog()}
|
||
|
||
res, err := b.generate(context.Background(), "сегодня я думаю о смысле жизни", msgs("сегодня я думаю о смысле жизни"), "", true)
|
||
if err != nil {
|
||
t.Fatalf("generate: %v", err)
|
||
}
|
||
if res.route != routeWebThenGrok {
|
||
t.Fatalf("freshness rumination = %q, want web_then_grok (designed)", res.route)
|
||
}
|
||
}
|
||
|
||
// TestGenerateWebDegradeFactualAbstain: a STATIC verifiable-fact web miss uses the
|
||
// factual-abstain hedge (not the staleness caveat), so Grok abstains on names/dates
|
||
// rather than shipping a confident guess.
|
||
func TestGenerateWebDegradeFactualAbstain(t *testing.T) {
|
||
const verdict = `{"needs_web":true,"verifiable":true,"entity_obscure":true,"time_sensitive":false,"search_query":"q","confidence":0.7}`
|
||
grok := &fakeLLM{text: "honest answer"}
|
||
gem := &fakeLLM{text: verdict}
|
||
web := &fakeWeb{err: errors.New("fetch boom")}
|
||
cfg := cascadeCfg()
|
||
cfg.RouterEnabled, cfg.RouterClassifierEnabled, cfg.WebEnabled, cfg.WebParanoid = true, true, true, true
|
||
b := &Bot{cfg: &cfg, llm: grok, gemini: gem, web: web, log: discardLog()}
|
||
|
||
res, err := b.generate(context.Background(), "кто снимался в фильме дом у дороги", msgs("кто снимался в фильме дом у дороги"), "", true)
|
||
if err != nil {
|
||
t.Fatalf("generate: %v", err)
|
||
}
|
||
if res.route != routeGrokDirect || !res.fallback {
|
||
t.Fatalf("res route=%q fallback=%v, want grok_direct fallback", res.route, res.fallback)
|
||
}
|
||
if !hedgeContains(grok.lastReq.Messages, "Не удалось проверить") {
|
||
t.Fatalf("factual miss should use the abstain hedge; messages = %+v", grok.lastReq.Messages)
|
||
}
|
||
if hedgeContains(grok.lastReq.Messages, "устаревш") {
|
||
t.Fatalf("factual miss must NOT use the staleness hedge")
|
||
}
|
||
}
|
||
|
||
// TestFactualMissHedge: the web-degrade hedge selection. A recency signal (Freshness or
|
||
// time_sensitive) → staleness (factualMiss=false); a static checkable-fact signal
|
||
// (verifiable / entity_obscure / a non-recency needs_web) → abstain (factualMiss=true).
|
||
func TestFactualMissHedge(t *testing.T) {
|
||
cases := []struct {
|
||
d RouterDecision
|
||
want bool // true => abstain hedge
|
||
}{
|
||
{RouterDecision{Freshness: "recent"}, false},
|
||
{RouterDecision{TimeSensitive: true}, false},
|
||
{RouterDecision{Verifiable: true}, true},
|
||
{RouterDecision{EntityObscure: true}, true},
|
||
{RouterDecision{NeedsWeb: true}, true}, // off-spec needs_web-only → abstain (Q3)
|
||
{RouterDecision{NeedsWeb: true, TimeSensitive: true}, false}, // recency still wins
|
||
{RouterDecision{}, false},
|
||
}
|
||
for _, c := range cases {
|
||
if got := c.d.factualMiss(); got != c.want {
|
||
t.Errorf("factualMiss(%+v) = %v, want %v", c.d, got, c.want)
|
||
}
|
||
}
|
||
}
|
||
|
||
// TestReserveEstimate: flags off → exactly grok_direct's estimate; with gemini grounding +
|
||
// classifier on, it includes the per-prompt fee AND the always-on classifier leg (§7).
|
||
func TestReserveEstimate(t *testing.T) {
|
||
cfg := cascadeCfg()
|
||
b := &Bot{cfg: &cfg, log: discardLog()}
|
||
base := b.estimateUSD("grok-x")
|
||
if got := b.reserveEstimate(); !approxEq(got, base) {
|
||
t.Fatalf("flags-off reserve = %v, want grok_direct estimate %v", got, base)
|
||
}
|
||
|
||
cfg2 := cascadeCfg()
|
||
cfg2.WebEnabled, cfg2.WebProvider = true, webProviderGeminiGrounding
|
||
cfg2.RouterEnabled, cfg2.RouterClassifierEnabled = true, true
|
||
cfg2.GeminiGroundingPerPrompt = 0.035
|
||
b2 := &Bot{cfg: &cfg2, log: discardLog()}
|
||
want := b2.estimateUSD("grok-x") + b2.estimateUSD("gemini-x") + 0.035 + b2.estimateUSD("gemini-x")
|
||
if got := b2.reserveEstimate(); !approxEq(got, want) {
|
||
t.Fatalf("web+classifier reserve = %v, want %v (XAI + gemini fetch + $0.035 fee + classifier leg)", got, want)
|
||
}
|
||
// The fee must actually move the envelope (regression guard for an unbooked fee).
|
||
cfg3 := cfg2
|
||
cfg3.GeminiGroundingPerPrompt = 0
|
||
b3 := &Bot{cfg: &cfg3, log: discardLog()}
|
||
if b2.reserveEstimate()-b3.reserveEstimate() < 0.0349 {
|
||
t.Fatalf("the grounding fee must raise the reservation by ~0.035")
|
||
}
|
||
}
|
||
|
||
// TestGrokReasoningEffort: GROK_REASONING_EFFORT is sent on grok_direct (so grok-4.3 can
|
||
// be kept fast with "none"), empty means not sent (compat with grok-4.20-non-reasoning),
|
||
// and the reason route always overrides to "high" regardless.
|
||
func TestGrokReasoningEffort(t *testing.T) {
|
||
grok := &fakeLLM{text: "ok"}
|
||
cfg := cascadeCfg()
|
||
cfg.GrokReasoningEffort = "none"
|
||
b := &Bot{cfg: &cfg, llm: grok, log: discardLog()}
|
||
if _, err := b.generate(context.Background(), "hello", msgs("hello"), "", true); err != nil {
|
||
t.Fatal(err)
|
||
}
|
||
if grok.lastReq.ReasoningEffort != "none" {
|
||
t.Fatalf("grok_direct effort = %q, want none", grok.lastReq.ReasoningEffort)
|
||
}
|
||
|
||
grokDef := &fakeLLM{text: "ok"}
|
||
cfgDef := cascadeCfg() // GrokReasoningEffort == ""
|
||
bDef := &Bot{cfg: &cfgDef, llm: grokDef, log: discardLog()}
|
||
if _, err := bDef.generate(context.Background(), "hello", msgs("hello"), "", true); err != nil {
|
||
t.Fatal(err)
|
||
}
|
||
if grokDef.lastReq.ReasoningEffort != "" {
|
||
t.Fatalf("default effort = %q, want empty (not sent)", grokDef.lastReq.ReasoningEffort)
|
||
}
|
||
|
||
grokR := &fakeLLM{text: "deep"}
|
||
cfgR := cascadeCfg()
|
||
cfgR.GrokReasoningEffort = "none"
|
||
cfgR.ReasoningEnabled = true
|
||
bR := &Bot{cfg: &cfgR, llm: grokR, log: discardLog()}
|
||
if _, err := bR.generate(context.Background(), "подумай глубже про X", msgs("подумай глубже про X"), "", true); err != nil {
|
||
t.Fatal(err)
|
||
}
|
||
if grokR.lastReq.ReasoningEffort != "high" {
|
||
t.Fatalf("reason route effort = %q, want high (overrides GROK_REASONING_EFFORT)", grokR.lastReq.ReasoningEffort)
|
||
}
|
||
}
|
||
|
||
// TestGenerateTerminalErrorPropagates: if even grok_direct fails, generate returns the
|
||
// error (respond turns it into refund + react), not a silent empty success.
|
||
func TestGenerateTerminalErrorPropagates(t *testing.T) {
|
||
grok := &fakeLLM{err: errors.New("xai down")}
|
||
cfg := cascadeCfg()
|
||
b := &Bot{cfg: &cfg, llm: grok, log: discardLog()}
|
||
|
||
if _, err := b.generate(context.Background(), "hello", msgs("hello"), "", true); err == nil {
|
||
t.Fatal("want terminal error when grok_direct fails, got nil")
|
||
}
|
||
}
|
||
|
||
// TestWebSynthMessagesNoRawURLs guards the source-leak fix: the grounded digest is
|
||
// injected, but the raw gemini-grounding redirect URLs must NOT reach the synth prompt
|
||
// (Grok was pasting vertexaisearch.../grounding-api-redirect/... links into the reply).
|
||
func TestWebSynthMessagesNoRawURLs(t *testing.T) {
|
||
wc := WebContext{
|
||
Digest: "Титаник вышел в 1997, режиссёр Джеймс Кэмерон.",
|
||
Citations: []string{"https://vertexaisearch.cloud.google.com/grounding-api-redirect/AUZIYQabc123"},
|
||
}
|
||
out := webSynthMessages(msgs("в каком году титаник"), wc)
|
||
var note string
|
||
for _, m := range out {
|
||
if m.Role == "system" && strings.Contains(m.Content, "Свежие данные") {
|
||
note = m.Content
|
||
}
|
||
}
|
||
if note == "" {
|
||
t.Fatal("web synth note missing")
|
||
}
|
||
if !strings.Contains(note, "Титаник вышел в 1997") {
|
||
t.Fatalf("digest not injected: %q", note)
|
||
}
|
||
if strings.Contains(note, "vertexaisearch") || strings.Contains(note, "grounding-api-redirect") || strings.Contains(note, "http") {
|
||
t.Fatalf("raw citation URL leaked into the synth prompt: %q", note)
|
||
}
|
||
}
|
||
|
||
func hedgeContains(ms []Message, sub string) bool {
|
||
for _, m := range ms {
|
||
if strings.Contains(m.Content, sub) {
|
||
return true
|
||
}
|
||
}
|
||
return false
|
||
}
|
||
|
||
func approxEq(a, b float64) bool {
|
||
d := a - b
|
||
return d < 1e-9 && d > -1e-9
|
||
}
|