package routedecide import "testing" // TestClassifyLayer0 is the free-heuristic golden set: freshness → web (WebForce), // short greetings/acks/bare-arithmetic → trivial candidate, everything else → // grok_direct, with substantive messages never trivial. func TestClassifyLayer0(t *testing.T) { cases := []struct { body string wantRoute string wantWebForce bool wantTrivial bool }{ {"привет", RouteTrivial, false, true}, {"спасибо", RouteTrivial, false, true}, {"2+2", RouteTrivial, false, true}, {"12 / 4 - 1", RouteTrivial, false, true}, {"hello", RouteTrivial, false, true}, {"какие новости сегодня?", RouteWeb, true, false}, {"курс доллара сегодня", RouteWeb, true, false}, {"what's the weather today", RouteWeb, true, false}, {"посоветуй фильм на вечер", RouteGrokDirect, false, false}, {"explain how TCP works", RouteGrokDirect, false, false}, {"спасибо, а теперь подробно объясни квантовую запутанность", RouteGrokDirect, false, false}, {"", RouteGrokDirect, false, false}, } for _, c := range cases { l0 := ClassifyLayer0(c.body) if l0.Route != c.wantRoute || l0.WebForce != c.wantWebForce || l0.Trivial != c.wantTrivial { t.Errorf("ClassifyLayer0(%q) = {route:%q webForce:%v trivial:%v}, want {%q %v %v}", c.body, l0.Route, l0.WebForce, l0.Trivial, c.wantRoute, c.wantWebForce, c.wantTrivial) } } } // TestFreshnessWordBoundaries guards the §7-#7 \b tightening: English freshness tokens // fire on whole words only — never inside scoreboard / concurrent / weathering — while // genuine freshness phrases still force web, and Russian stems stay stem-matched. func TestFreshnessWordBoundaries(t *testing.T) { shouldForceWeb := []string{ "what's the weather today", "latest news on AI", "current bitcoin price", "какие новости сегодня", // RU stems unchanged "курс доллара сегодня", } for _, s := range shouldForceWeb { if !ClassifyLayer0(s).WebForce { t.Errorf("expected WebForce on freshness phrase: %q", s) } } shouldNotForceWeb := []string{ "the scoreboard shows 3:1", // score inside scoreboard "concurrent programming in Go", // current inside concurrent "weathering the storm, metaphorically", // weather inside weathering "subscribe to my newsletter please", // news inside newsletter } for _, s := range shouldNotForceWeb { if ClassifyLayer0(s).WebForce { t.Errorf("freshness false-positive (substring match) on: %q", s) } } } // TestLookupHintFalsePositiveCorpus is the §5 guarantee: the soft lookup-intent regex // must NOT fire on greetings/vocatives/idioms/non-lookup interrogatives — it is anchored // on interrogative + lookup-verb, never on a capitalised word or a guillemet. A false // LookupHint can only ever bias the classifier (and only when WEB_PARANOID + verifiable), // but we still hold the regex itself to near-zero false positives. func TestLookupHintFalsePositiveCorpus(t *testing.T) { falsePositives := []string{ "Привет, Москва!", // vocative, no interrogative "«Война и мир» — топ", // guillemets are not a trigger "ну ты прям Эйнштейн", // proper noun, no «кто такой» "кто это сделал?", // «кто» not followed by a lookup-verb "когда ты придёшь?", // «когда» needs a release/birth verb "спасибо большое", // ack "расскажи что-нибудь", // imperative, no lookup interrogative "I love this movie", // English, no interrogative "who cares", // «who» not followed by is/was/starred/… } for _, s := range falsePositives { if l0 := ClassifyLayer0(s); l0.LookupHint { t.Errorf("lookupHint fired on a false-positive trap: %q", s) } } // And it MUST fire on genuine lookup intent (otherwise it's useless). truePositives := []string{ "кто снимался в фильме дом у дороги", "кто написал войну и мир", "в каком году вышел фильм матрица", "who directed Inception", "in what year was the Matrix released", "how many seasons of breaking bad", } for _, s := range truePositives { if l0 := ClassifyLayer0(s); !l0.LookupHint { t.Errorf("lookupHint should fire on genuine lookup intent: %q", s) } } } // TestRecommendationFreshnessCarveOut: a recommendation/advice request must NOT hard-route to // web even with a freshness lexeme ("сегодня"/"today"/"right now") — the web synth parrots an // SEO listicle and recommends nothing (observed live). It falls to grok_direct/classifier; // genuine non-recommendation freshness queries still force web. func TestRecommendationFreshnessCarveOut(t *testing.T) { noForce := []string{ "посоветуй фильм на сегодня вечер", "что посмотреть сегодня вечером", "чем заняться сегодня", "что приготовить сегодня на ужин", "recommend a movie today", "what to watch right now", } for _, s := range noForce { if ClassifyLayer0(s).WebForce { t.Errorf("recommendation with a freshness lexeme must NOT force web: %q", s) } } stillForce := []string{ "какие новости сегодня", "курс доллара сейчас", "what's the weather today", "сегодня я думаю о смысле жизни", // non-recommendation rumination — designed cheap false-web } for _, s := range stillForce { if !ClassifyLayer0(s).WebForce { t.Errorf("non-recommendation freshness must still force web: %q", s) } } } // TestCombineFreshnessAlwaysWeb: a freshnessRe hit (WebForce) routes to web regardless of // WEB_PARANOID and regardless of the classifier verdict — the deterministic signal that // survives the classifier being down (§4.4). func TestCombineFreshnessAlwaysWeb(t *testing.T) { l0 := Layer0{Route: RouteWeb, WebForce: true, Freshness: "recent"} v := Verdict{NeedsWeb: false, Confidence: 0.1} // classifier disagrees for _, paranoid := range []bool{true, false} { if got := Combine(l0, v, paranoid).Route; got != RouteWeb { t.Errorf("freshness with paranoid=%v = %q, want web", paranoid, got) } } } // TestCombineParanoidGating is the Design-X invariant (§15): with WEB_PARANOID OFF, only // freshness routes to web — the classifier's needs_web/entity/time/lookup signals are // recorded but do NOT change the route. With it ON, those arms activate. func TestCombineParanoidGating(t *testing.T) { l0 := Layer0{Route: RouteGrokDirect, LookupHint: true} // no freshness arms := []Verdict{ {NeedsWeb: true, Verifiable: true, Confidence: 0.9}, // classifier_needs_web (needs verifiable) {EntityObscure: true, Confidence: 0.4}, // entity_obscure {TimeSensitive: true, Confidence: 0.4}, // time_sensitive {Verifiable: true, Confidence: 0.4}, // lookup_hint && verifiable } for i, v := range arms { if got := Combine(l0, v, false).Route; got != RouteGrokDirect { t.Errorf("arm %d with paranoid OFF = %q, want grok_direct (web is freshness-only)", i, got) } if got := Combine(l0, v, true).Route; got != RouteWeb { t.Errorf("arm %d with paranoid ON = %q, want web", i, got) } } } // TestCombineWebFloor: the needs_web arm only fires at/above WebNeedsWebFloor (paranoid). func TestCombineWebFloor(t *testing.T) { l0 := Layer0{Route: RouteGrokDirect} below := Verdict{NeedsWeb: true, Verifiable: true, Confidence: WebNeedsWebFloor - 0.01} atFloor := Verdict{NeedsWeb: true, Verifiable: true, Confidence: WebNeedsWebFloor} if got := Combine(l0, below, true).Route; got != RouteGrokDirect { t.Errorf("needs_web below floor = %q, want grok_direct", got) } if got := Combine(l0, atFloor, true).Route; got != RouteWeb { t.Errorf("needs_web at floor = %q, want web", got) } } // TestCombineNeedsWebRequiresVerifiable is the false-web fix (observed live): the needs_web // arm fires ONLY when the classifier also flagged a checkable named-entity fact // (verifiable). A high-confidence needs_web on a non-verifiable query — an opinion or // explanation the small flash-lite over-eagerly marked needs_web=true ("посоветуй фильм", // "объясни goroutines") — stays on grok_direct. Recency (time_sensitive/freshness) and // obscurity (entity_obscure) keep their own arms, so no genuine grounding is lost. func TestCombineNeedsWebRequiresVerifiable(t *testing.T) { l0 := Layer0{Route: RouteGrokDirect} if got := Combine(l0, Verdict{NeedsWeb: true, Verifiable: false, Confidence: 1.0}, true).Route; got != RouteGrokDirect { t.Errorf("needs_web && !verifiable = %q, want grok_direct (false-web fix)", got) } if got := Combine(l0, Verdict{NeedsWeb: true, Verifiable: true, Confidence: 0.6}, true).Route; got != RouteWeb { t.Errorf("needs_web && verifiable = %q, want web", got) } // A non-verifiable needs_web that is ALSO entity_obscure still grounds (obscure arm). if got := Combine(l0, Verdict{NeedsWeb: true, Verifiable: false, EntityObscure: true, Confidence: 0.1}, true).Route; got != RouteWeb { t.Errorf("entity_obscure must still route web regardless of verifiable, got %q", got) } } // TestCombineTrivialAgreementGate: trivial requires BOTH the Layer-0 candidate AND // classifier.trivial AND confidence ≥ TrivialFloor. A lone signal stays on grok_direct. func TestCombineTrivialAgreementGate(t *testing.T) { trivialL0 := Layer0{Route: RouteTrivial, Trivial: true} nonTrivialL0 := Layer0{Route: RouteGrokDirect} if got := Combine(trivialL0, Verdict{Trivial: true, Confidence: 0.95}, true).Route; got != RouteTrivial { t.Errorf("agreed high-confidence trivial = %q, want trivial", got) } if got := Combine(trivialL0, Verdict{Trivial: true, Confidence: 0.5}, true).Route; got != RouteGrokDirect { t.Errorf("low-confidence trivial = %q, want grok_direct (no voice leak)", got) } if got := Combine(trivialL0, Verdict{Trivial: false, Confidence: 0.95}, true).Route; got != RouteGrokDirect { t.Errorf("classifier disagrees on trivial = %q, want grok_direct", got) } // Never trust classifier.trivial alone: without the Layer-0 candidate it stays grok. if got := Combine(nonTrivialL0, Verdict{Trivial: true, Confidence: 0.99}, true).Route; got == RouteTrivial { t.Errorf("classifier.trivial alone routed to trivial; must require the Layer-0 candidate") } } // TestCombineRoadHouse is the regression: the hallucinated-cast bug. With WEB_PARANOID on // and the classifier flagging the (obscure, verifiable) entity, both the first turn and // the resolved follow-up route to web; with paranoid off they fall to grok_direct (the // canary-neutral baseline). func TestCombineRoadHouse(t *testing.T) { first := ClassifyLayer0("кто снимался в фильме дом у дороги") followup := ClassifyLayer0("2024 года") // bare; the classifier resolves via context v := Verdict{NeedsWeb: true, Verifiable: true, EntityObscure: true, Confidence: 0.7} for _, l0 := range []Layer0{first, followup} { if got := Combine(l0, v, true).Route; got != RouteWeb { t.Errorf("road house with paranoid ON = %q, want web (the hallucination fix)", got) } if got := Combine(l0, v, false).Route; got != RouteGrokDirect { t.Errorf("road house with paranoid OFF = %q, want grok_direct (baseline)", got) } } } // TestWebDecidedByAttribution: the switch order attributes the right arm (for tuning 0.55). func TestWebDecidedByAttribution(t *testing.T) { cases := []struct { l0 Layer0 v Verdict want string }{ {Layer0{WebForce: true}, Verdict{}, WebByFreshness}, {Layer0{}, Verdict{NeedsWeb: true, Verifiable: true, Confidence: 0.9}, WebByNeedsWeb}, {Layer0{}, Verdict{EntityObscure: true, Confidence: 0.1}, WebByObscure}, {Layer0{}, Verdict{TimeSensitive: true, Confidence: 0.1}, WebByTime}, {Layer0{LookupHint: true}, Verdict{Verifiable: true, Confidence: 0.1}, WebByLookupHint}, {Layer0{Route: RouteGrokDirect}, Verdict{Confidence: 0.1}, WebByNone}, } for _, c := range cases { if got := Combine(c.l0, c.v, true).WebDecidedBy; got != c.want { t.Errorf("web_decided_by(%+v,%+v) = %q, want %q", c.l0, c.v, got, c.want) } } } // TestProjectGateOnAboutProject: the project route trusts the classifier — it fires when // AboutProject is set and not otherwise. There is no Layer-0 hint requirement (live traffic // showed it blocked correct context-resolved follow-ups). Independent of WEB_PARANOID. func TestProjectGateOnAboutProject(t *testing.T) { l0 := Layer0{Route: RouteGrokDirect} for _, paranoid := range []bool{true, false} { if got := Combine(l0, Verdict{AboutProject: true}, paranoid).Route; got != RouteProject { t.Errorf("AboutProject=true (paranoid=%v) = %q, want project_then_grok", paranoid, got) } if got := Combine(l0, Verdict{AboutProject: false}, paranoid).Route; got == RouteProject { t.Errorf("AboutProject=false (paranoid=%v) routed to project; must not", paranoid) } } } // TestProjectBeatsWebArms: the project arm is case #0 — it out-prioritizes even the hard // freshness (WebForce) arm and the classifier web arms, because the curated KB, not the // web, is the authoritative source for product facts ("какие новости у Vojo" trips // freshness yet is a product question). func TestProjectBeatsWebArms(t *testing.T) { l0 := Layer0{Route: RouteWeb, WebForce: true} // freshness hit v := Verdict{AboutProject: true, NeedsWeb: true, Verifiable: true, TimeSensitive: true, Confidence: 0.9} for _, paranoid := range []bool{true, false} { got := Combine(l0, v, paranoid) if got.Route != RouteProject { t.Errorf("project must beat web arms (paranoid=%v) = %q, want project_then_grok", paranoid, got.Route) } if got.WebDecidedBy != WebByNone { t.Errorf("project route web_decided_by = %q, want none", got.WebDecidedBy) } } }