// Command routereval is the OFFLINE router-replay harness for the §11 P1 gate. It reads // a golden set of (message, recorded classifier verdict, expected route, factual flag), // replays each item through the REAL decision functions (routedecide.ClassifyLayer0 + // CombineWithFloors — the same code package main uses, never a copy), and reports the // confusion matrix + the four P1 metrics: false-grok-on-factual (the lie metric), // false-web, trivial-leak, misroute. It is fully deterministic and needs no network: it // measures the ROUTING LAYER given a verdict, so you can sweep WEB_PARANOID and the // floors instantly. (Classifier accuracy itself is a separate LIVE check — §11 P2.) // // The lie label on the web path uses the citation-presence proxy by convention: a golden // item's `factual:true` + `expected_route:web_then_grok` marks "this MUST ground"; an // LLM-judge over query+answer is the higher-fidelity option to wire later (§14.6/§15). // // Usage: // // go run ./cmd/routereval -golden cmd/routereval/golden_sample.json // go run ./cmd/routereval -golden set.json -web-floor 0.7 # sweep the needs_web floor // // NOTE: golden_sample.json is labelled for the PRODUCTION config (paranoid ON) — its // expected_route values assume the epistemic web arms are active. Running -paranoid=false // against it is a what-if sweep that WILL report NO-GO (the entity facts fall to grok by // design); it is NOT a passing baseline. To evaluate the paranoid-off behaviour, label a // separate set whose expected_route reflects freshness-only web routing. package main import ( "encoding/json" "flag" "fmt" "os" rd "vojo.chat/ai-bot/internal/routedecide" ) // goldenItem is one labelled row. Message drives the free Layer-0; Verdict is the // recorded classifier output; ExpectedRoute + Factual are the ground-truth labels. type goldenItem struct { Name string `json:"name"` Message string `json:"message"` Verdict rd.Verdict `json:"verdict"` ExpectedRoute string `json:"expected_route"` Factual bool `json:"factual"` // a checkable-fact query that MUST ground } func main() { goldenPath := flag.String("golden", "cmd/routereval/golden_sample.json", "path to the golden-set JSON") paranoid := flag.Bool("paranoid", true, "apply the WEB_PARANOID classifier-driven web arms") webFloor := flag.Float64("web-floor", rd.WebNeedsWebFloor, "needs_web confidence floor to sweep") trivialFloor := flag.Float64("trivial-floor", rd.TrivialFloor, "trivial confidence floor") verbose := flag.Bool("v", false, "print every item, not just the mismatches") flag.Parse() raw, err := os.ReadFile(*goldenPath) if err != nil { fmt.Fprintf(os.Stderr, "read golden set: %v\n", err) os.Exit(2) } var items []goldenItem if err := json.Unmarshal(raw, &items); err != nil { fmt.Fprintf(os.Stderr, "parse golden set: %v\n", err) os.Exit(2) } if len(items) == 0 { fmt.Fprintln(os.Stderr, "golden set is empty") os.Exit(2) } floors := rd.Floors{WebNeedsWeb: *webFloor, Trivial: *trivialFloor} fmt.Printf("routereval: %d items | paranoid=%v web-floor=%.2f trivial-floor=%.2f\n\n", len(items), *paranoid, *webFloor, *trivialFloor) var ( correct int factualWeb, factualWebMissed int // denominator/numerator of false-grok-on-factual nonWebExpected, falseWeb int nonTrivialExpected, trivialLeak int ) roadHouseSeen := false roadHousePass := true for _, it := range items { l0 := rd.ClassifyLayer0(it.Message) got := rd.CombineWithFloors(l0, it.Verdict, *paranoid, floors).Route ok := got == it.ExpectedRoute if ok { correct++ } if it.Factual && it.ExpectedRoute == rd.RouteWeb { factualWeb++ if got == rd.RouteGrokDirect { factualWebMissed++ // a confident-lie risk: a checkable fact answered from memory } } if it.ExpectedRoute != rd.RouteWeb { nonWebExpected++ if got == rd.RouteWeb { falseWeb++ } } if it.ExpectedRoute != rd.RouteTrivial { nonTrivialExpected++ if got == rd.RouteTrivial { trivialLeak++ } } // The Road House regression pair must pass (its name carries "road house"). if contains(it.Name, "road house") { roadHouseSeen = true if !ok { roadHousePass = false } } if *verbose || !ok { flag := "ok " if !ok { flag = "MISS" } fmt.Printf(" [%s] %-40s want=%-16s got=%-16s\n", flag, trunc(it.Name, 40), it.ExpectedRoute, got) } } rate := func(num, den int) float64 { if den == 0 { return 0 } return float64(num) / float64(den) } misroute := 1 - rate(correct, len(items)) lie := rate(factualWebMissed, factualWeb) fw := rate(falseWeb, nonWebExpected) leak := rate(trivialLeak, nonTrivialExpected) fmt.Printf("\n— metrics (§11 P1 gates) —\n") fmt.Printf(" false-grok-on-FACTUAL : %5.1f%% (%d/%d) gate < 5%% %s\n", lie*100, factualWebMissed, factualWeb, pass(lie < 0.05)) fmt.Printf(" false-web : %5.1f%% (%d/%d) gate ≤ 15%% %s\n", fw*100, falseWeb, nonWebExpected, pass(fw <= 0.15)) fmt.Printf(" trivial-leak : %5.1f%% (%d/%d) gate ~ 0%% %s\n", leak*100, trivialLeak, nonTrivialExpected, pass(leak == 0)) fmt.Printf(" misroute : %5.1f%% (%d/%d) gate < 3%% %s\n", misroute*100, len(items)-correct, len(items), pass(misroute < 0.03)) if roadHouseSeen { fmt.Printf(" road-house pair : %s\n", pass(roadHousePass)) } // Exit non-zero if any gate fails, so the harness is CI/owner-runnable as a go/no-go. if lie >= 0.05 || fw > 0.15 || leak > 0 || misroute >= 0.03 || (roadHouseSeen && !roadHousePass) { fmt.Println("\nRESULT: NO-GO (a P1 gate failed)") os.Exit(1) } fmt.Println("\nRESULT: GO") } func pass(ok bool) string { if ok { return "PASS" } return "FAIL" } func contains(s, sub string) bool { return len(sub) == 0 || indexFold(s, sub) >= 0 } // indexFold is a tiny case-insensitive substring search (avoids importing strings just // for ToLower+Index in this small tool). func indexFold(s, sub string) int { ls, lsub := toLower(s), toLower(sub) for i := 0; i+len(lsub) <= len(ls); i++ { if ls[i:i+len(lsub)] == lsub { return i } } return -1 } func toLower(s string) string { b := []byte(s) for i, c := range b { if 'A' <= c && c <= 'Z' { b[i] = c + ('a' - 'A') } } return string(b) } func trunc(s string, n int) string { r := []rune(s) if len(r) <= n { return s } return string(r[:n-1]) + "…" }