vojo/apps/ai-bot/cmd/routereval/golden_sample.json

291 lines
7.9 KiB
JSON
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

[
{
"name": "road house first turn (obscure cast)",
"message": "кто снимался в фильме дом у дороги",
"verdict": {
"needs_web": true,
"verifiable": true,
"entity_obscure": true,
"time_sensitive": false,
"trivial": false,
"search_query": "Дом у дороги фильм актёрский состав",
"confidence": 0.7
},
"expected_route": "web_then_grok",
"factual": true
},
{
"name": "road house follow-up (DM, resolved)",
"message": "2024 года",
"verdict": {
"needs_web": true,
"verifiable": true,
"entity_obscure": true,
"time_sensitive": false,
"trivial": false,
"search_query": "Дом у дороги 2024 фильм актёрский состав",
"confidence": 0.65
},
"expected_route": "web_then_grok",
"factual": true
},
{
"name": "weather (freshness lexeme, forced web)",
"message": "погода сегодня в Москве",
"verdict": {
"needs_web": true,
"verifiable": false,
"entity_obscure": false,
"time_sensitive": true,
"trivial": false,
"search_query": "погода сегодня Москва",
"confidence": 0.95
},
"expected_route": "web_then_grok",
"factual": false
},
{
"name": "freshness rumination (accepted designed false-web, §14.1)",
"message": "сегодня я думаю о смысле жизни",
"verdict": {
"needs_web": false,
"verifiable": false,
"entity_obscure": false,
"time_sensitive": false,
"trivial": false,
"search_query": "",
"confidence": 0.2
},
"expected_route": "web_then_grok",
"factual": false
},
{
"name": "obscure entity founder (no freshness word)",
"message": "кто основал компанию Acme Widgets",
"verdict": {
"needs_web": true,
"verifiable": true,
"entity_obscure": true,
"time_sensitive": false,
"trivial": false,
"search_query": "Acme Widgets основатель компании",
"confidence": 0.6
},
"expected_route": "web_then_grok",
"factual": true
},
{
"name": "static famous fact (author lookup)",
"message": "кто написал войну и мир",
"verdict": {
"needs_web": true,
"verifiable": true,
"entity_obscure": false,
"time_sensitive": false,
"trivial": false,
"search_query": "Война и мир автор",
"confidence": 0.62
},
"expected_route": "web_then_grok",
"factual": true
},
{
"name": "current CEO (time-sensitive, sub-floor needs_web)",
"message": "кто возглавляет Tesla",
"verdict": {
"needs_web": true,
"verifiable": true,
"entity_obscure": false,
"time_sensitive": true,
"trivial": false,
"search_query": "Tesla CEO",
"confidence": 0.5
},
"expected_route": "web_then_grok",
"factual": false
},
{
"name": "greeting (trivial, high confidence)",
"message": "привет",
"verdict": {
"needs_web": false,
"verifiable": false,
"entity_obscure": false,
"time_sensitive": false,
"trivial": true,
"search_query": "",
"confidence": 0.95
},
"expected_route": "trivial_direct",
"factual": false
},
{
"name": "ack low-confidence trivial (no voice leak → grok)",
"message": "спасибо",
"verdict": {
"needs_web": false,
"verifiable": false,
"entity_obscure": false,
"time_sensitive": false,
"trivial": true,
"search_query": "",
"confidence": 0.5
},
"expected_route": "grok_direct",
"factual": false
},
{
"name": "opinion / recommendation (safe floor)",
"message": "посоветуй фильм на вечер",
"verdict": {
"needs_web": false,
"verifiable": false,
"entity_obscure": false,
"time_sensitive": false,
"trivial": false,
"search_query": "",
"confidence": 0.82
},
"expected_route": "grok_direct",
"factual": false
},
{
"name": "code help (safe floor)",
"message": "напиши функцию сортировки на python",
"verdict": {
"needs_web": false,
"verifiable": false,
"entity_obscure": false,
"time_sensitive": false,
"trivial": false,
"search_query": "",
"confidence": 0.9
},
"expected_route": "grok_direct",
"factual": false
},
{
"name": "vague needs_web below floor (stays grok)",
"message": "что ты думаешь о криптовалютах",
"verdict": {
"needs_web": true,
"verifiable": false,
"entity_obscure": false,
"time_sensitive": false,
"trivial": false,
"search_query": "",
"confidence": 0.4
},
"expected_route": "grok_direct",
"factual": false
},
{
"name": "explanation over-flagged needs_web but NOT verifiable (false-web fix)",
"message": "объясни как работают горутины в Go",
"verdict": {
"needs_web": true,
"verifiable": false,
"entity_obscure": false,
"time_sensitive": false,
"trivial": false,
"search_query": "",
"confidence": 0.9
},
"expected_route": "grok_direct",
"factual": false
},
{
"name": "ack-prefixed long real question (not trivial, safe floor)",
"message": "спасибо, а теперь подробно объясни квантовую запутанность",
"verdict": {
"needs_web": false,
"verifiable": false,
"entity_obscure": false,
"time_sensitive": false,
"trivial": false,
"search_query": "",
"confidence": 0.85
},
"expected_route": "grok_direct",
"factual": false
},
{
"name": "bare follow-up in a GROUP (no resolvable subject → grok)",
"message": "2024 года",
"verdict": {
"needs_web": false,
"verifiable": false,
"entity_obscure": false,
"time_sensitive": false,
"trivial": false,
"search_query": "",
"confidence": 0.3
},
"expected_route": "grok_direct",
"factual": false
},
{
"name": "project: what can Vojo do (name hint + about_project)",
"message": "что умеет vojo",
"verdict": {
"needs_web": false,
"verifiable": false,
"entity_obscure": false,
"time_sensitive": false,
"trivial": false,
"about_project": true,
"search_query": "",
"confidence": 0.9
},
"expected_route": "project_then_grok",
"factual": false
},
{
"name": "project: app how-to (intent hint + about_project)",
"message": "как в этом приложении включить шифрование",
"verdict": {
"needs_web": false,
"verifiable": false,
"entity_obscure": false,
"time_sensitive": false,
"trivial": false,
"about_project": true,
"search_query": "",
"confidence": 0.85
},
"expected_route": "project_then_grok",
"factual": false
},
{
"name": "venting about the app, classifier says not-about-project (about_project=false → grok)",
"message": "vojo упал опять?",
"verdict": {
"needs_web": false,
"verifiable": false,
"entity_obscure": false,
"time_sensitive": false,
"trivial": false,
"about_project": false,
"search_query": "",
"confidence": 0.4
},
"expected_route": "grok_direct",
"factual": false
},
{
"name": "project: context follow-up, no literal name (classifier resolves it)",
"message": "Про этот",
"verdict": {
"needs_web": false,
"verifiable": false,
"entity_obscure": false,
"time_sensitive": false,
"trivial": false,
"about_project": true,
"search_query": "",
"confidence": 1.0
},
"expected_route": "project_then_grok",
"factual": false
}
]