227 lines
6.2 KiB
JSON
227 lines
6.2 KiB
JSON
[
|
||
{
|
||
"name": "road house first turn (obscure cast)",
|
||
"message": "кто снимался в фильме дом у дороги",
|
||
"verdict": {
|
||
"needs_web": true,
|
||
"verifiable": true,
|
||
"entity_obscure": true,
|
||
"time_sensitive": false,
|
||
"trivial": false,
|
||
"search_query": "Дом у дороги фильм актёрский состав",
|
||
"confidence": 0.7
|
||
},
|
||
"expected_route": "web_then_grok",
|
||
"factual": true
|
||
},
|
||
{
|
||
"name": "road house follow-up (DM, resolved)",
|
||
"message": "2024 года",
|
||
"verdict": {
|
||
"needs_web": true,
|
||
"verifiable": true,
|
||
"entity_obscure": true,
|
||
"time_sensitive": false,
|
||
"trivial": false,
|
||
"search_query": "Дом у дороги 2024 фильм актёрский состав",
|
||
"confidence": 0.65
|
||
},
|
||
"expected_route": "web_then_grok",
|
||
"factual": true
|
||
},
|
||
{
|
||
"name": "weather (freshness lexeme, forced web)",
|
||
"message": "погода сегодня в Москве",
|
||
"verdict": {
|
||
"needs_web": true,
|
||
"verifiable": false,
|
||
"entity_obscure": false,
|
||
"time_sensitive": true,
|
||
"trivial": false,
|
||
"search_query": "погода сегодня Москва",
|
||
"confidence": 0.95
|
||
},
|
||
"expected_route": "web_then_grok",
|
||
"factual": false
|
||
},
|
||
{
|
||
"name": "freshness rumination (accepted designed false-web, §14.1)",
|
||
"message": "сегодня я думаю о смысле жизни",
|
||
"verdict": {
|
||
"needs_web": false,
|
||
"verifiable": false,
|
||
"entity_obscure": false,
|
||
"time_sensitive": false,
|
||
"trivial": false,
|
||
"search_query": "",
|
||
"confidence": 0.2
|
||
},
|
||
"expected_route": "web_then_grok",
|
||
"factual": false
|
||
},
|
||
{
|
||
"name": "obscure entity founder (no freshness word)",
|
||
"message": "кто основал компанию Acme Widgets",
|
||
"verdict": {
|
||
"needs_web": true,
|
||
"verifiable": true,
|
||
"entity_obscure": true,
|
||
"time_sensitive": false,
|
||
"trivial": false,
|
||
"search_query": "Acme Widgets основатель компании",
|
||
"confidence": 0.6
|
||
},
|
||
"expected_route": "web_then_grok",
|
||
"factual": true
|
||
},
|
||
{
|
||
"name": "static famous fact (author lookup)",
|
||
"message": "кто написал войну и мир",
|
||
"verdict": {
|
||
"needs_web": true,
|
||
"verifiable": true,
|
||
"entity_obscure": false,
|
||
"time_sensitive": false,
|
||
"trivial": false,
|
||
"search_query": "Война и мир автор",
|
||
"confidence": 0.62
|
||
},
|
||
"expected_route": "web_then_grok",
|
||
"factual": true
|
||
},
|
||
{
|
||
"name": "current CEO (time-sensitive, sub-floor needs_web)",
|
||
"message": "кто возглавляет Tesla",
|
||
"verdict": {
|
||
"needs_web": true,
|
||
"verifiable": true,
|
||
"entity_obscure": false,
|
||
"time_sensitive": true,
|
||
"trivial": false,
|
||
"search_query": "Tesla CEO",
|
||
"confidence": 0.5
|
||
},
|
||
"expected_route": "web_then_grok",
|
||
"factual": false
|
||
},
|
||
{
|
||
"name": "greeting (trivial, high confidence)",
|
||
"message": "привет",
|
||
"verdict": {
|
||
"needs_web": false,
|
||
"verifiable": false,
|
||
"entity_obscure": false,
|
||
"time_sensitive": false,
|
||
"trivial": true,
|
||
"search_query": "",
|
||
"confidence": 0.95
|
||
},
|
||
"expected_route": "trivial_direct",
|
||
"factual": false
|
||
},
|
||
{
|
||
"name": "ack low-confidence trivial (no voice leak → grok)",
|
||
"message": "спасибо",
|
||
"verdict": {
|
||
"needs_web": false,
|
||
"verifiable": false,
|
||
"entity_obscure": false,
|
||
"time_sensitive": false,
|
||
"trivial": true,
|
||
"search_query": "",
|
||
"confidence": 0.5
|
||
},
|
||
"expected_route": "grok_direct",
|
||
"factual": false
|
||
},
|
||
{
|
||
"name": "opinion / recommendation (safe floor)",
|
||
"message": "посоветуй фильм на вечер",
|
||
"verdict": {
|
||
"needs_web": false,
|
||
"verifiable": false,
|
||
"entity_obscure": false,
|
||
"time_sensitive": false,
|
||
"trivial": false,
|
||
"search_query": "",
|
||
"confidence": 0.82
|
||
},
|
||
"expected_route": "grok_direct",
|
||
"factual": false
|
||
},
|
||
{
|
||
"name": "code help (safe floor)",
|
||
"message": "напиши функцию сортировки на python",
|
||
"verdict": {
|
||
"needs_web": false,
|
||
"verifiable": false,
|
||
"entity_obscure": false,
|
||
"time_sensitive": false,
|
||
"trivial": false,
|
||
"search_query": "",
|
||
"confidence": 0.9
|
||
},
|
||
"expected_route": "grok_direct",
|
||
"factual": false
|
||
},
|
||
{
|
||
"name": "vague needs_web below floor (stays grok)",
|
||
"message": "что ты думаешь о криптовалютах",
|
||
"verdict": {
|
||
"needs_web": true,
|
||
"verifiable": false,
|
||
"entity_obscure": false,
|
||
"time_sensitive": false,
|
||
"trivial": false,
|
||
"search_query": "",
|
||
"confidence": 0.4
|
||
},
|
||
"expected_route": "grok_direct",
|
||
"factual": false
|
||
},
|
||
{
|
||
"name": "explanation over-flagged needs_web but NOT verifiable (false-web fix)",
|
||
"message": "объясни как работают горутины в Go",
|
||
"verdict": {
|
||
"needs_web": true,
|
||
"verifiable": false,
|
||
"entity_obscure": false,
|
||
"time_sensitive": false,
|
||
"trivial": false,
|
||
"search_query": "",
|
||
"confidence": 0.9
|
||
},
|
||
"expected_route": "grok_direct",
|
||
"factual": false
|
||
},
|
||
{
|
||
"name": "ack-prefixed long real question (not trivial, safe floor)",
|
||
"message": "спасибо, а теперь подробно объясни квантовую запутанность",
|
||
"verdict": {
|
||
"needs_web": false,
|
||
"verifiable": false,
|
||
"entity_obscure": false,
|
||
"time_sensitive": false,
|
||
"trivial": false,
|
||
"search_query": "",
|
||
"confidence": 0.85
|
||
},
|
||
"expected_route": "grok_direct",
|
||
"factual": false
|
||
},
|
||
{
|
||
"name": "bare follow-up in a GROUP (no resolvable subject → grok)",
|
||
"message": "2024 года",
|
||
"verdict": {
|
||
"needs_web": false,
|
||
"verifiable": false,
|
||
"entity_obscure": false,
|
||
"time_sensitive": false,
|
||
"trivial": false,
|
||
"search_query": "",
|
||
"confidence": 0.3
|
||
},
|
||
"expected_route": "grok_direct",
|
||
"factual": false
|
||
}
|
||
]
|