From 9beb5a19bda162f88e3c0d4622f09017cf77a2ce Mon Sep 17 00:00:00 2001 From: heaven Date: Sat, 6 Jun 2026 02:27:05 +0300 Subject: [PATCH] fix(ai-bot): keep recommendation requests on grok_direct instead of force-routing them to web on a freshness word --- .../internal/routedecide/routedecide.go | 23 ++++++++++++-- .../internal/routedecide/routedecide_test.go | 31 +++++++++++++++++++ apps/ai-bot/router.go | 6 ++-- 3 files changed, 55 insertions(+), 5 deletions(-) diff --git a/apps/ai-bot/internal/routedecide/routedecide.go b/apps/ai-bot/internal/routedecide/routedecide.go index 72960f22..98dbb9b5 100644 --- a/apps/ai-bot/internal/routedecide/routedecide.go +++ b/apps/ai-bot/internal/routedecide/routedecide.go @@ -118,6 +118,16 @@ var ( // The leading [\s«"„(] class is only an OPTIONAL left boundary, never a trigger. lookupIntentRe_RU = regexp.MustCompile(`(?i)(^|[\s«"„(])(кто\s+(так(ой|ая|ие)|снимал(ся|ась|ись)|играл|написал|основал|изобрёл|изобрел|режисс[её]р|автор)|в\s+как(ом|ой)\s+(год[уе]|фильм[еа]|сериал[еа]|книг[еи]|игр[еы])|когда\s+(вышел|вышла|вышло|выйдет|основан[аы]?|родил(ся|ась)|умер(ла)?|состоял(ся|ась)|был[аои]?\s+выпущен)|в\s+каком\s+году|сколько\s+(лет|стоит\s+бил|серий|сезонов|эпизодов)|чем\s+(закончил|известен|знаменит))`) lookupIntentRe_EN = regexp.MustCompile(`(?i)(^|[\s"'(])(who\s+(is|are|was|were|starred|played|directed|wrote|founded|invented|created)\s|in\s+(what|which)\s+(year|film|movie|show|series|book|game)\b|when\s+(did|was|were|does|is)\b.*\b(release|released|come\s+out|came\s+out|born|die|died|found|founded|launch|launched|air|aired)\b|what\s+year\b|how\s+many\s+(seasons|episodes|films|movies|books))`) + + // recommendationRe — a recommendation/advice request ("посоветуй фильм", "что посмотреть", + // "what to watch"). Used ONLY to suppress the freshness WebForce (see ClassifyLayer0): such + // requests are answered from the model's own taste/knowledge, and force-routing them to web + // is actively harmful — the web synth ("answer strictly from the digest") makes Grok parrot a + // generic SEO listicle and recommend nothing (observed live: "посоветуй фильм … в этот вечер" + // → a "домашний спа/почитать книгу" non-answer). Kept tight: only explicit recommend/advice + // verbs and "что/чем/во что/куда + activity", never bare interrogatives, so it can't swallow a + // genuine fresh lookup. Cyrillic stems unanchored (lowercased input), English \b-anchored. + recommendationRe = regexp.MustCompile(`(посовету|порекоменд|что\s+(посмотреть|глянуть|почитать|приготовить|послушать|подарить|поиграть)|чем\s+(себя\s+)?заня|во\s+что\s+(поиграть|сыграть)|куда\s+(сходить|пойти)|\brecommend|\bsuggest|what\s+(to|should\s+i)\s+(watch|read|cook|do|play|listen|make|see)|what\s+(movie|film|book|show|series|game)s?\s+(to|should|do\s+you))`) ) // NOTE: the project route used to require a Layer-0 lexical hint (literal "vojo" / an @@ -140,7 +150,14 @@ func ClassifyLayer0(body string) Layer0 { return Layer0{Route: RouteGrokDirect} } lookupHint := lookupIntentRe_RU.MatchString(s) || lookupIntentRe_EN.MatchString(s) - if freshnessRe.MatchString(s) { + // Freshness forces web — EXCEPT for a recommendation/advice request that merely happens to + // carry a freshness lexeme ("посоветуй фильм … сегодня вечером"). Those are answered from the + // model's own knowledge; force-routing them to web makes the synth parrot an SEO listicle and + // recommend nothing (see recommendationRe). They fall through to the classifier, which keeps + // them on grok_direct and still sends genuine "новинки"/"latest" recommendations to web via + // time_sensitive. A non-recommendation freshness rumination ("сегодня я думаю…") still + // force-routes — the accepted, designed cheap false-web. + if freshnessRe.MatchString(s) && !recommendationRe.MatchString(s) { return Layer0{Route: RouteWeb, WebForce: true, Freshness: "recent", LookupHint: lookupHint} } if IsTrivial(s) { @@ -182,7 +199,9 @@ type Combined struct { // gates EXECUTION on PROJECT_KB_ENABLED (mirroring how WebEnabled gates the web route), so // with the flag off a RouteProject decision cleanly falls through to grok_direct. // - freshnessRe (WebForce) is a HARD web signal, always honoured (it survives the -// classifier being down). +// classifier being down). The ONE carve-out is applied upstream in ClassifyLayer0: +// a recommendation/advice request ("посоветуй фильм … сегодня") does NOT set WebForce, +// because force-routing a recommendation to web makes the synth parrot an SEO listicle. // - Every OTHER web arm (the classifier's needs_web≥floor AND verifiable, // entity_obscure, time_sensitive, lookupHint && verifiable) is gated by `paranoid` // (WEB_PARANOID). The needs_web arm additionally requires `verifiable`: on a small diff --git a/apps/ai-bot/internal/routedecide/routedecide_test.go b/apps/ai-bot/internal/routedecide/routedecide_test.go index 6946880b..62212de8 100644 --- a/apps/ai-bot/internal/routedecide/routedecide_test.go +++ b/apps/ai-bot/internal/routedecide/routedecide_test.go @@ -101,6 +101,37 @@ func TestLookupHintFalsePositiveCorpus(t *testing.T) { } } +// TestRecommendationFreshnessCarveOut: a recommendation/advice request must NOT hard-route to +// web even with a freshness lexeme ("сегодня"/"today"/"right now") — the web synth parrots an +// SEO listicle and recommends nothing (observed live). It falls to grok_direct/classifier; +// genuine non-recommendation freshness queries still force web. +func TestRecommendationFreshnessCarveOut(t *testing.T) { + noForce := []string{ + "посоветуй фильм на сегодня вечер", + "что посмотреть сегодня вечером", + "чем заняться сегодня", + "что приготовить сегодня на ужин", + "recommend a movie today", + "what to watch right now", + } + for _, s := range noForce { + if ClassifyLayer0(s).WebForce { + t.Errorf("recommendation with a freshness lexeme must NOT force web: %q", s) + } + } + stillForce := []string{ + "какие новости сегодня", + "курс доллара сейчас", + "what's the weather today", + "сегодня я думаю о смысле жизни", // non-recommendation rumination — designed cheap false-web + } + for _, s := range stillForce { + if !ClassifyLayer0(s).WebForce { + t.Errorf("non-recommendation freshness must still force web: %q", s) + } + } +} + // TestCombineFreshnessAlwaysWeb: a freshnessRe hit (WebForce) routes to web regardless of // WEB_PARANOID and regardless of the classifier verdict — the deterministic signal that // survives the classifier being down (§4.4). diff --git a/apps/ai-bot/router.go b/apps/ai-bot/router.go index 25f556ed..59cc41c0 100644 --- a/apps/ai-bot/router.go +++ b/apps/ai-bot/router.go @@ -60,10 +60,10 @@ const classifierPrompt = `You are a routing classifier for a multilingual chat a Your main job is an EPISTEMIC judgement, not a topic label: if the assistant answered the LAST message purely from its own memory (no web), how likely is it to state a WRONG checkable fact — a name, a film/book cast, a date or release year, a number, a price, a score, a population, a who-did-what about a SPECIFIC named person/film/company/place/event? Such facts are exactly what a model misremembers and states confidently. Decide: -- "needs_web": true if a correct answer DEPENDS on such a checkable external fact, OR on anything time-sensitive (news, "сегодня"/today, "сейчас", latest, current price/rate/weather/score). Recency is sufficient but NOT necessary — a STATIC fact like a film's cast or a country's capital also counts. When in doubt, prefer TRUE: grounding is cheap, a confident wrong fact is not. FALSE for opinions, explanations, advice, casual chat, creative writing, code help, or transforming text the user already gave you. +- "needs_web": true if a correct answer DEPENDS on such a checkable external fact, OR on anything time-sensitive (news, "сегодня"/today, "сейчас", latest, current price/rate/weather/score). Recency is sufficient but NOT necessary — a STATIC fact like a film's cast or a country's capital also counts. When in doubt, prefer TRUE: grounding is cheap, a confident wrong fact is not. FALSE for opinions, explanations, advice, casual chat, creative writing, code help, or transforming text the user already gave you. Recommendations and suggestions — what to watch, read, cook, play, or do ("посоветуй фильм", "что посмотреть", "чем заняться вечером") — are ADVICE: answer from your own knowledge, so needs_web=FALSE even when the user says "сегодня"/"tonight"/"this evening" (that is WHEN they will act, not a need for fresh data). The ONLY exception is a request explicitly about NEW or CURRENT releases / what is on right now ("новинки", "что вышло", "what's new", "now playing", "latest") — that is needs_web=TRUE AND time_sensitive=TRUE (so a new-release recommendation actually routes to fresh web results). - "verifiable": true if the message is specifically a checkable fact about a NAMED entity (who acted in , who is CEO of , what year , population of ) — even if not about "today". A bare follow-up like "2024 года" inherits the entity from the previous turn. - "entity_obscure": true if the salient entity is plausibly long-tail / not a household name (a minor film, a non-famous person, a niche product) — these are where memory fails hardest. -- "time_sensitive": true if the answer can change over time (news, prices, weather, standings, "current"/"latest"/"now"). +- "time_sensitive": true if the answer can change over time (news, prices, weather, standings, "current"/"latest"/"now"). But a plan to DO or WATCH something "tonight"/"this evening"/"сегодня вечером" is NOT time-sensitive — the timeframe is when the user acts, not a fact that changes. - "trivial": true ONLY for a bare greeting, acknowledgement, or tiny arithmetic with no real question. - "about_project": true ONLY if the user is asking about THIS chat app itself, called Vojo — its concrete features, how to do something inside the app (calls, encryption, settings, rooms, channels), its limits, privacy, or pricing. Examples: "что ты умеешь", "what can this app do", "как включить шифрование здесь", "does Vojo support video calls". FALSE for any general-knowledge question that merely mentions a product or place name (including one coincidentally called Vojo that is not this app), and FALSE for a generic "what can an AI assistant do". When unsure, prefer FALSE. - "search_query": a SELF-CONTAINED web search query for this message, written in the LANGUAGE of the user's latest message (an English message → an English query; a Russian one → a Russian query) so the results match the user's language and region instead of defaulting to one country. Resolve follow-ups from context (a bare "2024 года" after discussing a film becomes " 2024 фильм актёрский состав"). For broad/region-neutral requests (e.g. "interesting news") keep it general and international, don't narrow it to a single country. Empty string ONLY if both needs_web and verifiable are false. @@ -139,7 +139,7 @@ func (b *Bot) routeLayer1(ctx context.Context, rcx string, l0 rd.Layer0, cost *C resp, err := b.gemini.Complete(ctx, LLMRequest{ Model: b.cfg.GeminiModel, Messages: []Message{{Role: "user", Content: classifierPrompt + rcx}}, - MaxTokens: 110, // was 80; the schema grew (about_project added) — must not truncate + MaxTokens: 160, // headroom for a long Cyrillic context-resolved search_query; a cut mid-query yields invalid JSON → safe degrade to the Layer-0 heuristic, but we'd lose the verdict, so leave slack Temperature: 0, }) if err != nil {