From 9beb5a19bda162f88e3c0d4622f09017cf77a2ce Mon Sep 17 00:00:00 2001
From: heaven <vojochatdev@gmail.com>
Date: Sat, 6 Jun 2026 02:27:05 +0300
Subject: [PATCH] fix(ai-bot): keep recommendation requests on grok_direct
 instead of force-routing them to web on a freshness word

---
 .../internal/routedecide/routedecide.go       | 23 ++++++++++++--
 .../internal/routedecide/routedecide_test.go  | 31 +++++++++++++++++++
 apps/ai-bot/router.go                         |  6 ++--
 3 files changed, 55 insertions(+), 5 deletions(-)

diff --git a/apps/ai-bot/internal/routedecide/routedecide.go b/apps/ai-bot/internal/routedecide/routedecide.go
index 72960f22..98dbb9b5 100644
--- a/apps/ai-bot/internal/routedecide/routedecide.go
+++ b/apps/ai-bot/internal/routedecide/routedecide.go
@@ -118,6 +118,16 @@ var (
 	// The leading [\s«"„(] class is only an OPTIONAL left boundary, never a trigger.
 	lookupIntentRe_RU = regexp.MustCompile(`(?i)(^|[\s«"„(])(кто\s+(так(ой|ая|ие)|снимал(ся|ась|ись)|играл|написал|основал|изобрёл|изобрел|режисс[её]р|автор)|в\s+как(ом|ой)\s+(год[уе]|фильм[еа]|сериал[еа]|книг[еи]|игр[еы])|когда\s+(вышел|вышла|вышло|выйдет|основан[аы]?|родил(ся|ась)|умер(ла)?|состоял(ся|ась)|был[аои]?\s+выпущен)|в\s+каком\s+году|сколько\s+(лет|стоит\s+бил|серий|сезонов|эпизодов)|чем\s+(закончил|известен|знаменит))`)
 	lookupIntentRe_EN = regexp.MustCompile(`(?i)(^|[\s"'(])(who\s+(is|are|was|were|starred|played|directed|wrote|founded|invented|created)\s|in\s+(what|which)\s+(year|film|movie|show|series|book|game)\b|when\s+(did|was|were|does|is)\b.*\b(release|released|come\s+out|came\s+out|born|die|died|found|founded|launch|launched|air|aired)\b|what\s+year\b|how\s+many\s+(seasons|episodes|films|movies|books))`)
+
+	// recommendationRe — a recommendation/advice request ("посоветуй фильм", "что посмотреть",
+	// "what to watch"). Used ONLY to suppress the freshness WebForce (see ClassifyLayer0): such
+	// requests are answered from the model's own taste/knowledge, and force-routing them to web
+	// is actively harmful — the web synth ("answer strictly from the digest") makes Grok parrot a
+	// generic SEO listicle and recommend nothing (observed live: "посоветуй фильм … в этот вечер"
+	// → a "домашний спа/почитать книгу" non-answer). Kept tight: only explicit recommend/advice
+	// verbs and "что/чем/во что/куда + activity", never bare interrogatives, so it can't swallow a
+	// genuine fresh lookup. Cyrillic stems unanchored (lowercased input), English \b-anchored.
+	recommendationRe = regexp.MustCompile(`(посовету|порекоменд|что\s+(посмотреть|глянуть|почитать|приготовить|послушать|подарить|поиграть)|чем\s+(себя\s+)?заня|во\s+что\s+(поиграть|сыграть)|куда\s+(сходить|пойти)|\brecommend|\bsuggest|what\s+(to|should\s+i)\s+(watch|read|cook|do|play|listen|make|see)|what\s+(movie|film|book|show|series|game)s?\s+(to|should|do\s+you))`)
 )
 
 // NOTE: the project route used to require a Layer-0 lexical hint (literal "vojo" / an
@@ -140,7 +150,14 @@ func ClassifyLayer0(body string) Layer0 {
 		return Layer0{Route: RouteGrokDirect}
 	}
 	lookupHint := lookupIntentRe_RU.MatchString(s) || lookupIntentRe_EN.MatchString(s)
-	if freshnessRe.MatchString(s) {
+	// Freshness forces web — EXCEPT for a recommendation/advice request that merely happens to
+	// carry a freshness lexeme ("посоветуй фильм … сегодня вечером"). Those are answered from the
+	// model's own knowledge; force-routing them to web makes the synth parrot an SEO listicle and
+	// recommend nothing (see recommendationRe). They fall through to the classifier, which keeps
+	// them on grok_direct and still sends genuine "новинки"/"latest" recommendations to web via
+	// time_sensitive. A non-recommendation freshness rumination ("сегодня я думаю…") still
+	// force-routes — the accepted, designed cheap false-web.
+	if freshnessRe.MatchString(s) && !recommendationRe.MatchString(s) {
 		return Layer0{Route: RouteWeb, WebForce: true, Freshness: "recent", LookupHint: lookupHint}
 	}
 	if IsTrivial(s) {
@@ -182,7 +199,9 @@ type Combined struct {
 //     gates EXECUTION on PROJECT_KB_ENABLED (mirroring how WebEnabled gates the web route), so
 //     with the flag off a RouteProject decision cleanly falls through to grok_direct.
 //   - freshnessRe (WebForce) is a HARD web signal, always honoured (it survives the
-//     classifier being down).
+//     classifier being down). The ONE carve-out is applied upstream in ClassifyLayer0:
+//     a recommendation/advice request ("посоветуй фильм … сегодня") does NOT set WebForce,
+//     because force-routing a recommendation to web makes the synth parrot an SEO listicle.
 //   - Every OTHER web arm (the classifier's needs_web≥floor AND verifiable,
 //     entity_obscure, time_sensitive, lookupHint && verifiable) is gated by `paranoid`
 //     (WEB_PARANOID). The needs_web arm additionally requires `verifiable`: on a small
diff --git a/apps/ai-bot/internal/routedecide/routedecide_test.go b/apps/ai-bot/internal/routedecide/routedecide_test.go
index 6946880b..62212de8 100644
--- a/apps/ai-bot/internal/routedecide/routedecide_test.go
+++ b/apps/ai-bot/internal/routedecide/routedecide_test.go
@@ -101,6 +101,37 @@ func TestLookupHintFalsePositiveCorpus(t *testing.T) {
 	}
 }
 
+// TestRecommendationFreshnessCarveOut: a recommendation/advice request must NOT hard-route to
+// web even with a freshness lexeme ("сегодня"/"today"/"right now") — the web synth parrots an
+// SEO listicle and recommends nothing (observed live). It falls to grok_direct/classifier;
+// genuine non-recommendation freshness queries still force web.
+func TestRecommendationFreshnessCarveOut(t *testing.T) {
+	noForce := []string{
+		"посоветуй фильм на сегодня вечер",
+		"что посмотреть сегодня вечером",
+		"чем заняться сегодня",
+		"что приготовить сегодня на ужин",
+		"recommend a movie today",
+		"what to watch right now",
+	}
+	for _, s := range noForce {
+		if ClassifyLayer0(s).WebForce {
+			t.Errorf("recommendation with a freshness lexeme must NOT force web: %q", s)
+		}
+	}
+	stillForce := []string{
+		"какие новости сегодня",
+		"курс доллара сейчас",
+		"what's the weather today",
+		"сегодня я думаю о смысле жизни", // non-recommendation rumination — designed cheap false-web
+	}
+	for _, s := range stillForce {
+		if !ClassifyLayer0(s).WebForce {
+			t.Errorf("non-recommendation freshness must still force web: %q", s)
+		}
+	}
+}
+
 // TestCombineFreshnessAlwaysWeb: a freshnessRe hit (WebForce) routes to web regardless of
 // WEB_PARANOID and regardless of the classifier verdict — the deterministic signal that
 // survives the classifier being down (§4.4).
diff --git a/apps/ai-bot/router.go b/apps/ai-bot/router.go
index 25f556ed..59cc41c0 100644
--- a/apps/ai-bot/router.go
+++ b/apps/ai-bot/router.go
@@ -60,10 +60,10 @@ const classifierPrompt = `You are a routing classifier for a multilingual chat a
 Your main job is an EPISTEMIC judgement, not a topic label: if the assistant answered the LAST message purely from its own memory (no web), how likely is it to state a WRONG checkable fact — a name, a film/book cast, a date or release year, a number, a price, a score, a population, a who-did-what about a SPECIFIC named person/film/company/place/event? Such facts are exactly what a model misremembers and states confidently.
 
 Decide:
-- "needs_web": true if a correct answer DEPENDS on such a checkable external fact, OR on anything time-sensitive (news, "сегодня"/today, "сейчас", latest, current price/rate/weather/score). Recency is sufficient but NOT necessary — a STATIC fact like a film's cast or a country's capital also counts. When in doubt, prefer TRUE: grounding is cheap, a confident wrong fact is not. FALSE for opinions, explanations, advice, casual chat, creative writing, code help, or transforming text the user already gave you.
+- "needs_web": true if a correct answer DEPENDS on such a checkable external fact, OR on anything time-sensitive (news, "сегодня"/today, "сейчас", latest, current price/rate/weather/score). Recency is sufficient but NOT necessary — a STATIC fact like a film's cast or a country's capital also counts. When in doubt, prefer TRUE: grounding is cheap, a confident wrong fact is not. FALSE for opinions, explanations, advice, casual chat, creative writing, code help, or transforming text the user already gave you. Recommendations and suggestions — what to watch, read, cook, play, or do ("посоветуй фильм", "что посмотреть", "чем заняться вечером") — are ADVICE: answer from your own knowledge, so needs_web=FALSE even when the user says "сегодня"/"tonight"/"this evening" (that is WHEN they will act, not a need for fresh data). The ONLY exception is a request explicitly about NEW or CURRENT releases / what is on right now ("новинки", "что вышло", "what's new", "now playing", "latest") — that is needs_web=TRUE AND time_sensitive=TRUE (so a new-release recommendation actually routes to fresh web results).
 - "verifiable": true if the message is specifically a checkable fact about a NAMED entity (who acted in <film>, who is CEO of <company>, what year <event>, population of <place>) — even if not about "today". A bare follow-up like "2024 года" inherits the entity from the previous turn.
 - "entity_obscure": true if the salient entity is plausibly long-tail / not a household name (a minor film, a non-famous person, a niche product) — these are where memory fails hardest.
-- "time_sensitive": true if the answer can change over time (news, prices, weather, standings, "current"/"latest"/"now").
+- "time_sensitive": true if the answer can change over time (news, prices, weather, standings, "current"/"latest"/"now"). But a plan to DO or WATCH something "tonight"/"this evening"/"сегодня вечером" is NOT time-sensitive — the timeframe is when the user acts, not a fact that changes.
 - "trivial": true ONLY for a bare greeting, acknowledgement, or tiny arithmetic with no real question.
 - "about_project": true ONLY if the user is asking about THIS chat app itself, called Vojo — its concrete features, how to do something inside the app (calls, encryption, settings, rooms, channels), its limits, privacy, or pricing. Examples: "что ты умеешь", "what can this app do", "как включить шифрование здесь", "does Vojo support video calls". FALSE for any general-knowledge question that merely mentions a product or place name (including one coincidentally called Vojo that is not this app), and FALSE for a generic "what can an AI assistant do". When unsure, prefer FALSE.
 - "search_query": a SELF-CONTAINED web search query for this message, written in the LANGUAGE of the user's latest message (an English message → an English query; a Russian one → a Russian query) so the results match the user's language and region instead of defaulting to one country. Resolve follow-ups from context (a bare "2024 года" after discussing a film becomes "<film name> 2024 фильм актёрский состав"). For broad/region-neutral requests (e.g. "interesting news") keep it general and international, don't narrow it to a single country. Empty string ONLY if both needs_web and verifiable are false.
@@ -139,7 +139,7 @@ func (b *Bot) routeLayer1(ctx context.Context, rcx string, l0 rd.Layer0, cost *C
 	resp, err := b.gemini.Complete(ctx, LLMRequest{
 		Model:       b.cfg.GeminiModel,
 		Messages:    []Message{{Role: "user", Content: classifierPrompt + rcx}},
-		MaxTokens:   110, // was 80; the schema grew (about_project added) — must not truncate
+		MaxTokens:   160, // headroom for a long Cyrillic context-resolved search_query; a cut mid-query yields invalid JSON → safe degrade to the Layer-0 heuristic, but we'd lose the verdict, so leave slack
 		Temperature: 0,
 	})
 	if err != nil {