fix(ai-bot): keep recommendation requests on grok_direct instead of force-routing them to web on a freshness word
This commit is contained in:
parent
ef5a9f5013
commit
9beb5a19bd
3 changed files with 55 additions and 5 deletions
|
|
@ -118,6 +118,16 @@ var (
|
||||||
// The leading [\s«"„(] class is only an OPTIONAL left boundary, never a trigger.
|
// The leading [\s«"„(] class is only an OPTIONAL left boundary, never a trigger.
|
||||||
lookupIntentRe_RU = regexp.MustCompile(`(?i)(^|[\s«"„(])(кто\s+(так(ой|ая|ие)|снимал(ся|ась|ись)|играл|написал|основал|изобрёл|изобрел|режисс[её]р|автор)|в\s+как(ом|ой)\s+(год[уе]|фильм[еа]|сериал[еа]|книг[еи]|игр[еы])|когда\s+(вышел|вышла|вышло|выйдет|основан[аы]?|родил(ся|ась)|умер(ла)?|состоял(ся|ась)|был[аои]?\s+выпущен)|в\s+каком\s+году|сколько\s+(лет|стоит\s+бил|серий|сезонов|эпизодов)|чем\s+(закончил|известен|знаменит))`)
|
lookupIntentRe_RU = regexp.MustCompile(`(?i)(^|[\s«"„(])(кто\s+(так(ой|ая|ие)|снимал(ся|ась|ись)|играл|написал|основал|изобрёл|изобрел|режисс[её]р|автор)|в\s+как(ом|ой)\s+(год[уе]|фильм[еа]|сериал[еа]|книг[еи]|игр[еы])|когда\s+(вышел|вышла|вышло|выйдет|основан[аы]?|родил(ся|ась)|умер(ла)?|состоял(ся|ась)|был[аои]?\s+выпущен)|в\s+каком\s+году|сколько\s+(лет|стоит\s+бил|серий|сезонов|эпизодов)|чем\s+(закончил|известен|знаменит))`)
|
||||||
lookupIntentRe_EN = regexp.MustCompile(`(?i)(^|[\s"'(])(who\s+(is|are|was|were|starred|played|directed|wrote|founded|invented|created)\s|in\s+(what|which)\s+(year|film|movie|show|series|book|game)\b|when\s+(did|was|were|does|is)\b.*\b(release|released|come\s+out|came\s+out|born|die|died|found|founded|launch|launched|air|aired)\b|what\s+year\b|how\s+many\s+(seasons|episodes|films|movies|books))`)
|
lookupIntentRe_EN = regexp.MustCompile(`(?i)(^|[\s"'(])(who\s+(is|are|was|were|starred|played|directed|wrote|founded|invented|created)\s|in\s+(what|which)\s+(year|film|movie|show|series|book|game)\b|when\s+(did|was|were|does|is)\b.*\b(release|released|come\s+out|came\s+out|born|die|died|found|founded|launch|launched|air|aired)\b|what\s+year\b|how\s+many\s+(seasons|episodes|films|movies|books))`)
|
||||||
|
|
||||||
|
// recommendationRe — a recommendation/advice request ("посоветуй фильм", "что посмотреть",
|
||||||
|
// "what to watch"). Used ONLY to suppress the freshness WebForce (see ClassifyLayer0): such
|
||||||
|
// requests are answered from the model's own taste/knowledge, and force-routing them to web
|
||||||
|
// is actively harmful — the web synth ("answer strictly from the digest") makes Grok parrot a
|
||||||
|
// generic SEO listicle and recommend nothing (observed live: "посоветуй фильм … в этот вечер"
|
||||||
|
// → a "домашний спа/почитать книгу" non-answer). Kept tight: only explicit recommend/advice
|
||||||
|
// verbs and "что/чем/во что/куда + activity", never bare interrogatives, so it can't swallow a
|
||||||
|
// genuine fresh lookup. Cyrillic stems unanchored (lowercased input), English \b-anchored.
|
||||||
|
recommendationRe = regexp.MustCompile(`(посовету|порекоменд|что\s+(посмотреть|глянуть|почитать|приготовить|послушать|подарить|поиграть)|чем\s+(себя\s+)?заня|во\s+что\s+(поиграть|сыграть)|куда\s+(сходить|пойти)|\brecommend|\bsuggest|what\s+(to|should\s+i)\s+(watch|read|cook|do|play|listen|make|see)|what\s+(movie|film|book|show|series|game)s?\s+(to|should|do\s+you))`)
|
||||||
)
|
)
|
||||||
|
|
||||||
// NOTE: the project route used to require a Layer-0 lexical hint (literal "vojo" / an
|
// NOTE: the project route used to require a Layer-0 lexical hint (literal "vojo" / an
|
||||||
|
|
@ -140,7 +150,14 @@ func ClassifyLayer0(body string) Layer0 {
|
||||||
return Layer0{Route: RouteGrokDirect}
|
return Layer0{Route: RouteGrokDirect}
|
||||||
}
|
}
|
||||||
lookupHint := lookupIntentRe_RU.MatchString(s) || lookupIntentRe_EN.MatchString(s)
|
lookupHint := lookupIntentRe_RU.MatchString(s) || lookupIntentRe_EN.MatchString(s)
|
||||||
if freshnessRe.MatchString(s) {
|
// Freshness forces web — EXCEPT for a recommendation/advice request that merely happens to
|
||||||
|
// carry a freshness lexeme ("посоветуй фильм … сегодня вечером"). Those are answered from the
|
||||||
|
// model's own knowledge; force-routing them to web makes the synth parrot an SEO listicle and
|
||||||
|
// recommend nothing (see recommendationRe). They fall through to the classifier, which keeps
|
||||||
|
// them on grok_direct and still sends genuine "новинки"/"latest" recommendations to web via
|
||||||
|
// time_sensitive. A non-recommendation freshness rumination ("сегодня я думаю…") still
|
||||||
|
// force-routes — the accepted, designed cheap false-web.
|
||||||
|
if freshnessRe.MatchString(s) && !recommendationRe.MatchString(s) {
|
||||||
return Layer0{Route: RouteWeb, WebForce: true, Freshness: "recent", LookupHint: lookupHint}
|
return Layer0{Route: RouteWeb, WebForce: true, Freshness: "recent", LookupHint: lookupHint}
|
||||||
}
|
}
|
||||||
if IsTrivial(s) {
|
if IsTrivial(s) {
|
||||||
|
|
@ -182,7 +199,9 @@ type Combined struct {
|
||||||
// gates EXECUTION on PROJECT_KB_ENABLED (mirroring how WebEnabled gates the web route), so
|
// gates EXECUTION on PROJECT_KB_ENABLED (mirroring how WebEnabled gates the web route), so
|
||||||
// with the flag off a RouteProject decision cleanly falls through to grok_direct.
|
// with the flag off a RouteProject decision cleanly falls through to grok_direct.
|
||||||
// - freshnessRe (WebForce) is a HARD web signal, always honoured (it survives the
|
// - freshnessRe (WebForce) is a HARD web signal, always honoured (it survives the
|
||||||
// classifier being down).
|
// classifier being down). The ONE carve-out is applied upstream in ClassifyLayer0:
|
||||||
|
// a recommendation/advice request ("посоветуй фильм … сегодня") does NOT set WebForce,
|
||||||
|
// because force-routing a recommendation to web makes the synth parrot an SEO listicle.
|
||||||
// - Every OTHER web arm (the classifier's needs_web≥floor AND verifiable,
|
// - Every OTHER web arm (the classifier's needs_web≥floor AND verifiable,
|
||||||
// entity_obscure, time_sensitive, lookupHint && verifiable) is gated by `paranoid`
|
// entity_obscure, time_sensitive, lookupHint && verifiable) is gated by `paranoid`
|
||||||
// (WEB_PARANOID). The needs_web arm additionally requires `verifiable`: on a small
|
// (WEB_PARANOID). The needs_web arm additionally requires `verifiable`: on a small
|
||||||
|
|
|
||||||
|
|
@ -101,6 +101,37 @@ func TestLookupHintFalsePositiveCorpus(t *testing.T) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// TestRecommendationFreshnessCarveOut: a recommendation/advice request must NOT hard-route to
|
||||||
|
// web even with a freshness lexeme ("сегодня"/"today"/"right now") — the web synth parrots an
|
||||||
|
// SEO listicle and recommends nothing (observed live). It falls to grok_direct/classifier;
|
||||||
|
// genuine non-recommendation freshness queries still force web.
|
||||||
|
func TestRecommendationFreshnessCarveOut(t *testing.T) {
|
||||||
|
noForce := []string{
|
||||||
|
"посоветуй фильм на сегодня вечер",
|
||||||
|
"что посмотреть сегодня вечером",
|
||||||
|
"чем заняться сегодня",
|
||||||
|
"что приготовить сегодня на ужин",
|
||||||
|
"recommend a movie today",
|
||||||
|
"what to watch right now",
|
||||||
|
}
|
||||||
|
for _, s := range noForce {
|
||||||
|
if ClassifyLayer0(s).WebForce {
|
||||||
|
t.Errorf("recommendation with a freshness lexeme must NOT force web: %q", s)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
stillForce := []string{
|
||||||
|
"какие новости сегодня",
|
||||||
|
"курс доллара сейчас",
|
||||||
|
"what's the weather today",
|
||||||
|
"сегодня я думаю о смысле жизни", // non-recommendation rumination — designed cheap false-web
|
||||||
|
}
|
||||||
|
for _, s := range stillForce {
|
||||||
|
if !ClassifyLayer0(s).WebForce {
|
||||||
|
t.Errorf("non-recommendation freshness must still force web: %q", s)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// TestCombineFreshnessAlwaysWeb: a freshnessRe hit (WebForce) routes to web regardless of
|
// TestCombineFreshnessAlwaysWeb: a freshnessRe hit (WebForce) routes to web regardless of
|
||||||
// WEB_PARANOID and regardless of the classifier verdict — the deterministic signal that
|
// WEB_PARANOID and regardless of the classifier verdict — the deterministic signal that
|
||||||
// survives the classifier being down (§4.4).
|
// survives the classifier being down (§4.4).
|
||||||
|
|
|
||||||
|
|
@ -60,10 +60,10 @@ const classifierPrompt = `You are a routing classifier for a multilingual chat a
|
||||||
Your main job is an EPISTEMIC judgement, not a topic label: if the assistant answered the LAST message purely from its own memory (no web), how likely is it to state a WRONG checkable fact — a name, a film/book cast, a date or release year, a number, a price, a score, a population, a who-did-what about a SPECIFIC named person/film/company/place/event? Such facts are exactly what a model misremembers and states confidently.
|
Your main job is an EPISTEMIC judgement, not a topic label: if the assistant answered the LAST message purely from its own memory (no web), how likely is it to state a WRONG checkable fact — a name, a film/book cast, a date or release year, a number, a price, a score, a population, a who-did-what about a SPECIFIC named person/film/company/place/event? Such facts are exactly what a model misremembers and states confidently.
|
||||||
|
|
||||||
Decide:
|
Decide:
|
||||||
- "needs_web": true if a correct answer DEPENDS on such a checkable external fact, OR on anything time-sensitive (news, "сегодня"/today, "сейчас", latest, current price/rate/weather/score). Recency is sufficient but NOT necessary — a STATIC fact like a film's cast or a country's capital also counts. When in doubt, prefer TRUE: grounding is cheap, a confident wrong fact is not. FALSE for opinions, explanations, advice, casual chat, creative writing, code help, or transforming text the user already gave you.
|
- "needs_web": true if a correct answer DEPENDS on such a checkable external fact, OR on anything time-sensitive (news, "сегодня"/today, "сейчас", latest, current price/rate/weather/score). Recency is sufficient but NOT necessary — a STATIC fact like a film's cast or a country's capital also counts. When in doubt, prefer TRUE: grounding is cheap, a confident wrong fact is not. FALSE for opinions, explanations, advice, casual chat, creative writing, code help, or transforming text the user already gave you. Recommendations and suggestions — what to watch, read, cook, play, or do ("посоветуй фильм", "что посмотреть", "чем заняться вечером") — are ADVICE: answer from your own knowledge, so needs_web=FALSE even when the user says "сегодня"/"tonight"/"this evening" (that is WHEN they will act, not a need for fresh data). The ONLY exception is a request explicitly about NEW or CURRENT releases / what is on right now ("новинки", "что вышло", "what's new", "now playing", "latest") — that is needs_web=TRUE AND time_sensitive=TRUE (so a new-release recommendation actually routes to fresh web results).
|
||||||
- "verifiable": true if the message is specifically a checkable fact about a NAMED entity (who acted in <film>, who is CEO of <company>, what year <event>, population of <place>) — even if not about "today". A bare follow-up like "2024 года" inherits the entity from the previous turn.
|
- "verifiable": true if the message is specifically a checkable fact about a NAMED entity (who acted in <film>, who is CEO of <company>, what year <event>, population of <place>) — even if not about "today". A bare follow-up like "2024 года" inherits the entity from the previous turn.
|
||||||
- "entity_obscure": true if the salient entity is plausibly long-tail / not a household name (a minor film, a non-famous person, a niche product) — these are where memory fails hardest.
|
- "entity_obscure": true if the salient entity is plausibly long-tail / not a household name (a minor film, a non-famous person, a niche product) — these are where memory fails hardest.
|
||||||
- "time_sensitive": true if the answer can change over time (news, prices, weather, standings, "current"/"latest"/"now").
|
- "time_sensitive": true if the answer can change over time (news, prices, weather, standings, "current"/"latest"/"now"). But a plan to DO or WATCH something "tonight"/"this evening"/"сегодня вечером" is NOT time-sensitive — the timeframe is when the user acts, not a fact that changes.
|
||||||
- "trivial": true ONLY for a bare greeting, acknowledgement, or tiny arithmetic with no real question.
|
- "trivial": true ONLY for a bare greeting, acknowledgement, or tiny arithmetic with no real question.
|
||||||
- "about_project": true ONLY if the user is asking about THIS chat app itself, called Vojo — its concrete features, how to do something inside the app (calls, encryption, settings, rooms, channels), its limits, privacy, or pricing. Examples: "что ты умеешь", "what can this app do", "как включить шифрование здесь", "does Vojo support video calls". FALSE for any general-knowledge question that merely mentions a product or place name (including one coincidentally called Vojo that is not this app), and FALSE for a generic "what can an AI assistant do". When unsure, prefer FALSE.
|
- "about_project": true ONLY if the user is asking about THIS chat app itself, called Vojo — its concrete features, how to do something inside the app (calls, encryption, settings, rooms, channels), its limits, privacy, or pricing. Examples: "что ты умеешь", "what can this app do", "как включить шифрование здесь", "does Vojo support video calls". FALSE for any general-knowledge question that merely mentions a product or place name (including one coincidentally called Vojo that is not this app), and FALSE for a generic "what can an AI assistant do". When unsure, prefer FALSE.
|
||||||
- "search_query": a SELF-CONTAINED web search query for this message, written in the LANGUAGE of the user's latest message (an English message → an English query; a Russian one → a Russian query) so the results match the user's language and region instead of defaulting to one country. Resolve follow-ups from context (a bare "2024 года" after discussing a film becomes "<film name> 2024 фильм актёрский состав"). For broad/region-neutral requests (e.g. "interesting news") keep it general and international, don't narrow it to a single country. Empty string ONLY if both needs_web and verifiable are false.
|
- "search_query": a SELF-CONTAINED web search query for this message, written in the LANGUAGE of the user's latest message (an English message → an English query; a Russian one → a Russian query) so the results match the user's language and region instead of defaulting to one country. Resolve follow-ups from context (a bare "2024 года" after discussing a film becomes "<film name> 2024 фильм актёрский состав"). For broad/region-neutral requests (e.g. "interesting news") keep it general and international, don't narrow it to a single country. Empty string ONLY if both needs_web and verifiable are false.
|
||||||
|
|
@ -139,7 +139,7 @@ func (b *Bot) routeLayer1(ctx context.Context, rcx string, l0 rd.Layer0, cost *C
|
||||||
resp, err := b.gemini.Complete(ctx, LLMRequest{
|
resp, err := b.gemini.Complete(ctx, LLMRequest{
|
||||||
Model: b.cfg.GeminiModel,
|
Model: b.cfg.GeminiModel,
|
||||||
Messages: []Message{{Role: "user", Content: classifierPrompt + rcx}},
|
Messages: []Message{{Role: "user", Content: classifierPrompt + rcx}},
|
||||||
MaxTokens: 110, // was 80; the schema grew (about_project added) — must not truncate
|
MaxTokens: 160, // headroom for a long Cyrillic context-resolved search_query; a cut mid-query yields invalid JSON → safe degrade to the Layer-0 heuristic, but we'd lose the verdict, so leave slack
|
||||||
Temperature: 0,
|
Temperature: 0,
|
||||||
})
|
})
|
||||||
if err != nil {
|
if err != nil {
|
||||||
|
|
|
||||||
Loading…
Add table
Reference in a new issue