148 lines
5 KiB
Go
148 lines
5 KiB
Go
package main
|
|
|
|
import "strings"
|
|
|
|
// bufferedMsg is one prior room message the bot retained for context.
|
|
type bufferedMsg struct {
|
|
sender string
|
|
body string
|
|
isBot bool
|
|
}
|
|
|
|
// buildContext assembles the provider-neutral message list under the owner's
|
|
// minimisation rule ("trigger + bot replies only", §6/F8):
|
|
//
|
|
// - GROUP rooms: send ONLY the bot's own prior replies (assistant turns) plus
|
|
// the single triggering message (user turn). Other participants' messages and
|
|
// display names never reach the model — the third-party-consent mitigation.
|
|
// - 1:1 rooms: there are no third parties, so the peer's recent turns are
|
|
// included too for coherence. Still no display names (pseudo "user").
|
|
//
|
|
// `history` is the recent room window EXCLUDING the trigger; `triggerBody` is the
|
|
// message that addressed the bot. Bodies are stripped of reply-fallback quotes so
|
|
// quoted third-party text doesn't leak.
|
|
func buildContext(system string, history []bufferedMsg, isDM bool, triggerBody string, maxEvents, maxTokens int) []Message {
|
|
msgs := []Message{{Role: "system", Content: system}}
|
|
|
|
// Keep at most the last maxEvents history items.
|
|
if len(history) > maxEvents {
|
|
history = history[len(history)-maxEvents:]
|
|
}
|
|
for _, h := range history {
|
|
body := stripReplyFallback(h.body)
|
|
if body == "" {
|
|
continue
|
|
}
|
|
if h.isBot {
|
|
msgs = append(msgs, Message{Role: "assistant", Content: body})
|
|
continue
|
|
}
|
|
if isDM {
|
|
msgs = append(msgs, Message{Role: "user", Content: body})
|
|
}
|
|
// group + non-bot history → dropped (privacy minimisation)
|
|
}
|
|
|
|
msgs = append(msgs, Message{Role: "user", Content: stripReplyFallback(triggerBody)})
|
|
return truncateToTokens(msgs, maxTokens)
|
|
}
|
|
|
|
// routerContextMaxRunes caps each line fed to the classifier/rewrite so a long buffered
|
|
// turn can't blow the router's token budget; ~200 runes is plenty to resolve a follow-up.
|
|
const routerContextMaxRunes = 200
|
|
|
|
// routerContext returns the privacy-minimised conversation window the Layer-1 classifier
|
|
// and the follow-up rewrite read, drawn ONLY from the already-minimised `msgs` (a strict
|
|
// subset of what the final Grok call sees — no new privacy surface, §6):
|
|
//
|
|
// - DM: the last ≤2 bot (assistant) turns plus the interleaved/final user turns, so a
|
|
// bare follow-up like "2024 года" carries the prior film name into search_query.
|
|
// - GROUP: ONLY the final user line. The per-(room,thread) buffer interleaves different
|
|
// members' topics (it is keyed by room+thread, not sender), so resolving a follow-up
|
|
// against prior turns could ground a confidently-wrong answer about the WRONG subject.
|
|
//
|
|
// Formatted "BOT: …\nUSER: …", each line truncated to routerContextMaxRunes. Empty when
|
|
// there is nothing to send.
|
|
func routerContext(msgs []Message, isDM bool) string {
|
|
conv := msgs
|
|
if len(conv) > 0 && conv[0].Role == "system" {
|
|
conv = conv[1:]
|
|
}
|
|
if len(conv) == 0 {
|
|
return ""
|
|
}
|
|
start := len(conv) - 1 // group default: only the final user line
|
|
if isDM {
|
|
// Walk back to include up to the 2 most recent assistant turns before the trigger.
|
|
const maxAssistant = 2
|
|
seen := 0
|
|
for i := len(conv) - 1; i >= 0; i-- {
|
|
start = i
|
|
if conv[i].Role == "assistant" {
|
|
if seen++; seen >= maxAssistant {
|
|
break
|
|
}
|
|
}
|
|
}
|
|
}
|
|
var b strings.Builder
|
|
for _, m := range conv[start:] {
|
|
text := strings.TrimSpace(m.Content)
|
|
if text == "" {
|
|
continue
|
|
}
|
|
if r := []rune(text); len(r) > routerContextMaxRunes {
|
|
text = string(r[:routerContextMaxRunes])
|
|
}
|
|
label := "USER"
|
|
if m.Role == "assistant" {
|
|
label = "BOT"
|
|
}
|
|
b.WriteString(label)
|
|
b.WriteString(": ")
|
|
b.WriteString(text)
|
|
b.WriteByte('\n')
|
|
}
|
|
return strings.TrimRight(b.String(), "\n")
|
|
}
|
|
|
|
// estimateTokens is a cheap upper-ish heuristic (~4 chars/token + per-message
|
|
// overhead). Used only to bound request size, not for billing (billing reads the
|
|
// API's returned usage).
|
|
func estimateTokens(s string) int {
|
|
return len([]rune(s))/4 + 4
|
|
}
|
|
|
|
// truncateToTokens drops the oldest non-system, non-final messages until the
|
|
// estimate fits maxTokens. The system prompt (index 0) and the final user
|
|
// trigger are always preserved.
|
|
func truncateToTokens(msgs []Message, maxTokens int) []Message {
|
|
total := 0
|
|
for _, m := range msgs {
|
|
total += estimateTokens(m.Content)
|
|
}
|
|
// Drop from index 1 upward (after system), never the last (trigger).
|
|
for total > maxTokens && len(msgs) > 2 {
|
|
total -= estimateTokens(msgs[1].Content)
|
|
msgs = append(msgs[:1], msgs[2:]...)
|
|
}
|
|
return msgs
|
|
}
|
|
|
|
// stripReplyFallback removes the Matrix rich-reply fallback: leading lines that
|
|
// start with "> " (the quoted parent) followed by a blank separator line. This
|
|
// keeps quoted third-party text out of xAI and de-noises the prompt.
|
|
func stripReplyFallback(body string) string {
|
|
if !strings.HasPrefix(body, "> ") {
|
|
return strings.TrimSpace(body)
|
|
}
|
|
lines := strings.Split(body, "\n")
|
|
i := 0
|
|
for i < len(lines) && strings.HasPrefix(lines[i], ">") {
|
|
i++
|
|
}
|
|
for i < len(lines) && strings.TrimSpace(lines[i]) == "" {
|
|
i++
|
|
}
|
|
return strings.TrimSpace(strings.Join(lines[i:], "\n"))
|
|
}
|