vojo/apps/ai-bot/bot.go

851 lines
33 KiB
Go
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

package main
import (
"context"
"fmt"
"log/slog"
"sync"
"sync/atomic"
"time"
)
// roomMeta caches per-room classification we need to handle a message: member
// counts (for the 1:1 test, F3), whether any member is outside ALLOWED_SERVERS,
// and encryption state (F15). Lazily fetched from the CS-API on first need
// (appservice transactions carry no room summary) and INVALIDATED whenever a
// third party's membership changes, so a 1:1 that gains a member is reclassified
// out of DM mode (no DM-mode third-party leak) and a newly added foreign member
// is caught. All fields are guarded by Bot.mu — never read or written without it,
// because the slow generation and the lazy CS-API probes run in concurrent per-room
// goroutines.
type roomMeta struct {
joined, invited int
countsKnown bool
foreign bool // a joined/invited member is outside ALLOWED_SERVERS
encrypted, encKnown bool
}
func (m *roomMeta) isDM() bool { return m.countsKnown && m.joined+m.invited == 2 }
type Bot struct {
cfg *Config
log *slog.Logger
mx *MatrixClient
llm LLMClient
st *Store
// gemini is the cheap chat backend for the trivial route and the Layer-1 classifier
// (an LLMClient so tests can fake it); nil unless a layer that uses it is enabled.
// web is the web-freshness provider, built only when WEB_ENABLED. Both nil → the
// cascade can only ever produce grok_direct.
gemini LLMClient
web WebProvider
// promptVersion is a short stable hash of the system prompt, logged with each
// request so prompt changes are visible in the analytics (A/B + regressions).
promptVersion string
// telemetryWrites paces the retention trim (every telemetryTrimEvery writes).
telemetryWrites atomic.Uint64
// mu guards the in-memory maps/sets below. Each transaction is acked to Synapse
// immediately (appservice.go) and its events are processed in transaction order,
// but the slow xAI generation runs in a per-room goroutine and the lazy probes run
// off-lock, so several goroutines touch this shared state at once. mu is held only
// for short map operations and is NEVER held across a network or xAI call — that
// head-of-line hold was the root cause of the multi-minute silence.
mu sync.Mutex
seen *lruSet // event ids already handled (dedup within a session; self-locking)
botSent *lruSet // event ids the bot itself sent (reply-parent detection; self-locking)
meta map[string]*roomMeta
buf map[string][]bufferedMsg
inflight map[string]bool // roomID currently generating a reply (per-room single-flight)
}
func NewBot(ctx context.Context, cfg *Config, logger *slog.Logger) (*Bot, error) {
mx := NewMatrixClient(cfg.HomeserverURL, cfg.ASToken, cfg.BotMXID)
llm := NewXAIClient(cfg.XAIBaseURL, cfg.XAIAPIKey, logger)
st, err := OpenStore(cfg.DatabaseURL)
if err != nil {
return nil, err
}
b := &Bot{
cfg: cfg,
log: logger,
mx: mx,
llm: llm,
st: st,
promptVersion: fmt.Sprintf("%08x", hashString(cfg.SystemPrompt)),
seen: newLRUSet(5000),
botSent: newLRUSet(5000),
meta: make(map[string]*roomMeta),
buf: make(map[string][]bufferedMsg),
inflight: make(map[string]bool),
}
// Build the cascade backends only for enabled layers (config already fail-fast
// validated that the keys exist). With every cascade flag off these stay nil and
// generate() can only produce grok_direct — today's bot. The grounding web provider
// needs the concrete client (for the native generateContent call), so keep a typed
// handle alongside the LLMClient face.
var gc *geminiClient
if cfg.needsGemini() {
gc = NewGeminiClient(cfg.GeminiBaseURL, cfg.GeminiAPIKey, cfg.GeminiModel, logger)
b.gemini = gc
}
if cfg.WebEnabled {
if cfg.WebProvider == webProviderGeminiGrounding {
b.web = &geminiGrounding{gem: gc, st: st, cfg: cfg}
} else {
b.web = newGrokWebSearch(cfg, logger)
}
}
// Confirm the as_token + user_id resolves to BOT_MXID before serving.
if err := b.verifyIdentity(ctx); err != nil {
st.Close()
return nil, err
}
// F23: ensure the profile has a display name (best-effort, idempotent).
if err := mx.SetDisplayName(ctx, cfg.BotDisplayName); err != nil {
logger.Warn("set display name failed (non-fatal)", "err", err)
}
return b, nil
}
func (b *Bot) Close() {
if b.st != nil {
_ = b.st.Close()
}
}
func (b *Bot) verifyIdentity(ctx context.Context) error {
who, err := b.mx.Whoami(ctx)
if err != nil {
return err
}
if who != b.cfg.BotMXID {
return fmt.Errorf("as_token resolves to %q but BOT_MXID is %q", who, b.cfg.BotMXID)
}
b.log.Info("authenticated", "mxid", who)
return nil
}
// Run starts the appservice transaction server and blocks until ctx is cancelled.
func (b *Bot) Run(ctx context.Context) error {
as := NewAppService(b.cfg, b.log, b.st, b.handleTransaction)
return as.Serve(ctx)
}
// handleTransaction processes one already-acked transaction's events. It runs in a
// background goroutine (the 200 has already been returned to Synapse). Events are
// processed IN ORDER (dedup + classification are synchronous) so the per-room
// single-flight claim is taken in arrival order; only the slow xAI generation is
// spawned per room, so different rooms answer concurrently and a slow call never
// blocks the next event or another room. Ordering within a room is therefore kept,
// while the head-of-line freeze is gone.
func (b *Bot) handleTransaction(ctx context.Context, events []Event) {
for i := range events {
b.handleEvent(ctx, &events[i])
}
}
// safego runs fn in a goroutine with panic recovery. The slow per-room work is
// detached from the HTTP handler, so an unrecovered panic there would crash the
// whole process and silence the bot for EVERY room — recover + log instead, so one
// malformed event can never take the bot down.
func (b *Bot) safego(what string, fn func()) {
go func() {
defer func() {
if r := recover(); r != nil {
b.log.Error("recovered panic in handler goroutine", "what", what, "panic", r)
}
}()
fn()
}()
}
func (b *Bot) handleEvent(ctx context.Context, ev *Event) {
if ev.EventID == "" || ev.RoomID == "" {
return
}
if !b.markSeen(ev.EventID) {
return // already handled (in-memory or durable dedup)
}
b.log.Debug("event", "type", ev.Type, "room", ev.RoomID, "sender", ev.Sender, "id", ev.EventID)
switch ev.Type {
case "m.room.member":
if ev.StateKey != nil && *ev.StateKey == b.cfg.BotMXID {
b.safego("self-membership", func() { b.handleSelfMembership(ctx, ev) })
} else {
// A third party's membership changed: counts + foreign flag are now stale.
// Re-probe on the next message so a 1:1 that gains a member drops out of DM
// mode (no third-party leak) and a new foreign member is caught.
b.invalidateCounts(ev.RoomID)
}
case "m.room.encryption":
b.setEncrypted(ev.RoomID)
case "m.room.message":
// Synchronous (in transaction order) up to the single-flight claim; only the
// slow generation inside handleMessage is spawned as a goroutine. This keeps
// per-room event order — the earlier message wins the claim — while different
// rooms still run concurrently.
b.handleMessage(ctx, ev)
}
}
// markSeen records an event id in both the in-memory set and the durable store and
// reports whether it is NEW (first time). The in-memory Add is atomic, and SeenEvent
// is an atomic INSERT … ON CONFLICT DO NOTHING, so two racing goroutines for the same
// event can never both proceed. On a durable-store error we fall through (the in-memory set still
// guards this session).
func (b *Bot) markSeen(eventID string) bool {
if !b.seen.Add(eventID) {
return false
}
isNew, err := b.st.SeenEvent(eventID)
if err != nil {
b.log.Error("durable dedup check failed", "id", eventID, "err", err)
return true
}
return isNew
}
// handleSelfMembership reacts to membership changes for the bot user: auto-join
// invites from allowed servers (F11), reject others, forget rooms we leave. Runs in
// its own goroutine because JoinRoom/LeaveRoom are network calls.
func (b *Bot) handleSelfMembership(ctx context.Context, ev *Event) {
switch ev.membershipOf() {
case "invite":
if !b.cfg.AllowedServers[serverOf(ev.Sender)] {
b.log.Warn("rejecting invite (server not allowed)", "room", ev.RoomID, "sender", ev.Sender)
if err := b.mx.LeaveRoom(ctx, ev.RoomID); err != nil {
b.log.Error("leave (reject) failed", "room", ev.RoomID, "err", err)
}
return
}
b.log.Info("accepting invite", "room", ev.RoomID, "sender", ev.Sender)
if err := b.mx.JoinRoom(ctx, ev.RoomID); err != nil {
b.log.Error("join failed", "room", ev.RoomID, "err", err)
return
}
// Fully-on-allowed-servers gate: a vojo.chat inviter can still pull the bot
// into a room that already holds federated third parties — leave at once.
if _, _, foreign := b.ensureCounts(ctx, ev.RoomID); foreign {
b.leaveForeign(ctx, ev.RoomID)
}
case "leave", "ban":
b.forgetRoom(ev.RoomID)
}
}
// leaveForeign leaves a room that contains a member outside ALLOWED_SERVERS, so
// the bot only ever operates in rooms hosted entirely on allowed homeservers.
func (b *Bot) leaveForeign(ctx context.Context, roomID string) {
b.log.Warn("leaving room — a member is outside ALLOWED_SERVERS", "room", roomID)
if err := b.mx.LeaveRoom(ctx, roomID); err != nil {
b.log.Error("leave (foreign) failed", "room", roomID, "err", err)
}
}
func (b *Bot) handleMessage(ctx context.Context, ev *Event) {
roomID := ev.RoomID
// A9/F15: re-check encryption; if (or once) encrypted, react once and skip — the
// bot can't read it. The probe runs without the lock.
if b.ensureEncryption(ctx, roomID) {
b.log.Debug("skip: encrypted room", "room", roomID)
// Log the skip only when we actually react (once per room), so an encrypted room
// the bot can't read doesn't flood request_log with one row per message.
if b.reactEncryptedOnce(ctx, roomID, ev.EventID) {
b.recordSkip(ev, degradeEncrypted)
}
return
}
mc, ok := ev.DecodeMessage()
if !ok {
// A message addressed to the bot that we can't decode shouldn't vanish without
// a trace (no silent drops): log at WARN so it's visible at the default level.
b.log.Warn("skip: undecodable message content", "room", roomID, "sender", ev.Sender, "id", ev.EventID)
return
}
// Edits re-carry m.mentions; never re-trigger or replay them (F16).
if mc.IsReplace() {
return
}
if ev.Sender == b.cfg.BotMXID {
return // our own message (the reply is buffered when we send it, not on echo-back)
}
if mc.MsgType == "m.notice" {
return // anti-loop: ignore notices (ours and other bots')
}
// Media / non-text is handled only once we know the message is addressed (below),
// so a stray image in a group the bot isn't mentioned in stays silent (correct),
// while media in a 1:1 or an @-mention gets a clear "text only" reaction.
isMedia := mc.MsgType != "m.text" && mc.MsgType != "m.emote"
countsKnown, isDM, foreign := b.ensureCounts(ctx, roomID)
// Stay only in rooms hosted entirely on allowed servers — never operate in (or
// "leak" the bot into) a federated room with non-consenting third parties.
if foreign {
b.leaveForeign(ctx, roomID)
b.recordSkip(ev, degradeForeign)
return
}
replyParentIsBot := mc.RelatesTo != nil && mc.RelatesTo.InReplyTo != nil &&
b.botSent.Has(mc.RelatesTo.InReplyTo.EventID)
mentioned := mentionsBot(mc, b.cfg.BotMXID, replyParentIsBot)
if !(isDM || mentioned) {
if !countsKnown {
// We couldn't classify the room (member probe failed) and the message isn't
// an explicit mention, so we can't tell a 1:1 (answer everything) from a
// group (answer only on mention). Log at WARN — not a silent Debug drop —
// so it's visible; we don't react because reacting in a group the bot isn't
// addressed in would be wrong. Re-probed on the next message.
b.log.Warn("skip: room unclassified (member probe failed), message not an explicit mention",
"room", roomID, "sender", ev.Sender)
} else {
b.log.Debug("skip: not addressed (group without mention)", "room", roomID, "sender", ev.Sender,
"dm", isDM, "mentioned", mentioned)
}
return
}
// Addressed but not text: react "text only" (no silent drop).
if isMedia {
b.log.Debug("skip: non-text msgtype (reacted)", "room", roomID, "sender", ev.Sender, "msgtype", mc.MsgType)
b.react(ctx, roomID, ev.EventID, reactMedia)
b.recordSkip(ev, degradeMedia)
return
}
// Per-room single-flight: while a generation is in flight for this room, drop
// further messages (like a chat — no queue, no new session, no token burn). No
// reaction here: the typing indicator is already showing for this room, which is
// the language-free "I'm busy" signal. The claim is taken here, synchronously and
// in transaction order, so the FIRST message for a room wins and later ones are
// dropped until release — never the reverse.
if !b.tryClaim(roomID) {
b.log.Debug("drop: room busy generating", "room", roomID, "sender", ev.Sender)
return
}
// Snapshot the room history (excludes this trigger) under the claim, then run the
// slow generation in its own goroutine so this transaction's remaining events and
// other rooms are not blocked by the xAI call. respond appends the trigger+answer
// to the buffer itself, only on success (see sendReply), and releases the claim.
history := b.snapshotBuf(roomID)
b.safego("respond", func() {
defer b.release(roomID)
b.respond(ctx, roomID, isDM, ev, mc, history)
})
}
// unlimitedCap is the effective per-user cap for UNLIMITED_USERS — high enough to
// never trip the per-user gate, while the global DAILY_USD_CEILING still applies.
const unlimitedCap = 1 << 30
func (b *Bot) respond(ctx context.Context, roomID string, isDM bool, ev *Event, mc *MessageContent, history []bufferedMsg) {
started := time.Now()
// One telemetry row per request, populated as the flow decides its outcome and
// emitted once via defer — so every exit (deny, error, empty, paid silence, success)
// is recorded without scattering writes (F-FUNC-5). It starts as route=none/ok=false;
// proceeding to the model sets the route, success sets ok=true.
rl := RequestLog{
ID: ev.EventID, RoomID: roomID, Sender: ev.Sender,
Route: routeNone, RouterSource: "default",
PromptVersion: b.promptVersion,
QueryText: mc.Body,
Models: map[string]string{"final": b.cfg.XAIModel},
}
defer func() {
rl.LatencyMS = int(time.Since(started).Milliseconds())
b.recordTelemetry(rl)
}()
perUserCap := b.cfg.PerUserDailyCap
perUserUSD := b.cfg.PerUserDailyUSD
if b.cfg.UnlimitedUsers[ev.Sender] {
perUserCap = unlimitedCap
perUserUSD = 0 // exempt from both per-user gates; the global ceiling still applies
}
// Reserve the route's estimated max-cost (not $0) so the global ceiling counts
// this in-flight call BEFORE it returns — the TOCTOU fix (§8.1). The envelope covers
// the most expensive ENABLED route, so whichever the router picks is admitted within
// the reservation; with the cascade off it is exactly grok_direct's estimate.
estimate := b.reserveEstimate()
switch res, err := b.st.Reserve(ev.Sender, perUserCap, perUserUSD, b.cfg.DailyUSDCeiling, estimate); {
case err != nil:
// A limiter failure is on our side — don't leave the user wondering.
b.log.Error("limiter reserve failed", "sender", ev.Sender, "err", err)
rl.Degraded, rl.Err = degradeReserveErr, err.Error()
b.react(ctx, roomID, ev.EventID, reactError)
return
case res == reserveDeniedUser:
// Per-user cap (anti-abuse, F24): stop answering, but always signal the limit —
// no message addressed to the bot is left without feedback.
b.log.Info("per-user daily cap reached; reacting", "sender", ev.Sender)
rl.PerUserCapHit = true
b.react(ctx, roomID, ev.EventID, reactRateLimit)
return
case res == reserveDeniedGlobal:
// Global USD ceiling. A reaction is cheap and non-intrusive (unlike the old
// once-per-day text notice), so signal every affected message rather than
// going silent after the first.
b.log.Warn("global daily USD ceiling reached", "room", roomID, "sender", ev.Sender)
rl.CeilingHit = true
b.react(ctx, roomID, ev.EventID, reactRateLimit)
return
}
// Past admission, a reservation + request slot are held. Guarantee they're freed on
// ANY exit that didn't settle — including a panic in generate() (recovered by safego)
// — so a leaked reservation can never permanently drift the global ceiling down
// (§8.1c). The normal paths set settled=true at Settle, so this defer then no-ops; it
// fires only on the panic/unexpected-return path, where it also reacts so the failure
// isn't silent.
settled := false
defer func() {
if settled {
return
}
rl.Degraded, rl.Err = "panic", "generation panicked or returned without settling"
if rerr := b.st.ReleaseReservation(ev.Sender, estimate); rerr != nil {
b.log.Error("release reservation (unsettled) failed", "sender", ev.Sender, "err", rerr)
}
if rerr := b.st.RefundRequest(ev.Sender); rerr != nil {
b.log.Error("refund (unsettled) failed", "sender", ev.Sender, "err", rerr)
}
b.react(ctx, roomID, ev.EventID, reactError)
}()
// Show "Vojo AI печатает…" for the whole generation. The keepalive refreshes the
// typing notification every 20s (the server expires it after 30s) so the indicator
// never lapses on a slow/retried answer, and the deferred stop clears it on exit.
stopTyping := b.startTypingKeepalive(ctx, roomID)
defer stopTyping()
// Overall per-request deadline (§8.2.2): every model call in the cascade shares this
// single budget (genCtx), so a multi-stage route can't accrete minutes the way
// per-stage 3×60s retries would. react/send/store ops use the live room ctx, NOT
// genCtx, so a budget timeout still surfaces as a ⚠️ react, never silence.
genCtx, cancel := context.WithTimeout(ctx, b.cfg.RequestBudget)
defer cancel()
msgs := buildContext(b.cfg.SystemPrompt, history, isDM, mc.Body, b.cfg.MaxCtxEvent, maxPromptTokens)
res, err := b.generate(genCtx, mc.Body, msgs, b.convID(roomID))
// Record what the routing + generation actually did, whatever the outcome.
rl.Route = res.route
rl.RouterSource = res.decision.Source
rl.RouterConfidence = res.decision.Confidence
rl.FallbackFired = res.fallback
rl.Escalated = res.route == routeReason
rl.Cost = res.cost
if res.stageMS != nil {
rl.StageMS = res.stageMS
}
if res.finalModel != "" {
rl.Models["final"] = res.finalModel
}
if res.decision.Source == "classifier" {
rl.Models["router"] = b.cfg.GeminiModel
}
if res.degraded != "" {
rl.Degraded = res.degraded
}
if err != nil {
// Terminal: even grok_direct failed. Settle whatever the cascade ACTUALLY spent
// (e.g. a paid web fetch before the failure) and release the rest of the
// reservation in one step, then refund the request slot so an outage doesn't burn
// the cap, and react (never silent). Settle with an all-zero cost is just a
// release, so a pure grok_direct failure books nothing — exactly as before.
b.log.Error("generation failed", "sender", ev.Sender, "route", res.route, "err", err)
rl.Err = err.Error()
if serr := b.st.Settle(ev.Sender, estimate, res.cost); serr != nil {
b.log.Error("settle (failed request) failed", "sender", ev.Sender, "err", serr)
}
settled = true
if rerr := b.st.RefundRequest(ev.Sender); rerr != nil {
b.log.Error("refund failed", "sender", ev.Sender, "err", rerr)
}
b.react(ctx, roomID, ev.EventID, reactError)
return
}
// Success from some route. Settle: release the reservation and book the real
// per-component cost, so both caps see grounding/tool fees too — not just tokens.
if err := b.st.Settle(ev.Sender, estimate, res.cost); err != nil {
b.log.Error("settle spend failed", "sender", ev.Sender, "err", err)
}
settled = true
rl.PromptTokens, rl.CachedTokens, rl.CompletionTokens =
res.usage.PromptTokens, res.usage.CachedTokens, res.usage.CompletionTokens
rl.CacheHit = res.usage.CachedTokens > 0
rl.ProviderRequestID = res.providerID
text := res.text
if text == "" {
// Billed but no usable text (content filter / length cap / empty choices). Never
// leave a billed request without feedback — react "couldn't answer". The slot
// stays consumed (the 2xx was real); no refund, or an empty reply could be forced
// to dodge the cap.
b.log.Warn("empty completion (billed, reacting)", "sender", ev.Sender, "usd", res.cost.Total())
rl.Degraded = degradeEmpty
b.react(ctx, roomID, ev.EventID, reactError)
return
}
b.log.Info("answered", "room", roomID, "sender", ev.Sender, "dm", isDM, "route", res.route,
"usd", res.cost.Total(), "prompt_tokens", res.usage.PromptTokens, "completion_tokens", res.usage.CompletionTokens)
if err := b.sendReply(ctx, roomID, ev, mc, text); err != nil {
// Paid silence (§8.1): the spend is real (USD is kept — refunding it would
// under-count the ceiling), but the reply never landed. Refund the request SLOT
// so the user can retry, and react ⚠️ so the failure isn't silent.
b.log.Error("send reply failed after billing; refunding slot + reacting", "sender", ev.Sender, "err", err)
rl.Degraded, rl.Err = degradeSendFailed, err.Error()
if rerr := b.st.RefundRequest(ev.Sender); rerr != nil {
b.log.Error("refund failed", "sender", ev.Sender, "err", rerr)
}
b.react(ctx, roomID, ev.EventID, reactError)
return
}
rl.OK = true
}
// maxPromptTokens bounds the assembled prompt (history is trimmed to fit) and feeds
// the reservation estimate, so the two never disagree about a request's size.
const maxPromptTokens = 8000
// estimateUSD is the conservative max-cost reserved for a route before the call, so
// the global ceiling can count an in-flight request (§8.1). It prices a full prompt
// (maxPromptTokens) plus the max output at the model's non-cached rates — an upper-ish
// bound, since real calls send fewer tokens and get the cheaper cached rate. Settle
// later books the authoritative actual cost regardless, so a slightly-off estimate
// only nudges admission, never the final accounting.
func (b *Bot) estimateUSD(model string) float64 {
p := b.cfg.priceFor(model)
return float64(maxPromptTokens)/1e6*p.InputPerM + float64(b.cfg.MaxOutTok)/1e6*p.OutputPerM
}
// convID returns the prompt-cache routing hint sent as x-grok-conv-id, or "" when
// GROK_PROMPT_CACHE is off. Grok caches prompt prefixes automatically; the header
// only pins a conversation to the same backend to raise the hit rate (docs.x.ai), so
// a stable per-room id is the right unit — every turn in a room shares the system
// prompt and history prefix. It carries no PII (the room id is opaque) and is hashed
// to keep it compact and non-identifying.
func (b *Bot) convID(roomID string) string {
if !b.cfg.GrokPromptCache {
return ""
}
return fmt.Sprintf("vojo-%08x", hashString(roomID))
}
// computeUSD prices a call from the API-returned token usage (authoritative
// counts) and the per-model price table — so the hard ceiling tracks real usage
// even if the model/price changes (only the price table needs updating), and a
// call books at the price of the model that actually served it.
func computeUSD(model string, u Usage, cfg *Config) float64 {
p := cfg.priceFor(model)
nonCached := u.PromptTokens - u.CachedTokens
if nonCached < 0 {
nonCached = 0
}
return float64(nonCached)/1e6*p.InputPerM +
float64(u.CachedTokens)/1e6*p.CachedPerM +
float64(u.CompletionTokens)/1e6*p.OutputPerM
}
// react adds an emoji m.reaction to the triggering event — the bot's language-free
// way to signal a system state (error / rate limit / encrypted / media) it can't
// express as a model-generated answer. Best-effort: a failed reaction is logged, not
// retried. Reactions are m.reaction (not m.room.message), so they never re-enter
// handleMessage and need no anti-loop tracking.
func (b *Bot) react(ctx context.Context, roomID, eventID, emoji string) {
content := map[string]any{
"m.relates_to": map[string]any{
"rel_type": "m.annotation",
"event_id": eventID,
"key": emoji,
},
}
if _, err := b.mx.SendEvent(ctx, roomID, "m.reaction", content); err != nil {
b.log.Error("react failed", "room", roomID, "emoji", emoji, "err", err)
}
}
// reactEncryptedOnce reacts 🔒 to the first message seen in an encrypted room and
// records a durable flag so a restart doesn't re-react (F5). Vojo disables E2EE by
// default, so this is a near-dead safety path; the reaction is far less intrusive
// than the old text notice, but the once-gate keeps it from annotating every message
// in the rare encrypted room.
// reactEncryptedOnce returns whether it reacted this call (true only the first time
// for a room), so the caller can log the skip exactly once too.
func (b *Bot) reactEncryptedOnce(ctx context.Context, roomID, eventID string) bool {
warned, err := b.st.HasWarnedEncrypted(roomID)
if err != nil {
b.log.Error("warned-flag read failed", "room", roomID, "err", err)
return false
}
if warned {
return false
}
b.react(ctx, roomID, eventID, reactEncrypted)
if err := b.st.SetWarnedEncrypted(roomID); err != nil {
b.log.Error("persist warned-flag failed", "room", roomID, "err", err)
}
return true
}
// sendReply sends the model's actual answer and records the completed exchange in the
// conversation buffer so the next turn has context. It RETURNS the send error so the
// caller can handle paid silence (§8.1): a billed answer that failed to deliver must
// refund the slot and react, not vanish.
func (b *Bot) sendReply(ctx context.Context, roomID string, trigger *Event, triggerMC *MessageContent, body string) error {
if err := b.sendMessage(ctx, roomID, trigger, triggerMC, body); err != nil {
return err
}
// Record the user trigger AND the assistant answer together, only AFTER the answer
// was sent, so a failed or empty generation never leaves a dangling user turn (a
// question with no reply) in the buffer — which would skew later completions.
// Single-flight guarantees no other turn for this room interleaves between the two.
b.appendBuf(roomID, bufferedMsg{sender: trigger.Sender, body: triggerMC.Body, isBot: false})
b.appendBuf(roomID, bufferedMsg{sender: b.cfg.BotMXID, body: body, isBot: true})
return nil
}
// sendMessage builds and sends an m.notice reply and tracks our own event id. Returns
// the send error (nil on success) so the caller can detect a failed delivery.
func (b *Bot) sendMessage(ctx context.Context, roomID string, trigger *Event, triggerMC *MessageContent, body string) error {
content := buildNoticeContent(trigger.EventID, trigger.Sender, triggerMC.RelatesTo, body)
id, err := b.mx.SendEvent(ctx, roomID, "m.room.message", content)
if err != nil {
b.log.Error("send failed", "room", roomID, "err", err)
return err
}
// Track our own reply so a future reply-to-it is recognised as addressing us.
b.botSent.Add(id)
return nil
}
// startTypingKeepalive starts the typing indicator and keeps it alive for the whole
// generation (the CS-API server-side typing notification expires after the 30s we
// pass, so we refresh every 20s). The returned stop clears the indicator and is safe
// to call once via defer. Typing is best-effort UX — failures are non-fatal.
func (b *Bot) startTypingKeepalive(ctx context.Context, roomID string) func() {
b.setTyping(ctx, roomID, true)
done := make(chan struct{})
go func() {
t := time.NewTicker(20 * time.Second)
defer t.Stop()
for {
select {
case <-ctx.Done():
return
case <-done:
return
case <-t.C:
b.setTyping(ctx, roomID, true)
}
}
}()
var once sync.Once
return func() {
once.Do(func() {
close(done)
b.setTyping(ctx, roomID, false)
})
}
}
// setTyping sets/clears the bot's typing indicator (best-effort UX; failures are
// non-fatal). The 30s server-side timeout is refreshed by startTypingKeepalive.
func (b *Bot) setTyping(ctx context.Context, roomID string, typing bool) {
if err := b.mx.SendTyping(ctx, roomID, typing, 30000); err != nil {
b.log.Debug("set typing failed", "room", roomID, "typing", typing, "err", err)
}
}
// buildNoticeContent builds the reply. m.notice (not m.text) so the anti-loop
// skip catches our own output. Thread-aware (F27): a trigger from a thread gets a
// thread relation so the answer lands in the thread, not the main timeline.
func buildNoticeContent(replyTo, sender string, triggerRelates *RelatesTo, body string) map[string]any {
relates := map[string]any{}
if triggerRelates != nil && triggerRelates.RelType == "m.thread" && triggerRelates.EventID != "" {
relates["rel_type"] = "m.thread"
relates["event_id"] = triggerRelates.EventID
relates["is_falling_back"] = true
relates["m.in_reply_to"] = map[string]any{"event_id": replyTo}
} else {
relates["m.in_reply_to"] = map[string]any{"event_id": replyTo}
}
content := map[string]any{
"msgtype": "m.notice",
"body": body,
"m.mentions": map[string]any{"user_ids": []string{sender}},
"m.relates_to": relates,
}
// The model answers in markdown; render it to org.matrix.custom.html so clients
// show formatting instead of raw `**`, `#`, lists, code fences. Only attach
// formatted_body when there's actual formatting — a plain answer keeps rendering
// from `body` exactly as before.
if html, formatted := markdownToHTML(body); formatted {
content["format"] = matrixHTMLFormat
content["formatted_body"] = html
}
return content
}
// --- per-room single-flight ----------------------------------------------------
// tryClaim marks a room as generating and returns true if the caller won the claim
// (no generation was already in flight). The loser must drop its message.
func (b *Bot) tryClaim(roomID string) bool {
b.mu.Lock()
defer b.mu.Unlock()
if b.inflight[roomID] {
return false
}
b.inflight[roomID] = true
return true
}
func (b *Bot) release(roomID string) {
b.mu.Lock()
defer b.mu.Unlock()
delete(b.inflight, roomID)
}
// --- per-room metadata helpers (all guarded by b.mu; probes run outside it) -----
// getMetaLocked returns (creating if needed) the room's meta. Caller MUST hold b.mu.
func (b *Bot) getMetaLocked(roomID string) *roomMeta {
m := b.meta[roomID]
if m == nil {
m = &roomMeta{}
b.meta[roomID] = m
}
return m
}
func (b *Bot) invalidateCounts(roomID string) {
b.mu.Lock()
defer b.mu.Unlock()
if m := b.meta[roomID]; m != nil {
m.countsKnown = false
}
}
func (b *Bot) setEncrypted(roomID string) {
b.mu.Lock()
defer b.mu.Unlock()
m := b.getMetaLocked(roomID)
m.encrypted, m.encKnown = true, true
}
func (b *Bot) forgetRoom(roomID string) {
b.mu.Lock()
defer b.mu.Unlock()
delete(b.meta, roomID)
delete(b.buf, roomID)
delete(b.inflight, roomID)
}
// ensureEncryption returns whether the room is encrypted, probing the CS-API once
// (without holding the lock) and caching the result. On probe error it returns false
// (treated as not-encrypted this round) and leaves the state unknown for a re-probe.
func (b *Bot) ensureEncryption(ctx context.Context, roomID string) bool {
b.mu.Lock()
if m := b.getMetaLocked(roomID); m.encKnown {
enc := m.encrypted
b.mu.Unlock()
return enc
}
b.mu.Unlock()
enc, err := b.mx.RoomEncrypted(ctx, roomID)
if err != nil {
b.log.Warn("encryption probe failed", "room", roomID, "err", err)
return false // leave unknown; re-probed on the next message
}
// Re-fetch under the lock instead of writing to the pointer captured before the
// unlocked probe: if the room was forgotten (leave/ban) mid-probe its meta was
// deleted, and writing to the captured pointer would resurrect a dead room.
b.mu.Lock()
if m := b.meta[roomID]; m != nil {
m.encrypted, m.encKnown = enc, true
}
b.mu.Unlock()
return enc
}
// ensureCounts returns (countsKnown, isDM, foreign), probing /members once (without
// holding the lock) and caching the result. On probe error it returns
// (false, false, false): the caller treats an unclassified room conservatively and
// logs a visible WARN rather than silently dropping.
func (b *Bot) ensureCounts(ctx context.Context, roomID string) (countsKnown, isDM, foreign bool) {
b.mu.Lock()
known := b.getMetaLocked(roomID).countsKnown
b.mu.Unlock()
if !known {
joined, invited, servers, err := b.mx.RoomMembership(ctx, roomID)
if err != nil {
b.log.Warn("member probe failed", "room", roomID, "err", err)
return false, false, false
}
isForeign := false
for s := range servers {
if !b.cfg.AllowedServers[s] {
isForeign = true
break
}
}
// Re-fetch under the lock rather than writing a pointer captured before the
// unlocked /members probe (see ensureEncryption): a leave/ban mid-probe must
// not be undone by resurrecting the room's meta.
b.mu.Lock()
if m := b.meta[roomID]; m != nil {
m.joined, m.invited, m.foreign, m.countsKnown = joined, invited, isForeign, true
}
b.mu.Unlock()
}
b.mu.Lock()
defer b.mu.Unlock()
if m := b.meta[roomID]; m != nil {
return m.countsKnown, m.isDM(), m.foreign
}
return false, false, false
}
func (b *Bot) snapshotBuf(roomID string) []bufferedMsg {
b.mu.Lock()
defer b.mu.Unlock()
src := b.buf[roomID]
if len(src) == 0 {
return nil
}
out := make([]bufferedMsg, len(src))
copy(out, src)
return out
}
func (b *Bot) appendBuf(roomID string, msg bufferedMsg) {
b.mu.Lock()
defer b.mu.Unlock()
limit := b.cfg.MaxCtxEvent * 2
if limit < 8 {
limit = 8
}
buf := append(b.buf[roomID], msg)
if len(buf) > limit {
buf = buf[len(buf)-limit:]
}
b.buf[roomID] = buf
}