vojo/apps/ai-bot/config.go

533 lines
22 KiB
Go

package main
import (
"fmt"
"os"
"strconv"
"strings"
"time"
)
// Config is the fully-resolved runtime configuration, parsed once from the
// environment at startup. Secrets (AS_TOKEN, HS_TOKEN, XAI_API_KEY) live ONLY
// here — never in config.json or any client bundle.
type Config struct {
HomeserverURL string
BotMXID string
BotDisplayName string
// Appservice auth, from the Synapse registration.yaml. `as_token`
// authenticates the bot TO the homeserver (used as the access token, with
// ?user_id=BOT_MXID identity assertion); `hs_token` authenticates the
// homeserver's transaction pushes TO us. Neither expires — no rotation.
ASToken string
HSToken string
// Listen address for the transaction-push HTTP server (the `url` in the
// registration points here, e.g. http://ai-bot:8009).
ASAddr string
// When set, as_token/hs_token are read from this generated registration.yaml
// (the mautrix idiom — one file shared with Synapse), overriding the env
// AS_TOKEN/HS_TOKEN. Empty → use the env tokens.
RegistrationPath string
XAIAPIKey string
XAIBaseURL string
XAIModel string
XAITemp float64
MaxOutTok int
MaxCtxEvent int
// GrokReasoningEffort is the reasoning_effort sent on the normal Grok voice calls
// (grok_direct + web synthesis). Empty = don't send it (the default — required for
// grok-4.20-non-reasoning, which rejects the param). On a unified model like
// grok-4.3 the API otherwise defaults to "low" (it thinks on every reply); set this
// to "none" to keep the default voice fast/cheap. The reason_then_grok route ignores
// this and always uses "high". Accepted: "" | none | low | medium | high.
GrokReasoningEffort string
// Allowlist of homeservers whose users may pull the bot into a room. Gates
// the *inviter* (F11). Comma-separated env, stored as a set.
AllowedServers map[string]bool
DailyUSDCeiling float64
PerUserDailyCap int
// PerUserDailyUSD is an optional per-user daily $ quota (0 = off) on top of the
// request count cap, so one user on expensive routes can't drain the shared global
// ceiling and deny everyone else. Checked against the user's own committed+reserved
// spend in Reserve.
PerUserDailyUSD float64
// mxids exempt from PER_USER_DAILY_CAP (e.g. the owner/admins testing). Still
// subject to the global DAILY_USD_CEILING, so the wallet stays protected.
UnlimitedUsers map[string]bool
// USD-per-1M-token prices for the default (final-voice) model, applied to the
// API-returned token usage so the hard ceiling tracks real usage even if the
// model/price changes. Kept as the back-compat XAI_PRICE_* source; folded into
// Prices below.
PriceInputPerM float64
PriceCachedPerM float64
PriceOutputPerM float64
// Prices is the per-model price table (LiteLLM pattern) read by priceFor(model),
// so a call books at the price of the model that actually served it. Built in
// LoadConfig; the default model's entry comes from the XAI_PRICE_* envs, and a
// second model (Gemini) adds its own entry when that layer lands.
Prices map[string]ModelPrice
// RequestBudget bounds one whole request (all model calls share it), so a slow or
// retried call — or a multi-stage cascade — can't accrete minutes. The default
// matches the previous effective ceiling for a single grok_direct call.
RequestBudget time.Duration
// GrokPromptCache, when true, sends the x-grok-conv-id routing header to raise the
// prompt-cache hit rate (Grok caches automatically; the header only pins routing).
GrokPromptCache bool
// TelemetryEnabled writes the request_log analytics row for every request. Default
// off so the cascade-off path adds no extra write; turned on to measure the base.
// Its write is isolated — a failure logs a WARN, never drops the answer.
TelemetryEnabled bool
// TelemetryStoreText additionally stores the query text in request_log (for offline
// eval). Default off — only metadata is kept.
TelemetryStoreText bool
// TelemetryRetention trims request_log rows older than this (time-based, since the
// analytics are a time series). 0 disables trimming.
TelemetryRetention time.Duration
// --- Cascade (Phase 2-4). EVERY flag defaults OFF, so an unset environment is
// exactly today's bot: one grok_direct call. Any layer off or failing degrades to
// grok_direct (§8.2). None of these is enabled in prod until the offline-eval gate
// (§9) passes. ---
// RouterEnabled turns on the Layer-0 heuristic router; off → everything is
// grok_direct. RouterClassifierEnabled additionally consults the Gemini Layer-1
// classifier on cases the heuristic left as grok_direct.
RouterEnabled bool
RouterClassifierEnabled bool
// TrivialOffloadEnabled lets the trivial route answer with Gemini; off → trivial
// still goes to Grok.
TrivialOffloadEnabled bool
// WebEnabled turns on the web_then_grok route. WebProvider selects the source:
// grok_web_search (default, works on chat/completions via Live Search) or
// gemini_grounding (Gemini-3 native only — see F-EXT-3).
WebEnabled bool
WebProvider string
// WebGroundingDailyCap caps grounded prompts/day (durable counter) before falling
// back, guarding the $/1k grounding overage. WebGroundingTier records the Gemini
// plan the cap reflects.
WebGroundingDailyCap int
WebGroundingTier string
// Reasoning route: a manual "think harder" trigger. ReasoningModel must be a
// reasoning-capable model (the default grok-4.20-non-reasoning is NOT — see the
// docs.x.ai finding); set REASONING_MODEL to e.g. grok-4.3 to use it.
ReasoningEnabled bool
ReasoningTrigger string
ReasoningModel string
// ReasoningEffort is the reasoning_effort the reason_then_grok route sends on the
// manual "think harder" trigger. Default "high". Accepted: none|low|medium|high.
ReasoningEffort string
// CanaryPercent routes a fraction of traffic through the new path for A/B before a
// full enable. 0 = off (scaffold; not yet consulted by the dispatch).
CanaryPercent int
// Gemini backend (the cheap/router/grounding model). Required only when a layer
// that uses it is enabled (validated below).
GeminiBaseURL string
GeminiAPIKey string
GeminiModel string
SystemPromptPath string
SystemPrompt string
StateDir string
// DatabaseURL is the libpq/pgx DSN of the bot's dedicated Postgres database
// (`vojo_ai`), e.g. postgres://vojo_ai:***@postgres:5432/vojo_ai?sslmode=disable.
// It holds only operational state (txn/event dedup, the daily spend ledger, the
// encrypted-warned set) — never message content. Required.
DatabaseURL string
// LogBodiesUsers is the allowlist of sender mxids whose model request/response
// BODIES are logged in full (truncated, at DEBUG) for debugging — everyone else gets
// routing + metadata logs only. Empty (default) = nobody, so message content never
// enters the logs unless an operator opts a specific user in AND runs at
// LOG_LEVEL=debug. Parsed from LOG_BODIES_USERS (comma-separated mxids).
LogBodiesUsers map[string]bool
}
func getenv(key, def string) string {
if v, ok := os.LookupEnv(key); ok && strings.TrimSpace(v) != "" {
return v
}
return def
}
// getSecret resolves a secret with optional file indirection: if `<key>_FILE`
// is set, the value is read from that file (trailing whitespace trimmed) — the
// standard Docker-secret / mounted-file convention, so the tokens can live in a
// separate read-only mount instead of inline in the config env (and never enter
// `docker inspect`/`/proc/<pid>/environ`). Falls back to the plain `<key>` env.
func getSecret(key string) (string, error) {
if path := strings.TrimSpace(os.Getenv(key + "_FILE")); path != "" {
b, err := os.ReadFile(path)
if err != nil {
return "", fmt.Errorf("%s_FILE (%s): %w", key, path, err)
}
return strings.TrimSpace(string(b)), nil
}
return getenv(key, ""), nil
}
func getenvInt(key string, def int) (int, error) {
raw := getenv(key, "")
if raw == "" {
return def, nil
}
n, err := strconv.Atoi(strings.TrimSpace(raw))
if err != nil {
return 0, fmt.Errorf("%s must be an integer, got %q", key, raw)
}
return n, nil
}
func getenvFloat(key string, def float64) (float64, error) {
raw := getenv(key, "")
if raw == "" {
return def, nil
}
f, err := strconv.ParseFloat(strings.TrimSpace(raw), 64)
if err != nil {
return 0, fmt.Errorf("%s must be a number, got %q", key, raw)
}
return f, nil
}
// getenvBool parses a boolean flag. Accepts the usual 1/0/true/false/yes/no/on/off
// (case-insensitive); empty → default. Every cascade flag defaults false, so an unset
// or blank env keeps today's behaviour.
func getenvBool(key string, def bool) (bool, error) {
raw := strings.TrimSpace(getenv(key, ""))
if raw == "" {
return def, nil
}
switch strings.ToLower(raw) {
case "1", "true", "yes", "on":
return true, nil
case "0", "false", "no", "off":
return false, nil
}
return false, fmt.Errorf("%s must be a boolean (true/false), got %q", key, raw)
}
func parseServerSet(raw string) map[string]bool {
set := make(map[string]bool)
for _, s := range strings.Split(raw, ",") {
s = strings.TrimSpace(s)
if s != "" {
set[s] = true
}
}
return set
}
// LoadConfig parses and validates the environment. It returns an error listing
// every missing/invalid required field at once so the operator fixes them in a
// single pass rather than discovering them one container-restart at a time.
func LoadConfig() (*Config, error) {
cfg := &Config{
HomeserverURL: strings.TrimRight(getenv("HOMESERVER_URL", ""), "/"),
BotMXID: getenv("BOT_MXID", ""),
BotDisplayName: getenv("BOT_DISPLAY_NAME", "Vojo AI"),
ASAddr: getenv("AS_ADDR", ":8009"),
RegistrationPath: getenv("REGISTRATION_PATH", ""),
XAIBaseURL: strings.TrimRight(getenv("XAI_BASE_URL", "https://api.x.ai/v1"), "/"),
XAIModel: getenv("XAI_MODEL", "grok-4.20-0309-non-reasoning"),
SystemPromptPath: getenv("SYSTEM_PROMPT_PATH", "prompts/system_ru.txt"),
StateDir: strings.TrimRight(getenv("STATE_DIR", "/state"), "/"),
DatabaseURL: getenv("AI_BOT_DATABASE_URL", ""),
AllowedServers: parseServerSet(getenv("ALLOWED_SERVERS", "")),
UnlimitedUsers: parseServerSet(getenv("UNLIMITED_USERS", "")),
LogBodiesUsers: parseServerSet(getenv("LOG_BODIES_USERS", "")),
// Cascade string-valued config (flags/ints/secrets parsed below).
GrokReasoningEffort: strings.ToLower(strings.TrimSpace(getenv("GROK_REASONING_EFFORT", ""))),
WebProvider: getenv("WEB_PROVIDER", webProviderGrokWebSearch),
WebGroundingTier: getenv("WEB_GROUNDING_TIER", "free"),
ReasoningTrigger: getenv("REASONING_TRIGGER", "подумай глубже"),
ReasoningModel: getenv("REASONING_MODEL", "grok-4.3"),
ReasoningEffort: strings.ToLower(strings.TrimSpace(getenv("REASONING_EFFORT", "high"))),
GeminiBaseURL: strings.TrimRight(getenv("GEMINI_BASE_URL", "https://generativelanguage.googleapis.com/v1beta/openai"), "/"),
GeminiModel: getenv("GEMINI_MODEL", "gemini-2.5-flash-lite"),
}
var problems []string
// Secrets support *_FILE indirection so they can be separate mounts / Docker
// secrets, decoupled from the non-secret config env.
for _, s := range []struct {
key string
dest *string
}{
{"AS_TOKEN", &cfg.ASToken},
{"HS_TOKEN", &cfg.HSToken},
{"XAI_API_KEY", &cfg.XAIAPIKey},
{"GEMINI_API_KEY", &cfg.GeminiAPIKey}, // optional; required only if a Gemini layer is on
} {
v, err := getSecret(s.key)
if err != nil {
problems = append(problems, err.Error())
}
*s.dest = v
}
// A generated registration.yaml, when provided, is the source of truth for
// the appservice tokens (mautrix idiom — the same file Synapse reads),
// overriding any env AS_TOKEN/HS_TOKEN.
if cfg.RegistrationPath != "" {
reg, err := LoadRegistration(cfg.RegistrationPath)
if err != nil {
problems = append(problems, err.Error())
} else {
cfg.ASToken, cfg.HSToken = reg.ASToken, reg.HSToken
if lp := localpartOf(cfg.BotMXID); lp != "" && reg.SenderLocalpart != "" && lp != reg.SenderLocalpart {
problems = append(problems, fmt.Sprintf(
"registration sender_localpart %q != BOT_MXID localpart %q", reg.SenderLocalpart, lp))
}
}
}
req := func(name, val string) {
if val == "" {
problems = append(problems, name+" is required")
}
}
req("HOMESERVER_URL", cfg.HomeserverURL)
req("BOT_MXID", cfg.BotMXID)
req("AS_TOKEN", cfg.ASToken)
req("HS_TOKEN", cfg.HSToken)
req("XAI_API_KEY", cfg.XAIAPIKey)
req("AI_BOT_DATABASE_URL", cfg.DatabaseURL)
if len(cfg.AllowedServers) == 0 {
problems = append(problems, "ALLOWED_SERVERS is required (comma-separated homeserver allowlist)")
}
var err error
if cfg.XAITemp, err = getenvFloat("XAI_TEMPERATURE", 0.6); err != nil {
problems = append(problems, err.Error())
}
if cfg.MaxOutTok, err = getenvInt("MAX_OUTPUT_TOKENS", 320); err != nil {
problems = append(problems, err.Error())
}
if cfg.MaxCtxEvent, err = getenvInt("MAX_CONTEXT_EVENTS", 20); err != nil {
problems = append(problems, err.Error())
}
if cfg.DailyUSDCeiling, err = getenvFloat("DAILY_USD_CEILING", 10); err != nil {
problems = append(problems, err.Error())
}
if cfg.PerUserDailyCap, err = getenvInt("PER_USER_DAILY_CAP", 30); err != nil {
problems = append(problems, err.Error())
}
if cfg.PerUserDailyUSD, err = getenvFloat("PER_USER_DAILY_USD", 0); err != nil {
problems = append(problems, err.Error())
}
if cfg.PriceInputPerM, err = getenvFloat("XAI_PRICE_INPUT_PER_M", 1.25); err != nil {
problems = append(problems, err.Error())
}
if cfg.PriceCachedPerM, err = getenvFloat("XAI_PRICE_CACHED_PER_M", 0.20); err != nil {
problems = append(problems, err.Error())
}
if cfg.PriceOutputPerM, err = getenvFloat("XAI_PRICE_OUTPUT_PER_M", 2.50); err != nil {
problems = append(problems, err.Error())
}
// Per-model price table. The default (final-voice) model is priced from the
// XAI_PRICE_* envs; additional models register their own entry as their layer
// lands. priceFor falls back to this default model for an unknown model.
cfg.Prices = map[string]ModelPrice{
cfg.XAIModel: {
InputPerM: cfg.PriceInputPerM,
CachedPerM: cfg.PriceCachedPerM,
OutputPerM: cfg.PriceOutputPerM,
},
}
var budgetSec, retentionDays int
if budgetSec, err = getenvInt("REQUEST_BUDGET_SECONDS", 180); err != nil {
problems = append(problems, err.Error())
}
cfg.RequestBudget = time.Duration(budgetSec) * time.Second
if cfg.GrokPromptCache, err = getenvBool("GROK_PROMPT_CACHE", false); err != nil {
problems = append(problems, err.Error())
}
if cfg.TelemetryEnabled, err = getenvBool("TELEMETRY_ENABLED", false); err != nil {
problems = append(problems, err.Error())
}
if cfg.TelemetryStoreText, err = getenvBool("TELEMETRY_STORE_TEXT", false); err != nil {
problems = append(problems, err.Error())
}
if retentionDays, err = getenvInt("TELEMETRY_RETENTION_DAYS", 30); err != nil {
problems = append(problems, err.Error())
}
cfg.TelemetryRetention = time.Duration(retentionDays) * 24 * time.Hour
// Cascade flags — every one defaults false, so an unset env is today's bot.
for _, f := range []struct {
key string
dest *bool
}{
{"ROUTER_ENABLED", &cfg.RouterEnabled},
{"ROUTER_CLASSIFIER_ENABLED", &cfg.RouterClassifierEnabled},
{"TRIVIAL_OFFLOAD_ENABLED", &cfg.TrivialOffloadEnabled},
{"WEB_ENABLED", &cfg.WebEnabled},
{"REASONING_ENABLED", &cfg.ReasoningEnabled},
} {
if *f.dest, err = getenvBool(f.key, false); err != nil {
problems = append(problems, err.Error())
}
}
if cfg.WebGroundingDailyCap, err = getenvInt("WEB_GROUNDING_DAILY_CAP", 450); err != nil {
problems = append(problems, err.Error())
}
if cfg.CanaryPercent, err = getenvInt("CANARY_PERCENT", 0); err != nil {
problems = append(problems, err.Error())
}
// Gemini pricing → the per-model table (defaults: gemini-2.5-flash-lite $0.10/$0.40
// per 1M; cached priced as input, a conservative over-count). Lets the ceiling and
// request_log price Gemini calls at Gemini rates.
var gIn, gOut float64
if gIn, err = getenvFloat("GEMINI_PRICE_INPUT_PER_M", 0.10); err != nil {
problems = append(problems, err.Error())
}
if gOut, err = getenvFloat("GEMINI_PRICE_OUTPUT_PER_M", 0.40); err != nil {
problems = append(problems, err.Error())
}
cfg.Prices[cfg.GeminiModel] = ModelPrice{InputPerM: gIn, CachedPerM: gIn, OutputPerM: gOut}
// Reasoning model price (defaults to the final-voice grok rates — grok-4.3 ≈ 4.20),
// so the reasoning route reserves/bills at its own price instead of falling back.
var rIn, rOut float64
if rIn, err = getenvFloat("REASONING_PRICE_INPUT_PER_M", cfg.PriceInputPerM); err != nil {
problems = append(problems, err.Error())
}
if rOut, err = getenvFloat("REASONING_PRICE_OUTPUT_PER_M", cfg.PriceOutputPerM); err != nil {
problems = append(problems, err.Error())
}
cfg.Prices[cfg.ReasoningModel] = ModelPrice{InputPerM: rIn, CachedPerM: cfg.PriceCachedPerM, OutputPerM: rOut}
// Fail-fast on broken cascade wiring (§5/F-FUNC-9), at EVERY start (not just
// check-config): a layer that needs Gemini but has no key would silently never
// fire. Better to refuse to start than to quietly run degraded.
needsGemini := cfg.TrivialOffloadEnabled || cfg.RouterClassifierEnabled ||
(cfg.WebEnabled && cfg.WebProvider == webProviderGeminiGrounding)
if needsGemini && cfg.GeminiAPIKey == "" {
problems = append(problems, "GEMINI_API_KEY is required when TRIVIAL_OFFLOAD_ENABLED, ROUTER_CLASSIFIER_ENABLED, or WEB_ENABLED with gemini_grounding is set")
}
if cfg.RouterClassifierEnabled && !cfg.RouterEnabled {
problems = append(problems, "ROUTER_CLASSIFIER_ENABLED requires ROUTER_ENABLED")
}
if cfg.WebEnabled && cfg.WebProvider != webProviderGrokWebSearch && cfg.WebProvider != webProviderGeminiGrounding {
problems = append(problems, fmt.Sprintf("WEB_PROVIDER must be %q or %q, got %q",
webProviderGrokWebSearch, webProviderGeminiGrounding, cfg.WebProvider))
}
if cfg.ReasoningEnabled && cfg.ReasoningModel == "" {
problems = append(problems, "REASONING_MODEL is required when REASONING_ENABLED is set")
}
switch cfg.GrokReasoningEffort {
case "", "none", "low", "medium", "high":
default:
problems = append(problems, fmt.Sprintf(
"GROK_REASONING_EFFORT must be one of none/low/medium/high (or empty), got %q", cfg.GrokReasoningEffort))
}
switch cfg.ReasoningEffort {
case "none", "low", "medium", "high":
default:
problems = append(problems, fmt.Sprintf(
"REASONING_EFFORT must be one of none/low/medium/high, got %q", cfg.ReasoningEffort))
}
if len(problems) > 0 {
return nil, fmt.Errorf("invalid configuration:\n - %s", strings.Join(problems, "\n - "))
}
return cfg, nil
}
// needsGemini reports whether any enabled layer requires the Gemini backend — the
// cheap trivial route, the Layer-1 classifier, or Gemini-native web grounding. Drives
// both the fail-fast key check and whether the client is built at all.
func (c *Config) needsGemini() bool {
return c.TrivialOffloadEnabled || c.RouterClassifierEnabled ||
(c.WebEnabled && c.WebProvider == webProviderGeminiGrounding)
}
// Summary returns a human-readable, SECRET-REDACTED dump for the startup log.
func (c *Config) Summary() string {
servers := make([]string, 0, len(c.AllowedServers))
for s := range c.AllowedServers {
servers = append(servers, s)
}
unlimited := make([]string, 0, len(c.UnlimitedUsers))
for u := range c.UnlimitedUsers {
unlimited = append(unlimited, u)
}
bodyUsers := make([]string, 0, len(c.LogBodiesUsers))
for u := range c.LogBodiesUsers {
bodyUsers = append(bodyUsers, u)
}
redact := func(s string) string {
if s == "" {
return "(unset)"
}
return "set(" + strconv.Itoa(len(s)) + " chars)"
}
return strings.Join([]string{
"ai-bot config:",
" HOMESERVER_URL = " + c.HomeserverURL,
" BOT_MXID = " + c.BotMXID,
" BOT_DISPLAY_NAME = " + c.BotDisplayName,
" AS_ADDR = " + c.ASAddr,
" REGISTRATION_PATH = " + func() string {
if c.RegistrationPath == "" {
return "(unset — using env tokens)"
}
return c.RegistrationPath
}(),
" AS_TOKEN = " + redact(c.ASToken),
" HS_TOKEN = " + redact(c.HSToken),
" XAI_BASE_URL = " + c.XAIBaseURL,
" XAI_MODEL = " + c.XAIModel,
" GROK_REASONING_EFFORT = " + func() string {
if c.GrokReasoningEffort == "" {
return "(unset — not sent; provider default)"
}
return c.GrokReasoningEffort
}(),
" XAI_API_KEY = " + redact(c.XAIAPIKey),
fmt.Sprintf(" XAI_TEMPERATURE = %g", c.XAITemp),
fmt.Sprintf(" MAX_OUTPUT_TOKENS = %d", c.MaxOutTok),
fmt.Sprintf(" MAX_CONTEXT_EVENTS = %d", c.MaxCtxEvent),
" ALLOWED_SERVERS = " + strings.Join(servers, ","),
fmt.Sprintf(" DAILY_USD_CEILING = %g", c.DailyUSDCeiling),
fmt.Sprintf(" PER_USER_DAILY_CAP = %d", c.PerUserDailyCap),
" UNLIMITED_USERS = " + strings.Join(unlimited, ","),
fmt.Sprintf(" PRICES /1M (in/cached/out) = %g / %g / %g",
c.PriceInputPerM, c.PriceCachedPerM, c.PriceOutputPerM),
" SYSTEM_PROMPT_PATH = " + c.SystemPromptPath,
" STATE_DIR = " + c.StateDir,
" AI_BOT_DATABASE_URL= " + redact(c.DatabaseURL),
fmt.Sprintf(" REQUEST_BUDGET = %s", c.RequestBudget),
fmt.Sprintf(" GROK_PROMPT_CACHE = %t", c.GrokPromptCache),
fmt.Sprintf(" TELEMETRY_ENABLED = %t (store_text=%t, retention=%s)",
c.TelemetryEnabled, c.TelemetryStoreText, c.TelemetryRetention),
fmt.Sprintf(" LOG_BODIES_USERS = %s (needs LOG_LEVEL=debug)",
func() string {
if len(bodyUsers) == 0 {
return "(none — bodies never logged)"
}
return strings.Join(bodyUsers, ",")
}()),
fmt.Sprintf(" CASCADE: router=%t classifier=%t trivial=%t web=%t(%s, cap=%d) reason=%t(%s)",
c.RouterEnabled, c.RouterClassifierEnabled, c.TrivialOffloadEnabled,
c.WebEnabled, c.WebProvider, c.WebGroundingDailyCap, c.ReasoningEnabled, c.ReasoningEffort),
" GEMINI_MODEL = " + c.GeminiModel,
" GEMINI_API_KEY = " + redact(c.GeminiAPIKey),
}, "\n")
}