591 lines
26 KiB
Go
591 lines
26 KiB
Go
package main
|
||
|
||
import (
|
||
"fmt"
|
||
"os"
|
||
"strconv"
|
||
"strings"
|
||
"time"
|
||
)
|
||
|
||
// Config is the fully-resolved runtime configuration, parsed once from the
|
||
// environment at startup. Secrets (AS_TOKEN, HS_TOKEN, XAI_API_KEY) live ONLY
|
||
// here — never in config.json or any client bundle.
|
||
type Config struct {
|
||
HomeserverURL string
|
||
BotMXID string
|
||
BotDisplayName string
|
||
|
||
// Appservice auth, from the Synapse registration.yaml. `as_token`
|
||
// authenticates the bot TO the homeserver (used as the access token, with
|
||
// ?user_id=BOT_MXID identity assertion); `hs_token` authenticates the
|
||
// homeserver's transaction pushes TO us. Neither expires — no rotation.
|
||
ASToken string
|
||
HSToken string
|
||
// Listen address for the transaction-push HTTP server (the `url` in the
|
||
// registration points here, e.g. http://ai-bot:8009).
|
||
ASAddr string
|
||
// When set, as_token/hs_token are read from this generated registration.yaml
|
||
// (the mautrix idiom — one file shared with Synapse), overriding the env
|
||
// AS_TOKEN/HS_TOKEN. Empty → use the env tokens.
|
||
RegistrationPath string
|
||
|
||
XAIAPIKey string
|
||
XAIBaseURL string
|
||
XAIModel string
|
||
XAITemp float64
|
||
MaxOutTok int
|
||
MaxCtxEvent int
|
||
|
||
// GrokReasoningEffort is the reasoning_effort sent on the normal Grok voice calls
|
||
// (grok_direct + web synthesis). Empty = don't send it (the default — required for
|
||
// grok-4.20-non-reasoning, which rejects the param). On a unified model like
|
||
// grok-4.3 the API otherwise defaults to "low" (it thinks on every reply); set this
|
||
// to "none" to keep the default voice fast/cheap. The reason_then_grok route ignores
|
||
// this and always uses "high". Accepted: "" | none | low | medium | high.
|
||
GrokReasoningEffort string
|
||
|
||
// Allowlist of homeservers whose users may pull the bot into a room. Gates
|
||
// the *inviter* (F11). Comma-separated env, stored as a set.
|
||
AllowedServers map[string]bool
|
||
|
||
DailyUSDCeiling float64
|
||
PerUserDailyCap int
|
||
// PerUserDailyUSD is an optional per-user daily $ quota (0 = off) on top of the
|
||
// request count cap, so one user on expensive routes can't drain the shared global
|
||
// ceiling and deny everyone else. Checked against the user's own committed+reserved
|
||
// spend in Reserve.
|
||
PerUserDailyUSD float64
|
||
|
||
// mxids exempt from PER_USER_DAILY_CAP (e.g. the owner/admins testing). Still
|
||
// subject to the global DAILY_USD_CEILING, so the wallet stays protected.
|
||
UnlimitedUsers map[string]bool
|
||
|
||
// USD-per-1M-token prices for the default (final-voice) model, applied to the
|
||
// API-returned token usage so the hard ceiling tracks real usage even if the
|
||
// model/price changes. Kept as the back-compat XAI_PRICE_* source; folded into
|
||
// Prices below.
|
||
PriceInputPerM float64
|
||
PriceCachedPerM float64
|
||
PriceOutputPerM float64
|
||
|
||
// Prices is the per-model price table (LiteLLM pattern) read by priceFor(model),
|
||
// so a call books at the price of the model that actually served it. Built in
|
||
// LoadConfig; the default model's entry comes from the XAI_PRICE_* envs, and a
|
||
// second model (Gemini) adds its own entry when that layer lands.
|
||
Prices map[string]ModelPrice
|
||
|
||
// RequestBudget bounds one whole request (all model calls share it), so a slow or
|
||
// retried call — or a multi-stage cascade — can't accrete minutes. The default
|
||
// matches the previous effective ceiling for a single grok_direct call.
|
||
RequestBudget time.Duration
|
||
|
||
// GrokPromptCache, when true, sends the x-grok-conv-id routing header to raise the
|
||
// prompt-cache hit rate (Grok caches automatically; the header only pins routing).
|
||
GrokPromptCache bool
|
||
|
||
// TelemetryEnabled writes the request_log analytics row for every request. Default
|
||
// off so the cascade-off path adds no extra write; turned on to measure the base.
|
||
// Its write is isolated — a failure logs a WARN, never drops the answer.
|
||
TelemetryEnabled bool
|
||
// TelemetryStoreText additionally stores the query text in request_log (for offline
|
||
// eval). Default off — only metadata is kept.
|
||
TelemetryStoreText bool
|
||
// TelemetryRetention trims request_log rows older than this (time-based, since the
|
||
// analytics are a time series). 0 disables trimming.
|
||
TelemetryRetention time.Duration
|
||
|
||
// --- Cascade (Phase 2-4). EVERY flag defaults OFF, so an unset environment is
|
||
// exactly today's bot: one grok_direct call. Any layer off or failing degrades to
|
||
// grok_direct (§8.2). None of these is enabled in prod until the offline-eval gate
|
||
// (§9) passes. ---
|
||
|
||
// RouterEnabled turns on the Layer-0 heuristic router; off → everything is
|
||
// grok_direct. RouterClassifierEnabled additionally consults the Gemini Layer-1
|
||
// classifier on cases the heuristic left as grok_direct.
|
||
RouterEnabled bool
|
||
RouterClassifierEnabled bool
|
||
// TrivialOffloadEnabled lets the trivial route answer with Gemini; off → trivial
|
||
// still goes to Grok.
|
||
TrivialOffloadEnabled bool
|
||
// WebEnabled turns on the web_then_grok route. WebProvider selects the source:
|
||
// grok_web_search (default, the xAI web_search tool on the Responses API) or
|
||
// gemini_grounding (native v1beta google_search — current models incl. 2.5; the
|
||
// F-EXT-3 caveat is OpenAI-compat-only, not a model-version limit).
|
||
WebEnabled bool
|
||
WebProvider string
|
||
// WebParanoid biases the router toward grounding: beyond freshnessRe, it unlocks the
|
||
// classifier-driven web arms (needs_web≥0.55, entity_obscure, time_sensitive,
|
||
// lookupHint && verifiable). Off (default) → web routing is freshness-only (today's
|
||
// behaviour), so enabling the classifier is web-routing-neutral and this is the single
|
||
// switch that activates epistemic grounding (§3/§15). Requires gemini_grounding.
|
||
WebParanoid bool
|
||
// WebGroundingDailyCap caps grounded prompts/day (durable counter) before falling
|
||
// back, guarding the $/1k grounding overage.
|
||
WebGroundingDailyCap int
|
||
// WebGroundingTier is a documentation-only label of which Gemini plan the operator is
|
||
// on; it is NOT read by any logic. The money knob is GeminiGroundingPerPrompt
|
||
// (GEMINI_GROUNDING_PER_PROMPT_USD) — that is what the ledger/ceiling actually use.
|
||
WebGroundingTier string
|
||
// GeminiGroundingPerPrompt is the per-grounded-prompt FEE booked into the ledger so the
|
||
// daily ceiling sees it (§7 SG1). Default 0.035 (the paid-tier $35/1k overage); set 0
|
||
// ONLY when genuinely on the free grounded-prompt tier. Booked even on the error return.
|
||
GeminiGroundingPerPrompt float64
|
||
// Reasoning route: a manual "think harder" trigger. ReasoningModel must be a
|
||
// reasoning-capable model (the default grok-4.20-non-reasoning is NOT — see the
|
||
// docs.x.ai finding); set REASONING_MODEL to e.g. grok-4.3 to use it.
|
||
ReasoningEnabled bool
|
||
ReasoningTrigger string
|
||
ReasoningModel string
|
||
// ReasoningEffort is the reasoning_effort the reason_then_grok route sends on the
|
||
// manual "think harder" trigger. Default "high". Accepted: none|low|medium|high.
|
||
ReasoningEffort string
|
||
// CanaryPercent routes a fraction of traffic through the new path for A/B before a
|
||
// full enable. 0 = off (scaffold; not yet consulted by the dispatch).
|
||
CanaryPercent int
|
||
|
||
// Gemini backend (the cheap/router/grounding model). Required only when a layer
|
||
// that uses it is enabled (validated below).
|
||
GeminiBaseURL string
|
||
GeminiAPIKey string
|
||
GeminiModel string
|
||
|
||
SystemPromptPath string
|
||
SystemPrompt string
|
||
StateDir string
|
||
|
||
// Project-knowledge route (project_then_grok). ProjectKB is the curated Vojo product
|
||
// knowledge base injected behind the about_project gate so Grok answers product questions
|
||
// from facts instead of empty parametric memory. It is OPERATOR DATA loaded once at
|
||
// startup from ProjectKBPath (like SystemPrompt — no hot-reload), never Go constants. Off
|
||
// (default) → the route is unreachable and the bot is byte-identical to today. Requires
|
||
// ROUTER_CLASSIFIER_ENABLED (the about_project gate is a classifier signal).
|
||
ProjectKBEnabled bool
|
||
ProjectKBPath string
|
||
ProjectKB string
|
||
|
||
// DatabaseURL is the libpq/pgx DSN of the bot's dedicated Postgres database
|
||
// (`vojo_ai`), e.g. postgres://vojo_ai:***@postgres:5432/vojo_ai?sslmode=disable.
|
||
// It holds only operational state (txn/event dedup, the daily spend ledger, the
|
||
// encrypted-warned set) — never message content. Required.
|
||
DatabaseURL string
|
||
|
||
// LogBodiesUsers is the allowlist of sender mxids whose model request/response
|
||
// BODIES are logged in full (truncated, at DEBUG) for debugging — everyone else gets
|
||
// routing + metadata logs only. Empty (default) = nobody, so message content never
|
||
// enters the logs unless an operator opts a specific user in AND runs at
|
||
// LOG_LEVEL=debug. Parsed from LOG_BODIES_USERS (comma-separated mxids).
|
||
LogBodiesUsers map[string]bool
|
||
}
|
||
|
||
func getenv(key, def string) string {
|
||
if v, ok := os.LookupEnv(key); ok && strings.TrimSpace(v) != "" {
|
||
return v
|
||
}
|
||
return def
|
||
}
|
||
|
||
// getSecret resolves a secret with optional file indirection: if `<key>_FILE`
|
||
// is set, the value is read from that file (trailing whitespace trimmed) — the
|
||
// standard Docker-secret / mounted-file convention, so the tokens can live in a
|
||
// separate read-only mount instead of inline in the config env (and never enter
|
||
// `docker inspect`/`/proc/<pid>/environ`). Falls back to the plain `<key>` env.
|
||
func getSecret(key string) (string, error) {
|
||
if path := strings.TrimSpace(os.Getenv(key + "_FILE")); path != "" {
|
||
b, err := os.ReadFile(path)
|
||
if err != nil {
|
||
return "", fmt.Errorf("%s_FILE (%s): %w", key, path, err)
|
||
}
|
||
return strings.TrimSpace(string(b)), nil
|
||
}
|
||
return getenv(key, ""), nil
|
||
}
|
||
|
||
func getenvInt(key string, def int) (int, error) {
|
||
raw := getenv(key, "")
|
||
if raw == "" {
|
||
return def, nil
|
||
}
|
||
n, err := strconv.Atoi(strings.TrimSpace(raw))
|
||
if err != nil {
|
||
return 0, fmt.Errorf("%s must be an integer, got %q", key, raw)
|
||
}
|
||
return n, nil
|
||
}
|
||
|
||
func getenvFloat(key string, def float64) (float64, error) {
|
||
raw := getenv(key, "")
|
||
if raw == "" {
|
||
return def, nil
|
||
}
|
||
f, err := strconv.ParseFloat(strings.TrimSpace(raw), 64)
|
||
if err != nil {
|
||
return 0, fmt.Errorf("%s must be a number, got %q", key, raw)
|
||
}
|
||
return f, nil
|
||
}
|
||
|
||
// getenvBool parses a boolean flag. Accepts the usual 1/0/true/false/yes/no/on/off
|
||
// (case-insensitive); empty → default. Every cascade flag defaults false, so an unset
|
||
// or blank env keeps today's behaviour.
|
||
func getenvBool(key string, def bool) (bool, error) {
|
||
raw := strings.TrimSpace(getenv(key, ""))
|
||
if raw == "" {
|
||
return def, nil
|
||
}
|
||
switch strings.ToLower(raw) {
|
||
case "1", "true", "yes", "on":
|
||
return true, nil
|
||
case "0", "false", "no", "off":
|
||
return false, nil
|
||
}
|
||
return false, fmt.Errorf("%s must be a boolean (true/false), got %q", key, raw)
|
||
}
|
||
|
||
func parseServerSet(raw string) map[string]bool {
|
||
set := make(map[string]bool)
|
||
for _, s := range strings.Split(raw, ",") {
|
||
s = strings.TrimSpace(s)
|
||
if s != "" {
|
||
set[s] = true
|
||
}
|
||
}
|
||
return set
|
||
}
|
||
|
||
// LoadConfig parses and validates the environment. It returns an error listing
|
||
// every missing/invalid required field at once so the operator fixes them in a
|
||
// single pass rather than discovering them one container-restart at a time.
|
||
func LoadConfig() (*Config, error) {
|
||
cfg := &Config{
|
||
HomeserverURL: strings.TrimRight(getenv("HOMESERVER_URL", ""), "/"),
|
||
BotMXID: getenv("BOT_MXID", ""),
|
||
BotDisplayName: getenv("BOT_DISPLAY_NAME", "Vojo AI"),
|
||
ASAddr: getenv("AS_ADDR", ":8009"),
|
||
RegistrationPath: getenv("REGISTRATION_PATH", ""),
|
||
XAIBaseURL: strings.TrimRight(getenv("XAI_BASE_URL", "https://api.x.ai/v1"), "/"),
|
||
XAIModel: getenv("XAI_MODEL", "grok-4.20-0309-non-reasoning"),
|
||
SystemPromptPath: getenv("SYSTEM_PROMPT_PATH", "prompts/system_prompt.txt"),
|
||
// Defaults to the KB that ships in the image (Dockerfile bakes prompts/), like
|
||
// SYSTEM_PROMPT_PATH — so enabling the route needs ONLY PROJECT_KB_ENABLED=true.
|
||
ProjectKBPath: getenv("PROJECT_KB_PATH", "prompts/vojo_kb.txt"),
|
||
StateDir: strings.TrimRight(getenv("STATE_DIR", "/state"), "/"),
|
||
DatabaseURL: getenv("AI_BOT_DATABASE_URL", ""),
|
||
AllowedServers: parseServerSet(getenv("ALLOWED_SERVERS", "")),
|
||
UnlimitedUsers: parseServerSet(getenv("UNLIMITED_USERS", "")),
|
||
LogBodiesUsers: parseServerSet(getenv("LOG_BODIES_USERS", "")),
|
||
|
||
// Cascade string-valued config (flags/ints/secrets parsed below).
|
||
GrokReasoningEffort: strings.ToLower(strings.TrimSpace(getenv("GROK_REASONING_EFFORT", ""))),
|
||
WebProvider: getenv("WEB_PROVIDER", webProviderGrokWebSearch),
|
||
WebGroundingTier: getenv("WEB_GROUNDING_TIER", "free"),
|
||
ReasoningTrigger: getenv("REASONING_TRIGGER", "подумай глубже"),
|
||
ReasoningModel: getenv("REASONING_MODEL", "grok-4.3"),
|
||
ReasoningEffort: strings.ToLower(strings.TrimSpace(getenv("REASONING_EFFORT", "high"))),
|
||
GeminiBaseURL: strings.TrimRight(getenv("GEMINI_BASE_URL", "https://generativelanguage.googleapis.com/v1beta/openai"), "/"),
|
||
GeminiModel: getenv("GEMINI_MODEL", "gemini-2.5-flash-lite"),
|
||
}
|
||
|
||
var problems []string
|
||
|
||
// Secrets support *_FILE indirection so they can be separate mounts / Docker
|
||
// secrets, decoupled from the non-secret config env.
|
||
for _, s := range []struct {
|
||
key string
|
||
dest *string
|
||
}{
|
||
{"AS_TOKEN", &cfg.ASToken},
|
||
{"HS_TOKEN", &cfg.HSToken},
|
||
{"XAI_API_KEY", &cfg.XAIAPIKey},
|
||
{"GEMINI_API_KEY", &cfg.GeminiAPIKey}, // optional; required only if a Gemini layer is on
|
||
} {
|
||
v, err := getSecret(s.key)
|
||
if err != nil {
|
||
problems = append(problems, err.Error())
|
||
}
|
||
*s.dest = v
|
||
}
|
||
|
||
// A generated registration.yaml, when provided, is the source of truth for
|
||
// the appservice tokens (mautrix idiom — the same file Synapse reads),
|
||
// overriding any env AS_TOKEN/HS_TOKEN.
|
||
if cfg.RegistrationPath != "" {
|
||
reg, err := LoadRegistration(cfg.RegistrationPath)
|
||
if err != nil {
|
||
problems = append(problems, err.Error())
|
||
} else {
|
||
cfg.ASToken, cfg.HSToken = reg.ASToken, reg.HSToken
|
||
if lp := localpartOf(cfg.BotMXID); lp != "" && reg.SenderLocalpart != "" && lp != reg.SenderLocalpart {
|
||
problems = append(problems, fmt.Sprintf(
|
||
"registration sender_localpart %q != BOT_MXID localpart %q", reg.SenderLocalpart, lp))
|
||
}
|
||
}
|
||
}
|
||
|
||
req := func(name, val string) {
|
||
if val == "" {
|
||
problems = append(problems, name+" is required")
|
||
}
|
||
}
|
||
req("HOMESERVER_URL", cfg.HomeserverURL)
|
||
req("BOT_MXID", cfg.BotMXID)
|
||
req("AS_TOKEN", cfg.ASToken)
|
||
req("HS_TOKEN", cfg.HSToken)
|
||
req("XAI_API_KEY", cfg.XAIAPIKey)
|
||
req("AI_BOT_DATABASE_URL", cfg.DatabaseURL)
|
||
if len(cfg.AllowedServers) == 0 {
|
||
problems = append(problems, "ALLOWED_SERVERS is required (comma-separated homeserver allowlist)")
|
||
}
|
||
|
||
var err error
|
||
if cfg.XAITemp, err = getenvFloat("XAI_TEMPERATURE", 0.6); err != nil {
|
||
problems = append(problems, err.Error())
|
||
}
|
||
if cfg.MaxOutTok, err = getenvInt("MAX_OUTPUT_TOKENS", 320); err != nil {
|
||
problems = append(problems, err.Error())
|
||
}
|
||
if cfg.MaxCtxEvent, err = getenvInt("MAX_CONTEXT_EVENTS", 20); err != nil {
|
||
problems = append(problems, err.Error())
|
||
}
|
||
if cfg.DailyUSDCeiling, err = getenvFloat("DAILY_USD_CEILING", 10); err != nil {
|
||
problems = append(problems, err.Error())
|
||
}
|
||
if cfg.PerUserDailyCap, err = getenvInt("PER_USER_DAILY_CAP", 30); err != nil {
|
||
problems = append(problems, err.Error())
|
||
}
|
||
if cfg.PerUserDailyUSD, err = getenvFloat("PER_USER_DAILY_USD", 0); err != nil {
|
||
problems = append(problems, err.Error())
|
||
}
|
||
if cfg.PriceInputPerM, err = getenvFloat("XAI_PRICE_INPUT_PER_M", 1.25); err != nil {
|
||
problems = append(problems, err.Error())
|
||
}
|
||
if cfg.PriceCachedPerM, err = getenvFloat("XAI_PRICE_CACHED_PER_M", 0.20); err != nil {
|
||
problems = append(problems, err.Error())
|
||
}
|
||
if cfg.PriceOutputPerM, err = getenvFloat("XAI_PRICE_OUTPUT_PER_M", 2.50); err != nil {
|
||
problems = append(problems, err.Error())
|
||
}
|
||
// Per-model price table. The default (final-voice) model is priced from the
|
||
// XAI_PRICE_* envs; additional models register their own entry as their layer
|
||
// lands. priceFor falls back to this default model for an unknown model.
|
||
cfg.Prices = map[string]ModelPrice{
|
||
cfg.XAIModel: {
|
||
InputPerM: cfg.PriceInputPerM,
|
||
CachedPerM: cfg.PriceCachedPerM,
|
||
OutputPerM: cfg.PriceOutputPerM,
|
||
},
|
||
}
|
||
|
||
var budgetSec, retentionDays int
|
||
if budgetSec, err = getenvInt("REQUEST_BUDGET_SECONDS", 180); err != nil {
|
||
problems = append(problems, err.Error())
|
||
}
|
||
cfg.RequestBudget = time.Duration(budgetSec) * time.Second
|
||
if cfg.GrokPromptCache, err = getenvBool("GROK_PROMPT_CACHE", false); err != nil {
|
||
problems = append(problems, err.Error())
|
||
}
|
||
if cfg.TelemetryEnabled, err = getenvBool("TELEMETRY_ENABLED", false); err != nil {
|
||
problems = append(problems, err.Error())
|
||
}
|
||
if cfg.TelemetryStoreText, err = getenvBool("TELEMETRY_STORE_TEXT", false); err != nil {
|
||
problems = append(problems, err.Error())
|
||
}
|
||
if retentionDays, err = getenvInt("TELEMETRY_RETENTION_DAYS", 30); err != nil {
|
||
problems = append(problems, err.Error())
|
||
}
|
||
cfg.TelemetryRetention = time.Duration(retentionDays) * 24 * time.Hour
|
||
|
||
// Cascade flags — every one defaults false, so an unset env is today's bot.
|
||
for _, f := range []struct {
|
||
key string
|
||
dest *bool
|
||
}{
|
||
{"ROUTER_ENABLED", &cfg.RouterEnabled},
|
||
{"ROUTER_CLASSIFIER_ENABLED", &cfg.RouterClassifierEnabled},
|
||
{"TRIVIAL_OFFLOAD_ENABLED", &cfg.TrivialOffloadEnabled},
|
||
{"WEB_ENABLED", &cfg.WebEnabled},
|
||
{"WEB_PARANOID", &cfg.WebParanoid},
|
||
{"REASONING_ENABLED", &cfg.ReasoningEnabled},
|
||
{"PROJECT_KB_ENABLED", &cfg.ProjectKBEnabled},
|
||
} {
|
||
if *f.dest, err = getenvBool(f.key, false); err != nil {
|
||
problems = append(problems, err.Error())
|
||
}
|
||
}
|
||
if cfg.WebGroundingDailyCap, err = getenvInt("WEB_GROUNDING_DAILY_CAP", 450); err != nil {
|
||
problems = append(problems, err.Error())
|
||
}
|
||
// The per-grounded-prompt fee booked into the ledger (§7 SG1). Default 0.035 (paid
|
||
// tier). An operator on the free tier sets 0 deliberately.
|
||
if cfg.GeminiGroundingPerPrompt, err = getenvFloat("GEMINI_GROUNDING_PER_PROMPT_USD", 0.035); err != nil {
|
||
problems = append(problems, err.Error())
|
||
}
|
||
if cfg.CanaryPercent, err = getenvInt("CANARY_PERCENT", 0); err != nil {
|
||
problems = append(problems, err.Error())
|
||
}
|
||
|
||
// Gemini pricing → the per-model table (defaults: gemini-2.5-flash-lite $0.10/$0.40
|
||
// per 1M; cached priced as input, a conservative over-count). Lets the ceiling and
|
||
// request_log price Gemini calls at Gemini rates.
|
||
var gIn, gOut float64
|
||
if gIn, err = getenvFloat("GEMINI_PRICE_INPUT_PER_M", 0.10); err != nil {
|
||
problems = append(problems, err.Error())
|
||
}
|
||
if gOut, err = getenvFloat("GEMINI_PRICE_OUTPUT_PER_M", 0.40); err != nil {
|
||
problems = append(problems, err.Error())
|
||
}
|
||
cfg.Prices[cfg.GeminiModel] = ModelPrice{InputPerM: gIn, CachedPerM: gIn, OutputPerM: gOut}
|
||
// Reasoning model price (defaults to the final-voice grok rates — grok-4.3 ≈ 4.20),
|
||
// so the reasoning route reserves/bills at its own price instead of falling back.
|
||
var rIn, rOut float64
|
||
if rIn, err = getenvFloat("REASONING_PRICE_INPUT_PER_M", cfg.PriceInputPerM); err != nil {
|
||
problems = append(problems, err.Error())
|
||
}
|
||
if rOut, err = getenvFloat("REASONING_PRICE_OUTPUT_PER_M", cfg.PriceOutputPerM); err != nil {
|
||
problems = append(problems, err.Error())
|
||
}
|
||
cfg.Prices[cfg.ReasoningModel] = ModelPrice{InputPerM: rIn, CachedPerM: cfg.PriceCachedPerM, OutputPerM: rOut}
|
||
|
||
// Fail-fast on broken cascade wiring (§5/F-FUNC-9), at EVERY start (not just
|
||
// check-config): a layer that needs Gemini but has no key would silently never
|
||
// fire. Better to refuse to start than to quietly run degraded.
|
||
needsGemini := cfg.TrivialOffloadEnabled || cfg.RouterClassifierEnabled ||
|
||
(cfg.WebEnabled && cfg.WebProvider == webProviderGeminiGrounding)
|
||
if needsGemini && cfg.GeminiAPIKey == "" {
|
||
problems = append(problems, "GEMINI_API_KEY is required when TRIVIAL_OFFLOAD_ENABLED, ROUTER_CLASSIFIER_ENABLED, or WEB_ENABLED with gemini_grounding is set")
|
||
}
|
||
if cfg.RouterClassifierEnabled && !cfg.RouterEnabled {
|
||
problems = append(problems, "ROUTER_CLASSIFIER_ENABLED requires ROUTER_ENABLED")
|
||
}
|
||
if cfg.WebEnabled && cfg.WebProvider != webProviderGrokWebSearch && cfg.WebProvider != webProviderGeminiGrounding {
|
||
problems = append(problems, fmt.Sprintf("WEB_PROVIDER must be %q or %q, got %q",
|
||
webProviderGrokWebSearch, webProviderGeminiGrounding, cfg.WebProvider))
|
||
}
|
||
// §7 SG3: paranoid web requires gemini_grounding. grok_web_search has no daily cap and
|
||
// costs 10–18× per request — letting the paranoid bias drive it would only be backstopped
|
||
// by the $10 ceiling. Refuse to boot (consistent with the other fail-fast blocks).
|
||
if cfg.WebEnabled && cfg.WebParanoid && cfg.WebProvider == webProviderGrokWebSearch {
|
||
problems = append(problems, "WEB_PARANOID requires WEB_PROVIDER=gemini_grounding (grok_web_search has no daily cap and is far costlier)")
|
||
}
|
||
// §7 SG5: a non-positive grounding cap silently disables grounding (IncrGroundingIfUnder
|
||
// denies everything), so every query would degrade — refuse it for gemini_grounding.
|
||
if cfg.WebEnabled && cfg.WebProvider == webProviderGeminiGrounding && cfg.WebGroundingDailyCap <= 0 {
|
||
problems = append(problems, "WEB_GROUNDING_DAILY_CAP must be > 0 for gemini_grounding (a non-positive cap silently disables grounding)")
|
||
}
|
||
if cfg.ReasoningEnabled && cfg.ReasoningModel == "" {
|
||
problems = append(problems, "REASONING_MODEL is required when REASONING_ENABLED is set")
|
||
}
|
||
// Project-KB route: the about_project gate is a classifier signal, so the classifier (and
|
||
// transitively the router + Gemini key) must be on, else the route can never fire.
|
||
// PROJECT_KB_PATH always has a value (defaults to the bundled KB); main.go does the file
|
||
// read + non-empty + size check (file I/O lives there, like SYSTEM_PROMPT_PATH).
|
||
if cfg.ProjectKBEnabled && !cfg.RouterClassifierEnabled {
|
||
problems = append(problems, "PROJECT_KB_ENABLED requires ROUTER_CLASSIFIER_ENABLED (the about_project gate is a classifier signal)")
|
||
}
|
||
switch cfg.GrokReasoningEffort {
|
||
case "", "none", "low", "medium", "high":
|
||
default:
|
||
problems = append(problems, fmt.Sprintf(
|
||
"GROK_REASONING_EFFORT must be one of none/low/medium/high (or empty), got %q", cfg.GrokReasoningEffort))
|
||
}
|
||
switch cfg.ReasoningEffort {
|
||
case "none", "low", "medium", "high":
|
||
default:
|
||
problems = append(problems, fmt.Sprintf(
|
||
"REASONING_EFFORT must be one of none/low/medium/high, got %q", cfg.ReasoningEffort))
|
||
}
|
||
|
||
if len(problems) > 0 {
|
||
return nil, fmt.Errorf("invalid configuration:\n - %s", strings.Join(problems, "\n - "))
|
||
}
|
||
return cfg, nil
|
||
}
|
||
|
||
// needsGemini reports whether any enabled layer requires the Gemini backend — the
|
||
// cheap trivial route, the Layer-1 classifier, or Gemini-native web grounding. Drives
|
||
// both the fail-fast key check and whether the client is built at all.
|
||
func (c *Config) needsGemini() bool {
|
||
return c.TrivialOffloadEnabled || c.RouterClassifierEnabled ||
|
||
(c.WebEnabled && c.WebProvider == webProviderGeminiGrounding)
|
||
}
|
||
|
||
// Summary returns a human-readable, SECRET-REDACTED dump for the startup log.
|
||
func (c *Config) Summary() string {
|
||
servers := make([]string, 0, len(c.AllowedServers))
|
||
for s := range c.AllowedServers {
|
||
servers = append(servers, s)
|
||
}
|
||
unlimited := make([]string, 0, len(c.UnlimitedUsers))
|
||
for u := range c.UnlimitedUsers {
|
||
unlimited = append(unlimited, u)
|
||
}
|
||
bodyUsers := make([]string, 0, len(c.LogBodiesUsers))
|
||
for u := range c.LogBodiesUsers {
|
||
bodyUsers = append(bodyUsers, u)
|
||
}
|
||
redact := func(s string) string {
|
||
if s == "" {
|
||
return "(unset)"
|
||
}
|
||
return "set(" + strconv.Itoa(len(s)) + " chars)"
|
||
}
|
||
return strings.Join([]string{
|
||
"ai-bot config:",
|
||
" HOMESERVER_URL = " + c.HomeserverURL,
|
||
" BOT_MXID = " + c.BotMXID,
|
||
" BOT_DISPLAY_NAME = " + c.BotDisplayName,
|
||
" AS_ADDR = " + c.ASAddr,
|
||
" REGISTRATION_PATH = " + func() string {
|
||
if c.RegistrationPath == "" {
|
||
return "(unset — using env tokens)"
|
||
}
|
||
return c.RegistrationPath
|
||
}(),
|
||
" AS_TOKEN = " + redact(c.ASToken),
|
||
" HS_TOKEN = " + redact(c.HSToken),
|
||
" XAI_BASE_URL = " + c.XAIBaseURL,
|
||
" XAI_MODEL = " + c.XAIModel,
|
||
" GROK_REASONING_EFFORT = " + func() string {
|
||
if c.GrokReasoningEffort == "" {
|
||
return "(unset — not sent; provider default)"
|
||
}
|
||
return c.GrokReasoningEffort
|
||
}(),
|
||
" XAI_API_KEY = " + redact(c.XAIAPIKey),
|
||
fmt.Sprintf(" XAI_TEMPERATURE = %g", c.XAITemp),
|
||
fmt.Sprintf(" MAX_OUTPUT_TOKENS = %d", c.MaxOutTok),
|
||
fmt.Sprintf(" MAX_CONTEXT_EVENTS = %d", c.MaxCtxEvent),
|
||
" ALLOWED_SERVERS = " + strings.Join(servers, ","),
|
||
fmt.Sprintf(" DAILY_USD_CEILING = %g", c.DailyUSDCeiling),
|
||
fmt.Sprintf(" PER_USER_DAILY_CAP = %d", c.PerUserDailyCap),
|
||
" UNLIMITED_USERS = " + strings.Join(unlimited, ","),
|
||
fmt.Sprintf(" PRICES /1M (in/cached/out) = %g / %g / %g",
|
||
c.PriceInputPerM, c.PriceCachedPerM, c.PriceOutputPerM),
|
||
" SYSTEM_PROMPT_PATH = " + c.SystemPromptPath,
|
||
" STATE_DIR = " + c.StateDir,
|
||
" AI_BOT_DATABASE_URL= " + redact(c.DatabaseURL),
|
||
fmt.Sprintf(" REQUEST_BUDGET = %s", c.RequestBudget),
|
||
fmt.Sprintf(" GROK_PROMPT_CACHE = %t", c.GrokPromptCache),
|
||
fmt.Sprintf(" TELEMETRY_ENABLED = %t (store_text=%t, retention=%s)",
|
||
c.TelemetryEnabled, c.TelemetryStoreText, c.TelemetryRetention),
|
||
fmt.Sprintf(" LOG_BODIES_USERS = %s (needs LOG_LEVEL=debug)",
|
||
func() string {
|
||
if len(bodyUsers) == 0 {
|
||
return "(none — bodies never logged)"
|
||
}
|
||
return strings.Join(bodyUsers, ",")
|
||
}()),
|
||
fmt.Sprintf(" CASCADE: router=%t classifier=%t trivial=%t web=%t(%s, paranoid=%t, cap=%d, fee=$%g/prompt) reason=%t(%s)",
|
||
c.RouterEnabled, c.RouterClassifierEnabled, c.TrivialOffloadEnabled,
|
||
c.WebEnabled, c.WebProvider, c.WebParanoid, c.WebGroundingDailyCap,
|
||
c.GeminiGroundingPerPrompt, c.ReasoningEnabled, c.ReasoningEffort),
|
||
fmt.Sprintf(" PROJECT_KB = enabled=%t path=%s", c.ProjectKBEnabled, func() string {
|
||
if c.ProjectKBPath == "" {
|
||
return "(unset)"
|
||
}
|
||
return c.ProjectKBPath
|
||
}()),
|
||
" GEMINI_MODEL = " + c.GeminiModel,
|
||
" GEMINI_API_KEY = " + redact(c.GeminiAPIKey),
|
||
}, "\n")
|
||
}
|