vojo/apps/ai-bot/llm.go

package main

import "context"

// llm.go is the provider-neutral seam between the bot's business logic and the
// concrete model backends. Nothing here names a vendor: the bot composes its
// context, prices usage, and books spend against these types, and a thin adapter
// (provider_xai.go, provider_gemini.go) maps them to/from each backend's wire
// format. This is what lets a second model (Gemini) slot in behind a flag without
// the business logic learning a new shape.

// Message is one provider-neutral chat turn.
type Message struct {
	Role    string // "system" | "user" | "assistant"
	Content string
}

// Usage is the provider-neutral token accounting returned with a completion. It
// drives billing (computeUSD) — the counts are the API's own, authoritative even
// if our price constants drift.
type Usage struct {
	PromptTokens     int
	CachedTokens     int // subset of PromptTokens served from the provider's prompt cache
	CompletionTokens int
}

// Tool is a provider-neutral tool the model may invoke (e.g. web search). Empty
// today; the web-freshness layer (Phase 3) populates it. Carried here so the
// request type is stable across phases.
type Tool struct {
	// Type names the tool, e.g. "web_search". Adapters translate it to each
	// backend's tool wire shape.
	Type string
}

// LLMRequest is a provider-neutral completion request. New optional fields (Tools,
// ReasoningEffort) serialize away when empty, so a plain grok_direct call produces
// exactly the same wire body it did before this seam existed.
type LLMRequest struct {
	Model           string
	Messages        []Message
	MaxTokens       int
	Temperature     float64
	Tools           []Tool // optional; populated by the web layer
	ReasoningEffort string // optional; "" = default, e.g. "low"|"high" for the reasoning route
	// ConvID is an optional prompt-cache routing hint. Adapters that support it (xAI's
	// x-grok-conv-id) pin a conversation to one backend to raise cache hit rate; "" =
	// don't send it. It is a header, not part of the request body, so it never changes
	// the wire body and an unset value is a no-op.
	ConvID string
}

// LLMResponse is a provider-neutral completion result.
type LLMResponse struct {
	Text              string
	Usage             Usage
	ProviderRequestID string // the backend's response id, logged for support/debug
}

// LLMClient is any chat-completion backend (Grok, Gemini, …). Implementations are
// thin adapters over a wire protocol; the bot depends only on this interface, so
// Bot.llm can be swapped or routed without touching business logic.
type LLMClient interface {
	Complete(ctx context.Context, req LLMRequest) (*LLMResponse, error)
}