vojo/apps/ai-bot/llm.go

65 lines
2.6 KiB
Go

package main
import "context"
// llm.go is the provider-neutral seam between the bot's business logic and the
// concrete model backends. Nothing here names a vendor: the bot composes its
// context, prices usage, and books spend against these types, and a thin adapter
// (provider_xai.go, provider_gemini.go) maps them to/from each backend's wire
// format. This is what lets a second model (Gemini) slot in behind a flag without
// the business logic learning a new shape.
// Message is one provider-neutral chat turn.
type Message struct {
Role string // "system" | "user" | "assistant"
Content string
}
// Usage is the provider-neutral token accounting returned with a completion. It
// drives billing (computeUSD) — the counts are the API's own, authoritative even
// if our price constants drift.
type Usage struct {
PromptTokens int
CachedTokens int // subset of PromptTokens served from the provider's prompt cache
CompletionTokens int
}
// Tool is a provider-neutral tool the model may invoke (e.g. web search). Empty
// today; the web-freshness layer (Phase 3) populates it. Carried here so the
// request type is stable across phases.
type Tool struct {
// Type names the tool, e.g. "web_search". Adapters translate it to each
// backend's tool wire shape.
Type string
}
// LLMRequest is a provider-neutral completion request. New optional fields (Tools,
// ReasoningEffort) serialize away when empty, so a plain grok_direct call produces
// exactly the same wire body it did before this seam existed.
type LLMRequest struct {
Model string
Messages []Message
MaxTokens int
Temperature float64
Tools []Tool // optional; populated by the web layer
ReasoningEffort string // optional; "" = default, e.g. "low"|"high" for the reasoning route
// ConvID is an optional prompt-cache routing hint. Adapters that support it (xAI's
// x-grok-conv-id) pin a conversation to one backend to raise cache hit rate; "" =
// don't send it. It is a header, not part of the request body, so it never changes
// the wire body and an unset value is a no-op.
ConvID string
}
// LLMResponse is a provider-neutral completion result.
type LLMResponse struct {
Text string
Usage Usage
ProviderRequestID string // the backend's response id, logged for support/debug
}
// LLMClient is any chat-completion backend (Grok, Gemini, …). Implementations are
// thin adapters over a wire protocol; the bot depends only on this interface, so
// Bot.llm can be swapped or routed without touching business logic.
type LLMClient interface {
Complete(ctx context.Context, req LLMRequest) (*LLMResponse, error)
}