vojo/apps/ai-bot/xai.go

171 lines
4.7 KiB
Go

package main
import (
"bytes"
"context"
"encoding/json"
"fmt"
"io"
"log/slog"
"math/rand"
"net/http"
"time"
)
// XAIClient talks the OpenAI-compatible Chat Completions endpoint at
// {base}/chat/completions with a Bearer key.
type XAIClient struct {
base string
key string
http *http.Client
maxTry int
log *slog.Logger
}
func NewXAIClient(base, key string, logger *slog.Logger) *XAIClient {
return &XAIClient{
base: base,
key: key,
http: &http.Client{},
maxTry: 3,
log: logger,
}
}
type xaiMessage struct {
Role string `json:"role"`
Content string `json:"content"`
}
type xaiRequest struct {
Model string `json:"model"`
Messages []xaiMessage `json:"messages"`
MaxTokens int `json:"max_tokens"`
Temperature float64 `json:"temperature"`
Stream bool `json:"stream"`
}
type xaiUsage struct {
PromptTokens int `json:"prompt_tokens"`
CompletionTokens int `json:"completion_tokens"`
PromptTokensDetails struct {
CachedTokens int `json:"cached_tokens"`
} `json:"prompt_tokens_details"`
}
type xaiResponse struct {
Choices []struct {
Message struct {
Content string `json:"content"`
} `json:"message"`
FinishReason string `json:"finish_reason"`
} `json:"choices"`
Usage xaiUsage `json:"usage"`
}
func (r *xaiResponse) Text() string {
if len(r.Choices) == 0 {
return ""
}
return r.Choices[0].Message.Content
}
// Complete calls Chat Completions with retry on transient failures (429 / 5xx /
// network timeout, exponential backoff + jitter). Non-retryable 4xx fail
// immediately. On exhaustion the caller refunds the reserved request and notifies
// the user, so a transient failure is never silently swallowed (F6).
func (x *XAIClient) Complete(ctx context.Context, model string, msgs []xaiMessage, maxTokens int, temp float64) (*xaiResponse, error) {
reqBody := xaiRequest{
Model: model,
Messages: msgs,
MaxTokens: maxTokens,
Temperature: temp,
Stream: false,
}
payload, err := json.Marshal(reqBody)
if err != nil {
return nil, err
}
var lastErr error
for attempt := 0; attempt < x.maxTry; attempt++ {
if attempt > 0 {
// 0.5s, 1s, 2s … capped at 8s, plus up to 250ms jitter.
backoff := time.Duration(500<<uint(attempt-1)) * time.Millisecond
if backoff > 8*time.Second {
backoff = 8 * time.Second
}
backoff += time.Duration(rand.Intn(250)) * time.Millisecond
select {
case <-ctx.Done():
return nil, ctx.Err()
case <-time.After(backoff):
}
}
resp, retryable, err := x.attempt(ctx, payload)
if err == nil {
return resp, nil
}
lastErr = err
if ctx.Err() != nil {
return nil, ctx.Err()
}
if !retryable {
return nil, err
}
if x.log != nil {
x.log.Warn("xai attempt failed, will retry", "attempt", attempt+1, "max", x.maxTry, "err", err)
}
}
return nil, fmt.Errorf("xai: exhausted %d attempts: %w", x.maxTry, lastErr)
}
// attempt performs one HTTP call. It returns retryable=true for 429/5xx and
// network errors, false for other non-2xx (terminal 4xx).
func (x *XAIClient) attempt(ctx context.Context, payload []byte) (*xaiResponse, bool, error) {
// Per-attempt deadline so a hung connection doesn't block the whole loop.
attemptCtx, cancel := context.WithTimeout(ctx, 60*time.Second)
defer cancel()
req, err := http.NewRequestWithContext(attemptCtx, http.MethodPost, x.base+"/chat/completions", bytes.NewReader(payload))
if err != nil {
return nil, false, err
}
req.Header.Set("Content-Type", "application/json")
req.Header.Set("Authorization", "Bearer "+x.key)
resp, err := x.http.Do(req)
if err != nil {
// Network error / timeout — retryable (unless the parent ctx is done).
return nil, ctx.Err() == nil, err
}
defer resp.Body.Close()
data, _ := io.ReadAll(resp.Body)
if resp.StatusCode == http.StatusTooManyRequests || resp.StatusCode >= 500 {
return nil, true, fmt.Errorf("xai http %d: %s", resp.StatusCode, snippet(data))
}
if resp.StatusCode < 200 || resp.StatusCode >= 300 {
return nil, false, fmt.Errorf("xai http %d: %s", resp.StatusCode, snippet(data))
}
var out xaiResponse
if err := json.Unmarshal(data, &out); err != nil {
return nil, false, fmt.Errorf("xai decode: %w", err)
}
// A 2xx is a billed call even when the model returns empty content (content
// filter, finish_reason=length with no text, or no choices). Return it as a
// success so the caller books the real cost via Reconcile instead of refunding
// the slot and losing the spend — which would let empty replies bypass BOTH the
// per-user cap and the global ceiling. The caller just won't send an empty body.
return &out, false, nil
}
func snippet(b []byte) string {
const max = 300
if len(b) > max {
return string(b[:max]) + "…"
}
return string(b)
}