122 lines
4.6 KiB
Go
122 lines
4.6 KiB
Go
package main
|
|
|
|
import (
|
|
"context"
|
|
"io"
|
|
"net/http"
|
|
"net/http/httptest"
|
|
"strings"
|
|
"testing"
|
|
)
|
|
|
|
// TestCompleteReasoningEffortSelfHeal verifies that when a model rejects the
|
|
// reasoning_effort param (the HTTP 400 a non-reasoning Grok model returns), the transport
|
|
// strips the param and retries once — so switching XAI_MODEL to a non-reasoning model
|
|
// degrades gracefully instead of hard-failing every request into a react.
|
|
func TestCompleteReasoningEffortSelfHeal(t *testing.T) {
|
|
var calls int
|
|
var sawEffortFirst, sawEffortSecond bool
|
|
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
|
body, _ := io.ReadAll(r.Body)
|
|
hasEffort := strings.Contains(string(body), "reasoning_effort")
|
|
calls++
|
|
if calls == 1 {
|
|
sawEffortFirst = hasEffort
|
|
w.WriteHeader(http.StatusBadRequest)
|
|
io.WriteString(w, `{"code":"Client specified an invalid argument","error":"Model grok-x-non-reasoning does not support parameter reasoningEffort."}`)
|
|
return
|
|
}
|
|
sawEffortSecond = hasEffort
|
|
w.Header().Set("Content-Type", "application/json")
|
|
io.WriteString(w, `{"id":"ok","choices":[{"message":{"content":"hi"},"finish_reason":"stop"}],"usage":{"prompt_tokens":1,"completion_tokens":1}}`)
|
|
}))
|
|
defer srv.Close()
|
|
|
|
c := newOpenAIClient("xai", srv.URL, "key", nil, discardLog())
|
|
resp, err := c.complete(context.Background(), openAIRequest{
|
|
Model: "grok-x-non-reasoning",
|
|
Messages: []openAIMessage{{Role: "user", Content: "hi"}},
|
|
MaxTokens: 10,
|
|
Temperature: 0.6,
|
|
ReasoningEffort: "low",
|
|
}, nil)
|
|
if err != nil {
|
|
t.Fatalf("complete returned error, want self-heal success: %v", err)
|
|
}
|
|
if resp.Text() != "hi" {
|
|
t.Fatalf("got %q, want %q", resp.Text(), "hi")
|
|
}
|
|
if calls != 2 {
|
|
t.Fatalf("expected exactly 2 calls (400, then stripped retry), got %d", calls)
|
|
}
|
|
if !sawEffortFirst {
|
|
t.Fatal("first call should have sent reasoning_effort")
|
|
}
|
|
if sawEffortSecond {
|
|
t.Fatal("retry must NOT send reasoning_effort")
|
|
}
|
|
}
|
|
|
|
// TestCompleteReasoningEffortCachedAfterFirst: after the first 400+heal, the client
|
|
// remembers the model rejects reasoning_effort and drops the param UP FRONT on later calls —
|
|
// so a misconfigured GROK_REASONING_EFFORT costs one 400 per process, not one per message.
|
|
func TestCompleteReasoningEffortCachedAfterFirst(t *testing.T) {
|
|
var calls, rejected int
|
|
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
|
body, _ := io.ReadAll(r.Body)
|
|
calls++
|
|
if strings.Contains(string(body), "reasoning_effort") {
|
|
rejected++
|
|
w.WriteHeader(http.StatusBadRequest)
|
|
io.WriteString(w, `{"error":"Model m does not support parameter reasoningEffort."}`)
|
|
return
|
|
}
|
|
w.Header().Set("Content-Type", "application/json")
|
|
io.WriteString(w, `{"id":"ok","choices":[{"message":{"content":"hi"},"finish_reason":"stop"}],"usage":{"prompt_tokens":1,"completion_tokens":1}}`)
|
|
}))
|
|
defer srv.Close()
|
|
|
|
c := newOpenAIClient("xai", srv.URL, "key", nil, discardLog())
|
|
req := func() openAIRequest {
|
|
return openAIRequest{Model: "m", Messages: []openAIMessage{{Role: "user", Content: "hi"}}, ReasoningEffort: "low"}
|
|
}
|
|
// First call: 400 (with effort) then a stripped retry → 2 HTTP calls, 1 rejection.
|
|
if _, err := c.complete(context.Background(), req(), nil); err != nil {
|
|
t.Fatalf("first complete: %v", err)
|
|
}
|
|
// Second call: the param is dropped up front → exactly 1 HTTP call, no new rejection.
|
|
if _, err := c.complete(context.Background(), req(), nil); err != nil {
|
|
t.Fatalf("second complete: %v", err)
|
|
}
|
|
if calls != 3 {
|
|
t.Fatalf("want 3 HTTP calls total (400+retry, then cached single), got %d", calls)
|
|
}
|
|
if rejected != 1 {
|
|
t.Fatalf("want exactly 1 reasoning_effort rejection (cached after), got %d", rejected)
|
|
}
|
|
}
|
|
|
|
// TestCompleteTerminal4xxNoSelfHeal guards that the strip-and-retry is scoped to the
|
|
// reasoning_effort 400 only: an unrelated 400 still fails fast (no spurious retry).
|
|
func TestCompleteTerminal4xxNoSelfHeal(t *testing.T) {
|
|
var calls int
|
|
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
|
calls++
|
|
w.WriteHeader(http.StatusBadRequest)
|
|
io.WriteString(w, `{"error":"some other invalid argument"}`)
|
|
}))
|
|
defer srv.Close()
|
|
|
|
c := newOpenAIClient("xai", srv.URL, "key", nil, discardLog())
|
|
_, err := c.complete(context.Background(), openAIRequest{
|
|
Model: "grok-x-non-reasoning",
|
|
Messages: []openAIMessage{{Role: "user", Content: "hi"}},
|
|
ReasoningEffort: "low",
|
|
}, nil)
|
|
if err == nil {
|
|
t.Fatal("expected a terminal error on an unrelated 400")
|
|
}
|
|
if calls != 1 {
|
|
t.Fatalf("unrelated 400 must fail fast (1 call), got %d", calls)
|
|
}
|
|
}
|