fix: preserve completion tokens on json retry

2026-05-15 22:30:38 +08:00 · 2026-03-27 21:33:06 +08:00
parent 473a382557
commit b7b2dc445d
2 changed files with 79 additions and 5 deletions
--- a/llm.js
+++ b/llm.js
@@ -12,7 +12,6 @@ const MODULE_NAME = "st_bme";
 const LLM_REQUEST_TIMEOUT_MS = 300000;
 const DEFAULT_TEXT_COMPLETION_TOKENS = 64000;
 const DEFAULT_JSON_COMPLETION_TOKENS = 64000;
-const RETRY_JSON_COMPLETION_TOKENS = 3200;
 const STREAM_DEBUG_PREVIEW_MAX_CHARS = 1200;
 const STREAM_DEBUG_UPDATE_INTERVAL_MS = 120;
 const SENSITIVE_DEBUG_KEY_PATTERN =
@@ -1389,10 +1388,7 @@ export async function callLLMForJSON({
        taskType,
        requestSource: privateRequestSource,
        onStreamProgress,
-        maxCompletionTokens:
-          attempt === 0
-            ? DEFAULT_JSON_COMPLETION_TOKENS
-            : RETRY_JSON_COMPLETION_TOKENS,
+        maxCompletionTokens: DEFAULT_JSON_COMPLETION_TOKENS,
      });
      const responseText = response?.content || "";
      const outputCleanup = applyTaskOutputRegexStages(taskType, responseText);
--- a/tests/llm-streaming.mjs
+++ b/tests/llm-streaming.mjs
@@ -335,8 +335,86 @@ async function testDedicatedStreamingAbortDoesNotLeaveActiveState() {
  }
 }

+async function testJsonRetryKeepsProfileCompletionTokens() {
+  const originalFetch = globalThis.fetch;
+  let fetchCount = 0;
+
+  globalThis.fetch = async () => {
+    fetchCount += 1;
+
+    if (fetchCount === 1) {
+      return new Response(
+        JSON.stringify({
+          choices: [
+            {
+              message: {
+                content: "not-json",
+              },
+              finish_reason: "stop",
+            },
+          ],
+        }),
+        {
+          status: 200,
+          headers: {
+            "Content-Type": "application/json",
+          },
+        },
+      );
+    }
+
+    return new Response(
+      JSON.stringify({
+        choices: [
+          {
+            message: {
+              content: '{"ok":true}',
+            },
+            finish_reason: "stop",
+          },
+        ],
+      }),
+      {
+        status: 200,
+        headers: {
+          "Content-Type": "application/json",
+        },
+      },
+    );
+  };
+
+  try {
+    await withStreamingSettings(
+      {
+        stream: false,
+        max_completion_tokens: 7777,
+      },
+      async () => {
+        const result = await llm.callLLMForJSON({
+          systemPrompt: "system",
+          userPrompt: "user",
+          maxRetries: 1,
+          taskType: "extract",
+          requestSource: "test:json-retry-keeps-profile-tokens",
+        });
+
+        assert.deepEqual(result, { ok: true });
+        assert.equal(fetchCount, 2);
+
+        const snapshot = getSnapshot("extract");
+        assert.ok(snapshot);
+        assert.equal(snapshot.requestBody?.max_completion_tokens, 7777);
+        assert.equal(snapshot.filteredGeneration?.max_completion_tokens, 7777);
+      },
+    );
+  } finally {
+    globalThis.fetch = originalFetch;
+  }
+}
+
 await testDedicatedStreamingSuccess();
 await testDedicatedStreamingFallsBackToNonStream();
 await testDedicatedStreamingAbortDoesNotLeaveActiveState();
+await testJsonRetryKeepsProfileCompletionTokens();

 console.log("llm-streaming tests passed");