From b7b2dc445d8b7266d2082e4d32fd1c8a63163532 Mon Sep 17 00:00:00 2001
From: Youzini-afk <13153778771cx@gmail.com>
Date: Fri, 27 Mar 2026 21:33:06 +0800
Subject: [PATCH] fix: preserve completion tokens on json retry

---
 llm.js                  |  6 +---
 tests/llm-streaming.mjs | 78 +++++++++++++++++++++++++++++++++++++++++
 2 files changed, 79 insertions(+), 5 deletions(-)

diff --git a/llm.js b/llm.js
index fdbbc49..924416d 100644
--- a/llm.js
+++ b/llm.js
@@ -12,7 +12,6 @@ const MODULE_NAME = "st_bme";
 const LLM_REQUEST_TIMEOUT_MS = 300000;
 const DEFAULT_TEXT_COMPLETION_TOKENS = 64000;
 const DEFAULT_JSON_COMPLETION_TOKENS = 64000;
-const RETRY_JSON_COMPLETION_TOKENS = 3200;
 const STREAM_DEBUG_PREVIEW_MAX_CHARS = 1200;
 const STREAM_DEBUG_UPDATE_INTERVAL_MS = 120;
 const SENSITIVE_DEBUG_KEY_PATTERN =
@@ -1389,10 +1388,7 @@ export async function callLLMForJSON({
         taskType,
         requestSource: privateRequestSource,
         onStreamProgress,
-        maxCompletionTokens:
-          attempt === 0
-            ? DEFAULT_JSON_COMPLETION_TOKENS
-            : RETRY_JSON_COMPLETION_TOKENS,
+        maxCompletionTokens: DEFAULT_JSON_COMPLETION_TOKENS,
       });
       const responseText = response?.content || "";
       const outputCleanup = applyTaskOutputRegexStages(taskType, responseText);
diff --git a/tests/llm-streaming.mjs b/tests/llm-streaming.mjs
index f4a3f0a..55d705e 100644
--- a/tests/llm-streaming.mjs
+++ b/tests/llm-streaming.mjs
@@ -335,8 +335,86 @@ async function testDedicatedStreamingAbortDoesNotLeaveActiveState() {
   }
 }
 
+async function testJsonRetryKeepsProfileCompletionTokens() {
+  const originalFetch = globalThis.fetch;
+  let fetchCount = 0;
+
+  globalThis.fetch = async () => {
+    fetchCount += 1;
+
+    if (fetchCount === 1) {
+      return new Response(
+        JSON.stringify({
+          choices: [
+            {
+              message: {
+                content: "not-json",
+              },
+              finish_reason: "stop",
+            },
+          ],
+        }),
+        {
+          status: 200,
+          headers: {
+            "Content-Type": "application/json",
+          },
+        },
+      );
+    }
+
+    return new Response(
+      JSON.stringify({
+        choices: [
+          {
+            message: {
+              content: '{"ok":true}',
+            },
+            finish_reason: "stop",
+          },
+        ],
+      }),
+      {
+        status: 200,
+        headers: {
+          "Content-Type": "application/json",
+        },
+      },
+    );
+  };
+
+  try {
+    await withStreamingSettings(
+      {
+        stream: false,
+        max_completion_tokens: 7777,
+      },
+      async () => {
+        const result = await llm.callLLMForJSON({
+          systemPrompt: "system",
+          userPrompt: "user",
+          maxRetries: 1,
+          taskType: "extract",
+          requestSource: "test:json-retry-keeps-profile-tokens",
+        });
+
+        assert.deepEqual(result, { ok: true });
+        assert.equal(fetchCount, 2);
+
+        const snapshot = getSnapshot("extract");
+        assert.ok(snapshot);
+        assert.equal(snapshot.requestBody?.max_completion_tokens, 7777);
+        assert.equal(snapshot.filteredGeneration?.max_completion_tokens, 7777);
+      },
+    );
+  } finally {
+    globalThis.fetch = originalFetch;
+  }
+}
+
 await testDedicatedStreamingSuccess();
 await testDedicatedStreamingFallsBackToNonStream();
 await testDedicatedStreamingAbortDoesNotLeaveActiveState();
+await testJsonRetryKeepsProfileCompletionTokens();
 
 console.log("llm-streaming tests passed");