From b7b2dc445d8b7266d2082e4d32fd1c8a63163532 Mon Sep 17 00:00:00 2001 From: Youzini-afk <13153778771cx@gmail.com> Date: Fri, 27 Mar 2026 21:33:06 +0800 Subject: [PATCH] fix: preserve completion tokens on json retry --- llm.js | 6 +--- tests/llm-streaming.mjs | 78 +++++++++++++++++++++++++++++++++++++++++ 2 files changed, 79 insertions(+), 5 deletions(-) diff --git a/llm.js b/llm.js index fdbbc49..924416d 100644 --- a/llm.js +++ b/llm.js @@ -12,7 +12,6 @@ const MODULE_NAME = "st_bme"; const LLM_REQUEST_TIMEOUT_MS = 300000; const DEFAULT_TEXT_COMPLETION_TOKENS = 64000; const DEFAULT_JSON_COMPLETION_TOKENS = 64000; -const RETRY_JSON_COMPLETION_TOKENS = 3200; const STREAM_DEBUG_PREVIEW_MAX_CHARS = 1200; const STREAM_DEBUG_UPDATE_INTERVAL_MS = 120; const SENSITIVE_DEBUG_KEY_PATTERN = @@ -1389,10 +1388,7 @@ export async function callLLMForJSON({ taskType, requestSource: privateRequestSource, onStreamProgress, - maxCompletionTokens: - attempt === 0 - ? DEFAULT_JSON_COMPLETION_TOKENS - : RETRY_JSON_COMPLETION_TOKENS, + maxCompletionTokens: DEFAULT_JSON_COMPLETION_TOKENS, }); const responseText = response?.content || ""; const outputCleanup = applyTaskOutputRegexStages(taskType, responseText); diff --git a/tests/llm-streaming.mjs b/tests/llm-streaming.mjs index f4a3f0a..55d705e 100644 --- a/tests/llm-streaming.mjs +++ b/tests/llm-streaming.mjs @@ -335,8 +335,86 @@ async function testDedicatedStreamingAbortDoesNotLeaveActiveState() { } } +async function testJsonRetryKeepsProfileCompletionTokens() { + const originalFetch = globalThis.fetch; + let fetchCount = 0; + + globalThis.fetch = async () => { + fetchCount += 1; + + if (fetchCount === 1) { + return new Response( + JSON.stringify({ + choices: [ + { + message: { + content: "not-json", + }, + finish_reason: "stop", + }, + ], + }), + { + status: 200, + headers: { + "Content-Type": "application/json", + }, + }, + ); + } + + return new Response( + JSON.stringify({ + choices: [ + { + message: { + content: '{"ok":true}', + }, + finish_reason: "stop", + }, + ], + }), + { + status: 200, + headers: { + "Content-Type": "application/json", + }, + }, + ); + }; + + try { + await withStreamingSettings( + { + stream: false, + max_completion_tokens: 7777, + }, + async () => { + const result = await llm.callLLMForJSON({ + systemPrompt: "system", + userPrompt: "user", + maxRetries: 1, + taskType: "extract", + requestSource: "test:json-retry-keeps-profile-tokens", + }); + + assert.deepEqual(result, { ok: true }); + assert.equal(fetchCount, 2); + + const snapshot = getSnapshot("extract"); + assert.ok(snapshot); + assert.equal(snapshot.requestBody?.max_completion_tokens, 7777); + assert.equal(snapshot.filteredGeneration?.max_completion_tokens, 7777); + }, + ); + } finally { + globalThis.fetch = originalFetch; + } +} + await testDedicatedStreamingSuccess(); await testDedicatedStreamingFallsBackToNonStream(); await testDedicatedStreamingAbortDoesNotLeaveActiveState(); +await testJsonRetryKeepsProfileCompletionTokens(); console.log("llm-streaming tests passed");