fix: preserve completion tokens on json retry

This commit is contained in:
Youzini-afk
2026-03-27 21:33:06 +08:00
parent 473a382557
commit b7b2dc445d
2 changed files with 79 additions and 5 deletions

6
llm.js
View File

@@ -12,7 +12,6 @@ const MODULE_NAME = "st_bme";
const LLM_REQUEST_TIMEOUT_MS = 300000; const LLM_REQUEST_TIMEOUT_MS = 300000;
const DEFAULT_TEXT_COMPLETION_TOKENS = 64000; const DEFAULT_TEXT_COMPLETION_TOKENS = 64000;
const DEFAULT_JSON_COMPLETION_TOKENS = 64000; const DEFAULT_JSON_COMPLETION_TOKENS = 64000;
const RETRY_JSON_COMPLETION_TOKENS = 3200;
const STREAM_DEBUG_PREVIEW_MAX_CHARS = 1200; const STREAM_DEBUG_PREVIEW_MAX_CHARS = 1200;
const STREAM_DEBUG_UPDATE_INTERVAL_MS = 120; const STREAM_DEBUG_UPDATE_INTERVAL_MS = 120;
const SENSITIVE_DEBUG_KEY_PATTERN = const SENSITIVE_DEBUG_KEY_PATTERN =
@@ -1389,10 +1388,7 @@ export async function callLLMForJSON({
taskType, taskType,
requestSource: privateRequestSource, requestSource: privateRequestSource,
onStreamProgress, onStreamProgress,
maxCompletionTokens: maxCompletionTokens: DEFAULT_JSON_COMPLETION_TOKENS,
attempt === 0
? DEFAULT_JSON_COMPLETION_TOKENS
: RETRY_JSON_COMPLETION_TOKENS,
}); });
const responseText = response?.content || ""; const responseText = response?.content || "";
const outputCleanup = applyTaskOutputRegexStages(taskType, responseText); const outputCleanup = applyTaskOutputRegexStages(taskType, responseText);

View File

@@ -335,8 +335,86 @@ async function testDedicatedStreamingAbortDoesNotLeaveActiveState() {
} }
} }
async function testJsonRetryKeepsProfileCompletionTokens() {
const originalFetch = globalThis.fetch;
let fetchCount = 0;
globalThis.fetch = async () => {
fetchCount += 1;
if (fetchCount === 1) {
return new Response(
JSON.stringify({
choices: [
{
message: {
content: "not-json",
},
finish_reason: "stop",
},
],
}),
{
status: 200,
headers: {
"Content-Type": "application/json",
},
},
);
}
return new Response(
JSON.stringify({
choices: [
{
message: {
content: '{"ok":true}',
},
finish_reason: "stop",
},
],
}),
{
status: 200,
headers: {
"Content-Type": "application/json",
},
},
);
};
try {
await withStreamingSettings(
{
stream: false,
max_completion_tokens: 7777,
},
async () => {
const result = await llm.callLLMForJSON({
systemPrompt: "system",
userPrompt: "user",
maxRetries: 1,
taskType: "extract",
requestSource: "test:json-retry-keeps-profile-tokens",
});
assert.deepEqual(result, { ok: true });
assert.equal(fetchCount, 2);
const snapshot = getSnapshot("extract");
assert.ok(snapshot);
assert.equal(snapshot.requestBody?.max_completion_tokens, 7777);
assert.equal(snapshot.filteredGeneration?.max_completion_tokens, 7777);
},
);
} finally {
globalThis.fetch = originalFetch;
}
}
await testDedicatedStreamingSuccess(); await testDedicatedStreamingSuccess();
await testDedicatedStreamingFallsBackToNonStream(); await testDedicatedStreamingFallsBackToNonStream();
await testDedicatedStreamingAbortDoesNotLeaveActiveState(); await testDedicatedStreamingAbortDoesNotLeaveActiveState();
await testJsonRetryKeepsProfileCompletionTokens();
console.log("llm-streaming tests passed"); console.log("llm-streaming tests passed");