fix: preserve completion tokens on json retry

2026-05-15 22:30:38 +08:00 · 2026-03-27 21:33:06 +08:00
parent 473a382557
commit b7b2dc445d
2 changed files with 79 additions and 5 deletions
--- a/llm.js
+++ b/llm.js
@@ -12,7 +12,6 @@ const MODULE_NAME = "st_bme";
 const LLM_REQUEST_TIMEOUT_MS = 300000;
 const DEFAULT_TEXT_COMPLETION_TOKENS = 64000;
 const DEFAULT_JSON_COMPLETION_TOKENS = 64000;
 const RETRY_JSON_COMPLETION_TOKENS = 3200;
 const STREAM_DEBUG_PREVIEW_MAX_CHARS = 1200;
 const STREAM_DEBUG_UPDATE_INTERVAL_MS = 120;
 const SENSITIVE_DEBUG_KEY_PATTERN =
@@ -1389,10 +1388,7 @@ export async function callLLMForJSON({
        taskType,
        requestSource: privateRequestSource,
        onStreamProgress,
-        maxCompletionTokens:
+        maxCompletionTokens: DEFAULT_JSON_COMPLETION_TOKENS,
          attempt === 0
            ? DEFAULT_JSON_COMPLETION_TOKENS
            : RETRY_JSON_COMPLETION_TOKENS,
      });
      const responseText = response?.content || "";
      const outputCleanup = applyTaskOutputRegexStages(taskType, responseText);
--- a/tests/llm-streaming.mjs
+++ b/tests/llm-streaming.mjs
@@ -335,8 +335,86 @@ async function testDedicatedStreamingAbortDoesNotLeaveActiveState() {
  }
 }
 async function testJsonRetryKeepsProfileCompletionTokens() {
  const originalFetch = globalThis.fetch;
  let fetchCount = 0;
  globalThis.fetch = async () => {
    fetchCount += 1;
    if (fetchCount === 1) {
      return new Response(
        JSON.stringify({
          choices: [
            {
              message: {
                content: "not-json",
              },
              finish_reason: "stop",
            },
          ],
        }),
        {
          status: 200,
          headers: {
            "Content-Type": "application/json",
          },
        },
      );
    }
    return new Response(
      JSON.stringify({
        choices: [
          {
            message: {
              content: '{"ok":true}',
            },
            finish_reason: "stop",
          },
        ],
      }),
      {
        status: 200,
        headers: {
          "Content-Type": "application/json",
        },
      },
    );
  };
  try {
    await withStreamingSettings(
      {
        stream: false,
        max_completion_tokens: 7777,
      },
      async () => {
        const result = await llm.callLLMForJSON({
          systemPrompt: "system",
          userPrompt: "user",
          maxRetries: 1,
          taskType: "extract",
          requestSource: "test:json-retry-keeps-profile-tokens",
        });
        assert.deepEqual(result, { ok: true });
        assert.equal(fetchCount, 2);
        const snapshot = getSnapshot("extract");
        assert.ok(snapshot);
        assert.equal(snapshot.requestBody?.max_completion_tokens, 7777);
        assert.equal(snapshot.filteredGeneration?.max_completion_tokens, 7777);
      },
    );
  } finally {
    globalThis.fetch = originalFetch;
  }
 }
 await testDedicatedStreamingSuccess();
 await testDedicatedStreamingFallsBackToNonStream();
 await testDedicatedStreamingAbortDoesNotLeaveActiveState();
 await testJsonRetryKeepsProfileCompletionTokens();
 console.log("llm-streaming tests passed");