From 67e6e29bb29a7a8bd7bbd7f356d814aed94d249d Mon Sep 17 00:00:00 2001 From: Youzini-afk <13153778771cx@gmail.com> Date: Sat, 28 Mar 2026 20:38:57 +0800 Subject: [PATCH] Harden recall flow and JSON task prompts --- compressor.js | 7 +- consolidator.js | 8 +- extractor.js | 7 +- index.js | 553 ++++++++++++++++++++---------- llm.js | 45 ++- prompt-builder.js | 3 +- retriever.js | 53 ++- tests/graph-persistence.mjs | 2 +- tests/p0-regressions.mjs | 28 +- tests/prompt-builder-defaults.mjs | 2 + tests/prompt-builder-mvu.mjs | 1 + tests/retrieval-config.mjs | 109 +++++- 12 files changed, 618 insertions(+), 200 deletions(-) diff --git a/compressor.js b/compressor.js index d28086d..df950b9 100644 --- a/compressor.js +++ b/compressor.js @@ -302,9 +302,14 @@ async function summarizeBatch( compressPromptBuild, userPrompt, ); + const llmSystemPrompt = + Array.isArray(promptPayload.promptMessages) && + promptPayload.promptMessages.length > 0 + ? String(promptPayload.systemPrompt || "") + : String(promptPayload.systemPrompt || systemPrompt || ""); return await callLLMForJSON({ - systemPrompt: promptPayload.systemPrompt || systemPrompt, + systemPrompt: llmSystemPrompt, userPrompt: promptPayload.userPrompt, maxRetries: 1, signal, diff --git a/consolidator.js b/consolidator.js index 0c1444a..55f1a6d 100644 --- a/consolidator.js +++ b/consolidator.js @@ -341,10 +341,14 @@ export async function consolidateMemories({ consolidationPromptBuild, userPrompt, ); + const llmSystemPrompt = + Array.isArray(promptPayload.promptMessages) && + promptPayload.promptMessages.length > 0 + ? String(promptPayload.systemPrompt || "") + : String(promptPayload.systemPrompt || consolidationSystemPrompt || ""); try { decision = await callLLMForJSON({ - systemPrompt: - promptPayload.systemPrompt || consolidationSystemPrompt, + systemPrompt: llmSystemPrompt, userPrompt: promptPayload.userPrompt, maxRetries: 1, signal, diff --git a/extractor.js b/extractor.js index 93a167f..f471477 100644 --- a/extractor.js +++ b/extractor.js @@ -174,10 +174,15 @@ export async function extractMemories({ "请分析对话,按 JSON 格式输出操作列表。", ].join("\n"); const promptPayload = resolveTaskPromptPayload(promptBuild, userPrompt); + const llmSystemPrompt = + Array.isArray(promptPayload.promptMessages) && + promptPayload.promptMessages.length > 0 + ? String(promptPayload.systemPrompt || "") + : String(promptPayload.systemPrompt || systemPrompt || ""); // 调用 LLM const result = await callLLMForJSON({ - systemPrompt: promptPayload.systemPrompt || systemPrompt, + systemPrompt: llmSystemPrompt, userPrompt: promptPayload.userPrompt, maxRetries: 2, signal, diff --git a/index.js b/index.js index 8cfee38..aaa5181 100644 --- a/index.js +++ b/index.js @@ -292,6 +292,8 @@ const defaultSettings = { let currentGraph = null; let isExtracting = false; let isRecalling = false; +let activeRecallPromise = null; +let recallRunSequence = 0; let lastInjectionContent = ""; let lastExtractedItems = []; // 最近提取的节点(面板展示用) let lastRecalledItems = []; // 最近召回的节点(面板展示用) @@ -322,6 +324,9 @@ let pendingHistoryRecoveryTrigger = ""; let pendingHistoryMutationCheckTimers = []; let pendingGraphLoadRetryTimer = null; let pendingGraphLoadRetryChatId = ""; +let skipBeforeCombineRecallUntil = 0; +let lastPreGenerationRecallKey = ""; +let lastPreGenerationRecallAt = 0; const generationRecallTransactions = new Map(); const GENERATION_RECALL_TRANSACTION_TTL_MS = 15000; const stageNoticeHandles = { @@ -696,6 +701,38 @@ function abortStage(stage) { return true; } +function abortRecallStageWithReason(reason = "召回已终止") { + const controller = stageAbortControllers.recall; + if (!controller || controller.signal.aborted) return false; + controller.abort(createAbortError(reason)); + return true; +} + +async function waitForActiveRecallToSettle(timeoutMs = 1800) { + const pending = activeRecallPromise; + if (!pending) { + return { + settled: !isRecalling, + timedOut: false, + }; + } + + let settled = false; + await Promise.race([ + Promise.resolve(pending) + .catch(() => {}) + .then(() => { + settled = true; + }), + new Promise((resolve) => setTimeout(resolve, timeoutMs)), + ]); + + return { + settled: settled || !isRecalling, + timedOut: !settled && isRecalling, + }; +} + function buildAbortStageAction(stage) { const abortStageName = findAbortableStageForNotice(stage); if (!abortStageName) return undefined; @@ -1687,11 +1724,19 @@ function scheduleStartupGraphReconciliation() { } } -function clearInjectionState() { +function clearInjectionState(options = {}) { + const { + preserveRecallStatus = false, + preserveRuntimeStatus = preserveRecallStatus, + } = options; lastInjectionContent = ""; lastRecalledItems = []; - lastRecallStatus = createUiStatus("待命", "当前无有效注入内容", "idle"); - runtimeStatus = createUiStatus("待命", "当前无有效注入内容", "idle"); + if (!preserveRecallStatus) { + lastRecallStatus = createUiStatus("待命", "当前无有效注入内容", "idle"); + } + if (!preserveRuntimeStatus) { + runtimeStatus = createUiStatus("待命", "当前无有效注入内容", "idle"); + } recordInjectionSnapshot("recall", { injectionText: "", selectedNodeIds: [], @@ -1703,7 +1748,7 @@ function clearInjectionState() { mode: "cleared", }, }); - if (!isRecalling) { + if (!isRecalling && !preserveRecallStatus) { dismissStageNotice("recall"); } @@ -3155,22 +3200,105 @@ function markGenerationRecallTransactionHookState( return transaction; } +function clearGenerationRecallTransactionsForChat( + chatId = getCurrentChatId(), + { clearAll = false } = {}, +) { + let removed = 0; + const normalizedChatId = String(chatId || ""); + if (clearAll || !normalizedChatId) { + removed = generationRecallTransactions.size; + generationRecallTransactions.clear(); + return removed; + } + + for (const [transactionId, transaction] of generationRecallTransactions.entries()) { + if (String(transaction?.chatId || "") !== normalizedChatId) continue; + generationRecallTransactions.delete(transactionId); + removed += 1; + } + + return removed; +} + +function isTerminalGenerationRecallHookState(state = "") { + return ["completed", "failed", "aborted", "skipped"].includes( + String(state || ""), + ); +} + function shouldRunRecallForTransaction(transaction, hookName) { if (!hookName) return true; if (!transaction) return true; const hookStates = transaction.hookStates || {}; - if (hookStates[hookName] === "completed") { + if (isTerminalGenerationRecallHookState(hookStates[hookName])) { return false; } if ( hookName === "GENERATE_BEFORE_COMBINE_PROMPTS" && - hookStates.GENERATION_AFTER_COMMANDS === "completed" + isTerminalGenerationRecallHookState(hookStates.GENERATION_AFTER_COMMANDS) ) { return false; } return true; } +function createRecallRunResult(status = "completed", extra = {}) { + const normalizedStatus = String(status || "skipped").trim() || "skipped"; + return { + ok: normalizedStatus === "completed", + didRecall: normalizedStatus === "completed", + status: normalizedStatus, + ...extra, + }; +} + +function getGenerationRecallHookStateFromResult(result) { + const status = String(result?.status || "").trim(); + switch (status) { + case "completed": + return "completed"; + case "failed": + return "failed"; + case "aborted": + case "superseded": + return "aborted"; + default: + return "skipped"; + } +} + +function invalidateRecallAfterHistoryMutation(reason = "聊天记录已变更") { + const hadActiveRecall = Boolean( + isRecalling || + (stageAbortControllers.recall && + !stageAbortControllers.recall.signal?.aborted), + ); + if (hadActiveRecall) { + abortRecallStageWithReason(`${reason},当前召回已取消`); + } + + clearGenerationRecallTransactionsForChat(); + clearRecallInputTracking(); + clearInjectionState({ + preserveRecallStatus: hadActiveRecall, + preserveRuntimeStatus: hadActiveRecall, + }); + + if (hadActiveRecall) { + setLastRecallStatus( + "召回已取消", + `${reason},等待新的召回请求`, + "warning", + { + syncRuntime: true, + }, + ); + } + + return hadActiveRecall; +} + function createGenerationRecallContext({ hookName, generationType = "normal", @@ -4648,7 +4776,7 @@ function applyRecallInjection(settings, recallInput, recentMessages, result) { if (now - lastRecallFallbackNoticeAt > 15000) { lastRecallFallbackNoticeAt = now; toastr.warning( - llmMeta.reason || "LLM 精排未返回有效结果,已回退到评分排序", + llmMeta.reason || "LLM 精排未成功,已改用评分排序并继续注入记忆", "ST-BME 召回提示", { timeOut: 4500 }, ); @@ -4662,184 +4790,240 @@ function applyRecallInjection(settings, recallInput, recentMessages, result) { * 召回管线:检索并注入记忆 */ async function runRecall(options = {}) { - if (isRecalling || !currentGraph) return false; - - const settings = getSettings(); - if (!settings.enabled || !settings.recallEnabled) return false; - if (!isGraphReadable()) { - setLastRecallStatus( - "等待图谱加载", - getGraphMutationBlockReason("召回"), - "warning", - { syncRuntime: true }, - ); - return false; - } - if (isGraphMetadataWriteAllowed()) { - if (!(await recoverHistoryIfNeeded("pre-recall"))) return false; - } - - const context = getContext(); - const chat = context.chat; - if (!chat || chat.length === 0) return false; - - isRecalling = true; - const recallController = beginStageAbortController("recall"); - const recallSignal = recallController.signal; - if (options.signal) { - if (options.signal.aborted) { - recallController.abort( - options.signal.reason || createAbortError("宿主已终止生成"), - ); - } else { - options.signal.addEventListener( - "abort", - () => - recallController.abort( - options.signal.reason || createAbortError("宿主已终止生成"), - ), - { once: true }, - ); - } - } - - try { - await ensureVectorReadyIfNeeded("pre-recall", recallSignal); - const recentContextMessageLimit = clampInt( - settings.recallLlmContextMessages, - 4, - 0, - 20, - ); - const recallInput = resolveRecallInput( - chat, - recentContextMessageLimit, - options, - ); - const userMessage = recallInput.userMessage; - const recentMessages = recallInput.recentMessages; - - if (!userMessage) return false; - - recallInput.hookName = options.hookName || ""; - - console.log("[ST-BME] 开始召回", { - source: recallInput.source, - sourceLabel: recallInput.sourceLabel, - hookName: recallInput.hookName, - userMessageLength: userMessage.length, - recentMessages: recentMessages.length, - }); - setLastRecallStatus( - "召回中", - [ - getRecallHookLabel(recallInput.hookName), - `来源 ${recallInput.sourceLabel}`, - `上下文 ${recentMessages.length} 条`, - `当前用户消息长度 ${userMessage.length}`, - ] - .filter(Boolean) - .join(" · "), - "running", - { syncRuntime: true }, - ); - if (recallInput.source === "send-intent") { - pendingRecallSendIntent = createRecallInputRecord(); - } - - const result = await retrieve({ - graph: currentGraph, - userMessage, - recentMessages, - embeddingConfig: getEmbeddingConfig(), - schema: getSchema(), - signal: recallSignal, - settings, - onStreamProgress: ({ previewText, receivedChars }) => { - const preview = previewText?.length > 60 - ? "…" + previewText.slice(-60) - : previewText || ""; - setLastRecallStatus( - "AI 生成中", - `${preview} [${receivedChars}字]`, - "running", - { syncRuntime: true, noticeMarquee: true }, - ); - }, - options: { - topK: settings.recallTopK, - maxRecallNodes: settings.recallMaxNodes, - enableLLMRecall: settings.recallEnableLLM, - enableVectorPrefilter: settings.recallEnableVectorPrefilter, - enableGraphDiffusion: settings.recallEnableGraphDiffusion, - diffusionTopK: settings.recallDiffusionTopK, - llmCandidatePool: settings.recallLlmCandidatePool, - recallPrompt: undefined, - weights: { - graphWeight: settings.graphWeight, - vectorWeight: settings.vectorWeight, - importanceWeight: settings.importanceWeight, - }, - // v2 options - enableVisibility: settings.enableVisibility ?? false, - visibilityFilter: context.name2 || null, - enableCrossRecall: settings.enableCrossRecall ?? false, - enableProbRecall: settings.enableProbRecall ?? false, - probRecallChance: settings.probRecallChance ?? 0.15, - enableMultiIntent: settings.recallEnableMultiIntent ?? true, - multiIntentMaxSegments: settings.recallMultiIntentMaxSegments ?? 4, - teleportAlpha: settings.recallTeleportAlpha ?? 0.15, - enableTemporalLinks: settings.recallEnableTemporalLinks ?? true, - temporalLinkStrength: settings.recallTemporalLinkStrength ?? 0.2, - enableDiversitySampling: - settings.recallEnableDiversitySampling ?? true, - dppCandidateMultiplier: - settings.recallDppCandidateMultiplier ?? 3, - dppQualityWeight: settings.recallDppQualityWeight ?? 1.0, - enableCooccurrenceBoost: - settings.recallEnableCooccurrenceBoost ?? false, - cooccurrenceScale: settings.recallCooccurrenceScale ?? 0.1, - cooccurrenceMaxNeighbors: - settings.recallCooccurrenceMaxNeighbors ?? 10, - enableResidualRecall: - settings.recallEnableResidualRecall ?? false, - residualBasisMaxNodes: - settings.recallResidualBasisMaxNodes ?? 24, - residualNmfTopics: settings.recallNmfTopics ?? 15, - residualNmfNoveltyThreshold: - settings.recallNmfNoveltyThreshold ?? 0.4, - residualThreshold: settings.recallResidualThreshold ?? 0.3, - residualTopK: settings.recallResidualTopK ?? 5, - }, - }); - - applyRecallInjection(settings, recallInput, recentMessages, result); - return true; - } catch (e) { - if (isAbortError(e)) { + if (isRecalling) { + abortRecallStageWithReason("旧召回已取消,正在启动新的召回"); + const settle = await waitForActiveRecallToSettle(); + if (!settle.settled && isRecalling) { setLastRecallStatus( - "召回已终止", - e?.message || "已手动终止当前召回", + "召回忙", + "上一轮召回仍在清理,请稍后重试", "warning", { syncRuntime: true, }, ); - return false; + return createRecallRunResult("skipped", { + reason: "上一轮召回仍在清理", + }); } - console.error("[ST-BME] 召回失败:", e); - const message = e?.message || String(e); - setLastRecallStatus("召回失败", message, "error", { - syncRuntime: true, - toastKind: "", - }); - toastr.error(`召回失败: ${message}`); - return false; - } finally { - finishStageAbortController("recall", recallController); - isRecalling = false; - refreshPanelLiveState(); } + + if (!currentGraph) { + return createRecallRunResult("skipped", { + reason: "当前无图谱", + }); + } + + const settings = getSettings(); + if (!settings.enabled || !settings.recallEnabled) { + return createRecallRunResult("skipped", { + reason: "召回功能未启用", + }); + } + if (!isGraphReadable()) { + const reason = getGraphMutationBlockReason("召回"); + setLastRecallStatus("等待图谱加载", reason, "warning", { + syncRuntime: true, + }); + return createRecallRunResult("skipped", { + reason, + }); + } + if (isGraphMetadataWriteAllowed()) { + if (!(await recoverHistoryIfNeeded("pre-recall"))) { + return createRecallRunResult("skipped", { + reason: "历史恢复未就绪", + }); + } + } + + const context = getContext(); + const chat = context.chat; + if (!chat || chat.length === 0) { + return createRecallRunResult("skipped", { + reason: "当前聊天为空", + }); + } + + const runId = ++recallRunSequence; + let recallPromise = null; + recallPromise = (async () => { + isRecalling = true; + const recallController = beginStageAbortController("recall"); + const recallSignal = recallController.signal; + if (options.signal) { + if (options.signal.aborted) { + recallController.abort( + options.signal.reason || createAbortError("宿主已终止生成"), + ); + } else { + options.signal.addEventListener( + "abort", + () => + recallController.abort( + options.signal.reason || createAbortError("宿主已终止生成"), + ), + { once: true }, + ); + } + } + + try { + await ensureVectorReadyIfNeeded("pre-recall", recallSignal); + const recentContextMessageLimit = clampInt( + settings.recallLlmContextMessages, + 4, + 0, + 20, + ); + const recallInput = resolveRecallInput( + chat, + recentContextMessageLimit, + options, + ); + const userMessage = recallInput.userMessage; + const recentMessages = recallInput.recentMessages; + + if (!userMessage) { + return createRecallRunResult("skipped", { + reason: "当前没有可用于召回的用户输入", + }); + } + + recallInput.hookName = options.hookName || ""; + + console.log("[ST-BME] 开始召回", { + source: recallInput.source, + sourceLabel: recallInput.sourceLabel, + hookName: recallInput.hookName, + userMessageLength: userMessage.length, + recentMessages: recentMessages.length, + runId, + }); + setLastRecallStatus( + "召回中", + [ + getRecallHookLabel(recallInput.hookName), + `来源 ${recallInput.sourceLabel}`, + `上下文 ${recentMessages.length} 条`, + `当前用户消息长度 ${userMessage.length}`, + ] + .filter(Boolean) + .join(" · "), + "running", + { syncRuntime: true }, + ); + if (recallInput.source === "send-intent") { + pendingRecallSendIntent = createRecallInputRecord(); + } + + const result = await retrieve({ + graph: currentGraph, + userMessage, + recentMessages, + embeddingConfig: getEmbeddingConfig(), + schema: getSchema(), + signal: recallSignal, + settings, + onStreamProgress: ({ previewText, receivedChars }) => { + const preview = previewText?.length > 60 + ? "…" + previewText.slice(-60) + : previewText || ""; + setLastRecallStatus( + "AI 生成中", + `${preview} [${receivedChars}字]`, + "running", + { syncRuntime: true, noticeMarquee: true }, + ); + }, + options: { + topK: settings.recallTopK, + maxRecallNodes: settings.recallMaxNodes, + enableLLMRecall: settings.recallEnableLLM, + enableVectorPrefilter: settings.recallEnableVectorPrefilter, + enableGraphDiffusion: settings.recallEnableGraphDiffusion, + diffusionTopK: settings.recallDiffusionTopK, + llmCandidatePool: settings.recallLlmCandidatePool, + recallPrompt: undefined, + weights: { + graphWeight: settings.graphWeight, + vectorWeight: settings.vectorWeight, + importanceWeight: settings.importanceWeight, + }, + // v2 options + enableVisibility: settings.enableVisibility ?? false, + visibilityFilter: context.name2 || null, + enableCrossRecall: settings.enableCrossRecall ?? false, + enableProbRecall: settings.enableProbRecall ?? false, + probRecallChance: settings.probRecallChance ?? 0.15, + enableMultiIntent: settings.recallEnableMultiIntent ?? true, + multiIntentMaxSegments: settings.recallMultiIntentMaxSegments ?? 4, + teleportAlpha: settings.recallTeleportAlpha ?? 0.15, + enableTemporalLinks: settings.recallEnableTemporalLinks ?? true, + temporalLinkStrength: settings.recallTemporalLinkStrength ?? 0.2, + enableDiversitySampling: + settings.recallEnableDiversitySampling ?? true, + dppCandidateMultiplier: + settings.recallDppCandidateMultiplier ?? 3, + dppQualityWeight: settings.recallDppQualityWeight ?? 1.0, + enableCooccurrenceBoost: + settings.recallEnableCooccurrenceBoost ?? false, + cooccurrenceScale: settings.recallCooccurrenceScale ?? 0.1, + cooccurrenceMaxNeighbors: + settings.recallCooccurrenceMaxNeighbors ?? 10, + enableResidualRecall: + settings.recallEnableResidualRecall ?? false, + residualBasisMaxNodes: + settings.recallResidualBasisMaxNodes ?? 24, + residualNmfTopics: settings.recallNmfTopics ?? 15, + residualNmfNoveltyThreshold: + settings.recallNmfNoveltyThreshold ?? 0.4, + residualThreshold: settings.recallResidualThreshold ?? 0.3, + residualTopK: settings.recallResidualTopK ?? 5, + }, + }); + + applyRecallInjection(settings, recallInput, recentMessages, result); + return createRecallRunResult("completed", { + reason: "召回完成", + selectedNodeIds: result.selectedNodeIds || [], + }); + } catch (e) { + if (isAbortError(e)) { + setLastRecallStatus( + "召回已终止", + e?.message || "已手动终止当前召回", + "warning", + { + syncRuntime: true, + }, + ); + return createRecallRunResult("aborted", { + reason: e?.message || "召回已终止", + }); + } + console.error("[ST-BME] 召回失败:", e); + const message = e?.message || String(e); + setLastRecallStatus("召回失败", message, "error", { + syncRuntime: true, + toastKind: "", + }); + toastr.error(`召回失败: ${message}`); + return createRecallRunResult("failed", { + reason: message, + }); + } finally { + finishStageAbortController("recall", recallController); + isRecalling = false; + if (activeRecallPromise === recallPromise) { + activeRecallPromise = null; + } + refreshPanelLiveState(); + } + })(); + + activeRecallPromise = recallPromise; + return await recallPromise; } // ==================== 事件钩子 ==================== @@ -4853,6 +5037,7 @@ function onChatChanged() { skipBeforeCombineRecallUntil = 0; lastPreGenerationRecallKey = ""; lastPreGenerationRecallAt = 0; + clearGenerationRecallTransactionsForChat("", { clearAll: true }); abortAllRunningStages(); dismissAllStageNotices(); syncGraphLoadFromLiveContext({ @@ -4881,7 +5066,7 @@ function onMessageSent(messageId) { } function onMessageDeleted(chatLengthOrMessageId, meta = null) { - clearInjectionState(); + invalidateRecallAfterHistoryMutation("消息已删除"); scheduleHistoryMutationRecheck( "message-deleted", chatLengthOrMessageId, @@ -4890,12 +5075,12 @@ function onMessageDeleted(chatLengthOrMessageId, meta = null) { } function onMessageEdited(messageId, meta = null) { - clearInjectionState(); + invalidateRecallAfterHistoryMutation("消息已编辑"); scheduleHistoryMutationRecheck("message-edited", messageId, meta); } function onMessageSwiped(messageId, meta = null) { - clearInjectionState(); + invalidateRecallAfterHistoryMutation("已切换楼层 swipe"); scheduleHistoryMutationRecheck("message-swiped", messageId, meta); } @@ -4925,7 +5110,7 @@ async function onGenerationAfterCommands(type, params = {}, dryRun = false) { recallContext.hookName, "running", ); - const didRecall = await runRecall({ + const recallResult = await runRecall({ ...recallOptions, recallKey: recallContext.recallKey, hookName: recallContext.hookName, @@ -4935,7 +5120,7 @@ async function onGenerationAfterCommands(type, params = {}, dryRun = false) { markGenerationRecallTransactionHookState( recallContext.transaction, recallContext.hookName, - didRecall ? "completed" : "pending", + getGenerationRecallHookStateFromResult(recallResult), ); } @@ -4960,7 +5145,7 @@ async function onBeforeCombinePrompts() { recallContext.hookName, "running", ); - const didRecall = await runRecall({ + const recallResult = await runRecall({ ...recallOptions, recallKey: recallContext.recallKey, hookName: recallContext.hookName, @@ -4968,7 +5153,7 @@ async function onBeforeCombinePrompts() { markGenerationRecallTransactionHookState( recallContext.transaction, recallContext.hookName, - didRecall ? "completed" : "pending", + getGenerationRecallHookStateFromResult(recallResult), ); } diff --git a/llm.js b/llm.js index b586f24..6aee979 100644 --- a/llm.js +++ b/llm.js @@ -1446,6 +1446,7 @@ export async function callLLMForJSON({ promptMessages = [], debugContext = null, onStreamProgress = null, + returnFailureDetails = false, } = {}) { const override = getLlmTestOverride("callLLMForJSON"); if (override) { @@ -1459,6 +1460,8 @@ export async function callLLMForJSON({ additionalMessages, promptMessages, debugContext, + onStreamProgress, + returnFailureDetails, }); } @@ -1467,6 +1470,7 @@ export async function callLLMForJSON({ requestSource, ); let lastFailureReason = ""; + let lastFailureType = ""; const promptExecutionSummary = buildPromptExecutionSummary(debugContext); for (let attempt = 0; attempt <= maxRetries; attempt++) { @@ -1503,18 +1507,28 @@ export async function callLLMForJSON({ if (!responseText || typeof responseText !== "string") { console.warn(`[ST-BME] LLM 返回空响应 (尝试 ${attempt + 1})`); lastFailureReason = "返回空响应"; + lastFailureType = "empty-response"; continue; } // 尝试解析 JSON const parsed = extractJSON(outputCleanup.cleanedText); if (parsed !== null) { - return parsed; + return returnFailureDetails + ? { + ok: true, + data: parsed, + attempts: attempt + 1, + errorType: "", + failureReason: "", + } + : parsed; } const truncated = response.finishReason === "length" || looksLikeTruncatedJson(outputCleanup.cleanedText); + lastFailureType = truncated ? "truncated-json" : "invalid-json"; lastFailureReason = truncated ? "输出因长度限制被截断,请重新输出更紧凑的完整 JSON" : "输出不是有效 JSON,请严格返回紧凑 JSON 对象"; @@ -1524,13 +1538,40 @@ export async function callLLMForJSON({ ); } catch (e) { if (isAbortError(e)) { - throw e; + const abortMessage = e?.message || String(e) || "LLM 调用已终止"; + const isTimeoutAbort = + !signal?.aborted && /超时/i.test(String(abortMessage || "")); + if (!isTimeoutAbort) { + throw e; + } + console.error(`[ST-BME] LLM 调用超时 (尝试 ${attempt + 1}):`, e); + lastFailureReason = abortMessage; + lastFailureType = "timeout"; + continue; } console.error(`[ST-BME] LLM 调用失败 (尝试 ${attempt + 1}):`, e); lastFailureReason = e?.message || String(e) || "LLM 调用失败"; + lastFailureType = "provider-error"; } } + if (returnFailureDetails) { + const failureSnapshot = { + ok: false, + data: null, + attempts: maxRetries + 1, + errorType: lastFailureType || "unknown", + failureReason: lastFailureReason || "LLM 未返回可解析 JSON", + }; + recordTaskLlmRequest(taskType || privateRequestSource, { + jsonFailure: failureSnapshot, + promptExecution: promptExecutionSummary, + }, { + merge: true, + }); + return failureSnapshot; + } + return null; } diff --git a/prompt-builder.js b/prompt-builder.js index 5d84ad8..c691854 100644 --- a/prompt-builder.js +++ b/prompt-builder.js @@ -1093,7 +1093,8 @@ export function buildTaskLlmPayload(promptBuild = null, fallbackUserPrompt = "") ).text; return { - systemPrompt: String(promptBuild?.systemPrompt || ""), + systemPrompt: + executionMessages.length > 0 ? "" : String(promptBuild?.systemPrompt || ""), userPrompt: hasUserMessage ? "" : sanitizedFallbackUserPrompt, promptMessages: executionMessages, additionalMessages: diff --git a/retriever.js b/retriever.js index a28d340..55a169c 100644 --- a/retriever.js +++ b/retriever.js @@ -57,6 +57,37 @@ function resolveTaskPromptPayload(promptBuild, fallbackUserPrompt = "") { }; } +function resolveTaskLlmSystemPrompt(promptPayload, fallbackSystemPrompt = "") { + const hasPromptMessages = + Array.isArray(promptPayload?.promptMessages) && + promptPayload.promptMessages.length > 0; + if (hasPromptMessages) { + return String(promptPayload?.systemPrompt || ""); + } + return String(promptPayload?.systemPrompt || fallbackSystemPrompt || ""); +} + +function buildRecallFallbackReason(llmResult) { + const failureType = String(llmResult?.errorType || "").trim(); + const failureReason = String(llmResult?.failureReason || "").trim(); + switch (failureType) { + case "timeout": + return "LLM 精排请求超时,已回退到评分排序"; + case "empty-response": + return "LLM 精排返回空响应,已回退到评分排序"; + case "truncated-json": + return "LLM 精排输出被截断,已回退到评分排序"; + case "invalid-json": + return "LLM 精排未返回有效 JSON,已回退到评分排序"; + case "provider-error": + return failureReason + ? `LLM 精排调用失败(${failureReason}),已回退到评分排序` + : "LLM 精排调用失败,已回退到评分排序"; + default: + return failureReason || "LLM 精排未返回可用结果,已回退到评分排序"; + } +} + function isAbortError(error) { return error?.name === "AbortError"; } @@ -535,6 +566,7 @@ export async function retrieve({ enabled: true, status: llmResult.status, reason: llmResult.reason, + fallbackType: llmResult.fallbackType || "", candidatePool: llmCandidates.length, selectedSeedCount: llmResult.selectedNodeIds.length, }; @@ -562,7 +594,7 @@ export async function retrieve({ selectedNodeIds = reconstructSceneNodeIds( graph, selectedNodeIds, - normalizedTopK + 6, + normalizedMaxRecallNodes, ); // 访问强化 @@ -597,7 +629,10 @@ export async function retrieve({ } } - selectedNodeIds = uniqueNodeIds(selectedNodeIds); + selectedNodeIds = uniqueNodeIds(selectedNodeIds).slice( + 0, + normalizedMaxRecallNodes, + ); retrievalMeta.llm = llmMeta; retrievalMeta.timings.total = roundMs(nowMs() - startedAt); @@ -809,8 +844,8 @@ async function llmRecall( ].join("\n"); const promptPayload = resolveTaskPromptPayload(recallPromptBuild, userPrompt); - const result = await callLLMForJSON({ - systemPrompt: promptPayload.systemPrompt || systemPrompt, + const llmResult = await callLLMForJSON({ + systemPrompt: resolveTaskLlmSystemPrompt(promptPayload, systemPrompt), userPrompt: promptPayload.userPrompt, maxRetries: 1, signal, @@ -822,7 +857,9 @@ async function llmRecall( promptMessages: promptPayload.promptMessages, additionalMessages: promptPayload.additionalMessages, onStreamProgress, + returnFailureDetails: true, }); + const result = llmResult?.ok ? llmResult.data : null; if (result?.selected_ids && Array.isArray(result.selected_ids)) { // 校验 ID 有效性 @@ -845,10 +882,16 @@ async function llmRecall( } // LLM 失败时回退到纯评分排序 + const fallbackReason = llmResult?.ok + ? Array.isArray(result?.selected_ids) + ? "LLM 返回的候选 ID 无效,已回退到评分排序" + : "LLM 返回了无法识别的 JSON 结构,已回退到评分排序" + : buildRecallFallbackReason(llmResult); return { selectedNodeIds: candidates.slice(0, maxNodes).map((c) => c.nodeId), status: "fallback", - reason: "LLM 未返回有效 JSON 或有效候选,已回退到评分排序", + reason: fallbackReason, + fallbackType: llmResult?.ok ? "invalid-candidate" : llmResult?.errorType || "unknown", }; } diff --git a/tests/graph-persistence.mjs b/tests/graph-persistence.mjs index a04380d..ac4ab32 100644 --- a/tests/graph-persistence.mjs +++ b/tests/graph-persistence.mjs @@ -28,7 +28,7 @@ function extractSnippet(startMarker, endMarker) { const persistencePrelude = extractSnippet( 'const MODULE_NAME = "st_bme";', - "function clearInjectionState() {", + "function clearInjectionState(options = {}) {", ); const persistenceCore = extractSnippet( "function loadGraphFromChat(options = {}) {", diff --git a/tests/p0-regressions.mjs b/tests/p0-regressions.mjs index 68530c5..e1497a8 100644 --- a/tests/p0-regressions.mjs +++ b/tests/p0-regressions.mjs @@ -237,7 +237,7 @@ function createGenerationRecallHarness() { ); context.runRecall = async (options = {}) => { context.runRecallCalls.push({ ...options }); - return true; + return { status: "completed", didRecall: true, ok: true }; }; return context; }); @@ -1296,6 +1296,31 @@ async function testGenerationRecallDifferentKeyCanRunAgain() { ); } +async function testGenerationRecallSkippedStateDoesNotLoopToBeforeCombine() { + const harness = await createGenerationRecallHarness(); + harness.chat = [{ is_user: true, mes: "同一条但本次跳过" }]; + harness.runRecall = async (options = {}) => { + harness.runRecallCalls.push({ ...options }); + return { + status: "skipped", + didRecall: false, + ok: false, + reason: "测试跳过", + }; + }; + + await harness.result.onGenerationAfterCommands("normal", {}, false); + await harness.result.onBeforeCombinePrompts(); + + assert.equal(harness.runRecallCalls.length, 1); + assert.equal( + harness.result.generationRecallTransactions.size, + 1, + ); + const transaction = [...harness.result.generationRecallTransactions.values()][0]; + assert.equal(transaction.hookStates.GENERATION_AFTER_COMMANDS, "skipped"); +} + async function testRerollUsesBatchBoundaryRollbackAndPersistsState() { const harness = await createRerollHarness(); harness.chat = [ @@ -1644,6 +1669,7 @@ await testProcessedHistoryAdvanceRequiresCompleteStrongSuccess(); await testGenerationRecallTransactionDedupesDoubleHookBySameKey(); await testGenerationRecallBeforeCombineRunsStandalone(); await testGenerationRecallDifferentKeyCanRunAgain(); +await testGenerationRecallSkippedStateDoesNotLoopToBeforeCombine(); await testRerollUsesBatchBoundaryRollbackAndPersistsState(); await testRerollRejectsMissingRecoveryPoint(); await testRerollFallsBackToDirectExtractForUnprocessedFloor(); diff --git a/tests/prompt-builder-defaults.mjs b/tests/prompt-builder-defaults.mjs index 4c00f48..1f3a196 100644 --- a/tests/prompt-builder-defaults.mjs +++ b/tests/prompt-builder-defaults.mjs @@ -51,6 +51,7 @@ const extractPromptBuild = await buildTaskPrompt(settings, "extract", { currentRange: "1 ~ 2", }); const extractPayload = buildTaskLlmPayload(extractPromptBuild, "fallback-user"); +assert.equal(extractPayload.systemPrompt, ""); assert.equal(extractPayload.userPrompt, ""); assert.equal( extractPayload.promptMessages.filter((message) => message.role === "user").length, @@ -86,6 +87,7 @@ const recallPromptBuild = await buildTaskPrompt(settings, "recall", { graphStats: "candidate_count=2", }); const recallPayload = buildTaskLlmPayload(recallPromptBuild, "fallback-user"); +assert.equal(recallPayload.systemPrompt, ""); assert.equal(recallPayload.userPrompt, ""); assert.equal( recallPayload.promptMessages.filter((message) => message.role === "user").length, diff --git a/tests/prompt-builder-mvu.mjs b/tests/prompt-builder-mvu.mjs index 8a0abb5..51367d7 100644 --- a/tests/prompt-builder-mvu.mjs +++ b/tests/prompt-builder-mvu.mjs @@ -258,6 +258,7 @@ try { }; const payload = buildTaskLlmPayload(promptBuild, "unused fallback"); + assert.equal(payload.systemPrompt, ""); const result = await llm.callLLMForJSON({ systemPrompt: payload.systemPrompt, userPrompt: payload.userPrompt, diff --git a/tests/retrieval-config.mjs b/tests/retrieval-config.mjs index baecf2a..39b1d7d 100644 --- a/tests/retrieval-config.mjs +++ b/tests/retrieval-config.mjs @@ -84,6 +84,8 @@ const state = { diffusionCalls: [], llmCalls: [], llmCandidateCount: 0, + llmResponse: { selected_ids: ["rule-2", "rule-1"] }, + llmOptions: [], }; const graph = createGraph(); @@ -164,12 +166,26 @@ const retrieve = await loadRetrieve({ { nodeId: "rule-3", energy: 0.9 }, ]; }, - async callLLMForJSON({ userPrompt }) { + async callLLMForJSON(params = {}) { + const { userPrompt = "" } = params; + state.llmOptions.push({ ...params }); state.llmCalls.push(userPrompt); state.llmCandidateCount = userPrompt .split("\n") .filter((line) => line.trim().startsWith("[")).length; - return { selected_ids: ["rule-2", "rule-1"] }; + if (params.returnFailureDetails) { + if (state.llmResponse?.ok === false) { + return state.llmResponse; + } + return { + ok: true, + data: state.llmResponse, + errorType: "", + failureReason: "", + attempts: 1, + }; + } + return state.llmResponse; }, getSTContextForPrompt() { return {}; @@ -201,7 +217,9 @@ assert.deepEqual(Array.from(noStageResult.selectedNodeIds), ["rule-2", "rule-1"] state.vectorCalls.length = 0; state.diffusionCalls.length = 0; state.llmCalls.length = 0; +state.llmOptions.length = 0; state.llmCandidateCount = 0; +state.llmResponse = { selected_ids: ["rule-2", "rule-1"] }; const llmPoolResult = await retrieve({ graph, userMessage: "请根据规则给出结论", @@ -227,10 +245,12 @@ assert.equal(llmPoolResult.meta.retrieval.vectorMergedHits, 3); assert.equal(llmPoolResult.meta.retrieval.diversityApplied, true); assert.equal(llmPoolResult.meta.retrieval.candidatePoolBeforeDpp, 3); assert.equal(llmPoolResult.meta.retrieval.candidatePoolAfterDpp, 2); +assert.equal(state.llmOptions[0].returnFailureDetails, true); state.vectorCalls.length = 0; state.diffusionCalls.length = 0; state.llmCalls.length = 0; +state.llmOptions.length = 0; await retrieve({ graph, userMessage: "规则一和规则二有什么关联", @@ -261,4 +281,89 @@ assert.equal(state.diffusionCalls[0].options.topK, 7); assert.equal(state.diffusionCalls[0].options.teleportAlpha, 0.15); assert.equal(noStageResult.meta.retrieval.llm.status, "disabled"); +state.vectorCalls.length = 0; +state.diffusionCalls.length = 0; +state.llmCalls.length = 0; +state.llmOptions.length = 0; +state.llmResponse = { + ok: false, + errorType: "invalid-json", + failureReason: "输出不是有效 JSON,请严格返回紧凑 JSON 对象", +}; +const fallbackResult = await retrieve({ + graph, + userMessage: "LLM 这次会坏掉", + recentMessages: ["用户:请回忆相关规则"], + embeddingConfig: {}, + schema, + options: { + topK: 4, + maxRecallNodes: 2, + enableVectorPrefilter: true, + enableGraphDiffusion: false, + enableLLMRecall: true, + llmCandidatePool: 2, + }, +}); +assert.equal(fallbackResult.meta.retrieval.llm.status, "fallback"); +assert.match(fallbackResult.meta.retrieval.llm.reason, /有效 JSON|回退到评分排序/); +assert.equal(fallbackResult.meta.retrieval.llm.fallbackType, "invalid-json"); + +const sceneGraph = { + nodes: [ + { + id: "event-1", + type: "event", + importance: 10, + createdTime: 1, + archived: false, + fields: { title: "事件一" }, + seqRange: [1, 1], + }, + { + id: "character-1", + type: "character", + importance: 6, + createdTime: 2, + archived: false, + fields: { name: "Alice" }, + seqRange: [1, 1], + }, + { + id: "location-1", + type: "location", + importance: 5, + createdTime: 3, + archived: false, + fields: { title: "大厅" }, + seqRange: [1, 1], + }, + ], + edges: [ + { fromId: "event-1", toId: "character-1", relation: "mentions" }, + { fromId: "event-1", toId: "location-1", relation: "occurs_at" }, + ], +}; +const sceneSchema = [ + { id: "event", label: "事件", alwaysInject: false }, + { id: "character", label: "角色", alwaysInject: false }, + { id: "location", label: "地点", alwaysInject: false }, +]; +const cappedResult = await retrieve({ + graph: sceneGraph, + userMessage: "只看这一个场景", + recentMessages: [], + embeddingConfig: {}, + schema: sceneSchema, + options: { + topK: 3, + maxRecallNodes: 1, + enableVectorPrefilter: false, + enableGraphDiffusion: false, + enableLLMRecall: false, + enableProbRecall: false, + }, +}); +assert.equal(cappedResult.selectedNodeIds.length, 1); + console.log("retrieval-config tests passed");