Harden recall flow and JSON task prompts

2026-05-15 22:30:38 +08:00 · 2026-03-28 20:38:57 +08:00
parent 30fdeaac1a
commit 67e6e29bb2
12 changed files with 618 additions and 200 deletions
--- a/compressor.js
+++ b/compressor.js
@@ -302,9 +302,14 @@ async function summarizeBatch(
    compressPromptBuild,
    userPrompt,
  );
+  const llmSystemPrompt =
+    Array.isArray(promptPayload.promptMessages) &&
+    promptPayload.promptMessages.length > 0
+      ? String(promptPayload.systemPrompt || "")
+      : String(promptPayload.systemPrompt || systemPrompt || "");

  return await callLLMForJSON({
-    systemPrompt: promptPayload.systemPrompt || systemPrompt,
+    systemPrompt: llmSystemPrompt,
    userPrompt: promptPayload.userPrompt,
    maxRetries: 1,
    signal,
--- a/consolidator.js
+++ b/consolidator.js
@@ -341,10 +341,14 @@ export async function consolidateMemories({
    consolidationPromptBuild,
    userPrompt,
  );
+  const llmSystemPrompt =
+    Array.isArray(promptPayload.promptMessages) &&
+    promptPayload.promptMessages.length > 0
+      ? String(promptPayload.systemPrompt || "")
+      : String(promptPayload.systemPrompt || consolidationSystemPrompt || "");
  try {
    decision = await callLLMForJSON({
-      systemPrompt:
-        promptPayload.systemPrompt || consolidationSystemPrompt,
+      systemPrompt: llmSystemPrompt,
      userPrompt: promptPayload.userPrompt,
      maxRetries: 1,
      signal,
--- a/extractor.js
+++ b/extractor.js
@@ -174,10 +174,15 @@ export async function extractMemories({
    "请分析对话，按 JSON 格式输出操作列表。",
  ].join("\n");
  const promptPayload = resolveTaskPromptPayload(promptBuild, userPrompt);
+  const llmSystemPrompt =
+    Array.isArray(promptPayload.promptMessages) &&
+    promptPayload.promptMessages.length > 0
+      ? String(promptPayload.systemPrompt || "")
+      : String(promptPayload.systemPrompt || systemPrompt || "");

  // 调用 LLM
  const result = await callLLMForJSON({
-    systemPrompt: promptPayload.systemPrompt || systemPrompt,
+    systemPrompt: llmSystemPrompt,
    userPrompt: promptPayload.userPrompt,
    maxRetries: 2,
    signal,
--- a/index.js
+++ b/index.js
@@ -292,6 +292,8 @@ const defaultSettings = {
 let currentGraph = null;
 let isExtracting = false;
 let isRecalling = false;
+let activeRecallPromise = null;
+let recallRunSequence = 0;
 let lastInjectionContent = "";
 let lastExtractedItems = []; // 最近提取的节点（面板展示用）
 let lastRecalledItems = []; // 最近召回的节点（面板展示用）
@@ -322,6 +324,9 @@ let pendingHistoryRecoveryTrigger = "";
 let pendingHistoryMutationCheckTimers = [];
 let pendingGraphLoadRetryTimer = null;
 let pendingGraphLoadRetryChatId = "";
+let skipBeforeCombineRecallUntil = 0;
+let lastPreGenerationRecallKey = "";
+let lastPreGenerationRecallAt = 0;
 const generationRecallTransactions = new Map();
 const GENERATION_RECALL_TRANSACTION_TTL_MS = 15000;
 const stageNoticeHandles = {
@@ -696,6 +701,38 @@ function abortStage(stage) {
  return true;
 }

+function abortRecallStageWithReason(reason = "召回已终止") {
+  const controller = stageAbortControllers.recall;
+  if (!controller || controller.signal.aborted) return false;
+  controller.abort(createAbortError(reason));
+  return true;
+}
+
+async function waitForActiveRecallToSettle(timeoutMs = 1800) {
+  const pending = activeRecallPromise;
+  if (!pending) {
+    return {
+      settled: !isRecalling,
+      timedOut: false,
+    };
+  }
+
+  let settled = false;
+  await Promise.race([
+    Promise.resolve(pending)
+      .catch(() => {})
+      .then(() => {
+        settled = true;
+      }),
+    new Promise((resolve) => setTimeout(resolve, timeoutMs)),
+  ]);
+
+  return {
+    settled: settled || !isRecalling,
+    timedOut: !settled && isRecalling,
+  };
+}
+
 function buildAbortStageAction(stage) {
  const abortStageName = findAbortableStageForNotice(stage);
  if (!abortStageName) return undefined;
@@ -1687,11 +1724,19 @@ function scheduleStartupGraphReconciliation() {
  }
 }

-function clearInjectionState() {
+function clearInjectionState(options = {}) {
+  const {
+    preserveRecallStatus = false,
+    preserveRuntimeStatus = preserveRecallStatus,
+  } = options;
  lastInjectionContent = "";
  lastRecalledItems = [];
-  lastRecallStatus = createUiStatus("待命", "当前无有效注入内容", "idle");
-  runtimeStatus = createUiStatus("待命", "当前无有效注入内容", "idle");
+  if (!preserveRecallStatus) {
+    lastRecallStatus = createUiStatus("待命", "当前无有效注入内容", "idle");
+  }
+  if (!preserveRuntimeStatus) {
+    runtimeStatus = createUiStatus("待命", "当前无有效注入内容", "idle");
+  }
  recordInjectionSnapshot("recall", {
    injectionText: "",
    selectedNodeIds: [],
@@ -1703,7 +1748,7 @@ function clearInjectionState() {
      mode: "cleared",
    },
  });
-  if (!isRecalling) {
+  if (!isRecalling && !preserveRecallStatus) {
    dismissStageNotice("recall");
  }

@@ -3155,22 +3200,105 @@ function markGenerationRecallTransactionHookState(
  return transaction;
 }

+function clearGenerationRecallTransactionsForChat(
+  chatId = getCurrentChatId(),
+  { clearAll = false } = {},
+) {
+  let removed = 0;
+  const normalizedChatId = String(chatId || "");
+  if (clearAll || !normalizedChatId) {
+    removed = generationRecallTransactions.size;
+    generationRecallTransactions.clear();
+    return removed;
+  }
+
+  for (const [transactionId, transaction] of generationRecallTransactions.entries()) {
+    if (String(transaction?.chatId || "") !== normalizedChatId) continue;
+    generationRecallTransactions.delete(transactionId);
+    removed += 1;
+  }
+
+  return removed;
+}
+
+function isTerminalGenerationRecallHookState(state = "") {
+  return ["completed", "failed", "aborted", "skipped"].includes(
+    String(state || ""),
+  );
+}
+
 function shouldRunRecallForTransaction(transaction, hookName) {
  if (!hookName) return true;
  if (!transaction) return true;
  const hookStates = transaction.hookStates || {};
-  if (hookStates[hookName] === "completed") {
+  if (isTerminalGenerationRecallHookState(hookStates[hookName])) {
    return false;
  }
  if (
    hookName === "GENERATE_BEFORE_COMBINE_PROMPTS" &&
-    hookStates.GENERATION_AFTER_COMMANDS === "completed"
+    isTerminalGenerationRecallHookState(hookStates.GENERATION_AFTER_COMMANDS)
  ) {
    return false;
  }
  return true;
 }

+function createRecallRunResult(status = "completed", extra = {}) {
+  const normalizedStatus = String(status || "skipped").trim() || "skipped";
+  return {
+    ok: normalizedStatus === "completed",
+    didRecall: normalizedStatus === "completed",
+    status: normalizedStatus,
+    ...extra,
+  };
+}
+
+function getGenerationRecallHookStateFromResult(result) {
+  const status = String(result?.status || "").trim();
+  switch (status) {
+    case "completed":
+      return "completed";
+    case "failed":
+      return "failed";
+    case "aborted":
+    case "superseded":
+      return "aborted";
+    default:
+      return "skipped";
+  }
+}
+
+function invalidateRecallAfterHistoryMutation(reason = "聊天记录已变更") {
+  const hadActiveRecall = Boolean(
+    isRecalling ||
+      (stageAbortControllers.recall &&
+        !stageAbortControllers.recall.signal?.aborted),
+  );
+  if (hadActiveRecall) {
+    abortRecallStageWithReason(`${reason}，当前召回已取消`);
+  }
+
+  clearGenerationRecallTransactionsForChat();
+  clearRecallInputTracking();
+  clearInjectionState({
+    preserveRecallStatus: hadActiveRecall,
+    preserveRuntimeStatus: hadActiveRecall,
+  });
+
+  if (hadActiveRecall) {
+    setLastRecallStatus(
+      "召回已取消",
+      `${reason}，等待新的召回请求`,
+      "warning",
+      {
+        syncRuntime: true,
+      },
+    );
+  }
+
+  return hadActiveRecall;
+}
+
 function createGenerationRecallContext({
  hookName,
  generationType = "normal",
@@ -4648,7 +4776,7 @@ function applyRecallInjection(settings, recallInput, recentMessages, result) {
    if (now - lastRecallFallbackNoticeAt > 15000) {
      lastRecallFallbackNoticeAt = now;
      toastr.warning(
-        llmMeta.reason || "LLM 精排未返回有效结果，已回退到评分排序",
+        llmMeta.reason || "LLM 精排未成功，已改用评分排序并继续注入记忆",
        "ST-BME 召回提示",
        { timeOut: 4500 },
      );
@@ -4662,184 +4790,240 @@ function applyRecallInjection(settings, recallInput, recentMessages, result) {
 * 召回管线：检索并注入记忆
 */
 async function runRecall(options = {}) {
-  if (isRecalling || !currentGraph) return false;
-
-  const settings = getSettings();
-  if (!settings.enabled || !settings.recallEnabled) return false;
-  if (!isGraphReadable()) {
-    setLastRecallStatus(
-      "等待图谱加载",
-      getGraphMutationBlockReason("召回"),
-      "warning",
-      { syncRuntime: true },
-    );
-    return false;
-  }
-  if (isGraphMetadataWriteAllowed()) {
-    if (!(await recoverHistoryIfNeeded("pre-recall"))) return false;
-  }
-
-  const context = getContext();
-  const chat = context.chat;
-  if (!chat || chat.length === 0) return false;
-
-  isRecalling = true;
-  const recallController = beginStageAbortController("recall");
-  const recallSignal = recallController.signal;
-  if (options.signal) {
-    if (options.signal.aborted) {
-      recallController.abort(
-        options.signal.reason || createAbortError("宿主已终止生成"),
-      );
-    } else {
-      options.signal.addEventListener(
-        "abort",
-        () =>
-          recallController.abort(
-            options.signal.reason || createAbortError("宿主已终止生成"),
-          ),
-        { once: true },
-      );
-    }
-  }
-
-  try {
-    await ensureVectorReadyIfNeeded("pre-recall", recallSignal);
-    const recentContextMessageLimit = clampInt(
-      settings.recallLlmContextMessages,
-      4,
-      0,
-      20,
-    );
-    const recallInput = resolveRecallInput(
-      chat,
-      recentContextMessageLimit,
-      options,
-    );
-    const userMessage = recallInput.userMessage;
-    const recentMessages = recallInput.recentMessages;
-
-    if (!userMessage) return false;
-
-    recallInput.hookName = options.hookName || "";
-
-    console.log("[ST-BME] 开始召回", {
-      source: recallInput.source,
-      sourceLabel: recallInput.sourceLabel,
-      hookName: recallInput.hookName,
-      userMessageLength: userMessage.length,
-      recentMessages: recentMessages.length,
-    });
-    setLastRecallStatus(
-      "召回中",
-      [
-        getRecallHookLabel(recallInput.hookName),
-        `来源 ${recallInput.sourceLabel}`,
-        `上下文 ${recentMessages.length} 条`,
-        `当前用户消息长度 ${userMessage.length}`,
-      ]
-        .filter(Boolean)
-        .join(" · "),
-      "running",
-      { syncRuntime: true },
-    );
-    if (recallInput.source === "send-intent") {
-      pendingRecallSendIntent = createRecallInputRecord();
-    }
-
-    const result = await retrieve({
-      graph: currentGraph,
-      userMessage,
-      recentMessages,
-      embeddingConfig: getEmbeddingConfig(),
-      schema: getSchema(),
-      signal: recallSignal,
-      settings,
-      onStreamProgress: ({ previewText, receivedChars }) => {
-        const preview = previewText?.length > 60
-          ? "…" + previewText.slice(-60)
-          : previewText || "";
-        setLastRecallStatus(
-          "AI 生成中",
-          `${preview}  [${receivedChars}字]`,
-          "running",
-          { syncRuntime: true, noticeMarquee: true },
-        );
-      },
-      options: {
-        topK: settings.recallTopK,
-        maxRecallNodes: settings.recallMaxNodes,
-        enableLLMRecall: settings.recallEnableLLM,
-        enableVectorPrefilter: settings.recallEnableVectorPrefilter,
-        enableGraphDiffusion: settings.recallEnableGraphDiffusion,
-        diffusionTopK: settings.recallDiffusionTopK,
-        llmCandidatePool: settings.recallLlmCandidatePool,
-        recallPrompt: undefined,
-        weights: {
-          graphWeight: settings.graphWeight,
-          vectorWeight: settings.vectorWeight,
-          importanceWeight: settings.importanceWeight,
-        },
-        // v2 options
-        enableVisibility: settings.enableVisibility ?? false,
-        visibilityFilter: context.name2 || null,
-        enableCrossRecall: settings.enableCrossRecall ?? false,
-        enableProbRecall: settings.enableProbRecall ?? false,
-        probRecallChance: settings.probRecallChance ?? 0.15,
-        enableMultiIntent: settings.recallEnableMultiIntent ?? true,
-        multiIntentMaxSegments: settings.recallMultiIntentMaxSegments ?? 4,
-        teleportAlpha: settings.recallTeleportAlpha ?? 0.15,
-        enableTemporalLinks: settings.recallEnableTemporalLinks ?? true,
-        temporalLinkStrength: settings.recallTemporalLinkStrength ?? 0.2,
-        enableDiversitySampling:
-          settings.recallEnableDiversitySampling ?? true,
-        dppCandidateMultiplier:
-          settings.recallDppCandidateMultiplier ?? 3,
-        dppQualityWeight: settings.recallDppQualityWeight ?? 1.0,
-        enableCooccurrenceBoost:
-          settings.recallEnableCooccurrenceBoost ?? false,
-        cooccurrenceScale: settings.recallCooccurrenceScale ?? 0.1,
-        cooccurrenceMaxNeighbors:
-          settings.recallCooccurrenceMaxNeighbors ?? 10,
-        enableResidualRecall:
-          settings.recallEnableResidualRecall ?? false,
-        residualBasisMaxNodes:
-          settings.recallResidualBasisMaxNodes ?? 24,
-        residualNmfTopics: settings.recallNmfTopics ?? 15,
-        residualNmfNoveltyThreshold:
-          settings.recallNmfNoveltyThreshold ?? 0.4,
-        residualThreshold: settings.recallResidualThreshold ?? 0.3,
-        residualTopK: settings.recallResidualTopK ?? 5,
-      },
-    });
-
-    applyRecallInjection(settings, recallInput, recentMessages, result);
-    return true;
-  } catch (e) {
-    if (isAbortError(e)) {
+  if (isRecalling) {
+    abortRecallStageWithReason("旧召回已取消，正在启动新的召回");
+    const settle = await waitForActiveRecallToSettle();
+    if (!settle.settled && isRecalling) {
      setLastRecallStatus(
-        "召回已终止",
-        e?.message || "已手动终止当前召回",
+        "召回忙",
+        "上一轮召回仍在清理，请稍后重试",
        "warning",
        {
          syncRuntime: true,
        },
      );
-      return false;
+      return createRecallRunResult("skipped", {
+        reason: "上一轮召回仍在清理",
+      });
    }
-    console.error("[ST-BME] 召回失败:", e);
-    const message = e?.message || String(e);
-    setLastRecallStatus("召回失败", message, "error", {
-      syncRuntime: true,
-      toastKind: "",
-    });
-    toastr.error(`召回失败: ${message}`);
-    return false;
-  } finally {
-    finishStageAbortController("recall", recallController);
-    isRecalling = false;
-    refreshPanelLiveState();
  }
+
+  if (!currentGraph) {
+    return createRecallRunResult("skipped", {
+      reason: "当前无图谱",
+    });
+  }
+
+  const settings = getSettings();
+  if (!settings.enabled || !settings.recallEnabled) {
+    return createRecallRunResult("skipped", {
+      reason: "召回功能未启用",
+    });
+  }
+  if (!isGraphReadable()) {
+    const reason = getGraphMutationBlockReason("召回");
+    setLastRecallStatus("等待图谱加载", reason, "warning", {
+      syncRuntime: true,
+    });
+    return createRecallRunResult("skipped", {
+      reason,
+    });
+  }
+  if (isGraphMetadataWriteAllowed()) {
+    if (!(await recoverHistoryIfNeeded("pre-recall"))) {
+      return createRecallRunResult("skipped", {
+        reason: "历史恢复未就绪",
+      });
+    }
+  }
+
+  const context = getContext();
+  const chat = context.chat;
+  if (!chat || chat.length === 0) {
+    return createRecallRunResult("skipped", {
+      reason: "当前聊天为空",
+    });
+  }
+
+  const runId = ++recallRunSequence;
+  let recallPromise = null;
+  recallPromise = (async () => {
+    isRecalling = true;
+    const recallController = beginStageAbortController("recall");
+    const recallSignal = recallController.signal;
+    if (options.signal) {
+      if (options.signal.aborted) {
+        recallController.abort(
+          options.signal.reason || createAbortError("宿主已终止生成"),
+        );
+      } else {
+        options.signal.addEventListener(
+          "abort",
+          () =>
+            recallController.abort(
+              options.signal.reason || createAbortError("宿主已终止生成"),
+            ),
+          { once: true },
+        );
+      }
+    }
+
+    try {
+      await ensureVectorReadyIfNeeded("pre-recall", recallSignal);
+      const recentContextMessageLimit = clampInt(
+        settings.recallLlmContextMessages,
+        4,
+        0,
+        20,
+      );
+      const recallInput = resolveRecallInput(
+        chat,
+        recentContextMessageLimit,
+        options,
+      );
+      const userMessage = recallInput.userMessage;
+      const recentMessages = recallInput.recentMessages;
+
+      if (!userMessage) {
+        return createRecallRunResult("skipped", {
+          reason: "当前没有可用于召回的用户输入",
+        });
+      }
+
+      recallInput.hookName = options.hookName || "";
+
+      console.log("[ST-BME] 开始召回", {
+        source: recallInput.source,
+        sourceLabel: recallInput.sourceLabel,
+        hookName: recallInput.hookName,
+        userMessageLength: userMessage.length,
+        recentMessages: recentMessages.length,
+        runId,
+      });
+      setLastRecallStatus(
+        "召回中",
+        [
+          getRecallHookLabel(recallInput.hookName),
+          `来源 ${recallInput.sourceLabel}`,
+          `上下文 ${recentMessages.length} 条`,
+          `当前用户消息长度 ${userMessage.length}`,
+        ]
+          .filter(Boolean)
+          .join(" · "),
+        "running",
+        { syncRuntime: true },
+      );
+      if (recallInput.source === "send-intent") {
+        pendingRecallSendIntent = createRecallInputRecord();
+      }
+
+      const result = await retrieve({
+        graph: currentGraph,
+        userMessage,
+        recentMessages,
+        embeddingConfig: getEmbeddingConfig(),
+        schema: getSchema(),
+        signal: recallSignal,
+        settings,
+        onStreamProgress: ({ previewText, receivedChars }) => {
+          const preview = previewText?.length > 60
+            ? "…" + previewText.slice(-60)
+            : previewText || "";
+          setLastRecallStatus(
+            "AI 生成中",
+            `${preview}  [${receivedChars}字]`,
+            "running",
+            { syncRuntime: true, noticeMarquee: true },
+          );
+        },
+        options: {
+          topK: settings.recallTopK,
+          maxRecallNodes: settings.recallMaxNodes,
+          enableLLMRecall: settings.recallEnableLLM,
+          enableVectorPrefilter: settings.recallEnableVectorPrefilter,
+          enableGraphDiffusion: settings.recallEnableGraphDiffusion,
+          diffusionTopK: settings.recallDiffusionTopK,
+          llmCandidatePool: settings.recallLlmCandidatePool,
+          recallPrompt: undefined,
+          weights: {
+            graphWeight: settings.graphWeight,
+            vectorWeight: settings.vectorWeight,
+            importanceWeight: settings.importanceWeight,
+          },
+          // v2 options
+          enableVisibility: settings.enableVisibility ?? false,
+          visibilityFilter: context.name2 || null,
+          enableCrossRecall: settings.enableCrossRecall ?? false,
+          enableProbRecall: settings.enableProbRecall ?? false,
+          probRecallChance: settings.probRecallChance ?? 0.15,
+          enableMultiIntent: settings.recallEnableMultiIntent ?? true,
+          multiIntentMaxSegments: settings.recallMultiIntentMaxSegments ?? 4,
+          teleportAlpha: settings.recallTeleportAlpha ?? 0.15,
+          enableTemporalLinks: settings.recallEnableTemporalLinks ?? true,
+          temporalLinkStrength: settings.recallTemporalLinkStrength ?? 0.2,
+          enableDiversitySampling:
+            settings.recallEnableDiversitySampling ?? true,
+          dppCandidateMultiplier:
+            settings.recallDppCandidateMultiplier ?? 3,
+          dppQualityWeight: settings.recallDppQualityWeight ?? 1.0,
+          enableCooccurrenceBoost:
+            settings.recallEnableCooccurrenceBoost ?? false,
+          cooccurrenceScale: settings.recallCooccurrenceScale ?? 0.1,
+          cooccurrenceMaxNeighbors:
+            settings.recallCooccurrenceMaxNeighbors ?? 10,
+          enableResidualRecall:
+            settings.recallEnableResidualRecall ?? false,
+          residualBasisMaxNodes:
+            settings.recallResidualBasisMaxNodes ?? 24,
+          residualNmfTopics: settings.recallNmfTopics ?? 15,
+          residualNmfNoveltyThreshold:
+            settings.recallNmfNoveltyThreshold ?? 0.4,
+          residualThreshold: settings.recallResidualThreshold ?? 0.3,
+          residualTopK: settings.recallResidualTopK ?? 5,
+        },
+      });
+
+      applyRecallInjection(settings, recallInput, recentMessages, result);
+      return createRecallRunResult("completed", {
+        reason: "召回完成",
+        selectedNodeIds: result.selectedNodeIds || [],
+      });
+    } catch (e) {
+      if (isAbortError(e)) {
+        setLastRecallStatus(
+          "召回已终止",
+          e?.message || "已手动终止当前召回",
+          "warning",
+          {
+            syncRuntime: true,
+          },
+        );
+        return createRecallRunResult("aborted", {
+          reason: e?.message || "召回已终止",
+        });
+      }
+      console.error("[ST-BME] 召回失败:", e);
+      const message = e?.message || String(e);
+      setLastRecallStatus("召回失败", message, "error", {
+        syncRuntime: true,
+        toastKind: "",
+      });
+      toastr.error(`召回失败: ${message}`);
+      return createRecallRunResult("failed", {
+        reason: message,
+      });
+    } finally {
+      finishStageAbortController("recall", recallController);
+      isRecalling = false;
+      if (activeRecallPromise === recallPromise) {
+        activeRecallPromise = null;
+      }
+      refreshPanelLiveState();
+    }
+  })();
+
+  activeRecallPromise = recallPromise;
+  return await recallPromise;
 }

 // ==================== 事件钩子 ====================
@@ -4853,6 +5037,7 @@ function onChatChanged() {
  skipBeforeCombineRecallUntil = 0;
  lastPreGenerationRecallKey = "";
  lastPreGenerationRecallAt = 0;
+  clearGenerationRecallTransactionsForChat("", { clearAll: true });
  abortAllRunningStages();
  dismissAllStageNotices();
  syncGraphLoadFromLiveContext({
@@ -4881,7 +5066,7 @@ function onMessageSent(messageId) {
 }

 function onMessageDeleted(chatLengthOrMessageId, meta = null) {
-  clearInjectionState();
+  invalidateRecallAfterHistoryMutation("消息已删除");
  scheduleHistoryMutationRecheck(
    "message-deleted",
    chatLengthOrMessageId,
@@ -4890,12 +5075,12 @@ function onMessageDeleted(chatLengthOrMessageId, meta = null) {
 }

 function onMessageEdited(messageId, meta = null) {
-  clearInjectionState();
+  invalidateRecallAfterHistoryMutation("消息已编辑");
  scheduleHistoryMutationRecheck("message-edited", messageId, meta);
 }

 function onMessageSwiped(messageId, meta = null) {
-  clearInjectionState();
+  invalidateRecallAfterHistoryMutation("已切换楼层 swipe");
  scheduleHistoryMutationRecheck("message-swiped", messageId, meta);
 }

@@ -4925,7 +5110,7 @@ async function onGenerationAfterCommands(type, params = {}, dryRun = false) {
    recallContext.hookName,
    "running",
  );
-  const didRecall = await runRecall({
+  const recallResult = await runRecall({
    ...recallOptions,
    recallKey: recallContext.recallKey,
    hookName: recallContext.hookName,
@@ -4935,7 +5120,7 @@ async function onGenerationAfterCommands(type, params = {}, dryRun = false) {
  markGenerationRecallTransactionHookState(
    recallContext.transaction,
    recallContext.hookName,
-    didRecall ? "completed" : "pending",
+    getGenerationRecallHookStateFromResult(recallResult),
  );
 }

@@ -4960,7 +5145,7 @@ async function onBeforeCombinePrompts() {
    recallContext.hookName,
    "running",
  );
-  const didRecall = await runRecall({
+  const recallResult = await runRecall({
    ...recallOptions,
    recallKey: recallContext.recallKey,
    hookName: recallContext.hookName,
@@ -4968,7 +5153,7 @@ async function onBeforeCombinePrompts() {
  markGenerationRecallTransactionHookState(
    recallContext.transaction,
    recallContext.hookName,
-    didRecall ? "completed" : "pending",
+    getGenerationRecallHookStateFromResult(recallResult),
  );
 }

--- a/llm.js
+++ b/llm.js
@@ -1446,6 +1446,7 @@ export async function callLLMForJSON({
  promptMessages = [],
  debugContext = null,
  onStreamProgress = null,
+  returnFailureDetails = false,
 } = {}) {
  const override = getLlmTestOverride("callLLMForJSON");
  if (override) {
@@ -1459,6 +1460,8 @@ export async function callLLMForJSON({
      additionalMessages,
      promptMessages,
      debugContext,
+      onStreamProgress,
+      returnFailureDetails,
    });
  }

@@ -1467,6 +1470,7 @@ export async function callLLMForJSON({
    requestSource,
  );
  let lastFailureReason = "";
+  let lastFailureType = "";
  const promptExecutionSummary = buildPromptExecutionSummary(debugContext);

  for (let attempt = 0; attempt <= maxRetries; attempt++) {
@@ -1503,18 +1507,28 @@ export async function callLLMForJSON({
      if (!responseText || typeof responseText !== "string") {
        console.warn(`[ST-BME] LLM 返回空响应 (尝试 ${attempt + 1})`);
        lastFailureReason = "返回空响应";
+        lastFailureType = "empty-response";
        continue;
      }

      // 尝试解析 JSON
      const parsed = extractJSON(outputCleanup.cleanedText);
      if (parsed !== null) {
-        return parsed;
+        return returnFailureDetails
+          ? {
+              ok: true,
+              data: parsed,
+              attempts: attempt + 1,
+              errorType: "",
+              failureReason: "",
+            }
+          : parsed;
      }

      const truncated =
        response.finishReason === "length" ||
        looksLikeTruncatedJson(outputCleanup.cleanedText);
+      lastFailureType = truncated ? "truncated-json" : "invalid-json";
      lastFailureReason = truncated
        ? "输出因长度限制被截断，请重新输出更紧凑的完整 JSON"
        : "输出不是有效 JSON，请严格返回紧凑 JSON 对象";
@@ -1524,13 +1538,40 @@ export async function callLLMForJSON({
      );
    } catch (e) {
      if (isAbortError(e)) {
-        throw e;
+        const abortMessage = e?.message || String(e) || "LLM 调用已终止";
+        const isTimeoutAbort =
+          !signal?.aborted && /超时/i.test(String(abortMessage || ""));
+        if (!isTimeoutAbort) {
+          throw e;
+        }
+        console.error(`[ST-BME] LLM 调用超时 (尝试 ${attempt + 1}):`, e);
+        lastFailureReason = abortMessage;
+        lastFailureType = "timeout";
+        continue;
      }
      console.error(`[ST-BME] LLM 调用失败 (尝试 ${attempt + 1}):`, e);
      lastFailureReason = e?.message || String(e) || "LLM 调用失败";
+      lastFailureType = "provider-error";
    }
  }

+  if (returnFailureDetails) {
+    const failureSnapshot = {
+      ok: false,
+      data: null,
+      attempts: maxRetries + 1,
+      errorType: lastFailureType || "unknown",
+      failureReason: lastFailureReason || "LLM 未返回可解析 JSON",
+    };
+    recordTaskLlmRequest(taskType || privateRequestSource, {
+      jsonFailure: failureSnapshot,
+      promptExecution: promptExecutionSummary,
+    }, {
+      merge: true,
+    });
+    return failureSnapshot;
+  }
+
  return null;
 }

--- a/prompt-builder.js
+++ b/prompt-builder.js
@@ -1093,7 +1093,8 @@ export function buildTaskLlmPayload(promptBuild = null, fallbackUserPrompt = "")
  ).text;

  return {
-    systemPrompt: String(promptBuild?.systemPrompt || ""),
+    systemPrompt:
+      executionMessages.length > 0 ? "" : String(promptBuild?.systemPrompt || ""),
    userPrompt: hasUserMessage ? "" : sanitizedFallbackUserPrompt,
    promptMessages: executionMessages,
    additionalMessages:
--- a/retriever.js
+++ b/retriever.js
@@ -57,6 +57,37 @@ function resolveTaskPromptPayload(promptBuild, fallbackUserPrompt = "") {
  };
 }

+function resolveTaskLlmSystemPrompt(promptPayload, fallbackSystemPrompt = "") {
+  const hasPromptMessages =
+    Array.isArray(promptPayload?.promptMessages) &&
+    promptPayload.promptMessages.length > 0;
+  if (hasPromptMessages) {
+    return String(promptPayload?.systemPrompt || "");
+  }
+  return String(promptPayload?.systemPrompt || fallbackSystemPrompt || "");
+}
+
+function buildRecallFallbackReason(llmResult) {
+  const failureType = String(llmResult?.errorType || "").trim();
+  const failureReason = String(llmResult?.failureReason || "").trim();
+  switch (failureType) {
+    case "timeout":
+      return "LLM 精排请求超时，已回退到评分排序";
+    case "empty-response":
+      return "LLM 精排返回空响应，已回退到评分排序";
+    case "truncated-json":
+      return "LLM 精排输出被截断，已回退到评分排序";
+    case "invalid-json":
+      return "LLM 精排未返回有效 JSON，已回退到评分排序";
+    case "provider-error":
+      return failureReason
+        ? `LLM 精排调用失败（${failureReason}），已回退到评分排序`
+        : "LLM 精排调用失败，已回退到评分排序";
+    default:
+      return failureReason || "LLM 精排未返回可用结果，已回退到评分排序";
+  }
+}
+
 function isAbortError(error) {
  return error?.name === "AbortError";
 }
@@ -535,6 +566,7 @@ export async function retrieve({
      enabled: true,
      status: llmResult.status,
      reason: llmResult.reason,
+      fallbackType: llmResult.fallbackType || "",
      candidatePool: llmCandidates.length,
      selectedSeedCount: llmResult.selectedNodeIds.length,
    };
@@ -562,7 +594,7 @@ export async function retrieve({
  selectedNodeIds = reconstructSceneNodeIds(
    graph,
    selectedNodeIds,
-    normalizedTopK + 6,
+    normalizedMaxRecallNodes,
  );

  // 访问强化
@@ -597,7 +629,10 @@ export async function retrieve({
    }
  }

-  selectedNodeIds = uniqueNodeIds(selectedNodeIds);
+  selectedNodeIds = uniqueNodeIds(selectedNodeIds).slice(
+    0,
+    normalizedMaxRecallNodes,
+  );
  retrievalMeta.llm = llmMeta;
  retrievalMeta.timings.total = roundMs(nowMs() - startedAt);

@@ -809,8 +844,8 @@ async function llmRecall(
  ].join("\n");
  const promptPayload = resolveTaskPromptPayload(recallPromptBuild, userPrompt);

-  const result = await callLLMForJSON({
-    systemPrompt: promptPayload.systemPrompt || systemPrompt,
+  const llmResult = await callLLMForJSON({
+    systemPrompt: resolveTaskLlmSystemPrompt(promptPayload, systemPrompt),
    userPrompt: promptPayload.userPrompt,
    maxRetries: 1,
    signal,
@@ -822,7 +857,9 @@ async function llmRecall(
    promptMessages: promptPayload.promptMessages,
    additionalMessages: promptPayload.additionalMessages,
    onStreamProgress,
+    returnFailureDetails: true,
  });
+  const result = llmResult?.ok ? llmResult.data : null;

  if (result?.selected_ids && Array.isArray(result.selected_ids)) {
    // 校验 ID 有效性
@@ -845,10 +882,16 @@ async function llmRecall(
  }

  // LLM 失败时回退到纯评分排序
+  const fallbackReason = llmResult?.ok
+    ? Array.isArray(result?.selected_ids)
+      ? "LLM 返回的候选 ID 无效，已回退到评分排序"
+      : "LLM 返回了无法识别的 JSON 结构，已回退到评分排序"
+    : buildRecallFallbackReason(llmResult);
  return {
    selectedNodeIds: candidates.slice(0, maxNodes).map((c) => c.nodeId),
    status: "fallback",
-    reason: "LLM 未返回有效 JSON 或有效候选，已回退到评分排序",
+    reason: fallbackReason,
+    fallbackType: llmResult?.ok ? "invalid-candidate" : llmResult?.errorType || "unknown",
  };
 }

--- a/tests/graph-persistence.mjs
+++ b/tests/graph-persistence.mjs
@@ -28,7 +28,7 @@ function extractSnippet(startMarker, endMarker) {

 const persistencePrelude = extractSnippet(
  'const MODULE_NAME = "st_bme";',
-  "function clearInjectionState() {",
+  "function clearInjectionState(options = {}) {",
 );
 const persistenceCore = extractSnippet(
  "function loadGraphFromChat(options = {}) {",
--- a/tests/p0-regressions.mjs
+++ b/tests/p0-regressions.mjs
@@ -237,7 +237,7 @@ function createGenerationRecallHarness() {
    );
    context.runRecall = async (options = {}) => {
      context.runRecallCalls.push({ ...options });
-      return true;
+      return { status: "completed", didRecall: true, ok: true };
    };
    return context;
  });
@@ -1296,6 +1296,31 @@ async function testGenerationRecallDifferentKeyCanRunAgain() {
  );
 }

+async function testGenerationRecallSkippedStateDoesNotLoopToBeforeCombine() {
+  const harness = await createGenerationRecallHarness();
+  harness.chat = [{ is_user: true, mes: "同一条但本次跳过" }];
+  harness.runRecall = async (options = {}) => {
+    harness.runRecallCalls.push({ ...options });
+    return {
+      status: "skipped",
+      didRecall: false,
+      ok: false,
+      reason: "测试跳过",
+    };
+  };
+
+  await harness.result.onGenerationAfterCommands("normal", {}, false);
+  await harness.result.onBeforeCombinePrompts();
+
+  assert.equal(harness.runRecallCalls.length, 1);
+  assert.equal(
+    harness.result.generationRecallTransactions.size,
+    1,
+  );
+  const transaction = [...harness.result.generationRecallTransactions.values()][0];
+  assert.equal(transaction.hookStates.GENERATION_AFTER_COMMANDS, "skipped");
+}
+
 async function testRerollUsesBatchBoundaryRollbackAndPersistsState() {
  const harness = await createRerollHarness();
  harness.chat = [
@@ -1644,6 +1669,7 @@ await testProcessedHistoryAdvanceRequiresCompleteStrongSuccess();
 await testGenerationRecallTransactionDedupesDoubleHookBySameKey();
 await testGenerationRecallBeforeCombineRunsStandalone();
 await testGenerationRecallDifferentKeyCanRunAgain();
+await testGenerationRecallSkippedStateDoesNotLoopToBeforeCombine();
 await testRerollUsesBatchBoundaryRollbackAndPersistsState();
 await testRerollRejectsMissingRecoveryPoint();
 await testRerollFallsBackToDirectExtractForUnprocessedFloor();
--- a/tests/prompt-builder-defaults.mjs
+++ b/tests/prompt-builder-defaults.mjs
@@ -51,6 +51,7 @@ const extractPromptBuild = await buildTaskPrompt(settings, "extract", {
  currentRange: "1 ~ 2",
 });
 const extractPayload = buildTaskLlmPayload(extractPromptBuild, "fallback-user");
+assert.equal(extractPayload.systemPrompt, "");
 assert.equal(extractPayload.userPrompt, "");
 assert.equal(
  extractPayload.promptMessages.filter((message) => message.role === "user").length,
@@ -86,6 +87,7 @@ const recallPromptBuild = await buildTaskPrompt(settings, "recall", {
  graphStats: "candidate_count=2",
 });
 const recallPayload = buildTaskLlmPayload(recallPromptBuild, "fallback-user");
+assert.equal(recallPayload.systemPrompt, "");
 assert.equal(recallPayload.userPrompt, "");
 assert.equal(
  recallPayload.promptMessages.filter((message) => message.role === "user").length,
--- a/tests/prompt-builder-mvu.mjs
+++ b/tests/prompt-builder-mvu.mjs
@@ -258,6 +258,7 @@ try {
  };

  const payload = buildTaskLlmPayload(promptBuild, "unused fallback");
+  assert.equal(payload.systemPrompt, "");
  const result = await llm.callLLMForJSON({
    systemPrompt: payload.systemPrompt,
    userPrompt: payload.userPrompt,
--- a/tests/retrieval-config.mjs
+++ b/tests/retrieval-config.mjs
@@ -84,6 +84,8 @@ const state = {
  diffusionCalls: [],
  llmCalls: [],
  llmCandidateCount: 0,
+  llmResponse: { selected_ids: ["rule-2", "rule-1"] },
+  llmOptions: [],
 };

 const graph = createGraph();
@@ -164,12 +166,26 @@ const retrieve = await loadRetrieve({
      { nodeId: "rule-3", energy: 0.9 },
    ];
  },
-  async callLLMForJSON({ userPrompt }) {
+  async callLLMForJSON(params = {}) {
+    const { userPrompt = "" } = params;
+    state.llmOptions.push({ ...params });
    state.llmCalls.push(userPrompt);
    state.llmCandidateCount = userPrompt
      .split("\n")
      .filter((line) => line.trim().startsWith("[")).length;
-    return { selected_ids: ["rule-2", "rule-1"] };
+    if (params.returnFailureDetails) {
+      if (state.llmResponse?.ok === false) {
+        return state.llmResponse;
+      }
+      return {
+        ok: true,
+        data: state.llmResponse,
+        errorType: "",
+        failureReason: "",
+        attempts: 1,
+      };
+    }
+    return state.llmResponse;
  },
    getSTContextForPrompt() {
      return {};
@@ -201,7 +217,9 @@ assert.deepEqual(Array.from(noStageResult.selectedNodeIds), ["rule-2", "rule-1"]
 state.vectorCalls.length = 0;
 state.diffusionCalls.length = 0;
 state.llmCalls.length = 0;
+state.llmOptions.length = 0;
 state.llmCandidateCount = 0;
+state.llmResponse = { selected_ids: ["rule-2", "rule-1"] };
 const llmPoolResult = await retrieve({
  graph,
  userMessage: "请根据规则给出结论",
@@ -227,10 +245,12 @@ assert.equal(llmPoolResult.meta.retrieval.vectorMergedHits, 3);
 assert.equal(llmPoolResult.meta.retrieval.diversityApplied, true);
 assert.equal(llmPoolResult.meta.retrieval.candidatePoolBeforeDpp, 3);
 assert.equal(llmPoolResult.meta.retrieval.candidatePoolAfterDpp, 2);
+assert.equal(state.llmOptions[0].returnFailureDetails, true);

 state.vectorCalls.length = 0;
 state.diffusionCalls.length = 0;
 state.llmCalls.length = 0;
+state.llmOptions.length = 0;
 await retrieve({
  graph,
  userMessage: "规则一和规则二有什么关联",
@@ -261,4 +281,89 @@ assert.equal(state.diffusionCalls[0].options.topK, 7);
 assert.equal(state.diffusionCalls[0].options.teleportAlpha, 0.15);
 assert.equal(noStageResult.meta.retrieval.llm.status, "disabled");

+state.vectorCalls.length = 0;
+state.diffusionCalls.length = 0;
+state.llmCalls.length = 0;
+state.llmOptions.length = 0;
+state.llmResponse = {
+  ok: false,
+  errorType: "invalid-json",
+  failureReason: "输出不是有效 JSON，请严格返回紧凑 JSON 对象",
+};
+const fallbackResult = await retrieve({
+  graph,
+  userMessage: "LLM 这次会坏掉",
+  recentMessages: ["用户：请回忆相关规则"],
+  embeddingConfig: {},
+  schema,
+  options: {
+    topK: 4,
+    maxRecallNodes: 2,
+    enableVectorPrefilter: true,
+    enableGraphDiffusion: false,
+    enableLLMRecall: true,
+    llmCandidatePool: 2,
+  },
+});
+assert.equal(fallbackResult.meta.retrieval.llm.status, "fallback");
+assert.match(fallbackResult.meta.retrieval.llm.reason, /有效 JSON|回退到评分排序/);
+assert.equal(fallbackResult.meta.retrieval.llm.fallbackType, "invalid-json");
+
+const sceneGraph = {
+  nodes: [
+    {
+      id: "event-1",
+      type: "event",
+      importance: 10,
+      createdTime: 1,
+      archived: false,
+      fields: { title: "事件一" },
+      seqRange: [1, 1],
+    },
+    {
+      id: "character-1",
+      type: "character",
+      importance: 6,
+      createdTime: 2,
+      archived: false,
+      fields: { name: "Alice" },
+      seqRange: [1, 1],
+    },
+    {
+      id: "location-1",
+      type: "location",
+      importance: 5,
+      createdTime: 3,
+      archived: false,
+      fields: { title: "大厅" },
+      seqRange: [1, 1],
+    },
+  ],
+  edges: [
+    { fromId: "event-1", toId: "character-1", relation: "mentions" },
+    { fromId: "event-1", toId: "location-1", relation: "occurs_at" },
+  ],
+};
+const sceneSchema = [
+  { id: "event", label: "事件", alwaysInject: false },
+  { id: "character", label: "角色", alwaysInject: false },
+  { id: "location", label: "地点", alwaysInject: false },
+];
+const cappedResult = await retrieve({
+  graph: sceneGraph,
+  userMessage: "只看这一个场景",
+  recentMessages: [],
+  embeddingConfig: {},
+  schema: sceneSchema,
+  options: {
+    topK: 3,
+    maxRecallNodes: 1,
+    enableVectorPrefilter: false,
+    enableGraphDiffusion: false,
+    enableLLMRecall: false,
+    enableProbRecall: false,
+  },
+});
+assert.equal(cappedResult.selectedNodeIds.length, 1);
+
 console.log("retrieval-config tests passed");