Harden recall flow and JSON task prompts

This commit is contained in:
Youzini-afk
2026-03-28 20:38:57 +08:00
parent 30fdeaac1a
commit 67e6e29bb2
12 changed files with 618 additions and 200 deletions

View File

@@ -302,9 +302,14 @@ async function summarizeBatch(
compressPromptBuild,
userPrompt,
);
const llmSystemPrompt =
Array.isArray(promptPayload.promptMessages) &&
promptPayload.promptMessages.length > 0
? String(promptPayload.systemPrompt || "")
: String(promptPayload.systemPrompt || systemPrompt || "");
return await callLLMForJSON({
systemPrompt: promptPayload.systemPrompt || systemPrompt,
systemPrompt: llmSystemPrompt,
userPrompt: promptPayload.userPrompt,
maxRetries: 1,
signal,

View File

@@ -341,10 +341,14 @@ export async function consolidateMemories({
consolidationPromptBuild,
userPrompt,
);
const llmSystemPrompt =
Array.isArray(promptPayload.promptMessages) &&
promptPayload.promptMessages.length > 0
? String(promptPayload.systemPrompt || "")
: String(promptPayload.systemPrompt || consolidationSystemPrompt || "");
try {
decision = await callLLMForJSON({
systemPrompt:
promptPayload.systemPrompt || consolidationSystemPrompt,
systemPrompt: llmSystemPrompt,
userPrompt: promptPayload.userPrompt,
maxRetries: 1,
signal,

View File

@@ -174,10 +174,15 @@ export async function extractMemories({
"请分析对话,按 JSON 格式输出操作列表。",
].join("\n");
const promptPayload = resolveTaskPromptPayload(promptBuild, userPrompt);
const llmSystemPrompt =
Array.isArray(promptPayload.promptMessages) &&
promptPayload.promptMessages.length > 0
? String(promptPayload.systemPrompt || "")
: String(promptPayload.systemPrompt || systemPrompt || "");
// 调用 LLM
const result = await callLLMForJSON({
systemPrompt: promptPayload.systemPrompt || systemPrompt,
systemPrompt: llmSystemPrompt,
userPrompt: promptPayload.userPrompt,
maxRetries: 2,
signal,

553
index.js
View File

@@ -292,6 +292,8 @@ const defaultSettings = {
let currentGraph = null;
let isExtracting = false;
let isRecalling = false;
let activeRecallPromise = null;
let recallRunSequence = 0;
let lastInjectionContent = "";
let lastExtractedItems = []; // 最近提取的节点(面板展示用)
let lastRecalledItems = []; // 最近召回的节点(面板展示用)
@@ -322,6 +324,9 @@ let pendingHistoryRecoveryTrigger = "";
let pendingHistoryMutationCheckTimers = [];
let pendingGraphLoadRetryTimer = null;
let pendingGraphLoadRetryChatId = "";
let skipBeforeCombineRecallUntil = 0;
let lastPreGenerationRecallKey = "";
let lastPreGenerationRecallAt = 0;
const generationRecallTransactions = new Map();
const GENERATION_RECALL_TRANSACTION_TTL_MS = 15000;
const stageNoticeHandles = {
@@ -696,6 +701,38 @@ function abortStage(stage) {
return true;
}
function abortRecallStageWithReason(reason = "召回已终止") {
const controller = stageAbortControllers.recall;
if (!controller || controller.signal.aborted) return false;
controller.abort(createAbortError(reason));
return true;
}
async function waitForActiveRecallToSettle(timeoutMs = 1800) {
const pending = activeRecallPromise;
if (!pending) {
return {
settled: !isRecalling,
timedOut: false,
};
}
let settled = false;
await Promise.race([
Promise.resolve(pending)
.catch(() => {})
.then(() => {
settled = true;
}),
new Promise((resolve) => setTimeout(resolve, timeoutMs)),
]);
return {
settled: settled || !isRecalling,
timedOut: !settled && isRecalling,
};
}
function buildAbortStageAction(stage) {
const abortStageName = findAbortableStageForNotice(stage);
if (!abortStageName) return undefined;
@@ -1687,11 +1724,19 @@ function scheduleStartupGraphReconciliation() {
}
}
function clearInjectionState() {
function clearInjectionState(options = {}) {
const {
preserveRecallStatus = false,
preserveRuntimeStatus = preserveRecallStatus,
} = options;
lastInjectionContent = "";
lastRecalledItems = [];
lastRecallStatus = createUiStatus("待命", "当前无有效注入内容", "idle");
runtimeStatus = createUiStatus("待命", "当前无有效注入内容", "idle");
if (!preserveRecallStatus) {
lastRecallStatus = createUiStatus("待命", "当前无有效注入内容", "idle");
}
if (!preserveRuntimeStatus) {
runtimeStatus = createUiStatus("待命", "当前无有效注入内容", "idle");
}
recordInjectionSnapshot("recall", {
injectionText: "",
selectedNodeIds: [],
@@ -1703,7 +1748,7 @@ function clearInjectionState() {
mode: "cleared",
},
});
if (!isRecalling) {
if (!isRecalling && !preserveRecallStatus) {
dismissStageNotice("recall");
}
@@ -3155,22 +3200,105 @@ function markGenerationRecallTransactionHookState(
return transaction;
}
function clearGenerationRecallTransactionsForChat(
chatId = getCurrentChatId(),
{ clearAll = false } = {},
) {
let removed = 0;
const normalizedChatId = String(chatId || "");
if (clearAll || !normalizedChatId) {
removed = generationRecallTransactions.size;
generationRecallTransactions.clear();
return removed;
}
for (const [transactionId, transaction] of generationRecallTransactions.entries()) {
if (String(transaction?.chatId || "") !== normalizedChatId) continue;
generationRecallTransactions.delete(transactionId);
removed += 1;
}
return removed;
}
function isTerminalGenerationRecallHookState(state = "") {
return ["completed", "failed", "aborted", "skipped"].includes(
String(state || ""),
);
}
function shouldRunRecallForTransaction(transaction, hookName) {
if (!hookName) return true;
if (!transaction) return true;
const hookStates = transaction.hookStates || {};
if (hookStates[hookName] === "completed") {
if (isTerminalGenerationRecallHookState(hookStates[hookName])) {
return false;
}
if (
hookName === "GENERATE_BEFORE_COMBINE_PROMPTS" &&
hookStates.GENERATION_AFTER_COMMANDS === "completed"
isTerminalGenerationRecallHookState(hookStates.GENERATION_AFTER_COMMANDS)
) {
return false;
}
return true;
}
function createRecallRunResult(status = "completed", extra = {}) {
const normalizedStatus = String(status || "skipped").trim() || "skipped";
return {
ok: normalizedStatus === "completed",
didRecall: normalizedStatus === "completed",
status: normalizedStatus,
...extra,
};
}
function getGenerationRecallHookStateFromResult(result) {
const status = String(result?.status || "").trim();
switch (status) {
case "completed":
return "completed";
case "failed":
return "failed";
case "aborted":
case "superseded":
return "aborted";
default:
return "skipped";
}
}
function invalidateRecallAfterHistoryMutation(reason = "聊天记录已变更") {
const hadActiveRecall = Boolean(
isRecalling ||
(stageAbortControllers.recall &&
!stageAbortControllers.recall.signal?.aborted),
);
if (hadActiveRecall) {
abortRecallStageWithReason(`${reason},当前召回已取消`);
}
clearGenerationRecallTransactionsForChat();
clearRecallInputTracking();
clearInjectionState({
preserveRecallStatus: hadActiveRecall,
preserveRuntimeStatus: hadActiveRecall,
});
if (hadActiveRecall) {
setLastRecallStatus(
"召回已取消",
`${reason},等待新的召回请求`,
"warning",
{
syncRuntime: true,
},
);
}
return hadActiveRecall;
}
function createGenerationRecallContext({
hookName,
generationType = "normal",
@@ -4648,7 +4776,7 @@ function applyRecallInjection(settings, recallInput, recentMessages, result) {
if (now - lastRecallFallbackNoticeAt > 15000) {
lastRecallFallbackNoticeAt = now;
toastr.warning(
llmMeta.reason || "LLM 精排未返回有效结果,已回退到评分排序",
llmMeta.reason || "LLM 精排未成功,已改用评分排序并继续注入记忆",
"ST-BME 召回提示",
{ timeOut: 4500 },
);
@@ -4662,184 +4790,240 @@ function applyRecallInjection(settings, recallInput, recentMessages, result) {
* 召回管线:检索并注入记忆
*/
async function runRecall(options = {}) {
if (isRecalling || !currentGraph) return false;
const settings = getSettings();
if (!settings.enabled || !settings.recallEnabled) return false;
if (!isGraphReadable()) {
setLastRecallStatus(
"等待图谱加载",
getGraphMutationBlockReason("召回"),
"warning",
{ syncRuntime: true },
);
return false;
}
if (isGraphMetadataWriteAllowed()) {
if (!(await recoverHistoryIfNeeded("pre-recall"))) return false;
}
const context = getContext();
const chat = context.chat;
if (!chat || chat.length === 0) return false;
isRecalling = true;
const recallController = beginStageAbortController("recall");
const recallSignal = recallController.signal;
if (options.signal) {
if (options.signal.aborted) {
recallController.abort(
options.signal.reason || createAbortError("宿主已终止生成"),
);
} else {
options.signal.addEventListener(
"abort",
() =>
recallController.abort(
options.signal.reason || createAbortError("宿主已终止生成"),
),
{ once: true },
);
}
}
try {
await ensureVectorReadyIfNeeded("pre-recall", recallSignal);
const recentContextMessageLimit = clampInt(
settings.recallLlmContextMessages,
4,
0,
20,
);
const recallInput = resolveRecallInput(
chat,
recentContextMessageLimit,
options,
);
const userMessage = recallInput.userMessage;
const recentMessages = recallInput.recentMessages;
if (!userMessage) return false;
recallInput.hookName = options.hookName || "";
console.log("[ST-BME] 开始召回", {
source: recallInput.source,
sourceLabel: recallInput.sourceLabel,
hookName: recallInput.hookName,
userMessageLength: userMessage.length,
recentMessages: recentMessages.length,
});
setLastRecallStatus(
"召回中",
[
getRecallHookLabel(recallInput.hookName),
`来源 ${recallInput.sourceLabel}`,
`上下文 ${recentMessages.length}`,
`当前用户消息长度 ${userMessage.length}`,
]
.filter(Boolean)
.join(" · "),
"running",
{ syncRuntime: true },
);
if (recallInput.source === "send-intent") {
pendingRecallSendIntent = createRecallInputRecord();
}
const result = await retrieve({
graph: currentGraph,
userMessage,
recentMessages,
embeddingConfig: getEmbeddingConfig(),
schema: getSchema(),
signal: recallSignal,
settings,
onStreamProgress: ({ previewText, receivedChars }) => {
const preview = previewText?.length > 60
? "…" + previewText.slice(-60)
: previewText || "";
setLastRecallStatus(
"AI 生成中",
`${preview} [${receivedChars}字]`,
"running",
{ syncRuntime: true, noticeMarquee: true },
);
},
options: {
topK: settings.recallTopK,
maxRecallNodes: settings.recallMaxNodes,
enableLLMRecall: settings.recallEnableLLM,
enableVectorPrefilter: settings.recallEnableVectorPrefilter,
enableGraphDiffusion: settings.recallEnableGraphDiffusion,
diffusionTopK: settings.recallDiffusionTopK,
llmCandidatePool: settings.recallLlmCandidatePool,
recallPrompt: undefined,
weights: {
graphWeight: settings.graphWeight,
vectorWeight: settings.vectorWeight,
importanceWeight: settings.importanceWeight,
},
// v2 options
enableVisibility: settings.enableVisibility ?? false,
visibilityFilter: context.name2 || null,
enableCrossRecall: settings.enableCrossRecall ?? false,
enableProbRecall: settings.enableProbRecall ?? false,
probRecallChance: settings.probRecallChance ?? 0.15,
enableMultiIntent: settings.recallEnableMultiIntent ?? true,
multiIntentMaxSegments: settings.recallMultiIntentMaxSegments ?? 4,
teleportAlpha: settings.recallTeleportAlpha ?? 0.15,
enableTemporalLinks: settings.recallEnableTemporalLinks ?? true,
temporalLinkStrength: settings.recallTemporalLinkStrength ?? 0.2,
enableDiversitySampling:
settings.recallEnableDiversitySampling ?? true,
dppCandidateMultiplier:
settings.recallDppCandidateMultiplier ?? 3,
dppQualityWeight: settings.recallDppQualityWeight ?? 1.0,
enableCooccurrenceBoost:
settings.recallEnableCooccurrenceBoost ?? false,
cooccurrenceScale: settings.recallCooccurrenceScale ?? 0.1,
cooccurrenceMaxNeighbors:
settings.recallCooccurrenceMaxNeighbors ?? 10,
enableResidualRecall:
settings.recallEnableResidualRecall ?? false,
residualBasisMaxNodes:
settings.recallResidualBasisMaxNodes ?? 24,
residualNmfTopics: settings.recallNmfTopics ?? 15,
residualNmfNoveltyThreshold:
settings.recallNmfNoveltyThreshold ?? 0.4,
residualThreshold: settings.recallResidualThreshold ?? 0.3,
residualTopK: settings.recallResidualTopK ?? 5,
},
});
applyRecallInjection(settings, recallInput, recentMessages, result);
return true;
} catch (e) {
if (isAbortError(e)) {
if (isRecalling) {
abortRecallStageWithReason("旧召回已取消,正在启动新的召回");
const settle = await waitForActiveRecallToSettle();
if (!settle.settled && isRecalling) {
setLastRecallStatus(
"召回已终止",
e?.message || "已手动终止当前召回",
"召回",
"上一轮召回仍在清理,请稍后重试",
"warning",
{
syncRuntime: true,
},
);
return false;
return createRecallRunResult("skipped", {
reason: "上一轮召回仍在清理",
});
}
console.error("[ST-BME] 召回失败:", e);
const message = e?.message || String(e);
setLastRecallStatus("召回失败", message, "error", {
syncRuntime: true,
toastKind: "",
});
toastr.error(`召回失败: ${message}`);
return false;
} finally {
finishStageAbortController("recall", recallController);
isRecalling = false;
refreshPanelLiveState();
}
if (!currentGraph) {
return createRecallRunResult("skipped", {
reason: "当前无图谱",
});
}
const settings = getSettings();
if (!settings.enabled || !settings.recallEnabled) {
return createRecallRunResult("skipped", {
reason: "召回功能未启用",
});
}
if (!isGraphReadable()) {
const reason = getGraphMutationBlockReason("召回");
setLastRecallStatus("等待图谱加载", reason, "warning", {
syncRuntime: true,
});
return createRecallRunResult("skipped", {
reason,
});
}
if (isGraphMetadataWriteAllowed()) {
if (!(await recoverHistoryIfNeeded("pre-recall"))) {
return createRecallRunResult("skipped", {
reason: "历史恢复未就绪",
});
}
}
const context = getContext();
const chat = context.chat;
if (!chat || chat.length === 0) {
return createRecallRunResult("skipped", {
reason: "当前聊天为空",
});
}
const runId = ++recallRunSequence;
let recallPromise = null;
recallPromise = (async () => {
isRecalling = true;
const recallController = beginStageAbortController("recall");
const recallSignal = recallController.signal;
if (options.signal) {
if (options.signal.aborted) {
recallController.abort(
options.signal.reason || createAbortError("宿主已终止生成"),
);
} else {
options.signal.addEventListener(
"abort",
() =>
recallController.abort(
options.signal.reason || createAbortError("宿主已终止生成"),
),
{ once: true },
);
}
}
try {
await ensureVectorReadyIfNeeded("pre-recall", recallSignal);
const recentContextMessageLimit = clampInt(
settings.recallLlmContextMessages,
4,
0,
20,
);
const recallInput = resolveRecallInput(
chat,
recentContextMessageLimit,
options,
);
const userMessage = recallInput.userMessage;
const recentMessages = recallInput.recentMessages;
if (!userMessage) {
return createRecallRunResult("skipped", {
reason: "当前没有可用于召回的用户输入",
});
}
recallInput.hookName = options.hookName || "";
console.log("[ST-BME] 开始召回", {
source: recallInput.source,
sourceLabel: recallInput.sourceLabel,
hookName: recallInput.hookName,
userMessageLength: userMessage.length,
recentMessages: recentMessages.length,
runId,
});
setLastRecallStatus(
"召回中",
[
getRecallHookLabel(recallInput.hookName),
`来源 ${recallInput.sourceLabel}`,
`上下文 ${recentMessages.length}`,
`当前用户消息长度 ${userMessage.length}`,
]
.filter(Boolean)
.join(" · "),
"running",
{ syncRuntime: true },
);
if (recallInput.source === "send-intent") {
pendingRecallSendIntent = createRecallInputRecord();
}
const result = await retrieve({
graph: currentGraph,
userMessage,
recentMessages,
embeddingConfig: getEmbeddingConfig(),
schema: getSchema(),
signal: recallSignal,
settings,
onStreamProgress: ({ previewText, receivedChars }) => {
const preview = previewText?.length > 60
? "…" + previewText.slice(-60)
: previewText || "";
setLastRecallStatus(
"AI 生成中",
`${preview} [${receivedChars}字]`,
"running",
{ syncRuntime: true, noticeMarquee: true },
);
},
options: {
topK: settings.recallTopK,
maxRecallNodes: settings.recallMaxNodes,
enableLLMRecall: settings.recallEnableLLM,
enableVectorPrefilter: settings.recallEnableVectorPrefilter,
enableGraphDiffusion: settings.recallEnableGraphDiffusion,
diffusionTopK: settings.recallDiffusionTopK,
llmCandidatePool: settings.recallLlmCandidatePool,
recallPrompt: undefined,
weights: {
graphWeight: settings.graphWeight,
vectorWeight: settings.vectorWeight,
importanceWeight: settings.importanceWeight,
},
// v2 options
enableVisibility: settings.enableVisibility ?? false,
visibilityFilter: context.name2 || null,
enableCrossRecall: settings.enableCrossRecall ?? false,
enableProbRecall: settings.enableProbRecall ?? false,
probRecallChance: settings.probRecallChance ?? 0.15,
enableMultiIntent: settings.recallEnableMultiIntent ?? true,
multiIntentMaxSegments: settings.recallMultiIntentMaxSegments ?? 4,
teleportAlpha: settings.recallTeleportAlpha ?? 0.15,
enableTemporalLinks: settings.recallEnableTemporalLinks ?? true,
temporalLinkStrength: settings.recallTemporalLinkStrength ?? 0.2,
enableDiversitySampling:
settings.recallEnableDiversitySampling ?? true,
dppCandidateMultiplier:
settings.recallDppCandidateMultiplier ?? 3,
dppQualityWeight: settings.recallDppQualityWeight ?? 1.0,
enableCooccurrenceBoost:
settings.recallEnableCooccurrenceBoost ?? false,
cooccurrenceScale: settings.recallCooccurrenceScale ?? 0.1,
cooccurrenceMaxNeighbors:
settings.recallCooccurrenceMaxNeighbors ?? 10,
enableResidualRecall:
settings.recallEnableResidualRecall ?? false,
residualBasisMaxNodes:
settings.recallResidualBasisMaxNodes ?? 24,
residualNmfTopics: settings.recallNmfTopics ?? 15,
residualNmfNoveltyThreshold:
settings.recallNmfNoveltyThreshold ?? 0.4,
residualThreshold: settings.recallResidualThreshold ?? 0.3,
residualTopK: settings.recallResidualTopK ?? 5,
},
});
applyRecallInjection(settings, recallInput, recentMessages, result);
return createRecallRunResult("completed", {
reason: "召回完成",
selectedNodeIds: result.selectedNodeIds || [],
});
} catch (e) {
if (isAbortError(e)) {
setLastRecallStatus(
"召回已终止",
e?.message || "已手动终止当前召回",
"warning",
{
syncRuntime: true,
},
);
return createRecallRunResult("aborted", {
reason: e?.message || "召回已终止",
});
}
console.error("[ST-BME] 召回失败:", e);
const message = e?.message || String(e);
setLastRecallStatus("召回失败", message, "error", {
syncRuntime: true,
toastKind: "",
});
toastr.error(`召回失败: ${message}`);
return createRecallRunResult("failed", {
reason: message,
});
} finally {
finishStageAbortController("recall", recallController);
isRecalling = false;
if (activeRecallPromise === recallPromise) {
activeRecallPromise = null;
}
refreshPanelLiveState();
}
})();
activeRecallPromise = recallPromise;
return await recallPromise;
}
// ==================== 事件钩子 ====================
@@ -4853,6 +5037,7 @@ function onChatChanged() {
skipBeforeCombineRecallUntil = 0;
lastPreGenerationRecallKey = "";
lastPreGenerationRecallAt = 0;
clearGenerationRecallTransactionsForChat("", { clearAll: true });
abortAllRunningStages();
dismissAllStageNotices();
syncGraphLoadFromLiveContext({
@@ -4881,7 +5066,7 @@ function onMessageSent(messageId) {
}
function onMessageDeleted(chatLengthOrMessageId, meta = null) {
clearInjectionState();
invalidateRecallAfterHistoryMutation("消息已删除");
scheduleHistoryMutationRecheck(
"message-deleted",
chatLengthOrMessageId,
@@ -4890,12 +5075,12 @@ function onMessageDeleted(chatLengthOrMessageId, meta = null) {
}
function onMessageEdited(messageId, meta = null) {
clearInjectionState();
invalidateRecallAfterHistoryMutation("消息已编辑");
scheduleHistoryMutationRecheck("message-edited", messageId, meta);
}
function onMessageSwiped(messageId, meta = null) {
clearInjectionState();
invalidateRecallAfterHistoryMutation("已切换楼层 swipe");
scheduleHistoryMutationRecheck("message-swiped", messageId, meta);
}
@@ -4925,7 +5110,7 @@ async function onGenerationAfterCommands(type, params = {}, dryRun = false) {
recallContext.hookName,
"running",
);
const didRecall = await runRecall({
const recallResult = await runRecall({
...recallOptions,
recallKey: recallContext.recallKey,
hookName: recallContext.hookName,
@@ -4935,7 +5120,7 @@ async function onGenerationAfterCommands(type, params = {}, dryRun = false) {
markGenerationRecallTransactionHookState(
recallContext.transaction,
recallContext.hookName,
didRecall ? "completed" : "pending",
getGenerationRecallHookStateFromResult(recallResult),
);
}
@@ -4960,7 +5145,7 @@ async function onBeforeCombinePrompts() {
recallContext.hookName,
"running",
);
const didRecall = await runRecall({
const recallResult = await runRecall({
...recallOptions,
recallKey: recallContext.recallKey,
hookName: recallContext.hookName,
@@ -4968,7 +5153,7 @@ async function onBeforeCombinePrompts() {
markGenerationRecallTransactionHookState(
recallContext.transaction,
recallContext.hookName,
didRecall ? "completed" : "pending",
getGenerationRecallHookStateFromResult(recallResult),
);
}

45
llm.js
View File

@@ -1446,6 +1446,7 @@ export async function callLLMForJSON({
promptMessages = [],
debugContext = null,
onStreamProgress = null,
returnFailureDetails = false,
} = {}) {
const override = getLlmTestOverride("callLLMForJSON");
if (override) {
@@ -1459,6 +1460,8 @@ export async function callLLMForJSON({
additionalMessages,
promptMessages,
debugContext,
onStreamProgress,
returnFailureDetails,
});
}
@@ -1467,6 +1470,7 @@ export async function callLLMForJSON({
requestSource,
);
let lastFailureReason = "";
let lastFailureType = "";
const promptExecutionSummary = buildPromptExecutionSummary(debugContext);
for (let attempt = 0; attempt <= maxRetries; attempt++) {
@@ -1503,18 +1507,28 @@ export async function callLLMForJSON({
if (!responseText || typeof responseText !== "string") {
console.warn(`[ST-BME] LLM 返回空响应 (尝试 ${attempt + 1})`);
lastFailureReason = "返回空响应";
lastFailureType = "empty-response";
continue;
}
// 尝试解析 JSON
const parsed = extractJSON(outputCleanup.cleanedText);
if (parsed !== null) {
return parsed;
return returnFailureDetails
? {
ok: true,
data: parsed,
attempts: attempt + 1,
errorType: "",
failureReason: "",
}
: parsed;
}
const truncated =
response.finishReason === "length" ||
looksLikeTruncatedJson(outputCleanup.cleanedText);
lastFailureType = truncated ? "truncated-json" : "invalid-json";
lastFailureReason = truncated
? "输出因长度限制被截断,请重新输出更紧凑的完整 JSON"
: "输出不是有效 JSON请严格返回紧凑 JSON 对象";
@@ -1524,13 +1538,40 @@ export async function callLLMForJSON({
);
} catch (e) {
if (isAbortError(e)) {
throw e;
const abortMessage = e?.message || String(e) || "LLM 调用已终止";
const isTimeoutAbort =
!signal?.aborted && /超时/i.test(String(abortMessage || ""));
if (!isTimeoutAbort) {
throw e;
}
console.error(`[ST-BME] LLM 调用超时 (尝试 ${attempt + 1}):`, e);
lastFailureReason = abortMessage;
lastFailureType = "timeout";
continue;
}
console.error(`[ST-BME] LLM 调用失败 (尝试 ${attempt + 1}):`, e);
lastFailureReason = e?.message || String(e) || "LLM 调用失败";
lastFailureType = "provider-error";
}
}
if (returnFailureDetails) {
const failureSnapshot = {
ok: false,
data: null,
attempts: maxRetries + 1,
errorType: lastFailureType || "unknown",
failureReason: lastFailureReason || "LLM 未返回可解析 JSON",
};
recordTaskLlmRequest(taskType || privateRequestSource, {
jsonFailure: failureSnapshot,
promptExecution: promptExecutionSummary,
}, {
merge: true,
});
return failureSnapshot;
}
return null;
}

View File

@@ -1093,7 +1093,8 @@ export function buildTaskLlmPayload(promptBuild = null, fallbackUserPrompt = "")
).text;
return {
systemPrompt: String(promptBuild?.systemPrompt || ""),
systemPrompt:
executionMessages.length > 0 ? "" : String(promptBuild?.systemPrompt || ""),
userPrompt: hasUserMessage ? "" : sanitizedFallbackUserPrompt,
promptMessages: executionMessages,
additionalMessages:

View File

@@ -57,6 +57,37 @@ function resolveTaskPromptPayload(promptBuild, fallbackUserPrompt = "") {
};
}
function resolveTaskLlmSystemPrompt(promptPayload, fallbackSystemPrompt = "") {
const hasPromptMessages =
Array.isArray(promptPayload?.promptMessages) &&
promptPayload.promptMessages.length > 0;
if (hasPromptMessages) {
return String(promptPayload?.systemPrompt || "");
}
return String(promptPayload?.systemPrompt || fallbackSystemPrompt || "");
}
function buildRecallFallbackReason(llmResult) {
const failureType = String(llmResult?.errorType || "").trim();
const failureReason = String(llmResult?.failureReason || "").trim();
switch (failureType) {
case "timeout":
return "LLM 精排请求超时,已回退到评分排序";
case "empty-response":
return "LLM 精排返回空响应,已回退到评分排序";
case "truncated-json":
return "LLM 精排输出被截断,已回退到评分排序";
case "invalid-json":
return "LLM 精排未返回有效 JSON已回退到评分排序";
case "provider-error":
return failureReason
? `LLM 精排调用失败(${failureReason}),已回退到评分排序`
: "LLM 精排调用失败,已回退到评分排序";
default:
return failureReason || "LLM 精排未返回可用结果,已回退到评分排序";
}
}
function isAbortError(error) {
return error?.name === "AbortError";
}
@@ -535,6 +566,7 @@ export async function retrieve({
enabled: true,
status: llmResult.status,
reason: llmResult.reason,
fallbackType: llmResult.fallbackType || "",
candidatePool: llmCandidates.length,
selectedSeedCount: llmResult.selectedNodeIds.length,
};
@@ -562,7 +594,7 @@ export async function retrieve({
selectedNodeIds = reconstructSceneNodeIds(
graph,
selectedNodeIds,
normalizedTopK + 6,
normalizedMaxRecallNodes,
);
// 访问强化
@@ -597,7 +629,10 @@ export async function retrieve({
}
}
selectedNodeIds = uniqueNodeIds(selectedNodeIds);
selectedNodeIds = uniqueNodeIds(selectedNodeIds).slice(
0,
normalizedMaxRecallNodes,
);
retrievalMeta.llm = llmMeta;
retrievalMeta.timings.total = roundMs(nowMs() - startedAt);
@@ -809,8 +844,8 @@ async function llmRecall(
].join("\n");
const promptPayload = resolveTaskPromptPayload(recallPromptBuild, userPrompt);
const result = await callLLMForJSON({
systemPrompt: promptPayload.systemPrompt || systemPrompt,
const llmResult = await callLLMForJSON({
systemPrompt: resolveTaskLlmSystemPrompt(promptPayload, systemPrompt),
userPrompt: promptPayload.userPrompt,
maxRetries: 1,
signal,
@@ -822,7 +857,9 @@ async function llmRecall(
promptMessages: promptPayload.promptMessages,
additionalMessages: promptPayload.additionalMessages,
onStreamProgress,
returnFailureDetails: true,
});
const result = llmResult?.ok ? llmResult.data : null;
if (result?.selected_ids && Array.isArray(result.selected_ids)) {
// 校验 ID 有效性
@@ -845,10 +882,16 @@ async function llmRecall(
}
// LLM 失败时回退到纯评分排序
const fallbackReason = llmResult?.ok
? Array.isArray(result?.selected_ids)
? "LLM 返回的候选 ID 无效,已回退到评分排序"
: "LLM 返回了无法识别的 JSON 结构,已回退到评分排序"
: buildRecallFallbackReason(llmResult);
return {
selectedNodeIds: candidates.slice(0, maxNodes).map((c) => c.nodeId),
status: "fallback",
reason: "LLM 未返回有效 JSON 或有效候选,已回退到评分排序",
reason: fallbackReason,
fallbackType: llmResult?.ok ? "invalid-candidate" : llmResult?.errorType || "unknown",
};
}

View File

@@ -28,7 +28,7 @@ function extractSnippet(startMarker, endMarker) {
const persistencePrelude = extractSnippet(
'const MODULE_NAME = "st_bme";',
"function clearInjectionState() {",
"function clearInjectionState(options = {}) {",
);
const persistenceCore = extractSnippet(
"function loadGraphFromChat(options = {}) {",

View File

@@ -237,7 +237,7 @@ function createGenerationRecallHarness() {
);
context.runRecall = async (options = {}) => {
context.runRecallCalls.push({ ...options });
return true;
return { status: "completed", didRecall: true, ok: true };
};
return context;
});
@@ -1296,6 +1296,31 @@ async function testGenerationRecallDifferentKeyCanRunAgain() {
);
}
async function testGenerationRecallSkippedStateDoesNotLoopToBeforeCombine() {
const harness = await createGenerationRecallHarness();
harness.chat = [{ is_user: true, mes: "同一条但本次跳过" }];
harness.runRecall = async (options = {}) => {
harness.runRecallCalls.push({ ...options });
return {
status: "skipped",
didRecall: false,
ok: false,
reason: "测试跳过",
};
};
await harness.result.onGenerationAfterCommands("normal", {}, false);
await harness.result.onBeforeCombinePrompts();
assert.equal(harness.runRecallCalls.length, 1);
assert.equal(
harness.result.generationRecallTransactions.size,
1,
);
const transaction = [...harness.result.generationRecallTransactions.values()][0];
assert.equal(transaction.hookStates.GENERATION_AFTER_COMMANDS, "skipped");
}
async function testRerollUsesBatchBoundaryRollbackAndPersistsState() {
const harness = await createRerollHarness();
harness.chat = [
@@ -1644,6 +1669,7 @@ await testProcessedHistoryAdvanceRequiresCompleteStrongSuccess();
await testGenerationRecallTransactionDedupesDoubleHookBySameKey();
await testGenerationRecallBeforeCombineRunsStandalone();
await testGenerationRecallDifferentKeyCanRunAgain();
await testGenerationRecallSkippedStateDoesNotLoopToBeforeCombine();
await testRerollUsesBatchBoundaryRollbackAndPersistsState();
await testRerollRejectsMissingRecoveryPoint();
await testRerollFallsBackToDirectExtractForUnprocessedFloor();

View File

@@ -51,6 +51,7 @@ const extractPromptBuild = await buildTaskPrompt(settings, "extract", {
currentRange: "1 ~ 2",
});
const extractPayload = buildTaskLlmPayload(extractPromptBuild, "fallback-user");
assert.equal(extractPayload.systemPrompt, "");
assert.equal(extractPayload.userPrompt, "");
assert.equal(
extractPayload.promptMessages.filter((message) => message.role === "user").length,
@@ -86,6 +87,7 @@ const recallPromptBuild = await buildTaskPrompt(settings, "recall", {
graphStats: "candidate_count=2",
});
const recallPayload = buildTaskLlmPayload(recallPromptBuild, "fallback-user");
assert.equal(recallPayload.systemPrompt, "");
assert.equal(recallPayload.userPrompt, "");
assert.equal(
recallPayload.promptMessages.filter((message) => message.role === "user").length,

View File

@@ -258,6 +258,7 @@ try {
};
const payload = buildTaskLlmPayload(promptBuild, "unused fallback");
assert.equal(payload.systemPrompt, "");
const result = await llm.callLLMForJSON({
systemPrompt: payload.systemPrompt,
userPrompt: payload.userPrompt,

View File

@@ -84,6 +84,8 @@ const state = {
diffusionCalls: [],
llmCalls: [],
llmCandidateCount: 0,
llmResponse: { selected_ids: ["rule-2", "rule-1"] },
llmOptions: [],
};
const graph = createGraph();
@@ -164,12 +166,26 @@ const retrieve = await loadRetrieve({
{ nodeId: "rule-3", energy: 0.9 },
];
},
async callLLMForJSON({ userPrompt }) {
async callLLMForJSON(params = {}) {
const { userPrompt = "" } = params;
state.llmOptions.push({ ...params });
state.llmCalls.push(userPrompt);
state.llmCandidateCount = userPrompt
.split("\n")
.filter((line) => line.trim().startsWith("[")).length;
return { selected_ids: ["rule-2", "rule-1"] };
if (params.returnFailureDetails) {
if (state.llmResponse?.ok === false) {
return state.llmResponse;
}
return {
ok: true,
data: state.llmResponse,
errorType: "",
failureReason: "",
attempts: 1,
};
}
return state.llmResponse;
},
getSTContextForPrompt() {
return {};
@@ -201,7 +217,9 @@ assert.deepEqual(Array.from(noStageResult.selectedNodeIds), ["rule-2", "rule-1"]
state.vectorCalls.length = 0;
state.diffusionCalls.length = 0;
state.llmCalls.length = 0;
state.llmOptions.length = 0;
state.llmCandidateCount = 0;
state.llmResponse = { selected_ids: ["rule-2", "rule-1"] };
const llmPoolResult = await retrieve({
graph,
userMessage: "请根据规则给出结论",
@@ -227,10 +245,12 @@ assert.equal(llmPoolResult.meta.retrieval.vectorMergedHits, 3);
assert.equal(llmPoolResult.meta.retrieval.diversityApplied, true);
assert.equal(llmPoolResult.meta.retrieval.candidatePoolBeforeDpp, 3);
assert.equal(llmPoolResult.meta.retrieval.candidatePoolAfterDpp, 2);
assert.equal(state.llmOptions[0].returnFailureDetails, true);
state.vectorCalls.length = 0;
state.diffusionCalls.length = 0;
state.llmCalls.length = 0;
state.llmOptions.length = 0;
await retrieve({
graph,
userMessage: "规则一和规则二有什么关联",
@@ -261,4 +281,89 @@ assert.equal(state.diffusionCalls[0].options.topK, 7);
assert.equal(state.diffusionCalls[0].options.teleportAlpha, 0.15);
assert.equal(noStageResult.meta.retrieval.llm.status, "disabled");
state.vectorCalls.length = 0;
state.diffusionCalls.length = 0;
state.llmCalls.length = 0;
state.llmOptions.length = 0;
state.llmResponse = {
ok: false,
errorType: "invalid-json",
failureReason: "输出不是有效 JSON请严格返回紧凑 JSON 对象",
};
const fallbackResult = await retrieve({
graph,
userMessage: "LLM 这次会坏掉",
recentMessages: ["用户:请回忆相关规则"],
embeddingConfig: {},
schema,
options: {
topK: 4,
maxRecallNodes: 2,
enableVectorPrefilter: true,
enableGraphDiffusion: false,
enableLLMRecall: true,
llmCandidatePool: 2,
},
});
assert.equal(fallbackResult.meta.retrieval.llm.status, "fallback");
assert.match(fallbackResult.meta.retrieval.llm.reason, /有效 JSON|回退到评分排序/);
assert.equal(fallbackResult.meta.retrieval.llm.fallbackType, "invalid-json");
const sceneGraph = {
nodes: [
{
id: "event-1",
type: "event",
importance: 10,
createdTime: 1,
archived: false,
fields: { title: "事件一" },
seqRange: [1, 1],
},
{
id: "character-1",
type: "character",
importance: 6,
createdTime: 2,
archived: false,
fields: { name: "Alice" },
seqRange: [1, 1],
},
{
id: "location-1",
type: "location",
importance: 5,
createdTime: 3,
archived: false,
fields: { title: "大厅" },
seqRange: [1, 1],
},
],
edges: [
{ fromId: "event-1", toId: "character-1", relation: "mentions" },
{ fromId: "event-1", toId: "location-1", relation: "occurs_at" },
],
};
const sceneSchema = [
{ id: "event", label: "事件", alwaysInject: false },
{ id: "character", label: "角色", alwaysInject: false },
{ id: "location", label: "地点", alwaysInject: false },
];
const cappedResult = await retrieve({
graph: sceneGraph,
userMessage: "只看这一个场景",
recentMessages: [],
embeddingConfig: {},
schema: sceneSchema,
options: {
topK: 3,
maxRecallNodes: 1,
enableVectorPrefilter: false,
enableGraphDiffusion: false,
enableLLMRecall: false,
enableProbRecall: false,
},
});
assert.equal(cappedResult.selectedNodeIds.length, 1);
console.log("retrieval-config tests passed");