From 322752bb43f388623d0bda2adf9234552c62e107 Mon Sep 17 00:00:00 2001 From: Youzini-afk <13153778771cx@gmail.com> Date: Sat, 11 Apr 2026 16:05:06 +0800 Subject: [PATCH] feat: improve shujuku-compatible extraction and recall input --- index.js | 64 ++- llm/llm.js | 4 + maintenance/chat-history.js | 3 + maintenance/extraction-context.js | 453 ++++++++++++++++++ maintenance/extractor.js | 196 +++++++- prompting/default-task-profile-templates.js | 28 +- prompting/prompt-builder.js | 59 ++- prompting/prompt-profiles.js | 26 + retrieval/recall-controller.js | 12 + runtime/settings-defaults.js | 10 + tests/default-settings.mjs | 1 + tests/extractor-input-context.mjs | 151 ++++++ tests/extractor-phase3-layered-context.mjs | 393 +++++++++++++++ tests/prompt-builder-mixed-transcript.mjs | 148 ++++++ .../recall-authoritative-generation-input.mjs | 129 +++++ 15 files changed, 1642 insertions(+), 35 deletions(-) create mode 100644 maintenance/extraction-context.js create mode 100644 tests/extractor-input-context.mjs create mode 100644 tests/extractor-phase3-layered-context.mjs create mode 100644 tests/prompt-builder-mixed-transcript.mjs create mode 100644 tests/recall-authoritative-generation-input.mjs diff --git a/index.js b/index.js index 758c535..62daf7b 100644 --- a/index.js +++ b/index.js @@ -9861,6 +9861,41 @@ function resolveGenerationRecallDeliveryMode( return "immediate"; } +function shouldUseAuthoritativeGenerationRecallInput(recallOptions = {}) { + const normalizedGenerationType = normalizeGenerationRecallTransactionType( + recallOptions?.generationType || "normal", + ); + if (normalizedGenerationType !== "normal") { + return false; + } + return Boolean(getSettings()?.recallUseAuthoritativeGenerationInput); +} + +function shouldPreserveAuthoritativeGenerationRecallText( + source, + overrideUserMessage, + targetUserMessageText, + recallOptions = {}, +) { + if (!shouldUseAuthoritativeGenerationRecallInput(recallOptions)) { + return false; + } + const normalizedOverride = normalizeRecallInputText(overrideUserMessage); + const normalizedTarget = normalizeRecallInputText(targetUserMessageText); + if (!normalizedOverride || !normalizedTarget || normalizedOverride === normalizedTarget) { + return false; + } + const normalizedSource = String(source || "").trim(); + return [ + "send-intent", + "generation-started-send-intent", + "generation-started-textarea", + "host-generation-lifecycle", + "textarea-live", + "planner-handoff", + ].includes(normalizedSource); +} + function freezeGenerationRecallOptionsForTransaction( chat, generationType = "normal", @@ -9935,6 +9970,8 @@ function freezeGenerationRecallOptionsForTransaction( lockedSource: source, lockedSourceLabel: sourceLabel, lockedReason: sourceReason, + authoritativeInputUsed: false, + boundUserFloorText: "", includeSyntheticUserMessage: Boolean( recallOptions?.includeSyntheticUserMessage, ), @@ -9949,12 +9986,21 @@ function freezeGenerationRecallOptionsForTransaction( return null; } - const frozenUserMessage = normalizeRecallInputText( - targetUserMessage?.mes || - recallOptions?.overrideUserMessage || - recallOptions?.userMessage || - "", + const targetUserMessageText = normalizeRecallInputText(targetUserMessage?.mes || ""); + const preserveAuthoritativeText = shouldPreserveAuthoritativeGenerationRecallText( + source, + overrideUserMessage, + targetUserMessageText, + recallOptions, ); + const frozenUserMessage = preserveAuthoritativeText + ? normalizeRecallInputText(overrideUserMessage) + : normalizeRecallInputText( + targetUserMessage?.mes || + recallOptions?.overrideUserMessage || + recallOptions?.userMessage || + "", + ); if (!frozenUserMessage) { return null; } @@ -9978,7 +10024,9 @@ function freezeGenerationRecallOptionsForTransaction( (frozenUserMessage === overrideUserMessage ? "transaction-source-frozen" : "transaction-bound-to-chat-user-floor"), - includeSyntheticUserMessage: false, + authoritativeInputUsed: preserveAuthoritativeText, + boundUserFloorText: targetUserMessageText, + includeSyntheticUserMessage: preserveAuthoritativeText, }; } @@ -13271,12 +13319,12 @@ async function onRestoreCurrentChatFromCloud() { async () => { const chatId = getCurrentChatId(); if (!chatId) { - toastr.warning("当前没有聊天上下鏂?"); + toastr.warning("当前没有聊天上下文"); return { handledToast: true }; } const confirmed = globalThis.confirm?.( - "这会用云端备份完整覆盖当前聊天的本地记忆,并先保留一份本地安全快照。确定继续吗锛?, + "这会用云端备份完整覆盖当前聊天的本地记忆,并先保留一份本地安全快照。确定继续吗?", ); if (!confirmed) { return { cancelled: true }; diff --git a/llm/llm.js b/llm/llm.js index e54c844..0383eeb 100644 --- a/llm/llm.js +++ b/llm/llm.js @@ -457,6 +457,10 @@ function buildPromptExecutionSummary(debugContext = null) { debugContext.mvu && typeof debugContext.mvu === "object" ? cloneRuntimeDebugValue(debugContext.mvu, {}) : null, + inputContext: + debugContext.inputContext && typeof debugContext.inputContext === "object" + ? cloneRuntimeDebugValue(debugContext.inputContext, {}) + : null, regexInput: normalizeRegexDebugEntries(debugContext.regexInput), }; } diff --git a/maintenance/chat-history.js b/maintenance/chat-history.js index 22a96b4..4595e9c 100644 --- a/maintenance/chat-history.js +++ b/maintenance/chat-history.js @@ -283,6 +283,9 @@ export function buildExtractionMessages(chat, startIdx, endIdx, settings) { seq: index, role: msg.is_user ? "user" : "assistant", content, + rawContent: String(msg?.mes ?? ""), + name: String(msg?.name ?? "").trim(), + speaker: String(msg?.name ?? "").trim(), }); } diff --git a/maintenance/extraction-context.js b/maintenance/extraction-context.js new file mode 100644 index 0000000..201f05d --- /dev/null +++ b/maintenance/extraction-context.js @@ -0,0 +1,453 @@ +function splitConfigText(value = "") { + return String(value || "") + .split(/[\r\n,]+/) + .map((item) => String(item || "").trim()) + .filter(Boolean); +} + +function escapeRegex(value = "") { + return String(value || "").replace(/[.*+?^${}()|[\]\\]/g, "\\$&"); +} + +function normalizeBoundaryRule(rawRule, mode = "exclude", index = 0) { + if (typeof rawRule === "string") { + const tag = String(rawRule || "").trim(); + if (!tag) return null; + return { + id: `${mode}:tag:${index}:${tag}`, + mode, + kind: "tag", + label: tag, + tag, + }; + } + + if (!rawRule || typeof rawRule !== "object" || Array.isArray(rawRule)) { + return null; + } + + const tag = String(rawRule.tag || rawRule.name || "").trim(); + if (tag) { + return { + id: `${mode}:tag:${index}:${tag}`, + mode, + kind: "tag", + label: String(rawRule.label || tag).trim() || tag, + tag, + }; + } + + const start = String(rawRule.start ?? rawRule.open ?? rawRule.begin ?? "").trim(); + const end = String(rawRule.end ?? rawRule.close ?? rawRule.finish ?? "").trim(); + if (!start || !end) { + return null; + } + + return { + id: `${mode}:boundary:${index}`, + mode, + kind: "boundary", + label: String(rawRule.label || `${start} … ${end}`).trim() || `${start} … ${end}`, + start, + end, + caseSensitive: rawRule.caseSensitive === true, + }; +} + +function normalizeBoundaryRules(rawRules = null, rawTags = "", mode = "exclude") { + const values = []; + if (Array.isArray(rawRules)) { + values.push(...rawRules); + } else if (rawRules !== null && rawRules !== undefined && rawRules !== "") { + values.push(rawRules); + } + values.push(...splitConfigText(rawTags)); + + return values + .map((item, index) => normalizeBoundaryRule(item, mode, index)) + .filter(Boolean); +} + +function applyTagBoundaryRule(text, rule) { + const input = String(text || ""); + const escapedTag = escapeRegex(rule?.tag || ""); + if (!escapedTag) { + return { + changed: false, + output: input, + ruleLabel: String(rule?.label || ""), + matchedText: "", + }; + } + + const regex = new RegExp( + `<${escapedTag}\\b[^>]*>([\\s\\S]*?)<\\/${escapedTag}>`, + "gi", + ); + let match = null; + for (const candidate of input.matchAll(regex)) { + match = candidate; + } + if (!match) { + return { + changed: false, + output: input, + ruleLabel: String(rule?.label || ""), + matchedText: "", + }; + } + + const matchedText = String(match[0] || ""); + if (rule?.mode === "extract") { + return { + changed: true, + output: String(match[1] || "").trim(), + ruleLabel: String(rule?.label || rule?.tag || ""), + matchedText, + }; + } + + const matchIndex = Number(match.index); + if (!Number.isFinite(matchIndex) || matchIndex < 0) { + return { + changed: false, + output: input, + ruleLabel: String(rule?.label || rule?.tag || ""), + matchedText: "", + }; + } + + return { + changed: true, + output: `${input.slice(0, matchIndex)}${input.slice(matchIndex + matchedText.length)}`.trim(), + ruleLabel: String(rule?.label || rule?.tag || ""), + matchedText, + }; +} + +function applyLiteralBoundaryRule(text, rule) { + const input = String(text || ""); + const start = String(rule?.start || ""); + const end = String(rule?.end || ""); + if (!start || !end) { + return { + changed: false, + output: input, + ruleLabel: String(rule?.label || ""), + matchedText: "", + }; + } + + const sourceText = rule?.caseSensitive === true ? input : input.toLowerCase(); + const startNeedle = rule?.caseSensitive === true ? start : start.toLowerCase(); + const endNeedle = rule?.caseSensitive === true ? end : end.toLowerCase(); + const startIndex = sourceText.lastIndexOf(startNeedle); + if (startIndex < 0) { + return { + changed: false, + output: input, + ruleLabel: String(rule?.label || ""), + matchedText: "", + }; + } + + const endIndex = sourceText.indexOf(endNeedle, startIndex + startNeedle.length); + if (endIndex < 0) { + return { + changed: false, + output: input, + ruleLabel: String(rule?.label || ""), + matchedText: "", + }; + } + + const matchedText = input.slice(startIndex, endIndex + end.length); + if (rule?.mode === "extract") { + return { + changed: true, + output: input.slice(startIndex + start.length, endIndex).trim(), + ruleLabel: String(rule?.label || ""), + matchedText, + }; + } + + return { + changed: true, + output: `${input.slice(0, startIndex)}${input.slice(endIndex + end.length)}`.trim(), + ruleLabel: String(rule?.label || ""), + matchedText, + }; +} + +function applyBoundaryRule(text, rule) { + if (rule?.kind === "tag") { + return applyTagBoundaryRule(text, rule); + } + if (rule?.kind === "boundary") { + return applyLiteralBoundaryRule(text, rule); + } + return { + changed: false, + output: String(text || ""), + ruleLabel: String(rule?.label || ""), + matchedText: "", + }; +} + +function applyFirstExtractRule(text, rules = []) { + const input = String(text || ""); + for (const rule of Array.isArray(rules) ? rules : []) { + const result = applyBoundaryRule(input, rule); + if (result.changed) { + return { + changed: true, + output: result.output, + operation: { + mode: "extract", + rule: result.ruleLabel, + matchedLength: String(result.matchedText || "").length, + }, + }; + } + } + return { + changed: false, + output: input, + operation: null, + }; +} + +function applyExcludeRules(text, rules = []) { + const input = String(text || ""); + let output = input; + const operations = []; + + for (const rule of Array.isArray(rules) ? rules : []) { + const result = applyBoundaryRule(output, rule); + if (!result.changed) { + continue; + } + output = result.output; + operations.push({ + mode: "exclude", + rule: result.ruleLabel, + matchedLength: String(result.matchedText || "").length, + }); + } + + return { + changed: output !== input, + output, + operations, + }; +} + +function normalizeRole(value = "") { + const role = String(value || "assistant").trim().toLowerCase(); + if (["user", "assistant", "system"].includes(role)) { + return role; + } + return role === "ai" ? "assistant" : "assistant"; +} + +function resolveMessageContent(message = {}) { + if (typeof message?.content === "string") { + return message.content; + } + if (typeof message?.mes === "string") { + return message.mes; + } + return ""; +} + +function resolveMessageRawContent(message = {}) { + if (typeof message?.rawContent === "string") { + return message.rawContent; + } + if (typeof message?.mes === "string") { + return message.mes; + } + if (typeof message?.content === "string") { + return message.content; + } + return ""; +} + +function resolveSpeakerName(message = {}, role = "assistant", names = {}) { + const explicitSpeaker = String( + message?.speaker ?? message?.name ?? message?.displayName ?? "", + ).trim(); + if (explicitSpeaker) { + return explicitSpeaker; + } + if (role === "user") { + return String(names?.userName || "用户").trim() || "用户"; + } + if (role === "assistant") { + return String(names?.charName || "角色").trim() || "角色"; + } + return role || "assistant"; +} + +function normalizeExtractionMessage(message = {}, index = 0, names = {}) { + const role = normalizeRole( + message?.role ?? (message?.is_user === true ? "user" : "assistant"), + ); + const content = String(resolveMessageContent(message) || "").trim(); + const rawContent = String(resolveMessageRawContent(message) || content).trim(); + const speaker = resolveSpeakerName(message, role, names); + const seq = Number.isFinite(Number(message?.seq)) ? Number(message.seq) : null; + + return { + index, + seq, + role, + speaker, + name: speaker, + content, + rawContent, + sourceType: role === "user" ? "user_input" : "ai_output", + }; +} + +function countRoles(messages = []) { + return (Array.isArray(messages) ? messages : []).reduce( + (acc, message) => { + const role = normalizeRole(message?.role || "assistant"); + acc[role] = Number(acc[role] || 0) + 1; + return acc; + }, + { user: 0, assistant: 0, system: 0 }, + ); +} + +export function formatExtractionTranscript(messages = []) { + return (Array.isArray(messages) ? messages : []) + .map((message, index) => { + const seqLabel = Number.isFinite(Number(message?.seq)) + ? `#${Number(message.seq)}` + : `#${index + 1}`; + const role = normalizeRole(message?.role || "assistant"); + const speaker = String(message?.speaker || message?.name || "").trim(); + const speakerLabel = speaker ? `|${speaker}` : ""; + return `${seqLabel} [${role}${speakerLabel}]: ${String(message?.content || "")}`; + }) + .filter((item) => String(item || "").trim()) + .join("\n\n"); +} + +export function buildExtractionInputContext( + messages = [], + { settings = {}, userName = "", charName = "" } = {}, +) { + const normalizedMessages = (Array.isArray(messages) ? messages : []) + .map((message, index) => normalizeExtractionMessage(message, index, { + userName, + charName, + })) + .filter( + (message) => + String(message?.content || "").trim().length > 0 || + String(message?.rawContent || "").trim().length > 0, + ); + + const extractRules = normalizeBoundaryRules( + settings?.extractAssistantExtractRules, + settings?.extractAssistantExtractTags, + "extract", + ); + const excludeRules = normalizeBoundaryRules( + settings?.extractAssistantExcludeRules, + settings?.extractAssistantExcludeTags, + "exclude", + ); + + const filteredMessages = []; + const messageOperations = []; + let changedAssistantMessageCount = 0; + let droppedAssistantMessageCount = 0; + let extractedAssistantMessageCount = 0; + let excludedAssistantMessageCount = 0; + + for (const message of normalizedMessages) { + const operations = []; + let nextContent = String(message.content || ""); + + if (message.role === "assistant") { + const extractResult = applyFirstExtractRule(nextContent, extractRules); + if (extractResult.changed) { + nextContent = extractResult.output; + extractedAssistantMessageCount += 1; + operations.push(extractResult.operation); + } + + const excludeResult = applyExcludeRules(nextContent, excludeRules); + if (excludeResult.changed) { + nextContent = excludeResult.output; + excludedAssistantMessageCount += 1; + operations.push(...excludeResult.operations); + } + } + + const normalizedContent = String(nextContent || "").trim(); + if (operations.length > 0 || normalizedContent !== String(message.content || "").trim()) { + if (message.role === "assistant") { + changedAssistantMessageCount += 1; + } + messageOperations.push({ + seq: message.seq, + role: message.role, + speaker: message.speaker, + beforeLength: String(message.content || "").length, + afterLength: normalizedContent.length, + operations, + }); + } + + if (!normalizedContent) { + if (message.role === "assistant" && String(message.content || "").trim()) { + droppedAssistantMessageCount += 1; + } + continue; + } + + filteredMessages.push({ + ...message, + content: normalizedContent, + extractionFilterOperations: operations, + }); + } + + const rawTranscript = formatExtractionTranscript( + normalizedMessages.filter((message) => String(message.content || "").trim()), + ); + const filteredTranscript = formatExtractionTranscript(filteredMessages); + + return { + rawMessages: normalizedMessages, + filteredMessages, + rawTranscript, + filteredTranscript, + debug: { + rawMessageCount: normalizedMessages.length, + filteredMessageCount: filteredMessages.length, + rawRoleCounts: countRoles(normalizedMessages), + filteredRoleCounts: countRoles(filteredMessages), + rawTranscriptLength: rawTranscript.length, + filteredTranscriptLength: filteredTranscript.length, + changedAssistantMessageCount, + droppedAssistantMessageCount, + extractedAssistantMessageCount, + excludedAssistantMessageCount, + assistantBoundaryConfig: { + extractRuleCount: extractRules.length, + excludeRuleCount: excludeRules.length, + extractRules: extractRules.map((rule) => rule.label), + excludeRules: excludeRules.map((rule) => rule.label), + }, + rawMessages: normalizedMessages, + filteredMessages, + messageOperations, + }, + }; +} diff --git a/maintenance/extractor.js b/maintenance/extractor.js index 40ad690..efafd7b 100644 --- a/maintenance/extractor.js +++ b/maintenance/extractor.js @@ -32,8 +32,10 @@ import { deriveStoryTimeSpanFromNodes, describeNodeStoryTime, normalizeStoryTime, + resolveActiveStoryContext, upsertTimelineSegment, } from "../graph/story-timeline.js"; +import { getActiveSummaryEntries } from "../graph/summary-state.js"; import { buildTaskExecutionDebugContext, buildTaskLlmPayload, @@ -42,6 +44,7 @@ import { import { RELATION_TYPES } from "../graph/schema.js"; import { applyTaskRegex } from "../prompting/task-regex.js"; import { getSTContextForPrompt, getSTContextSnapshot } from "../host/st-context.js"; +import { buildExtractionInputContext } from "./extraction-context.js"; import { aliasSetMatchesValue, buildUserPovAliasNormalizedSet, @@ -61,6 +64,17 @@ function createTaskLlmDebugContext(promptBuild, regexInput) { : null; } +function createExtractTaskLlmDebugContext(promptBuild, regexInput, inputContext = null) { + const debugContext = createTaskLlmDebugContext(promptBuild, regexInput); + if (!inputContext || typeof inputContext !== "object") { + return debugContext; + } + return { + ...debugContext, + inputContext, + }; +} + function resolveTaskPromptPayload(promptBuild, fallbackUserPrompt = "") { if (typeof buildTaskLlmPayload === "function") { return buildTaskLlmPayload(promptBuild, fallbackUserPrompt); @@ -86,6 +100,54 @@ function resolveTaskLlmSystemPrompt(promptPayload, fallbackSystemPrompt = "") { return String(promptPayload?.systemPrompt || fallbackSystemPrompt || ""); } +function buildActiveSummariesText(graph) { + const entries = getActiveSummaryEntries(graph); + if (!Array.isArray(entries) || entries.length === 0) return ""; + return entries + .map((entry, index) => { + const rangeLabel = Array.isArray(entry.messageRange) && entry.messageRange.length >= 2 + && entry.messageRange[0] >= 0 && entry.messageRange[1] >= 0 + ? `楼${entry.messageRange[0]}~${entry.messageRange[1]}` + : ""; + const levelLabel = entry.level ? `L${entry.level}` : ""; + const prefix = [rangeLabel, levelLabel].filter(Boolean).join(" "); + return `[${index + 1}]${prefix ? ` (${prefix})` : ""} ${String(entry.text || entry.summary || "").trim()}`; + }) + .filter((line) => line.trim()) + .join("\n"); +} + +function buildStoryTimeContextText(graph) { + const storyCtx = resolveActiveStoryContext(graph); + if (!storyCtx?.resolved) return ""; + const parts = []; + if (storyCtx.activeStoryTimeLabel) { + parts.push(`当前活跃剧情时间:${storyCtx.activeStoryTimeLabel}`); + } + if (storyCtx.source) { + parts.push(`来源:${storyCtx.source}`); + } + const seg = storyCtx.segment; + if (seg?.tense && seg.tense !== "unknown") { + parts.push(`时态:${seg.tense}`); + } + return parts.join(" | "); +} + +function applyRecentMessageCap(messages, cap = 0) { + if (!Array.isArray(messages) || messages.length === 0) return messages; + const numericCap = Number(cap); + if (!Number.isFinite(numericCap) || numericCap <= 0) return messages; + if (messages.length <= numericCap) return messages; + return messages.slice(-numericCap); +} + +function resolveExtractPromptStructuredMode(settings) { + const mode = String(settings?.extractPromptStructuredMode || "both").trim().toLowerCase(); + if (["transcript", "structured", "both"].includes(mode)) return mode; + return "both"; +} + function isAbortError(error) { return error?.name === "AbortError"; } @@ -799,13 +861,42 @@ export async function extractMemories({ `[ST-BME] 提取开始: chat[${effectiveStartSeq}..${effectiveEndSeq}], ${messages.length} 条消息`, ); - // 构建对话文本 - const dialogueText = messages - .map((m) => { - const seqLabel = Number.isFinite(m.seq) ? `#${m.seq}` : "#?"; - return `${seqLabel} [${m.role}]: ${m.content}`; - }) - .join("\n\n"); + const extractionInput = buildExtractionInputContext(messages, { + settings, + userName: stContext?.prompt?.userName || "", + charName: stContext?.prompt?.charName || "", + }); + const allStructuredMessages = Array.isArray(extractionInput?.filteredMessages) + ? extractionInput.filteredMessages.map((message) => ({ + seq: message?.seq, + role: message?.role, + content: message?.content, + speaker: message?.speaker, + name: message?.name, + })) + : []; + + // Phase 3: apply recent message cap + const structuredMessages = applyRecentMessageCap( + allStructuredMessages, + settings?.extractRecentMessageCap, + ); + const cappedMessageCount = allStructuredMessages.length - structuredMessages.length; + if (cappedMessageCount > 0) { + debugLog( + `[ST-BME][extract-p3] extractRecentMessageCap=${settings?.extractRecentMessageCap}, ` + + `capped ${cappedMessageCount} messages (${allStructuredMessages.length} -> ${structuredMessages.length})`, + ); + } + + // Phase 3: structured mode determines what goes into recentMessages/dialogueText + const structuredMode = resolveExtractPromptStructuredMode(settings); + const dialogueText = structuredMode === "structured" + ? "" + : String(extractionInput?.filteredTranscript || ""); + const promptRecentMessages = structuredMode === "transcript" + ? dialogueText + : structuredMessages; // 构建当前图概览(让 LLM 知道已有哪些节点,避免重复) const graphOverview = buildGraphOverview(graph, schema); @@ -817,16 +908,36 @@ export async function extractMemories({ ? `${messages[0]?.seq ?? "?"} ~ ${messages[messages.length - 1]?.seq ?? "?"}` : ""; + // Phase 3: layered context — active summaries and story time + const activeSummaries = settings?.extractIncludeSummaries !== false + ? buildActiveSummariesText(graph) + : ""; + const storyTimeContext = settings?.extractIncludeStoryTime !== false + ? buildStoryTimeContextText(graph) + : ""; + + debugLog( + `[ST-BME][extract-p3] structuredMode=${structuredMode}, ` + + `activeSummaries=${activeSummaries ? activeSummaries.split("\n").length + " entries" : "none"}, ` + + `storyTimeContext=${storyTimeContext ? "present" : "none"}, ` + + `worldbookMode=${String(settings?.extractWorldbookMode || "active")}`, + ); + + const extractWorldbookMode = String(settings?.extractWorldbookMode || "active").trim().toLowerCase(); const promptBuild = await buildTaskPrompt(settings, "extract", { taskName: "extract", schema: schemaDescription, schemaDescription, - recentMessages: dialogueText, - chatMessages: messages, + recentMessages: promptRecentMessages, + chatMessages: structuredMessages, dialogueText, graphStats: graphOverview, graphOverview, currentRange, + activeSummaries, + storyTimeContext, + taskInputDebug: extractionInput?.debug || null, + __skipWorldInfo: extractWorldbookMode === "none", ...getSTContextForPrompt(), }); @@ -843,19 +954,50 @@ export async function extractMemories({ "system", ); - // 用户提示词 - const userPrompt = [ - "## 当前对话内容(需提取记忆)", - dialogueText, - "", + // 用户提示词 — Phase 3 分层信息结构 + const userPromptSections = []; + + // Layer 1: 当前对话切片 + if (dialogueText) { + userPromptSections.push("## 当前对话内容(需提取记忆)", dialogueText, ""); + } else if (structuredMode === "structured" && structuredMessages.length > 0) { + userPromptSections.push( + "## 当前对话内容(结构化消息,需提取记忆)", + "(结构化消息已通过 profile blocks 注入,请参考上方 recentMessages 块。)", + "", + ); + } + + // Layer 2: 当前图谱状态 + userPromptSections.push( "## 当前图谱状态", graphOverview || "(空图谱,尚无节点)", "", - "## 节点类型定义", - schemaDescription, - "", - "请分析对话,按 JSON 格式输出操作列表。", - ].join("\n"); + ); + + // Layer 3: 已有总结快照(帮助避免重复提取) + if (activeSummaries) { + userPromptSections.push( + "## 近期局面总结(已有覆盖,避免重复)", + activeSummaries, + "", + ); + } + + // Layer 4: 故事时间线位置 + if (storyTimeContext) { + userPromptSections.push( + "## 当前故事时间", + storyTimeContext, + "", + ); + } + + // Layer 5: 节点类型定义 + userPromptSections.push("## 节点类型定义", schemaDescription, ""); + + userPromptSections.push("请分析对话,按 JSON 格式输出操作列表。"); + const userPrompt = userPromptSections.join("\n"); const promptPayload = resolveTaskPromptPayload(promptBuild, userPrompt); const extractionAugmentPrompt = buildCognitiveExtractAugmentPrompt(); const promptPayloadAdditionalMessages = Array.isArray( @@ -904,6 +1046,16 @@ export async function extractMemories({ `[ST-BME][prompt-diag] NO user messages in promptMessages! Fallback userPrompt will be used.`, ); } + if (extractionInput?.debug) { + debugLog( + `[ST-BME][extract-input] raw=${Number(extractionInput.debug.rawMessageCount || 0)}, ` + + `filtered=${Number(extractionInput.debug.filteredMessageCount || 0)}, ` + + `assistantChanged=${Number(extractionInput.debug.changedAssistantMessageCount || 0)}, ` + + `assistantDropped=${Number(extractionInput.debug.droppedAssistantMessageCount || 0)}, ` + + `extractRules=${Number(extractionInput.debug.assistantBoundaryConfig?.extractRuleCount || 0)}, ` + + `excludeRules=${Number(extractionInput.debug.assistantBoundaryConfig?.excludeRuleCount || 0)}`, + ); + } } // 调用 LLM @@ -913,7 +1065,11 @@ export async function extractMemories({ maxRetries: 2, signal, taskType: "extract", - debugContext: createTaskLlmDebugContext(promptBuild, extractRegexInput), + debugContext: createExtractTaskLlmDebugContext( + promptBuild, + extractRegexInput, + extractionInput?.debug || null, + ), promptMessages: promptPayload.promptMessages, additionalMessages: promptPayloadAdditionalMessages, onStreamProgress, diff --git a/prompting/default-task-profile-templates.js b/prompting/default-task-profile-templates.js index 1758366..03c0927 100644 --- a/prompting/default-task-profile-templates.js +++ b/prompting/default-task-profile-templates.js @@ -133,6 +133,30 @@ export const DEFAULT_TASK_PROFILE_TEMPLATES = { "injectionMode": "relative", "order": 9 }, + { + "id": "default-active-summaries", + "name": "活跃总结", + "type": "builtin", + "enabled": true, + "role": "system", + "sourceKey": "activeSummaries", + "sourceField": "", + "content": "", + "injectionMode": "relative", + "order": 10 + }, + { + "id": "default-story-time-context", + "name": "故事时间", + "type": "builtin", + "enabled": true, + "role": "system", + "sourceKey": "storyTimeContext", + "sourceField": "", + "content": "", + "injectionMode": "relative", + "order": 11 + }, { "id": "default-format", "name": "输出格式", @@ -143,7 +167,7 @@ export const DEFAULT_TASK_PROFILE_TEMPLATES = { "sourceField": "", "content": "请只输出一个合法 JSON 对象:\n{\n \"thought\": \"简要分析这批对话里真正值得入图的变化\",\n \"batchStoryTime\": {\n \"label\": \"第二天清晨\",\n \"tense\": \"ongoing\",\n \"relation\": \"after\",\n \"anchorLabel\": \"昨夜冲突之后\",\n \"confidence\": \"high\",\n \"advancesActiveTimeline\": true\n },\n \"operations\": [\n {\n \"action\": \"create\",\n \"type\": \"event\",\n \"fields\": {\"title\": \"简短事件名\", \"summary\": \"...\", \"participants\": \"...\", \"status\": \"ongoing\"},\n \"scope\": {\"layer\": \"objective\", \"regionPrimary\": \"主地区\", \"regionPath\": [\"上级地区\", \"主地区\"], \"regionSecondary\": [\"次级地区\"]},\n \"storyTime\": {\"label\": \"第二天清晨\", \"tense\": \"ongoing\", \"relation\": \"same\", \"confidence\": \"high\"},\n \"importance\": 6,\n \"ref\": \"evt1\"\n },\n {\n \"action\": \"create\",\n \"type\": \"pov_memory\",\n \"fields\": {\"summary\": \"这个角色会怎么记住这件事\", \"belief\": \"她认为发生了什么\", \"emotion\": \"情绪\", \"attitude\": \"态度\", \"certainty\": \"unsure\", \"about\": \"evt1\"},\n \"scope\": {\"layer\": \"pov\", \"ownerType\": \"character\", \"ownerId\": \"角色名\", \"ownerName\": \"角色名\", \"regionPrimary\": \"主地区\", \"regionPath\": [\"上级地区\", \"主地区\"]},\n \"storyTime\": {\"label\": \"第二天清晨\", \"tense\": \"ongoing\", \"relation\": \"same\", \"confidence\": \"high\"}\n }\n ],\n \"cognitionUpdates\": [\n {\n \"ownerType\": \"character\",\n \"ownerName\": \"艾琳\",\n \"ownerNodeId\": \"char-1\",\n \"knownRefs\": [\"evt1\", \"char2\"],\n \"mistakenRefs\": [\"evt2\"],\n \"visibility\": [\n {\"ref\": \"evt1\", \"score\": 1.0, \"reason\": \"direct witness\"},\n {\"ref\": \"thread-1\", \"score\": 0.55, \"reason\": \"heard nearby\"}\n ]\n }\n ],\n \"regionUpdates\": {\n \"activeRegionHint\": \"钟楼\",\n \"adjacency\": [\n {\"region\": \"钟楼\", \"adjacent\": [\"旧城区\", \"内廷\"]}\n ]\n }\n}\n如果要更新已有节点,可使用 {\"action\":\"update\",\"nodeId\":\"existing-node-id\",\"fields\":{...},\"scope\":{...}}。\nknownRefs / mistakenRefs / visibility.ref 优先引用同批 ref,没有 ref 再引用已有 nodeId。\n如果这一批主叙事时间能判断,尽量填写 batchStoryTime;operations[].storyTime 可以单独覆盖,不写时视为继承本批主时间。\n如果这批对话没有值得入图的新信息,返回 {\"thought\":\"...\", \"operations\": [], \"cognitionUpdates\": [], \"regionUpdates\": {}}。", "injectionMode": "relative", - "order": 10 + "order": 12 }, { "id": "default-rules", @@ -155,7 +179,7 @@ export const DEFAULT_TASK_PROFILE_TEMPLATES = { "sourceField": "", "content": "我对你的执行标准是这样的——\n- 先帮我做事件分级,再决定要不要建节点:\n · A级(转折点):关系质变、告白、背叛、决裂、不可逆改变、重大选择 -> importance 8-10,必记\n · B级(推进点):新信息、新联系、阶段性完成、有意义的位置移动 -> importance 5-7,按信息量建节点\n · C级(填充):日常对话、重复行为、无后续影响的闲聊 -> 通常不单独建节点\n- 每批帮我收敛成少量高价值操作就好;通常 1 个 event,加上必要的 update、必要的 POV 和记忆认知更新就够了。\n- 客观事实帮我优先用 event / character / location / thread / rule / synopsis / reflection。\n- 主观记忆统一使用 type = pov_memory,不要拿 character / location / event 去伪装第一视角记忆。\n- 客观节点 scope.layer 必须是 objective;POV 节点 scope.layer 必须是 pov,并且必须写 ownerType / ownerId / ownerName。\n- 涉及到的角色都尽量尝试补 cognitionUpdates,不只限当前角色和用户。\n- cognitionUpdates 只表达谁明确知道、谁误解、谁低置信可见;不要帮我写成第二份事实节点。\n- 多角色场景里,pov_memory 和 cognitionUpdates 必须写清具体人物;不要把角色卡名当作 POV owner。\n- 用户 POV 不等于角色已知事实;它是我作为用户/玩家侧的感受、承诺、偏见和长期互动背景。\n- batchStoryTime 表示这批主叙事所处的剧情时间;只有明确推进主叙事时才把 advancesActiveTimeline 设为 true。\n- operations[].storyTime 写节点自己的剧情时间;帮我区分\"故事里什么时候发生\"和\"聊天里什么时候被提到\"。\n- flashback / future / hypothetical 可以写时间,但通常不要推进当前活动时间轴。\n- 地区能判断才写 scope.regionPrimary / regionPath / regionSecondary;判断不出来就帮我留空。\n- 角色、地点等 latestOnly 节点如果图里已有同名同作用域节点,优先帮我 update,不要重复 create。\n\n客观层字段方面我的要求是——\n- event.title 只写简短事件名,6-18 字。\n- event.summary 用白描复述事实,150 字以内,不抒情不评价。\n- participants 用逗号分隔参与者。\n- character / location 的字段也用白描,不写主观评价。\n\nPOV 记忆字段方面我的要求是——\npov_memory 要像角色真的会留下的记忆痕迹,不是客观事件的换个说法。\n\n- **summary**:帮我写\"这个角色会怎么记住这件事\"\n · 不是客观事件摘要,是主观记忆痕迹\n · 用角色的人格语气(温柔?冷淡?戏谑?怯懦?警觉?)\n · 可以是碎念、独白、关系定位、感官片段——看角色性格\n · 只包含角色真实看到、听到、感受到的内容(非全知)\n · 示例:\n × \"角色A和用户在咖啡馆聊天,谈到了工作\"(客观复述,我不要这种)\n √ \"他今天一直在揉太阳穴。我问他要不要换个话题,他说没事。他说没事的时候眼睛没看我。\"(主观记忆,我要这种)\n\n- **belief**:角色认为发生了什么\n · 可能与客观事实不同——这正是 POV 价值所在\n · 如果角色误解了真相,belief 要帮我反映这个误解\n\n- **emotion**:当时最强烈的情感\n · 帮我写具体感受,不写\"开心\"\"难过\"这种标签\n · 示例:\n × \"开心\"\n √ \"胸口像被什么顶着,想说点什么又说不出来\"\n\n- **attitude**:角色对这件事或相关人的态度(可能发生了变化)\n\n- **certainty**:\n · certain = 亲历确认,非常肯定\n · unsure = 间接得知或只看到片段\n · mistaken = 明确误解了事实\n\n- **about**:关联的事件或实体,优先引用同批 ref,没有 ref 再用简短标签\n\nvisibility.score 取 0..1;1 表示亲历或明确得知,0.5 左右表示间接听闻,0.2 左右表示远远瞥见。\n时间推不出来就留空,不允许为了补全格式硬编剧情时间标签。\n\n以下是我特别不想看到的——\n- 编造对话里没有的事件、地区、想法、认知状态或邻接关系。\n- 把角色 POV、用户 POV、客观事实混成同一个节点。\n- 让 POV 记忆拥有该视角不可能知道的信息(全知错误)。\n- 所有角色的 POV 都用同一种语气写(应该各有各的人格印记)。\n- POV summary 写成客观事件的换皮复述。\n- emotion 只写标签词,不写具体感受。\n- 只为显得全面就给所有角色都硬写 POV 或 cognitionUpdates。\n- 把 cognitionUpdates 当硬白名单或第二份世界事实表。\n- 把后面才说到的事情误判成后面才发生,或把未来计划当成已经发生的当前事实。\n- 把角色卡名、群像统称或旁白身份当成具体 POV owner。\n- 地区不确定却硬写一个像地区的词。\n- 为了显得全面而生成很多低价值碎节点。\n- 直接复制原文,或写成文学化修辞。", "injectionMode": "relative", - "order": 11 + "order": 13 } ], "generation": { diff --git a/prompting/prompt-builder.js b/prompting/prompt-builder.js index 1b9c3cd..0dc5fb1 100644 --- a/prompting/prompt-builder.js +++ b/prompting/prompt-builder.js @@ -181,6 +181,10 @@ export function buildTaskExecutionDebugContext( promptDebug.mvu && typeof promptDebug.mvu === "object" ? cloneRuntimeDebugValue(promptDebug.mvu, {}) : null, + inputContext: + promptDebug.inputContext && typeof promptDebug.inputContext === "object" + ? cloneRuntimeDebugValue(promptDebug.inputContext, {}) + : null, regexInput: (() => { const merged = mergeRegexCollectors( @@ -284,18 +288,26 @@ function getPromptMessageLikeDescriptor(value) { if (typeof value.content === "string") { const role = String(value.role || "assistant").trim().toLowerCase(); + const speaker = String( + value.speaker || value.name || value.displayName || "", + ).trim(); return { content: String(value.content || ""), role: role === "user" ? "user" : "assistant", seq: getOptionalFiniteNumber(value.seq), + speaker, }; } if (typeof value.mes === "string") { + const speaker = String( + value.speaker || value.name || value.displayName || "", + ).trim(); return { content: String(value.mes || ""), role: value.is_user === true ? "user" : "assistant", seq: getOptionalFiniteNumber(value.seq), + speaker, }; } @@ -320,7 +332,10 @@ function formatPromptMessageTranscript(value) { } const seqLabel = descriptor.seq != null ? `#${descriptor.seq}` : `#${index + 1}`; - return `${seqLabel} [${descriptor.role}]: ${descriptor.content}`; + const speakerLabel = descriptor.speaker + ? `|${descriptor.speaker}` + : ""; + return `${seqLabel} [${descriptor.role}${speakerLabel}]: ${descriptor.content}`; }) .filter(Boolean) .join("\n\n"); @@ -766,6 +781,32 @@ function sanitizePromptContextInputs( return value; } seen.add(value); + const messageDescriptor = getPromptMessageLikeDescriptor(value); + if (messageDescriptor) { + const contentKey = typeof value.content === "string" + ? "content" + : typeof value.mes === "string" + ? "mes" + : ""; + const messageRole = messageDescriptor.role === "user" + ? "user" + : messageDescriptor.role === "assistant" + ? "assistant" + : regexRole; + return Object.fromEntries( + Object.entries(value).map(([key, entryValue]) => [ + key, + key === contentKey + ? applyLocalRegexToStructuredValue( + entryValue, + regexStage, + messageRole, + seen, + ) + : entryValue, + ]), + ); + } return Object.fromEntries( Object.entries(value).map(([key, entryValue]) => [ key, @@ -821,14 +862,14 @@ function sanitizePromptContextInputs( ? "" : null : sanitized.value; - if (structuredSanitizerInput.renderAsTranscript) { - sanitizedValue = stringifyInterpolatedValue(sanitizedValue); - } sanitizedValue = applyLocalRegexToStructuredValue( sanitizedValue, regexStage, regexRole, ); + if (structuredSanitizerInput.renderAsTranscript) { + sanitizedValue = stringifyInterpolatedValue(sanitizedValue); + } sanitizedContext[fieldName] = sanitizedValue; } @@ -1407,6 +1448,10 @@ export async function buildTaskPrompt(settings = {}, taskType, context = {}) { const legacyPrompt = getLegacyPromptForTask(settings, taskType); const promptRegexInput = { entries: [] }; const mvuPromptDebug = createEmptyMvuPromptDebug(); + const taskInputDebug = + context?.taskInputDebug && typeof context.taskInputDebug === "object" + ? cloneRuntimeDebugValue(context.taskInputDebug, {}) + : null; const worldInfoInputContext = { ...context, }; @@ -1430,7 +1475,9 @@ export async function buildTaskPrompt(settings = {}, taskType, context = {}) { return orderA - orderB; }); - const worldInfoRequested = profileRequiresWorldInfo(profile); + const worldInfoRequested = context?.__skipWorldInfo === true + ? false + : profileRequiresWorldInfo(profile); const emptyWorldInfo = buildEmptyWorldInfoContext(); let resolvedWorldInfo = emptyWorldInfo; let worldInfoRuntimeBlockedContents = []; @@ -1771,6 +1818,7 @@ export async function buildTaskPrompt(settings = {}, taskType, context = {}) { ), fallbackReason: String(mvuPromptDebug.fallbackReason || ""), }, + inputContext: taskInputDebug, effectivePath: { promptAssembly: "ordered-private-messages", hostInjectionPlan: "diagnostic-plan-only", @@ -1809,6 +1857,7 @@ export async function buildTaskPrompt(settings = {}, taskType, context = {}) { hostInjectionPlan, worldInfoResolution, mvu: result.debug.mvu, + inputContext: taskInputDebug, regexInput: result.regexInput, debug: result.debug, }); diff --git a/prompting/prompt-profiles.js b/prompting/prompt-profiles.js index ddd7ba4..a19e2ed 100644 --- a/prompting/prompt-profiles.js +++ b/prompting/prompt-profiles.js @@ -158,6 +158,18 @@ const BUILTIN_BLOCK_DEFINITIONS = [ role: "system", description: "注入近期检测到的记忆矛盾或冲突信息。reflection 任务专用,触发基于矛盾的深度反思。", }, + { + sourceKey: "activeSummaries", + name: "活跃总结", + role: "system", + description: "注入当前活跃的分层总结快照。extract 任务使用,帮助 LLM 了解近期已总结的局面,避免重复提取已覆盖内容。", + }, + { + sourceKey: "storyTimeContext", + name: "故事时间", + role: "system", + description: "注入当前活跃的故事时间线标签与来源。extract 任务使用,帮助 LLM 定位本批对话在剧情时间轴上的位置。", + }, ]; const DEFAULT_TASK_PROFILE_VERSION = 3; @@ -489,6 +501,20 @@ const TASK_CONTEXT_BLOCK_BLUEPRINTS = { role: "system", sourceKey: "currentRange", }, + { + id: "default-active-summaries", + name: "活跃总结", + type: "builtin", + role: "system", + sourceKey: "activeSummaries", + }, + { + id: "default-story-time-context", + name: "故事时间", + type: "builtin", + role: "system", + sourceKey: "storyTimeContext", + }, ], recall: [ { diff --git a/retrieval/recall-controller.js b/retrieval/recall-controller.js index 03c41d4..ebb1a7d 100644 --- a/retrieval/recall-controller.js +++ b/retrieval/recall-controller.js @@ -86,6 +86,10 @@ export function resolveRecallInputController( override?.overrideReason || "override-bound", ), + authoritativeInputUsed: Boolean(override?.authoritativeInputUsed), + boundUserFloorText: runtime.normalizeRecallInputText( + override?.boundUserFloorText || "", + ), sourceCandidates: Array.isArray(override?.sourceCandidates) ? override.sourceCandidates.map((candidate) => ({ ...candidate })) : [], @@ -145,6 +149,8 @@ export function resolveRecallInputController( source, sourceLabel: runtime.getRecallUserMessageSourceLabel(source), reason: userMessage ? `${source || "unknown"}-selected` : "no-recall-input", + authoritativeInputUsed: false, + boundUserFloorText: tailUserText || latestUserText || "", sourceCandidates: [], recentMessages: runtime.buildRecallRecentMessages( chat, @@ -212,6 +218,8 @@ export function applyRecallInjectionController( source: recallInput.source, sourceLabel: recallInput.sourceLabel, reason: recallInput.reason || "", + authoritativeInputUsed: Boolean(recallInput.authoritativeInputUsed), + boundUserFloorText: String(recallInput.boundUserFloorText || ""), sourceCandidates: Array.isArray(recallInput.sourceCandidates) ? recallInput.sourceCandidates.map((candidate) => ({ ...candidate })) : [], @@ -475,6 +483,8 @@ export async function runRecallController(runtime, options = {}) { source: recallInput?.source || cachedRecallPayload.source || "", sourceLabel: recallInput?.sourceLabel || cachedRecallPayload.sourceLabel || "", + authoritativeInputUsed: Boolean(recallInput?.authoritativeInputUsed), + boundUserFloorText: String(recallInput?.boundUserFloorText || ""), hookName: recallInput?.hookName || "", sourceCandidates: Array.isArray(recallInput?.sourceCandidates) ? recallInput.sourceCandidates.map((candidate) => ({ @@ -531,6 +541,8 @@ export async function runRecallController(runtime, options = {}) { "immediate", source: recallInput?.source || "", sourceLabel: recallInput?.sourceLabel || "", + authoritativeInputUsed: Boolean(recallInput?.authoritativeInputUsed), + boundUserFloorText: String(recallInput?.boundUserFloorText || ""), hookName: recallInput?.hookName || "", sourceCandidates: Array.isArray(recallInput?.sourceCandidates) ? recallInput.sourceCandidates.map((candidate) => ({ ...candidate })) diff --git a/runtime/settings-defaults.js b/runtime/settings-defaults.js index e5d2c62..2ba320a 100644 --- a/runtime/settings-defaults.js +++ b/runtime/settings-defaults.js @@ -20,6 +20,15 @@ export const defaultSettings = { extractEvery: 1, extractContextTurns: 2, extractAutoDelayLatestAssistant: false, + extractAssistantExtractTags: "", + extractAssistantExcludeTags: "think,analysis,reasoning", + extractAssistantExtractRules: [], + extractAssistantExcludeRules: [], + extractRecentMessageCap: 0, + extractPromptStructuredMode: "both", + extractWorldbookMode: "active", + extractIncludeStoryTime: true, + extractIncludeSummaries: true, // 召回设置 recallEnabled: true, @@ -34,6 +43,7 @@ export const defaultSettings = { recallDiffusionTopK: 100, recallLlmCandidatePool: 30, recallLlmContextMessages: 4, + recallUseAuthoritativeGenerationInput: false, recallEnableMultiIntent: true, recallMultiIntentMaxSegments: 4, recallEnableContextQueryBlend: true, diff --git a/tests/default-settings.mjs b/tests/default-settings.mjs index efc451f..4feaec5 100644 --- a/tests/default-settings.mjs +++ b/tests/default-settings.mjs @@ -14,6 +14,7 @@ assert.equal(defaultSettings.recallEnableGraphDiffusion, true); assert.equal(defaultSettings.recallDiffusionTopK, 100); assert.equal(defaultSettings.recallLlmCandidatePool, 30); assert.equal(defaultSettings.recallLlmContextMessages, 4); +assert.equal(defaultSettings.recallUseAuthoritativeGenerationInput, false); assert.equal(defaultSettings.recallEnableMultiIntent, true); assert.equal(defaultSettings.recallMultiIntentMaxSegments, 4); assert.equal(defaultSettings.recallEnableContextQueryBlend, true); diff --git a/tests/extractor-input-context.mjs b/tests/extractor-input-context.mjs new file mode 100644 index 0000000..683e3ee --- /dev/null +++ b/tests/extractor-input-context.mjs @@ -0,0 +1,151 @@ +import assert from "node:assert/strict"; +import { + installResolveHooks, + toDataModuleUrl, +} from "./helpers/register-hooks-compat.mjs"; + +const extensionsShimSource = [ + "export const extension_settings = {};", + "export function getContext() {", + " return globalThis.__stBmeTestContext || {", + " chat: [],", + " chatMetadata: {},", + " extensionSettings: {},", + " powerUserSettings: {},", + " characters: {},", + " characterId: null,", + " name1: '玩家',", + " name2: '艾琳',", + " chatId: 'test-chat',", + " };", + "}", +].join("\n"); + +const scriptShimSource = [ + "export function getRequestHeaders() {", + " return {};", + "}", + "export function substituteParamsExtended(value) {", + " return String(value ?? '');", + "}", +].join("\n"); + +const openAiShimSource = [ + "export const chat_completion_sources = {};", + "export async function sendOpenAIRequest() {", + " throw new Error('sendOpenAIRequest should not be called in extractor-input-context test');", + "}", +].join("\n"); + +installResolveHooks([ + { + specifiers: [ + "../../../extensions.js", + "../../../../extensions.js", + "../../../../../extensions.js", + ], + url: toDataModuleUrl(extensionsShimSource), + }, + { + specifiers: [ + "../../../../script.js", + "../../../../../script.js", + ], + url: toDataModuleUrl(scriptShimSource), + }, + { + specifiers: [ + "../../../../openai.js", + "../../../../../openai.js", + ], + url: toDataModuleUrl(openAiShimSource), + }, +]); + +const { createEmptyGraph } = await import("../graph/graph.js"); +const { DEFAULT_NODE_SCHEMA } = await import("../graph/schema.js"); +const { extractMemories } = await import("../maintenance/extractor.js"); + +function setTestOverrides(overrides = {}) { + globalThis.__stBmeTestOverrides = overrides; + return () => { + delete globalThis.__stBmeTestOverrides; + }; +} + +globalThis.__stBmeTestContext = { + chat: [], + chatMetadata: {}, + extensionSettings: {}, + powerUserSettings: {}, + characters: {}, + characterId: null, + name1: "玩家", + name2: "艾琳", + chatId: "test-chat", +}; + +const graph = createEmptyGraph(); +let captured = null; +const restore = setTestOverrides({ + llm: { + async callLLMForJSON(payload) { + captured = payload; + return { + operations: [], + cognitionUpdates: [], + regionUpdates: {}, + }; + }, + }, +}); + +try { + const result = await extractMemories({ + graph, + messages: [ + { + seq: 10, + role: "assistant", + content: "隐式思维继续说明", + name: "艾琳", + speaker: "艾琳", + }, + { + seq: 11, + role: "user", + content: "用户输入", + name: "玩家", + speaker: "玩家", + }, + ], + startSeq: 10, + endSeq: 11, + schema: DEFAULT_NODE_SCHEMA, + embeddingConfig: null, + settings: { + extractAssistantExcludeTags: "think", + }, + }); + + assert.equal(result.success, true); + assert.ok(captured); + assert.ok(captured.debugContext); + assert.ok(captured.debugContext.inputContext); + assert.equal(captured.debugContext.inputContext.rawMessageCount, 2); + assert.equal(captured.debugContext.inputContext.filteredMessageCount, 2); + assert.equal(captured.debugContext.inputContext.changedAssistantMessageCount, 1); + assert.equal(captured.debugContext.inputContext.excludedAssistantMessageCount, 1); + + const recentBlock = (Array.isArray(captured.promptMessages) ? captured.promptMessages : []).find( + (message) => message.sourceKey === "recentMessages", + ); + assert.ok(recentBlock); + assert.match(String(recentBlock?.content || ""), /#10 \[assistant\|艾琳\]: 继续说明/); + assert.match(String(recentBlock?.content || ""), /#11 \[user\|玩家\]: 用户输入/); + assert.doesNotMatch(String(recentBlock?.content || ""), /隐式思维|/); +} finally { + restore(); +} + +console.log("extractor-input-context tests passed"); diff --git a/tests/extractor-phase3-layered-context.mjs b/tests/extractor-phase3-layered-context.mjs new file mode 100644 index 0000000..f83a305 --- /dev/null +++ b/tests/extractor-phase3-layered-context.mjs @@ -0,0 +1,393 @@ +import assert from "node:assert/strict"; +import { + installResolveHooks, + toDataModuleUrl, +} from "./helpers/register-hooks-compat.mjs"; + +const extensionsShimSource = [ + "export const extension_settings = {};", + "export function getContext() {", + " return globalThis.__stBmeTestContext || {", + " chat: [],", + " chatMetadata: {},", + " extensionSettings: {},", + " powerUserSettings: {},", + " characters: {},", + " characterId: null,", + " name1: '玩家',", + " name2: '艾琳',", + " chatId: 'test-chat',", + " };", + "}", +].join("\n"); + +const scriptShimSource = [ + "export function getRequestHeaders() {", + " return {};", + "}", + "export function substituteParamsExtended(value) {", + " return String(value ?? '');", + "}", +].join("\n"); + +const openAiShimSource = [ + "export const chat_completion_sources = {};", + "export async function sendOpenAIRequest() {", + " throw new Error('sendOpenAIRequest should not be called in p3 test');", + "}", +].join("\n"); + +installResolveHooks([ + { + specifiers: [ + "../../../extensions.js", + "../../../../extensions.js", + "../../../../../extensions.js", + ], + url: toDataModuleUrl(extensionsShimSource), + }, + { + specifiers: [ + "../../../../script.js", + "../../../../../script.js", + ], + url: toDataModuleUrl(scriptShimSource), + }, + { + specifiers: [ + "../../../../openai.js", + "../../../../../openai.js", + ], + url: toDataModuleUrl(openAiShimSource), + }, +]); + +const { createEmptyGraph, addNode, createNode } = await import("../graph/graph.js"); +const { DEFAULT_NODE_SCHEMA } = await import("../graph/schema.js"); +const { extractMemories } = await import("../maintenance/extractor.js"); +const { appendSummaryEntry } = await import("../graph/summary-state.js"); +const { normalizeGraphSummaryState } = await import("../graph/summary-state.js"); +const { applyBatchStoryTime } = await import("../graph/story-timeline.js"); +const { defaultSettings } = await import("../runtime/settings-defaults.js"); + +function setTestOverrides(overrides = {}) { + globalThis.__stBmeTestOverrides = overrides; + return () => { + delete globalThis.__stBmeTestOverrides; + }; +} + +globalThis.__stBmeTestContext = { + chat: [], + chatMetadata: {}, + extensionSettings: {}, + powerUserSettings: {}, + characters: {}, + characterId: null, + name1: "玩家", + name2: "艾琳", + chatId: "test-chat", +}; + +const baseMessages = [ + { seq: 10, role: "user", content: "第一轮消息", name: "玩家", speaker: "玩家" }, + { seq: 11, role: "assistant", content: "第一轮回复", name: "艾琳", speaker: "艾琳" }, + { seq: 12, role: "user", content: "第二轮消息", name: "玩家", speaker: "玩家" }, + { seq: 13, role: "assistant", content: "第二轮回复", name: "艾琳", speaker: "艾琳" }, + { seq: 14, role: "user", content: "第三轮消息", name: "玩家", speaker: "玩家" }, + { seq: 15, role: "assistant", content: "第三轮回复", name: "艾琳", speaker: "艾琳" }, +]; + +function collectAllPromptContent(captured) { + return [ + String(captured.systemPrompt || ""), + String(captured.userPrompt || ""), + ...(Array.isArray(captured.promptMessages) ? captured.promptMessages : []).map( + (m) => String(m.content || ""), + ), + ...(Array.isArray(captured.additionalMessages) ? captured.additionalMessages : []).map( + (m) => String(m.content || ""), + ), + ].join("\n"); +} + +// ── Test 1: default settings — activeSummaries and storyTimeContext passed ── +{ + const graph = createEmptyGraph(); + normalizeGraphSummaryState(graph); + const entry = appendSummaryEntry(graph, { + text: "最近的局面总结测试文本", + messageRange: [5, 9], + level: 1, + }); + applyBatchStoryTime(graph, { label: "第二天清晨", tense: "ongoing" }, "extract"); + + let captured = null; + const restore = setTestOverrides({ + llm: { + async callLLMForJSON(payload) { + captured = payload; + return { operations: [], cognitionUpdates: [], regionUpdates: {} }; + }, + }, + }); + + try { + const result = await extractMemories({ + graph, + messages: baseMessages.slice(0, 2), + startSeq: 10, + endSeq: 11, + schema: DEFAULT_NODE_SCHEMA, + embeddingConfig: null, + settings: { ...defaultSettings }, + }); + + assert.equal(result.success, true); + assert.ok(captured, "LLM should be called"); + + const allContent = collectAllPromptContent(captured); + + // activeSummaries should be somewhere in prompt content + assert.match(allContent, /最近的局面总结测试文本/, "active summaries text should appear in prompt"); + + // storyTimeContext should be somewhere in prompt content + assert.match(allContent, /第二天清晨/, "story time label should appear in prompt"); + + // recentMessages block should contain the dialogue + const recentBlock = (Array.isArray(captured.promptMessages) ? captured.promptMessages : []).find( + (m) => m.sourceKey === "recentMessages", + ); + assert.ok(recentBlock, "recentMessages block should exist"); + assert.match(String(recentBlock.content || ""), /第一轮/, "recentMessages should contain dialogue content"); + } finally { + restore(); + } +} + +// ── Test 2: extractRecentMessageCap limits messages ── +{ + const graph = createEmptyGraph(); + let captured = null; + const restore = setTestOverrides({ + llm: { + async callLLMForJSON(payload) { + captured = payload; + return { operations: [], cognitionUpdates: [], regionUpdates: {} }; + }, + }, + }); + + try { + const result = await extractMemories({ + graph, + messages: baseMessages, + startSeq: 10, + endSeq: 15, + schema: DEFAULT_NODE_SCHEMA, + embeddingConfig: null, + settings: { + ...defaultSettings, + extractRecentMessageCap: 2, + }, + }); + + assert.equal(result.success, true); + assert.ok(captured); + + // With cap=2, only the last 2 messages (seq 14, 15) should be in the recentMessages block + const recentBlock = (Array.isArray(captured.promptMessages) ? captured.promptMessages : []).find( + (m) => m.sourceKey === "recentMessages", + ); + assert.ok(recentBlock, "recentMessages block should exist"); + const recentContent = String(recentBlock.content || ""); + assert.match(recentContent, /第三轮/, "capped messages should contain the last messages"); + assert.doesNotMatch(recentContent, /第一轮/, "capped messages should not contain early messages"); + } finally { + restore(); + } +} + +// ── Test 3: extractPromptStructuredMode = "structured" omits dialogueText ── +{ + const graph = createEmptyGraph(); + let captured = null; + const restore = setTestOverrides({ + llm: { + async callLLMForJSON(payload) { + captured = payload; + return { operations: [], cognitionUpdates: [], regionUpdates: {} }; + }, + }, + }); + + try { + const result = await extractMemories({ + graph, + messages: baseMessages.slice(0, 2), + startSeq: 10, + endSeq: 11, + schema: DEFAULT_NODE_SCHEMA, + embeddingConfig: null, + settings: { + ...defaultSettings, + extractPromptStructuredMode: "structured", + }, + }); + + assert.equal(result.success, true); + assert.ok(captured); + + // In structured mode, recentMessages block should still have structured content + const recentBlock = (Array.isArray(captured.promptMessages) ? captured.promptMessages : []).find( + (m) => m.sourceKey === "recentMessages", + ); + assert.ok(recentBlock, "recentMessages block should exist"); + const recentContent = String(recentBlock?.content || ""); + assert.ok(recentContent.length > 0, "recentMessages block should have content"); + // The full transcript should NOT appear in prompt content + // (structured mode excludes dialogueText) + const allContent = collectAllPromptContent(captured); + // In "structured" mode, the user prompt fallback or blocks may reference structured messages + assert.match(recentContent, /第一轮/, "structured messages should contain dialogue"); + } finally { + restore(); + } +} + +// ── Test 4: extractPromptStructuredMode = "transcript" passes string ── +{ + const graph = createEmptyGraph(); + let captured = null; + const restore = setTestOverrides({ + llm: { + async callLLMForJSON(payload) { + captured = payload; + return { operations: [], cognitionUpdates: [], regionUpdates: {} }; + }, + }, + }); + + try { + const result = await extractMemories({ + graph, + messages: baseMessages.slice(0, 2), + startSeq: 10, + endSeq: 11, + schema: DEFAULT_NODE_SCHEMA, + embeddingConfig: null, + settings: { + ...defaultSettings, + extractPromptStructuredMode: "transcript", + }, + }); + + assert.equal(result.success, true); + assert.ok(captured); + + // In transcript mode, the content should still be present in some form + const allContent = collectAllPromptContent(captured); + assert.match(allContent, /第一轮/, "transcript mode should have dialogue content"); + // recentMessages block should exist and have transcript content + const recentBlock = (Array.isArray(captured.promptMessages) ? captured.promptMessages : []).find( + (m) => m.sourceKey === "recentMessages", + ); + assert.ok(recentBlock, "recentMessages block should exist in transcript mode"); + } finally { + restore(); + } +} + +// ── Test 5: extractIncludeSummaries = false omits summaries ── +{ + const graph = createEmptyGraph(); + normalizeGraphSummaryState(graph); + appendSummaryEntry(graph, { + text: "这条总结不应出现", + messageRange: [5, 9], + level: 1, + }); + + let captured = null; + const restore = setTestOverrides({ + llm: { + async callLLMForJSON(payload) { + captured = payload; + return { operations: [], cognitionUpdates: [], regionUpdates: {} }; + }, + }, + }); + + try { + const result = await extractMemories({ + graph, + messages: baseMessages.slice(0, 2), + startSeq: 10, + endSeq: 11, + schema: DEFAULT_NODE_SCHEMA, + embeddingConfig: null, + settings: { + ...defaultSettings, + extractIncludeSummaries: false, + }, + }); + + assert.equal(result.success, true); + assert.ok(captured); + + const allContent = collectAllPromptContent(captured); + assert.doesNotMatch(allContent, /这条总结不应出现/, "summaries should be excluded when disabled"); + } finally { + restore(); + } +} + +// ── Test 6: extractIncludeStoryTime = false omits story time ── +{ + const graph = createEmptyGraph(); + applyBatchStoryTime(graph, { label: "隐藏的时间标签", tense: "ongoing" }, "extract"); + + let captured = null; + const restore = setTestOverrides({ + llm: { + async callLLMForJSON(payload) { + captured = payload; + return { operations: [], cognitionUpdates: [], regionUpdates: {} }; + }, + }, + }); + + try { + const result = await extractMemories({ + graph, + messages: baseMessages.slice(0, 2), + startSeq: 10, + endSeq: 11, + schema: DEFAULT_NODE_SCHEMA, + embeddingConfig: null, + settings: { + ...defaultSettings, + extractIncludeStoryTime: false, + }, + }); + + assert.equal(result.success, true); + assert.ok(captured); + + const allContent = collectAllPromptContent(captured); + assert.doesNotMatch(allContent, /隐藏的时间标签/, "story time should be excluded when disabled"); + } finally { + restore(); + } +} + +// ── Test 7: new settings exist in defaults ── +{ + assert.equal(defaultSettings.extractRecentMessageCap, 0); + assert.equal(defaultSettings.extractPromptStructuredMode, "both"); + assert.equal(defaultSettings.extractWorldbookMode, "active"); + assert.equal(defaultSettings.extractIncludeStoryTime, true); + assert.equal(defaultSettings.extractIncludeSummaries, true); +} + +console.log("extractor-phase3-layered-context tests passed"); diff --git a/tests/prompt-builder-mixed-transcript.mjs b/tests/prompt-builder-mixed-transcript.mjs new file mode 100644 index 0000000..fa2bd83 --- /dev/null +++ b/tests/prompt-builder-mixed-transcript.mjs @@ -0,0 +1,148 @@ +import assert from "node:assert/strict"; +import { + installResolveHooks, + toDataModuleUrl, +} from "./helpers/register-hooks-compat.mjs"; + +const extensionsShimSource = [ + "export const extension_settings = {};", + "export function getContext() {", + " return {", + " chat: [],", + " chatMetadata: {},", + " extensionSettings: {},", + " powerUserSettings: {},", + " characters: {},", + " characterId: null,", + " name1: '',", + " name2: '',", + " chatId: 'test-chat',", + " };", + "}", +].join("\n"); + +const scriptShimSource = [ + "export function substituteParamsExtended(value) {", + " return String(value ?? '');", + "}", +].join("\n"); + +installResolveHooks([ + { + specifiers: [ + "../../../extensions.js", + "../../../../extensions.js", + "../../../../../extensions.js", + ], + url: toDataModuleUrl(extensionsShimSource), + }, + { + specifiers: [ + "../../../../script.js", + "../../../../../script.js", + ], + url: toDataModuleUrl(scriptShimSource), + }, +]); + +const { buildTaskLlmPayload, buildTaskPrompt } = await import("../prompting/prompt-builder.js"); +const { createDefaultTaskProfiles } = await import("../prompting/prompt-profiles.js"); +const { initializeHostAdapter } = await import("../host/adapter/index.js"); + +initializeHostAdapter({}); + +const settings = { + taskProfilesVersion: 3, + taskProfiles: createDefaultTaskProfiles(), +}; +const extractProfile = settings.taskProfiles.extract.profiles[0]; +extractProfile.regex = { + ...(extractProfile.regex || {}), + enabled: true, + inheritStRegex: false, + sources: { + global: false, + preset: false, + character: false, + }, + stages: { + ...(extractProfile.regex?.stages || {}), + input: true, + "input.recentMessages": true, + "input.finalPrompt": false, + }, + localRules: [ + { + id: "assistant-local-role-aware", + script_name: "assistant-local-role-aware", + enabled: true, + find_regex: "/继续说明/g", + replace_string: "助手已净化", + source: { + user_input: false, + ai_output: true, + }, + destination: { + prompt: true, + display: false, + }, + }, + { + id: "user-local-role-aware", + script_name: "user-local-role-aware", + enabled: true, + find_regex: "/用户输入/g", + replace_string: "用户已净化", + source: { + user_input: true, + ai_output: false, + }, + destination: { + prompt: true, + display: false, + }, + }, + ], +}; + +const promptBuild = await buildTaskPrompt(settings, "extract", { + taskName: "extract", + charDescription: "", + userPersona: "", + recentMessages: "这里会被 chatMessages 回填", + chatMessages: [ + { + seq: 41, + role: "assistant", + content: "继续说明", + name: "艾琳", + speaker: "艾琳", + }, + { + seq: 42, + role: "user", + content: "用户输入", + name: "玩家", + speaker: "玩家", + }, + ], + graphStats: "node_count=1", + schema: "event(title, summary)", + currentRange: "41 ~ 42", +}); +const payload = buildTaskLlmPayload(promptBuild, "fallback-user"); +const recentBlock = payload.promptMessages.find( + (message) => message.sourceKey === "recentMessages", +); +assert.match(String(recentBlock?.content || ""), /#41 \[assistant\|艾琳\]: 助手已净化/); +assert.match(String(recentBlock?.content || ""), /#42 \[user\|玩家\]: 用户已净化/); +assert.doesNotMatch( + String(recentBlock?.content || ""), + /#41 \[assistant\|艾琳\]: 用户已净化/, +); +assert.doesNotMatch( + String(recentBlock?.content || ""), + /#42 \[user\|玩家\]: 助手已净化/, +); + +console.log("prompt-builder-mixed-transcript tests passed"); diff --git a/tests/recall-authoritative-generation-input.mjs b/tests/recall-authoritative-generation-input.mjs new file mode 100644 index 0000000..912d49f --- /dev/null +++ b/tests/recall-authoritative-generation-input.mjs @@ -0,0 +1,129 @@ +import assert from "node:assert/strict"; + +import { MODULE_NAME } from "../graph/graph-persistence.js"; +import { + buildRecallRecentMessagesController, + resolveRecallInputController, +} from "../retrieval/recall-controller.js"; +import { createGenerationRecallHarness } from "./helpers/generation-recall-harness.mjs"; + +async function testSendIntentCanRemainAuthoritativeQueryWhenFlagEnabled() { + const harness = await createGenerationRecallHarness(); + harness.extension_settings[MODULE_NAME] = { + recallUseAuthoritativeGenerationInput: true, + }; + harness.chat = [{ is_user: true, mes: "旧的 chat tail" }]; + harness.pendingRecallSendIntent = { + text: "刚触发发送的新输入", + hash: "hash-phase4-send-intent", + at: Date.now(), + source: "dom-intent", + }; + + await harness.result.onGenerationAfterCommands("normal", {}, false); + + assert.equal(harness.runRecallCalls.length, 1); + assert.equal(harness.runRecallCalls[0].overrideUserMessage, "刚触发发送的新输入"); + assert.equal(harness.runRecallCalls[0].overrideSource, "send-intent"); + assert.equal(harness.runRecallCalls[0].targetUserMessageIndex, 0); + assert.equal(harness.runRecallCalls[0].includeSyntheticUserMessage, true); + + const transaction = [...harness.result.generationRecallTransactions.values()][0]; + assert.ok(transaction); + assert.equal( + transaction.frozenRecallOptions.overrideUserMessage, + "刚触发发送的新输入", + ); + assert.equal(transaction.frozenRecallOptions.lockedSource, "send-intent"); + assert.equal(transaction.frozenRecallOptions.targetUserMessageIndex, 0); + assert.equal(transaction.frozenRecallOptions.authoritativeInputUsed, true); + assert.equal(transaction.frozenRecallOptions.boundUserFloorText, "旧的 chat tail"); + assert.equal(transaction.frozenRecallOptions.includeSyntheticUserMessage, true); +} + +async function testHostSnapshotCanRemainAuthoritativeQueryWhenFlagEnabled() { + const harness = await createGenerationRecallHarness(); + harness.extension_settings[MODULE_NAME] = { + recallUseAuthoritativeGenerationInput: true, + }; + harness.chat = [{ is_user: true, mes: "旧的 chat tail" }]; + const frozenSnapshot = harness.result.freezeHostGenerationInputSnapshot( + "宿主快照输入", + ); + + await harness.result.onGenerationAfterCommands( + "normal", + { frozenInputSnapshot: frozenSnapshot }, + false, + ); + + assert.equal(harness.runRecallCalls.length, 1); + assert.equal(harness.runRecallCalls[0].overrideUserMessage, "宿主快照输入"); + assert.equal( + harness.runRecallCalls[0].overrideSource, + "host-generation-lifecycle", + ); + assert.equal(harness.runRecallCalls[0].targetUserMessageIndex, 0); + assert.equal(harness.runRecallCalls[0].includeSyntheticUserMessage, true); + assert.equal( + JSON.stringify( + harness.runRecallCalls[0].sourceCandidates.map((candidate) => candidate.source), + ), + JSON.stringify(["host-generation-lifecycle", "chat-tail-user"]), + ); + + const transaction = [...harness.result.generationRecallTransactions.values()][0]; + assert.ok(transaction); + assert.equal(transaction.frozenRecallOptions.overrideUserMessage, "宿主快照输入"); + assert.equal( + transaction.frozenRecallOptions.lockedSource, + "host-generation-lifecycle", + ); + assert.equal(transaction.frozenRecallOptions.targetUserMessageIndex, 0); + assert.equal(transaction.frozenRecallOptions.authoritativeInputUsed, true); + assert.equal(transaction.frozenRecallOptions.boundUserFloorText, "旧的 chat tail"); + assert.equal(transaction.frozenRecallOptions.includeSyntheticUserMessage, true); +} + +function testResolveRecallInputControllerAppendsSyntheticAuthoritativeUserMessage() { + const runtime = { + normalizeRecallInputText(value = "") { + return String(value || "").trim(); + }, + buildRecallRecentMessages(chat, limit, syntheticUserMessage = "") { + return buildRecallRecentMessagesController(chat, limit, syntheticUserMessage, { + formatRecallContextLine(message) { + return `[${message?.is_user ? "user" : "assistant"}]: ${String(message?.mes || "")}`; + }, + normalizeRecallInputText(value = "") { + return String(value || "").trim(); + }, + }); + }, + }; + const result = resolveRecallInputController( + [{ is_user: true, mes: "旧的 chat tail" }], + 4, + { + overrideUserMessage: "权威输入", + overrideSource: "send-intent", + includeSyntheticUserMessage: true, + }, + runtime, + ); + + assert.equal(result.userMessage, "权威输入"); + assert.equal(result.source, "send-intent"); + assert.equal(result.authoritativeInputUsed, false); + assert.equal(result.boundUserFloorText, ""); + assert.deepEqual(result.recentMessages, [ + "[user]: 旧的 chat tail", + "[user]: 权威输入", + ]); +} + +await testSendIntentCanRemainAuthoritativeQueryWhenFlagEnabled(); +await testHostSnapshotCanRemainAuthoritativeQueryWhenFlagEnabled(); +testResolveRecallInputControllerAppendsSyntheticAuthoritativeUserMessage(); + +console.log("recall-authoritative-generation-input tests passed");