diff --git a/maintenance/chat-history.js b/maintenance/chat-history.js index 4595e9c..a0e2a14 100644 --- a/maintenance/chat-history.js +++ b/maintenance/chat-history.js @@ -286,6 +286,7 @@ export function buildExtractionMessages(chat, startIdx, endIdx, settings) { rawContent: String(msg?.mes ?? ""), name: String(msg?.name ?? "").trim(), speaker: String(msg?.name ?? "").trim(), + isContextOnly: index < startIdx, }); } diff --git a/maintenance/extraction-context.js b/maintenance/extraction-context.js index 201f05d..0658fe9 100644 --- a/maintenance/extraction-context.js +++ b/maintenance/extraction-context.js @@ -307,6 +307,7 @@ function normalizeExtractionMessage(message = {}, index = 0, names = {}) { content, rawContent, sourceType: role === "user" ? "user_input" : "ai_output", + isContextOnly: message?.isContextOnly === true, }; } @@ -322,18 +323,38 @@ function countRoles(messages = []) { } export function formatExtractionTranscript(messages = []) { - return (Array.isArray(messages) ? messages : []) - .map((message, index) => { - const seqLabel = Number.isFinite(Number(message?.seq)) - ? `#${Number(message.seq)}` - : `#${index + 1}`; - const role = normalizeRole(message?.role || "assistant"); - const speaker = String(message?.speaker || message?.name || "").trim(); - const speakerLabel = speaker ? `|${speaker}` : ""; - return `${seqLabel} [${role}${speakerLabel}]: ${String(message?.content || "")}`; - }) - .filter((item) => String(item || "").trim()) - .join("\n\n"); + const safeMessages = Array.isArray(messages) ? messages : []; + const hasContextMessages = safeMessages.some((m) => m?.isContextOnly === true); + const hasTargetMessages = safeMessages.some((m) => m?.isContextOnly !== true); + const lines = []; + let inContext = null; + + for (let index = 0; index < safeMessages.length; index += 1) { + const message = safeMessages[index]; + const isContext = message?.isContextOnly === true; + + if (hasContextMessages && hasTargetMessages && isContext !== inContext) { + if (isContext) { + lines.push("--- 以下是上下文回顾(已提取过),仅供理解剧情 ---"); + } else { + lines.push("--- 以下是本次需要提取记忆的新对话内容 ---"); + } + inContext = isContext; + } + + const seqLabel = Number.isFinite(Number(message?.seq)) + ? `#${Number(message.seq)}` + : `#${index + 1}`; + const role = normalizeRole(message?.role || "assistant"); + const speaker = String(message?.speaker || message?.name || "").trim(); + const speakerLabel = speaker ? `|${speaker}` : ""; + const line = `${seqLabel} [${role}${speakerLabel}]: ${String(message?.content || "")}`; + if (String(line || "").trim()) { + lines.push(line); + } + } + + return lines.join("\n\n"); } export function buildExtractionInputContext( diff --git a/maintenance/extractor.js b/maintenance/extractor.js index efafd7b..97c82ee 100644 --- a/maintenance/extractor.js +++ b/maintenance/extractor.js @@ -873,6 +873,7 @@ export async function extractMemories({ content: message?.content, speaker: message?.speaker, name: message?.name, + isContextOnly: message?.isContextOnly === true, })) : []; @@ -957,15 +958,39 @@ export async function extractMemories({ // 用户提示词 — Phase 3 分层信息结构 const userPromptSections = []; - // Layer 1: 当前对话切片 - if (dialogueText) { - userPromptSections.push("## 当前对话内容(需提取记忆)", dialogueText, ""); - } else if (structuredMode === "structured" && structuredMessages.length > 0) { - userPromptSections.push( - "## 当前对话内容(结构化消息,需提取记忆)", - "(结构化消息已通过 profile blocks 注入,请参考上方 recentMessages 块。)", - "", - ); + // Layer 1: 当前对话切片(区分上下文回顾 vs 提取目标) + { + const hasContextMessages = structuredMessages.some((m) => m?.isContextOnly === true); + const hasTargetMessages = structuredMessages.some((m) => m?.isContextOnly !== true); + if (dialogueText) { + if (hasContextMessages && hasTargetMessages) { + userPromptSections.push( + "## 对话内容", + "以下对话包含两部分:已提取过的上下文回顾(仅供理解前情)和本次需要提取记忆的新内容。" + + "请**只从新内容中提取记忆**,不要重复提取上下文回顾中已有的信息。", + dialogueText, + "", + ); + } else { + userPromptSections.push("## 当前对话内容(需提取记忆)", dialogueText, ""); + } + } else if (structuredMode === "structured" && structuredMessages.length > 0) { + if (hasContextMessages && hasTargetMessages) { + userPromptSections.push( + "## 对话内容(结构化消息)", + "以下结构化消息包含两部分:标记为 isContextOnly 的是已提取过的上下文回顾(仅供理解前情)," + + "其余是本次需要提取记忆的新内容。请**只从 isContextOnly 为 false 的消息中提取记忆**。" + + "(结构化消息已通过 profile blocks 注入,请参考上方 recentMessages 块。)", + "", + ); + } else { + userPromptSections.push( + "## 当前对话内容(结构化消息,需提取记忆)", + "(结构化消息已通过 profile blocks 注入,请参考上方 recentMessages 块。)", + "", + ); + } + } } // Layer 2: 当前图谱状态 diff --git a/tests/extraction-context-only-flag.mjs b/tests/extraction-context-only-flag.mjs new file mode 100644 index 0000000..8bf703a --- /dev/null +++ b/tests/extraction-context-only-flag.mjs @@ -0,0 +1,140 @@ +import assert from "node:assert/strict"; +import { + buildExtractionMessages, +} from "../maintenance/chat-history.js"; +import { + buildExtractionInputContext, + formatExtractionTranscript, +} from "../maintenance/extraction-context.js"; + +// ─── buildExtractionMessages: isContextOnly flag ─── + +const chat = [ + { is_user: false, is_system: true, mes: "greeting" }, + { is_user: true, is_system: false, mes: "user-1" }, + { is_user: false, is_system: false, mes: "assistant-1" }, + { is_user: true, is_system: false, mes: "user-2" }, + { is_user: false, is_system: false, mes: "assistant-2" }, + { is_user: true, is_system: false, mes: "user-3" }, + { is_user: false, is_system: false, mes: "assistant-3" }, +]; + +{ + const messages = buildExtractionMessages(chat, 4, 6, { + extractContextTurns: 2, + }); + const contextOnly = messages.filter((m) => m.isContextOnly); + const target = messages.filter((m) => !m.isContextOnly); + + assert.ok( + contextOnly.length > 0, + "should have context-only messages when extractContextTurns > 0", + ); + assert.ok( + target.length > 0, + "should have extraction target messages", + ); + assert.ok( + contextOnly.every((m) => m.seq < 4), + "context-only messages should have seq < startIdx", + ); + assert.ok( + target.every((m) => m.seq >= 4), + "target messages should have seq >= startIdx", + ); + console.log(" ✓ buildExtractionMessages: isContextOnly flag marks context vs target"); +} + +{ + const messages = buildExtractionMessages(chat, 2, 6, { + extractContextTurns: 0, + }); + const contextOnly = messages.filter((m) => m.isContextOnly); + assert.equal( + contextOnly.length, + 0, + "no context-only messages when extractContextTurns=0 and startIdx=2", + ); + console.log(" ✓ buildExtractionMessages: no context-only when contextTurns=0"); +} + +{ + const messages = buildExtractionMessages(chat, 1, 6, { + extractContextTurns: 2, + }); + const contextOnly = messages.filter((m) => m.isContextOnly); + assert.equal( + contextOnly.length, + 0, + "no context-only when startIdx is already at the beginning", + ); + console.log(" ✓ buildExtractionMessages: no context-only when startIdx at beginning"); +} + +// ─── formatExtractionTranscript: section dividers ─── + +{ + const mixed = [ + { seq: 1, role: "user", content: "context user", speaker: "A", isContextOnly: true }, + { seq: 2, role: "assistant", content: "context ai", speaker: "B", isContextOnly: true }, + { seq: 3, role: "user", content: "target user", speaker: "A", isContextOnly: false }, + { seq: 4, role: "assistant", content: "target ai", speaker: "B", isContextOnly: false }, + ]; + const transcript = formatExtractionTranscript(mixed); + assert.match(transcript, /已提取过/, "transcript should contain context review header"); + assert.match(transcript, /本次需要提取/, "transcript should contain extraction target header"); + assert.ok( + transcript.indexOf("已提取过") < transcript.indexOf("本次需要提取"), + "context header should appear before target header", + ); + assert.match(transcript, /#1.*context user/, "context message should appear"); + assert.match(transcript, /#3.*target user/, "target message should appear"); + console.log(" ✓ formatExtractionTranscript: section dividers for mixed context/target"); +} + +{ + const allTarget = [ + { seq: 3, role: "user", content: "user msg", speaker: "A", isContextOnly: false }, + { seq: 4, role: "assistant", content: "ai msg", speaker: "B", isContextOnly: false }, + ]; + const transcript = formatExtractionTranscript(allTarget); + assert.doesNotMatch(transcript, /已提取过/, "no context header when all are target"); + assert.doesNotMatch(transcript, /本次需要提取/, "no target header when all are target"); + console.log(" ✓ formatExtractionTranscript: no dividers when all messages are targets"); +} + +{ + const allContext = [ + { seq: 1, role: "user", content: "user msg", speaker: "A", isContextOnly: true }, + { seq: 2, role: "assistant", content: "ai msg", speaker: "B", isContextOnly: true }, + ]; + const transcript = formatExtractionTranscript(allContext); + assert.doesNotMatch(transcript, /已提取过/, "no dividers when all are context-only"); + assert.doesNotMatch(transcript, /本次需要提取/, "no dividers when all are context-only"); + console.log(" ✓ formatExtractionTranscript: no dividers when all messages are context-only"); +} + +// ─── buildExtractionInputContext: isContextOnly propagation ─── + +{ + const inputMessages = [ + { seq: 1, role: "user", content: "old question", name: "A", speaker: "A", isContextOnly: true }, + { seq: 2, role: "assistant", content: "old answer", name: "B", speaker: "B", isContextOnly: true }, + { seq: 3, role: "user", content: "new question", name: "A", speaker: "A", isContextOnly: false }, + { seq: 4, role: "assistant", content: "new answer", name: "B", speaker: "B", isContextOnly: false }, + ]; + const result = buildExtractionInputContext(inputMessages, { + settings: {}, + userName: "A", + charName: "B", + }); + const contextFiltered = result.filteredMessages.filter((m) => m.isContextOnly); + const targetFiltered = result.filteredMessages.filter((m) => !m.isContextOnly); + assert.equal(contextFiltered.length, 2, "context messages propagated through filtering"); + assert.equal(targetFiltered.length, 2, "target messages propagated through filtering"); + assert.match(result.filteredTranscript, /已提取过/, "transcript includes context header"); + assert.match(result.filteredTranscript, /本次需要提取/, "transcript includes target header"); + console.log(" ✓ buildExtractionInputContext: isContextOnly propagated to filteredMessages and transcript"); +} + +console.log("extraction-context-only-flag tests passed");