feat: add isContextOnly flag to extraction messages to prevent repeated extraction

- buildExtractionMessages marks messages with isContextOnly (seq < startIdx)
- formatExtractionTranscript inserts section dividers for mixed context/target
- extractor.js Layer 1 prompt splits context review vs extraction target with guidance
- Add tests/extraction-context-only-flag.mjs (7 test cases)
This commit is contained in:
Youzini-afk
2026-04-12 12:45:55 +08:00
parent 84eaecbecc
commit a8e3169002
4 changed files with 208 additions and 21 deletions

View File

@@ -286,6 +286,7 @@ export function buildExtractionMessages(chat, startIdx, endIdx, settings) {
rawContent: String(msg?.mes ?? ""),
name: String(msg?.name ?? "").trim(),
speaker: String(msg?.name ?? "").trim(),
isContextOnly: index < startIdx,
});
}

View File

@@ -307,6 +307,7 @@ function normalizeExtractionMessage(message = {}, index = 0, names = {}) {
content,
rawContent,
sourceType: role === "user" ? "user_input" : "ai_output",
isContextOnly: message?.isContextOnly === true,
};
}
@@ -322,18 +323,38 @@ function countRoles(messages = []) {
}
export function formatExtractionTranscript(messages = []) {
return (Array.isArray(messages) ? messages : [])
.map((message, index) => {
const seqLabel = Number.isFinite(Number(message?.seq))
? `#${Number(message.seq)}`
: `#${index + 1}`;
const role = normalizeRole(message?.role || "assistant");
const speaker = String(message?.speaker || message?.name || "").trim();
const speakerLabel = speaker ? `|${speaker}` : "";
return `${seqLabel} [${role}${speakerLabel}]: ${String(message?.content || "")}`;
})
.filter((item) => String(item || "").trim())
.join("\n\n");
const safeMessages = Array.isArray(messages) ? messages : [];
const hasContextMessages = safeMessages.some((m) => m?.isContextOnly === true);
const hasTargetMessages = safeMessages.some((m) => m?.isContextOnly !== true);
const lines = [];
let inContext = null;
for (let index = 0; index < safeMessages.length; index += 1) {
const message = safeMessages[index];
const isContext = message?.isContextOnly === true;
if (hasContextMessages && hasTargetMessages && isContext !== inContext) {
if (isContext) {
lines.push("--- 以下是上下文回顾(已提取过),仅供理解剧情 ---");
} else {
lines.push("--- 以下是本次需要提取记忆的新对话内容 ---");
}
inContext = isContext;
}
const seqLabel = Number.isFinite(Number(message?.seq))
? `#${Number(message.seq)}`
: `#${index + 1}`;
const role = normalizeRole(message?.role || "assistant");
const speaker = String(message?.speaker || message?.name || "").trim();
const speakerLabel = speaker ? `|${speaker}` : "";
const line = `${seqLabel} [${role}${speakerLabel}]: ${String(message?.content || "")}`;
if (String(line || "").trim()) {
lines.push(line);
}
}
return lines.join("\n\n");
}
export function buildExtractionInputContext(

View File

@@ -873,6 +873,7 @@ export async function extractMemories({
content: message?.content,
speaker: message?.speaker,
name: message?.name,
isContextOnly: message?.isContextOnly === true,
}))
: [];
@@ -957,15 +958,39 @@ export async function extractMemories({
// 用户提示词 — Phase 3 分层信息结构
const userPromptSections = [];
// Layer 1: 当前对话切片
if (dialogueText) {
userPromptSections.push("## 当前对话内容(需提取记忆)", dialogueText, "");
} else if (structuredMode === "structured" && structuredMessages.length > 0) {
userPromptSections.push(
"## 当前对话内容(结构化消息,需提取记忆)",
"(结构化消息已通过 profile blocks 注入,请参考上方 recentMessages 块。)",
"",
);
// Layer 1: 当前对话切片(区分上下文回顾 vs 提取目标)
{
const hasContextMessages = structuredMessages.some((m) => m?.isContextOnly === true);
const hasTargetMessages = structuredMessages.some((m) => m?.isContextOnly !== true);
if (dialogueText) {
if (hasContextMessages && hasTargetMessages) {
userPromptSections.push(
"## 对话内容",
"以下对话包含两部分:已提取过的上下文回顾(仅供理解前情)和本次需要提取记忆的新内容。" +
"请**只从新内容中提取记忆**,不要重复提取上下文回顾中已有的信息。",
dialogueText,
"",
);
} else {
userPromptSections.push("## 当前对话内容(需提取记忆)", dialogueText, "");
}
} else if (structuredMode === "structured" && structuredMessages.length > 0) {
if (hasContextMessages && hasTargetMessages) {
userPromptSections.push(
"## 对话内容(结构化消息)",
"以下结构化消息包含两部分:标记为 isContextOnly 的是已提取过的上下文回顾(仅供理解前情)," +
"其余是本次需要提取记忆的新内容。请**只从 isContextOnly 为 false 的消息中提取记忆**。" +
"(结构化消息已通过 profile blocks 注入,请参考上方 recentMessages 块。)",
"",
);
} else {
userPromptSections.push(
"## 当前对话内容(结构化消息,需提取记忆)",
"(结构化消息已通过 profile blocks 注入,请参考上方 recentMessages 块。)",
"",
);
}
}
}
// Layer 2: 当前图谱状态