mirror of
https://github.com/Youzini-afk/ST-Bionic-Memory-Ecology.git
synced 2026-06-13 18:31:16 +08:00
feat: add isContextOnly flag to extraction messages to prevent repeated extraction
- buildExtractionMessages marks messages with isContextOnly (seq < startIdx) - formatExtractionTranscript inserts section dividers for mixed context/target - extractor.js Layer 1 prompt splits context review vs extraction target with guidance - Add tests/extraction-context-only-flag.mjs (7 test cases)
This commit is contained in:
@@ -286,6 +286,7 @@ export function buildExtractionMessages(chat, startIdx, endIdx, settings) {
|
||||
rawContent: String(msg?.mes ?? ""),
|
||||
name: String(msg?.name ?? "").trim(),
|
||||
speaker: String(msg?.name ?? "").trim(),
|
||||
isContextOnly: index < startIdx,
|
||||
});
|
||||
}
|
||||
|
||||
|
||||
@@ -307,6 +307,7 @@ function normalizeExtractionMessage(message = {}, index = 0, names = {}) {
|
||||
content,
|
||||
rawContent,
|
||||
sourceType: role === "user" ? "user_input" : "ai_output",
|
||||
isContextOnly: message?.isContextOnly === true,
|
||||
};
|
||||
}
|
||||
|
||||
@@ -322,18 +323,38 @@ function countRoles(messages = []) {
|
||||
}
|
||||
|
||||
export function formatExtractionTranscript(messages = []) {
|
||||
return (Array.isArray(messages) ? messages : [])
|
||||
.map((message, index) => {
|
||||
const seqLabel = Number.isFinite(Number(message?.seq))
|
||||
? `#${Number(message.seq)}`
|
||||
: `#${index + 1}`;
|
||||
const role = normalizeRole(message?.role || "assistant");
|
||||
const speaker = String(message?.speaker || message?.name || "").trim();
|
||||
const speakerLabel = speaker ? `|${speaker}` : "";
|
||||
return `${seqLabel} [${role}${speakerLabel}]: ${String(message?.content || "")}`;
|
||||
})
|
||||
.filter((item) => String(item || "").trim())
|
||||
.join("\n\n");
|
||||
const safeMessages = Array.isArray(messages) ? messages : [];
|
||||
const hasContextMessages = safeMessages.some((m) => m?.isContextOnly === true);
|
||||
const hasTargetMessages = safeMessages.some((m) => m?.isContextOnly !== true);
|
||||
const lines = [];
|
||||
let inContext = null;
|
||||
|
||||
for (let index = 0; index < safeMessages.length; index += 1) {
|
||||
const message = safeMessages[index];
|
||||
const isContext = message?.isContextOnly === true;
|
||||
|
||||
if (hasContextMessages && hasTargetMessages && isContext !== inContext) {
|
||||
if (isContext) {
|
||||
lines.push("--- 以下是上下文回顾(已提取过),仅供理解剧情 ---");
|
||||
} else {
|
||||
lines.push("--- 以下是本次需要提取记忆的新对话内容 ---");
|
||||
}
|
||||
inContext = isContext;
|
||||
}
|
||||
|
||||
const seqLabel = Number.isFinite(Number(message?.seq))
|
||||
? `#${Number(message.seq)}`
|
||||
: `#${index + 1}`;
|
||||
const role = normalizeRole(message?.role || "assistant");
|
||||
const speaker = String(message?.speaker || message?.name || "").trim();
|
||||
const speakerLabel = speaker ? `|${speaker}` : "";
|
||||
const line = `${seqLabel} [${role}${speakerLabel}]: ${String(message?.content || "")}`;
|
||||
if (String(line || "").trim()) {
|
||||
lines.push(line);
|
||||
}
|
||||
}
|
||||
|
||||
return lines.join("\n\n");
|
||||
}
|
||||
|
||||
export function buildExtractionInputContext(
|
||||
|
||||
@@ -873,6 +873,7 @@ export async function extractMemories({
|
||||
content: message?.content,
|
||||
speaker: message?.speaker,
|
||||
name: message?.name,
|
||||
isContextOnly: message?.isContextOnly === true,
|
||||
}))
|
||||
: [];
|
||||
|
||||
@@ -957,15 +958,39 @@ export async function extractMemories({
|
||||
// 用户提示词 — Phase 3 分层信息结构
|
||||
const userPromptSections = [];
|
||||
|
||||
// Layer 1: 当前对话切片
|
||||
if (dialogueText) {
|
||||
userPromptSections.push("## 当前对话内容(需提取记忆)", dialogueText, "");
|
||||
} else if (structuredMode === "structured" && structuredMessages.length > 0) {
|
||||
userPromptSections.push(
|
||||
"## 当前对话内容(结构化消息,需提取记忆)",
|
||||
"(结构化消息已通过 profile blocks 注入,请参考上方 recentMessages 块。)",
|
||||
"",
|
||||
);
|
||||
// Layer 1: 当前对话切片(区分上下文回顾 vs 提取目标)
|
||||
{
|
||||
const hasContextMessages = structuredMessages.some((m) => m?.isContextOnly === true);
|
||||
const hasTargetMessages = structuredMessages.some((m) => m?.isContextOnly !== true);
|
||||
if (dialogueText) {
|
||||
if (hasContextMessages && hasTargetMessages) {
|
||||
userPromptSections.push(
|
||||
"## 对话内容",
|
||||
"以下对话包含两部分:已提取过的上下文回顾(仅供理解前情)和本次需要提取记忆的新内容。" +
|
||||
"请**只从新内容中提取记忆**,不要重复提取上下文回顾中已有的信息。",
|
||||
dialogueText,
|
||||
"",
|
||||
);
|
||||
} else {
|
||||
userPromptSections.push("## 当前对话内容(需提取记忆)", dialogueText, "");
|
||||
}
|
||||
} else if (structuredMode === "structured" && structuredMessages.length > 0) {
|
||||
if (hasContextMessages && hasTargetMessages) {
|
||||
userPromptSections.push(
|
||||
"## 对话内容(结构化消息)",
|
||||
"以下结构化消息包含两部分:标记为 isContextOnly 的是已提取过的上下文回顾(仅供理解前情)," +
|
||||
"其余是本次需要提取记忆的新内容。请**只从 isContextOnly 为 false 的消息中提取记忆**。" +
|
||||
"(结构化消息已通过 profile blocks 注入,请参考上方 recentMessages 块。)",
|
||||
"",
|
||||
);
|
||||
} else {
|
||||
userPromptSections.push(
|
||||
"## 当前对话内容(结构化消息,需提取记忆)",
|
||||
"(结构化消息已通过 profile blocks 注入,请参考上方 recentMessages 块。)",
|
||||
"",
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Layer 2: 当前图谱状态
|
||||
|
||||
140
tests/extraction-context-only-flag.mjs
Normal file
140
tests/extraction-context-only-flag.mjs
Normal file
@@ -0,0 +1,140 @@
|
||||
import assert from "node:assert/strict";
|
||||
import {
|
||||
buildExtractionMessages,
|
||||
} from "../maintenance/chat-history.js";
|
||||
import {
|
||||
buildExtractionInputContext,
|
||||
formatExtractionTranscript,
|
||||
} from "../maintenance/extraction-context.js";
|
||||
|
||||
// ─── buildExtractionMessages: isContextOnly flag ───
|
||||
|
||||
const chat = [
|
||||
{ is_user: false, is_system: true, mes: "greeting" },
|
||||
{ is_user: true, is_system: false, mes: "user-1" },
|
||||
{ is_user: false, is_system: false, mes: "assistant-1" },
|
||||
{ is_user: true, is_system: false, mes: "user-2" },
|
||||
{ is_user: false, is_system: false, mes: "assistant-2" },
|
||||
{ is_user: true, is_system: false, mes: "user-3" },
|
||||
{ is_user: false, is_system: false, mes: "assistant-3" },
|
||||
];
|
||||
|
||||
{
|
||||
const messages = buildExtractionMessages(chat, 4, 6, {
|
||||
extractContextTurns: 2,
|
||||
});
|
||||
const contextOnly = messages.filter((m) => m.isContextOnly);
|
||||
const target = messages.filter((m) => !m.isContextOnly);
|
||||
|
||||
assert.ok(
|
||||
contextOnly.length > 0,
|
||||
"should have context-only messages when extractContextTurns > 0",
|
||||
);
|
||||
assert.ok(
|
||||
target.length > 0,
|
||||
"should have extraction target messages",
|
||||
);
|
||||
assert.ok(
|
||||
contextOnly.every((m) => m.seq < 4),
|
||||
"context-only messages should have seq < startIdx",
|
||||
);
|
||||
assert.ok(
|
||||
target.every((m) => m.seq >= 4),
|
||||
"target messages should have seq >= startIdx",
|
||||
);
|
||||
console.log(" ✓ buildExtractionMessages: isContextOnly flag marks context vs target");
|
||||
}
|
||||
|
||||
{
|
||||
const messages = buildExtractionMessages(chat, 2, 6, {
|
||||
extractContextTurns: 0,
|
||||
});
|
||||
const contextOnly = messages.filter((m) => m.isContextOnly);
|
||||
assert.equal(
|
||||
contextOnly.length,
|
||||
0,
|
||||
"no context-only messages when extractContextTurns=0 and startIdx=2",
|
||||
);
|
||||
console.log(" ✓ buildExtractionMessages: no context-only when contextTurns=0");
|
||||
}
|
||||
|
||||
{
|
||||
const messages = buildExtractionMessages(chat, 1, 6, {
|
||||
extractContextTurns: 2,
|
||||
});
|
||||
const contextOnly = messages.filter((m) => m.isContextOnly);
|
||||
assert.equal(
|
||||
contextOnly.length,
|
||||
0,
|
||||
"no context-only when startIdx is already at the beginning",
|
||||
);
|
||||
console.log(" ✓ buildExtractionMessages: no context-only when startIdx at beginning");
|
||||
}
|
||||
|
||||
// ─── formatExtractionTranscript: section dividers ───
|
||||
|
||||
{
|
||||
const mixed = [
|
||||
{ seq: 1, role: "user", content: "context user", speaker: "A", isContextOnly: true },
|
||||
{ seq: 2, role: "assistant", content: "context ai", speaker: "B", isContextOnly: true },
|
||||
{ seq: 3, role: "user", content: "target user", speaker: "A", isContextOnly: false },
|
||||
{ seq: 4, role: "assistant", content: "target ai", speaker: "B", isContextOnly: false },
|
||||
];
|
||||
const transcript = formatExtractionTranscript(mixed);
|
||||
assert.match(transcript, /已提取过/, "transcript should contain context review header");
|
||||
assert.match(transcript, /本次需要提取/, "transcript should contain extraction target header");
|
||||
assert.ok(
|
||||
transcript.indexOf("已提取过") < transcript.indexOf("本次需要提取"),
|
||||
"context header should appear before target header",
|
||||
);
|
||||
assert.match(transcript, /#1.*context user/, "context message should appear");
|
||||
assert.match(transcript, /#3.*target user/, "target message should appear");
|
||||
console.log(" ✓ formatExtractionTranscript: section dividers for mixed context/target");
|
||||
}
|
||||
|
||||
{
|
||||
const allTarget = [
|
||||
{ seq: 3, role: "user", content: "user msg", speaker: "A", isContextOnly: false },
|
||||
{ seq: 4, role: "assistant", content: "ai msg", speaker: "B", isContextOnly: false },
|
||||
];
|
||||
const transcript = formatExtractionTranscript(allTarget);
|
||||
assert.doesNotMatch(transcript, /已提取过/, "no context header when all are target");
|
||||
assert.doesNotMatch(transcript, /本次需要提取/, "no target header when all are target");
|
||||
console.log(" ✓ formatExtractionTranscript: no dividers when all messages are targets");
|
||||
}
|
||||
|
||||
{
|
||||
const allContext = [
|
||||
{ seq: 1, role: "user", content: "user msg", speaker: "A", isContextOnly: true },
|
||||
{ seq: 2, role: "assistant", content: "ai msg", speaker: "B", isContextOnly: true },
|
||||
];
|
||||
const transcript = formatExtractionTranscript(allContext);
|
||||
assert.doesNotMatch(transcript, /已提取过/, "no dividers when all are context-only");
|
||||
assert.doesNotMatch(transcript, /本次需要提取/, "no dividers when all are context-only");
|
||||
console.log(" ✓ formatExtractionTranscript: no dividers when all messages are context-only");
|
||||
}
|
||||
|
||||
// ─── buildExtractionInputContext: isContextOnly propagation ───
|
||||
|
||||
{
|
||||
const inputMessages = [
|
||||
{ seq: 1, role: "user", content: "old question", name: "A", speaker: "A", isContextOnly: true },
|
||||
{ seq: 2, role: "assistant", content: "old answer", name: "B", speaker: "B", isContextOnly: true },
|
||||
{ seq: 3, role: "user", content: "new question", name: "A", speaker: "A", isContextOnly: false },
|
||||
{ seq: 4, role: "assistant", content: "new answer", name: "B", speaker: "B", isContextOnly: false },
|
||||
];
|
||||
const result = buildExtractionInputContext(inputMessages, {
|
||||
settings: {},
|
||||
userName: "A",
|
||||
charName: "B",
|
||||
});
|
||||
const contextFiltered = result.filteredMessages.filter((m) => m.isContextOnly);
|
||||
const targetFiltered = result.filteredMessages.filter((m) => !m.isContextOnly);
|
||||
assert.equal(contextFiltered.length, 2, "context messages propagated through filtering");
|
||||
assert.equal(targetFiltered.length, 2, "target messages propagated through filtering");
|
||||
assert.match(result.filteredTranscript, /已提取过/, "transcript includes context header");
|
||||
assert.match(result.filteredTranscript, /本次需要提取/, "transcript includes target header");
|
||||
console.log(" ✓ buildExtractionInputContext: isContextOnly propagated to filteredMessages and transcript");
|
||||
}
|
||||
|
||||
console.log("extraction-context-only-flag tests passed");
|
||||
Reference in New Issue
Block a user