mirror of
https://github.com/Youzini-afk/ST-Bionic-Memory-Ecology.git
synced 2026-05-15 22:30:38 +08:00
fix: 多项修复与优化
This commit is contained in:
469
compressor.js
469
compressor.js
@@ -1,23 +1,30 @@
|
||||
// ST-BME: 层级压缩引擎
|
||||
// 超过阈值的节点被 LLM 总结为更高层级的压缩节点
|
||||
|
||||
import { createNode, addNode, createEdge, addEdge, getActiveNodes, getNode } from './graph.js';
|
||||
import { callLLMForJSON } from './llm.js';
|
||||
import { embedText } from './embedding.js';
|
||||
import { buildTaskPrompt } from './prompt-builder.js';
|
||||
import { applyTaskRegex } from './task-regex.js';
|
||||
import { isDirectVectorConfig } from './vector-index.js';
|
||||
import { embedText } from "./embedding.js";
|
||||
import {
|
||||
addEdge,
|
||||
addNode,
|
||||
createEdge,
|
||||
createNode,
|
||||
getActiveNodes,
|
||||
getNode,
|
||||
} from "./graph.js";
|
||||
import { callLLMForJSON } from "./llm.js";
|
||||
import { buildTaskPrompt } from "./prompt-builder.js";
|
||||
import { applyTaskRegex } from "./task-regex.js";
|
||||
import { isDirectVectorConfig } from "./vector-index.js";
|
||||
|
||||
function createAbortError(message = '操作已终止') {
|
||||
const error = new Error(message);
|
||||
error.name = 'AbortError';
|
||||
return error;
|
||||
function createAbortError(message = "操作已终止") {
|
||||
const error = new Error(message);
|
||||
error.name = "AbortError";
|
||||
return error;
|
||||
}
|
||||
|
||||
function throwIfAborted(signal) {
|
||||
if (signal?.aborted) {
|
||||
throw signal.reason instanceof Error ? signal.reason : createAbortError();
|
||||
}
|
||||
if (signal?.aborted) {
|
||||
throw signal.reason instanceof Error ? signal.reason : createAbortError();
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -30,193 +37,234 @@ function throwIfAborted(signal) {
|
||||
* @param {boolean} [params.force=false] - 忽略阈值强制压缩
|
||||
* @returns {Promise<{created: number, archived: number}>}
|
||||
*/
|
||||
export async function compressType({ graph, typeDef, embeddingConfig, force = false, customPrompt, signal, settings = {} }) {
|
||||
const compression = typeDef.compression;
|
||||
if (!compression || compression.mode !== 'hierarchical') {
|
||||
return { created: 0, archived: 0 };
|
||||
}
|
||||
export async function compressType({
|
||||
graph,
|
||||
typeDef,
|
||||
embeddingConfig,
|
||||
force = false,
|
||||
customPrompt,
|
||||
signal,
|
||||
settings = {},
|
||||
}) {
|
||||
const compression = typeDef.compression;
|
||||
if (!compression || compression.mode !== "hierarchical") {
|
||||
return { created: 0, archived: 0 };
|
||||
}
|
||||
|
||||
let totalCreated = 0;
|
||||
let totalArchived = 0;
|
||||
let totalCreated = 0;
|
||||
let totalArchived = 0;
|
||||
|
||||
// 从最低层级开始逐层压缩
|
||||
for (let level = 0; level < compression.maxDepth; level++) {
|
||||
throwIfAborted(signal);
|
||||
const result = await compressLevel({
|
||||
graph,
|
||||
typeDef,
|
||||
level,
|
||||
embeddingConfig,
|
||||
force,
|
||||
customPrompt,
|
||||
signal,
|
||||
settings,
|
||||
});
|
||||
// 从最低层级开始逐层压缩
|
||||
for (let level = 0; level < compression.maxDepth; level++) {
|
||||
throwIfAborted(signal);
|
||||
const result = await compressLevel({
|
||||
graph,
|
||||
typeDef,
|
||||
level,
|
||||
embeddingConfig,
|
||||
force,
|
||||
customPrompt,
|
||||
signal,
|
||||
settings,
|
||||
});
|
||||
|
||||
totalCreated += result.created;
|
||||
totalArchived += result.archived;
|
||||
totalCreated += result.created;
|
||||
totalArchived += result.archived;
|
||||
|
||||
// 如果这一层没有压缩发生,停止
|
||||
if (result.created === 0) break;
|
||||
}
|
||||
// 如果这一层没有压缩发生,停止
|
||||
if (result.created === 0) break;
|
||||
}
|
||||
|
||||
return { created: totalCreated, archived: totalArchived };
|
||||
return { created: totalCreated, archived: totalArchived };
|
||||
}
|
||||
|
||||
/**
|
||||
* 压缩特定层级的节点
|
||||
*/
|
||||
async function compressLevel({ graph, typeDef, level, embeddingConfig, force, customPrompt, signal, settings = {} }) {
|
||||
const compression = typeDef.compression;
|
||||
throwIfAborted(signal);
|
||||
async function compressLevel({
|
||||
graph,
|
||||
typeDef,
|
||||
level,
|
||||
embeddingConfig,
|
||||
force,
|
||||
customPrompt,
|
||||
signal,
|
||||
settings = {},
|
||||
}) {
|
||||
const compression = typeDef.compression;
|
||||
throwIfAborted(signal);
|
||||
|
||||
// 获取该层级的活跃叶子节点
|
||||
const levelNodes = getActiveNodes(graph, typeDef.id)
|
||||
.filter(n => n.level === level)
|
||||
.sort((a, b) => a.seq - b.seq);
|
||||
// 获取该层级的活跃叶子节点
|
||||
const levelNodes = getActiveNodes(graph, typeDef.id)
|
||||
.filter((n) => n.level === level)
|
||||
.sort((a, b) => a.seq - b.seq);
|
||||
|
||||
const threshold = force ? Math.max(2, compression.fanIn) : compression.threshold;
|
||||
const keepRecent = force ? 0 : compression.keepRecentLeaves;
|
||||
const threshold = force
|
||||
? Math.max(2, compression.fanIn)
|
||||
: compression.threshold;
|
||||
const keepRecent = force ? 0 : compression.keepRecentLeaves;
|
||||
|
||||
// 不够阈值,无需压缩
|
||||
if (levelNodes.length <= threshold) {
|
||||
return { created: 0, archived: 0 };
|
||||
// 不够阈值,无需压缩
|
||||
if (levelNodes.length <= threshold) {
|
||||
return { created: 0, archived: 0 };
|
||||
}
|
||||
|
||||
// 排除最近的节点
|
||||
const compressible = levelNodes.slice(0, levelNodes.length - keepRecent);
|
||||
if (compressible.length < compression.fanIn) {
|
||||
return { created: 0, archived: 0 };
|
||||
}
|
||||
|
||||
let created = 0;
|
||||
let archived = 0;
|
||||
|
||||
// 按 fanIn 分组压缩
|
||||
for (let i = 0; i < compressible.length; i += compression.fanIn) {
|
||||
const batch = compressible.slice(i, i + compression.fanIn);
|
||||
if (batch.length < 2) break; // 至少 2 个才压缩
|
||||
|
||||
// 调用 LLM 总结
|
||||
const summaryResult = await summarizeBatch(
|
||||
batch,
|
||||
typeDef,
|
||||
customPrompt,
|
||||
signal,
|
||||
settings,
|
||||
);
|
||||
if (!summaryResult) continue;
|
||||
|
||||
// 创建压缩节点
|
||||
const compressedNode = createNode({
|
||||
type: typeDef.id,
|
||||
fields: summaryResult.fields,
|
||||
seq: batch[batch.length - 1].seq,
|
||||
seqRange: [
|
||||
batch[0].seqRange?.[0] ?? batch[0].seq,
|
||||
batch[batch.length - 1].seqRange?.[1] ?? batch[batch.length - 1].seq,
|
||||
],
|
||||
importance: Math.max(...batch.map((n) => n.importance)),
|
||||
});
|
||||
|
||||
compressedNode.level = level + 1;
|
||||
compressedNode.childIds = batch.map((n) => n.id);
|
||||
|
||||
// 生成 embedding
|
||||
if (isDirectVectorConfig(embeddingConfig) && summaryResult.fields.summary) {
|
||||
const vec = await embedText(
|
||||
summaryResult.fields.summary,
|
||||
embeddingConfig,
|
||||
{ signal },
|
||||
);
|
||||
if (vec) compressedNode.embedding = Array.from(vec);
|
||||
}
|
||||
|
||||
// 排除最近的节点
|
||||
const compressible = levelNodes.slice(0, levelNodes.length - keepRecent);
|
||||
if (compressible.length < compression.fanIn) {
|
||||
return { created: 0, archived: 0 };
|
||||
addNode(graph, compressedNode);
|
||||
migrateBatchEdges(graph, batch, compressedNode);
|
||||
created++;
|
||||
|
||||
// 归档子节点
|
||||
for (const child of batch) {
|
||||
child.archived = true;
|
||||
child.parentId = compressedNode.id;
|
||||
archived++;
|
||||
}
|
||||
}
|
||||
|
||||
let created = 0;
|
||||
let archived = 0;
|
||||
|
||||
// 按 fanIn 分组压缩
|
||||
for (let i = 0; i < compressible.length; i += compression.fanIn) {
|
||||
const batch = compressible.slice(i, i + compression.fanIn);
|
||||
if (batch.length < 2) break; // 至少 2 个才压缩
|
||||
|
||||
// 调用 LLM 总结
|
||||
const summaryResult = await summarizeBatch(batch, typeDef, customPrompt, signal, settings);
|
||||
if (!summaryResult) continue;
|
||||
|
||||
// 创建压缩节点
|
||||
const compressedNode = createNode({
|
||||
type: typeDef.id,
|
||||
fields: summaryResult.fields,
|
||||
seq: batch[batch.length - 1].seq,
|
||||
seqRange: [batch[0].seqRange?.[0] ?? batch[0].seq, batch[batch.length - 1].seqRange?.[1] ?? batch[batch.length - 1].seq],
|
||||
importance: Math.max(...batch.map(n => n.importance)),
|
||||
});
|
||||
|
||||
compressedNode.level = level + 1;
|
||||
compressedNode.childIds = batch.map(n => n.id);
|
||||
|
||||
// 生成 embedding
|
||||
if (isDirectVectorConfig(embeddingConfig) && summaryResult.fields.summary) {
|
||||
const vec = await embedText(summaryResult.fields.summary, embeddingConfig, { signal });
|
||||
if (vec) compressedNode.embedding = Array.from(vec);
|
||||
}
|
||||
|
||||
addNode(graph, compressedNode);
|
||||
migrateBatchEdges(graph, batch, compressedNode);
|
||||
created++;
|
||||
|
||||
// 归档子节点
|
||||
for (const child of batch) {
|
||||
child.archived = true;
|
||||
child.parentId = compressedNode.id;
|
||||
archived++;
|
||||
}
|
||||
}
|
||||
|
||||
return { created, archived };
|
||||
return { created, archived };
|
||||
}
|
||||
|
||||
function migrateBatchEdges(graph, batch, compressedNode) {
|
||||
const batchIds = new Set(batch.map(node => node.id));
|
||||
const activeNodeIds = new Set(getActiveNodes(graph).map(node => node.id));
|
||||
const batchIds = new Set(batch.map((node) => node.id));
|
||||
|
||||
for (const edge of graph.edges) {
|
||||
if (edge.invalidAt || edge.expiredAt) continue;
|
||||
for (const edge of graph.edges) {
|
||||
if (edge.invalidAt || edge.expiredAt) continue;
|
||||
|
||||
const fromInside = batchIds.has(edge.fromId);
|
||||
const toInside = batchIds.has(edge.toId);
|
||||
if (!fromInside && !toInside) continue;
|
||||
if (fromInside && toInside) continue;
|
||||
const fromInside = batchIds.has(edge.fromId);
|
||||
const toInside = batchIds.has(edge.toId);
|
||||
if (!fromInside && !toInside) continue;
|
||||
if (fromInside && toInside) continue;
|
||||
|
||||
const newFromId = fromInside ? compressedNode.id : edge.fromId;
|
||||
const newToId = toInside ? compressedNode.id : edge.toId;
|
||||
const newFromId = fromInside ? compressedNode.id : edge.fromId;
|
||||
const newToId = toInside ? compressedNode.id : edge.toId;
|
||||
|
||||
if (newFromId === newToId) continue;
|
||||
if (!activeNodeIds.has(newFromId) || !activeNodeIds.has(newToId)) continue;
|
||||
if (!getNode(graph, newFromId) || !getNode(graph, newToId)) continue;
|
||||
if (newFromId === newToId) continue;
|
||||
if (!getNode(graph, newFromId) || !getNode(graph, newToId)) continue;
|
||||
|
||||
const migratedEdge = createEdge({
|
||||
fromId: newFromId,
|
||||
toId: newToId,
|
||||
relation: edge.relation,
|
||||
strength: edge.strength,
|
||||
edgeType: edge.edgeType,
|
||||
});
|
||||
migratedEdge.validAt = edge.validAt ?? migratedEdge.validAt;
|
||||
migratedEdge.invalidAt = edge.invalidAt ?? migratedEdge.invalidAt;
|
||||
migratedEdge.expiredAt = edge.expiredAt ?? migratedEdge.expiredAt;
|
||||
const migratedEdge = createEdge({
|
||||
fromId: newFromId,
|
||||
toId: newToId,
|
||||
relation: edge.relation,
|
||||
strength: edge.strength,
|
||||
edgeType: edge.edgeType,
|
||||
});
|
||||
migratedEdge.validAt = edge.validAt ?? migratedEdge.validAt;
|
||||
migratedEdge.invalidAt = edge.invalidAt ?? migratedEdge.invalidAt;
|
||||
migratedEdge.expiredAt = edge.expiredAt ?? migratedEdge.expiredAt;
|
||||
|
||||
addEdge(graph, migratedEdge);
|
||||
}
|
||||
addEdge(graph, migratedEdge);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* 调用 LLM 总结一批节点
|
||||
*/
|
||||
async function summarizeBatch(nodes, typeDef, customPrompt, signal, settings = {}) {
|
||||
const nodeDescriptions = nodes.map((n, i) => {
|
||||
const fieldsStr = Object.entries(n.fields)
|
||||
.filter(([_, v]) => v)
|
||||
.map(([k, v]) => `${k}: ${v}`)
|
||||
.join('\n ');
|
||||
return `节点 ${i + 1} [楼层 ${n.seq}]:\n ${fieldsStr}`;
|
||||
}).join('\n\n');
|
||||
async function summarizeBatch(
|
||||
nodes,
|
||||
typeDef,
|
||||
customPrompt,
|
||||
signal,
|
||||
settings = {},
|
||||
) {
|
||||
const nodeDescriptions = nodes
|
||||
.map((n, i) => {
|
||||
const fieldsStr = Object.entries(n.fields)
|
||||
.filter(([_, v]) => v)
|
||||
.map(([k, v]) => `${k}: ${v}`)
|
||||
.join("\n ");
|
||||
return `节点 ${i + 1} [楼层 ${n.seq}]:\n ${fieldsStr}`;
|
||||
})
|
||||
.join("\n\n");
|
||||
|
||||
const instruction = typeDef.compression.instruction || '将以下节点压缩总结为一条精炼记录。';
|
||||
const instruction =
|
||||
typeDef.compression.instruction || "将以下节点压缩总结为一条精炼记录。";
|
||||
|
||||
const compressPromptBuild = buildTaskPrompt(settings, 'compress', {
|
||||
taskName: 'compress',
|
||||
nodeContent: nodeDescriptions,
|
||||
candidateNodes: nodeDescriptions,
|
||||
currentRange: `${nodes[0]?.seq ?? '?'} ~ ${nodes[nodes.length - 1]?.seq ?? '?'}`,
|
||||
graphStats: `node_count=${nodes.length}, node_type=${typeDef.id}`,
|
||||
});
|
||||
const systemPrompt = applyTaskRegex(
|
||||
settings,
|
||||
'compress',
|
||||
'finalPrompt',
|
||||
compressPromptBuild.systemPrompt || customPrompt || [
|
||||
'你是一个记忆压缩器。将多个同类型节点总结为一条更高层级的压缩节点。',
|
||||
const compressPromptBuild = buildTaskPrompt(settings, "compress", {
|
||||
taskName: "compress",
|
||||
nodeContent: nodeDescriptions,
|
||||
candidateNodes: nodeDescriptions,
|
||||
currentRange: `${nodes[0]?.seq ?? "?"} ~ ${nodes[nodes.length - 1]?.seq ?? "?"}`,
|
||||
graphStats: `node_count=${nodes.length}, node_type=${typeDef.id}`,
|
||||
});
|
||||
const systemPrompt = applyTaskRegex(
|
||||
settings,
|
||||
"compress",
|
||||
"finalPrompt",
|
||||
compressPromptBuild.systemPrompt ||
|
||||
customPrompt ||
|
||||
[
|
||||
"你是一个记忆压缩器。将多个同类型节点总结为一条更高层级的压缩节点。",
|
||||
instruction,
|
||||
'',
|
||||
'输出格式为严格 JSON:',
|
||||
`{"fields": {${typeDef.columns.map(c => `"${c.name}": "..."`).join(', ')}}}`,
|
||||
'',
|
||||
'规则:',
|
||||
'- 保留关键信息:因果关系、不可逆结果、未解决伏笔',
|
||||
'- 去除重复和低信息密度内容',
|
||||
'- 压缩后文本应精炼,目标 150 字左右',
|
||||
].join('\n'),
|
||||
);
|
||||
"",
|
||||
"输出格式为严格 JSON:",
|
||||
`{"fields": {${typeDef.columns.map((c) => `"${c.name}": "..."`).join(", ")}}}`,
|
||||
"",
|
||||
"规则:",
|
||||
"- 保留关键信息:因果关系、不可逆结果、未解决伏笔",
|
||||
"- 去除重复和低信息密度内容",
|
||||
"- 压缩后文本应精炼,目标 150 字左右",
|
||||
].join("\n"),
|
||||
);
|
||||
|
||||
const userPrompt = `请压缩以下 ${nodes.length} 个 "${typeDef.label}" 节点:\n\n${nodeDescriptions}`;
|
||||
const userPrompt = `请压缩以下 ${nodes.length} 个 "${typeDef.label}" 节点:\n\n${nodeDescriptions}`;
|
||||
|
||||
return await callLLMForJSON({
|
||||
systemPrompt,
|
||||
userPrompt,
|
||||
maxRetries: 1,
|
||||
signal,
|
||||
taskType: 'compress',
|
||||
additionalMessages: compressPromptBuild.customMessages || [],
|
||||
});
|
||||
return await callLLMForJSON({
|
||||
systemPrompt,
|
||||
userPrompt,
|
||||
maxRetries: 1,
|
||||
signal,
|
||||
taskType: "compress",
|
||||
additionalMessages: compressPromptBuild.customMessages || [],
|
||||
});
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -228,20 +276,36 @@ async function summarizeBatch(nodes, typeDef, customPrompt, signal, settings = {
|
||||
* @param {boolean} [force=false]
|
||||
* @returns {Promise<{created: number, archived: number}>}
|
||||
*/
|
||||
export async function compressAll(graph, schema, embeddingConfig, force = false, customPrompt, signal, settings = {}) {
|
||||
let totalCreated = 0;
|
||||
let totalArchived = 0;
|
||||
export async function compressAll(
|
||||
graph,
|
||||
schema,
|
||||
embeddingConfig,
|
||||
force = false,
|
||||
customPrompt,
|
||||
signal,
|
||||
settings = {},
|
||||
) {
|
||||
let totalCreated = 0;
|
||||
let totalArchived = 0;
|
||||
|
||||
for (const typeDef of schema) {
|
||||
throwIfAborted(signal);
|
||||
if (typeDef.compression?.mode === 'hierarchical') {
|
||||
const result = await compressType({ graph, typeDef, embeddingConfig, force, customPrompt, signal, settings });
|
||||
totalCreated += result.created;
|
||||
totalArchived += result.archived;
|
||||
}
|
||||
for (const typeDef of schema) {
|
||||
throwIfAborted(signal);
|
||||
if (typeDef.compression?.mode === "hierarchical") {
|
||||
const result = await compressType({
|
||||
graph,
|
||||
typeDef,
|
||||
embeddingConfig,
|
||||
force,
|
||||
customPrompt,
|
||||
signal,
|
||||
settings,
|
||||
});
|
||||
totalCreated += result.created;
|
||||
totalArchived += result.archived;
|
||||
}
|
||||
}
|
||||
|
||||
return { created: totalCreated, archived: totalArchived };
|
||||
return { created: totalCreated, archived: totalArchived };
|
||||
}
|
||||
|
||||
// ==================== v2: 主动遗忘(SleepGate 启发) ====================
|
||||
@@ -249,40 +313,45 @@ export async function compressAll(graph, schema, embeddingConfig, force = false,
|
||||
/**
|
||||
* 睡眠清理周期
|
||||
* 评估每个节点的保留价值,低于阈值的归档(遗忘)
|
||||
*
|
||||
*
|
||||
* @param {object} graph - 图状态
|
||||
* @param {object} settings - 包含 forgetThreshold 的设置
|
||||
* @returns {{forgotten: number}} 本次遗忘的节点数
|
||||
*/
|
||||
export function sleepCycle(graph, settings) {
|
||||
const threshold = settings.forgetThreshold ?? 0.5;
|
||||
const nodes = getActiveNodes(graph);
|
||||
const now = Date.now();
|
||||
let forgotten = 0;
|
||||
const threshold = settings.forgetThreshold ?? 0.5;
|
||||
const nodes = getActiveNodes(graph);
|
||||
const now = Date.now();
|
||||
let forgotten = 0;
|
||||
|
||||
for (const node of nodes) {
|
||||
// 跳过常驻类型(synopsis, rule 等重要节点不应被遗忘)
|
||||
if (node.type === 'synopsis' || node.type === 'rule' || node.type === 'thread') continue;
|
||||
// 跳过高重要性节点
|
||||
if (node.importance >= 8) continue;
|
||||
// 跳过最近创建的节点(< 1 小时)
|
||||
if (now - node.createdTime < 3600000) continue;
|
||||
for (const node of nodes) {
|
||||
// 跳过常驻类型(synopsis, rule 等重要节点不应被遗忘)
|
||||
if (
|
||||
node.type === "synopsis" ||
|
||||
node.type === "rule" ||
|
||||
node.type === "thread"
|
||||
)
|
||||
continue;
|
||||
// 跳过高重要性节点
|
||||
if (node.importance >= 8) continue;
|
||||
// 跳过最近创建的节点(< 1 小时)
|
||||
if (now - node.createdTime < 3600000) continue;
|
||||
|
||||
// 计算保留价值 = importance × recency × (1 + accessFreq)
|
||||
const ageHours = (now - node.createdTime) / 3600000;
|
||||
const recency = 1 / (1 + Math.log10(1 + ageHours));
|
||||
const accessFreq = node.accessCount / Math.max(1, ageHours / 24);
|
||||
const retentionValue = (node.importance / 10) * recency * (1 + accessFreq);
|
||||
// 计算保留价值 = importance × recency × (1 + accessFreq)
|
||||
const ageHours = (now - node.createdTime) / 3600000;
|
||||
const recency = 1 / (1 + Math.log10(1 + ageHours));
|
||||
const accessFreq = node.accessCount / Math.max(1, ageHours / 24);
|
||||
const retentionValue = (node.importance / 10) * recency * (1 + accessFreq);
|
||||
|
||||
if (retentionValue < threshold) {
|
||||
node.archived = true;
|
||||
forgotten++;
|
||||
}
|
||||
if (retentionValue < threshold) {
|
||||
node.archived = true;
|
||||
forgotten++;
|
||||
}
|
||||
}
|
||||
|
||||
if (forgotten > 0) {
|
||||
console.log(`[ST-BME] 主动遗忘: ${forgotten} 个低价值节点已归档`);
|
||||
}
|
||||
if (forgotten > 0) {
|
||||
console.log(`[ST-BME] 主动遗忘: ${forgotten} 个低价值节点已归档`);
|
||||
}
|
||||
|
||||
return { forgotten };
|
||||
return { forgotten };
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user