Reorganize modules into layered directories

This commit is contained in:
Youzini-afk
2026-04-08 01:17:47 +08:00
parent 59942541ea
commit feec17f3e3
90 changed files with 284 additions and 219 deletions

629
maintenance/compressor.js Normal file
View File

@@ -0,0 +1,629 @@
// ST-BME: 层级压缩引擎
// 超过阈值的节点被 LLM 总结为更高层级的压缩节点
import { debugLog } from "../runtime/debug-logging.js";
import { embedText } from "../vector/embedding.js";
import {
addEdge,
addNode,
createEdge,
createNode,
getActiveNodes,
getNode,
} from "../graph/graph.js";
import { callLLMForJSON } from "../llm/llm.js";
import {
getScopeOwnerKey,
getScopeRegionKey,
normalizeMemoryScope,
} from "../graph/memory-scope.js";
import { ensureEventTitle, getNodeDisplayName } from "../graph/node-labels.js";
import {
buildTaskExecutionDebugContext,
buildTaskLlmPayload,
buildTaskPrompt,
} from "../prompting/prompt-builder.js";
import { getSTContextForPrompt } from "../host/st-context.js";
import { applyTaskRegex } from "../prompting/task-regex.js";
import { isDirectVectorConfig } from "../vector/vector-index.js";
function createAbortError(message = "操作已终止") {
const error = new Error(message);
error.name = "AbortError";
return error;
}
function createTaskLlmDebugContext(promptBuild, regexInput) {
return typeof buildTaskExecutionDebugContext === "function"
? buildTaskExecutionDebugContext(promptBuild, { regexInput })
: null;
}
function resolveTaskPromptPayload(promptBuild, fallbackUserPrompt = "") {
if (typeof buildTaskLlmPayload === "function") {
return buildTaskLlmPayload(promptBuild, fallbackUserPrompt);
}
return {
systemPrompt: String(promptBuild?.systemPrompt || ""),
userPrompt: String(fallbackUserPrompt || ""),
promptMessages: [],
additionalMessages: Array.isArray(promptBuild?.privateTaskMessages)
? promptBuild.privateTaskMessages
: [],
};
}
function throwIfAborted(signal) {
if (signal?.aborted) {
throw signal.reason instanceof Error ? signal.reason : createAbortError();
}
}
function resolveCompressionWindow(compression = {}, force = false) {
const fanIn = Number.isFinite(Number(compression?.fanIn))
? Math.max(2, Number(compression.fanIn))
: 2;
const threshold = force
? fanIn
: Number.isFinite(Number(compression?.threshold))
? Math.max(2, Number(compression.threshold))
: fanIn;
const keepRecent = force
? 0
: Number.isFinite(Number(compression?.keepRecentLeaves))
? Math.max(0, Number(compression.keepRecentLeaves))
: 0;
return {
fanIn,
threshold,
keepRecent,
};
}
function normalizeCompressionFieldValue(value) {
if (value == null) return "";
if (typeof value === "string") return value.trim();
if (typeof value === "number" || typeof value === "boolean") {
return String(value);
}
if (Array.isArray(value)) {
return value
.map((item) => normalizeCompressionFieldValue(item))
.filter(Boolean)
.join("");
}
if (typeof value === "object") {
try {
return JSON.stringify(value);
} catch {
return "";
}
}
return String(value).trim();
}
function buildCompressionFallbackSummary(batch = []) {
return batch
.map((node) =>
normalizeCompressionFieldValue(
node?.fields?.summary ||
node?.fields?.title ||
node?.fields?.name ||
node?.fields?.insight ||
getNodeDisplayName(node),
),
)
.filter(Boolean)
.slice(0, 6)
.join("");
}
function normalizeCompressedFields(summaryResult, typeDef, batch = []) {
const rawFields =
summaryResult?.fields &&
typeof summaryResult.fields === "object" &&
!Array.isArray(summaryResult.fields)
? summaryResult.fields
: summaryResult && typeof summaryResult === "object" && !Array.isArray(summaryResult)
? summaryResult
: {};
const columns = Array.isArray(typeDef?.columns) ? typeDef.columns : [];
const normalized = {};
for (const column of columns) {
const key = String(column?.name || "").trim();
if (!key) continue;
const normalizedValue = normalizeCompressionFieldValue(rawFields[key]);
if (normalizedValue) {
normalized[key] = normalizedValue;
}
}
const fallbackSummary = buildCompressionFallbackSummary(batch);
if (!normalized.summary && columns.some((column) => column?.name === "summary")) {
normalized.summary = fallbackSummary || "压缩批次摘要缺失";
}
if (!normalized.insight && columns.some((column) => column?.name === "insight")) {
normalized.insight = fallbackSummary || "压缩批次洞察缺失";
}
if (!normalized.title && columns.some((column) => column?.name === "title")) {
const titled = ensureEventTitle({ title: rawFields?.title, summary: normalized.summary });
normalized.title =
normalizeCompressionFieldValue(titled?.title) ||
normalizeCompressionFieldValue(rawFields?.name) ||
normalizeCompressionFieldValue(batch[batch.length - 1]?.fields?.title) ||
normalizeCompressionFieldValue(batch[batch.length - 1]?.fields?.name) ||
"压缩节点";
}
if (!normalized.name && columns.some((column) => column?.name === "name")) {
normalized.name =
normalizeCompressionFieldValue(rawFields?.title) ||
normalizeCompressionFieldValue(rawFields?.name) ||
normalizeCompressionFieldValue(batch[batch.length - 1]?.fields?.name) ||
"压缩节点";
}
return normalized;
}
/**
* 对指定类型执行层级压缩
*
* @param {object} params
* @param {object} params.graph - 当前图状态
* @param {object} params.typeDef - 要压缩的类型定义
* @param {object} params.embeddingConfig - Embedding API 配置
* @param {boolean} [params.force=false] - 忽略阈值强制压缩
* @returns {Promise<{created: number, archived: number}>}
*/
export async function compressType({
graph,
typeDef,
embeddingConfig,
force = false,
customPrompt,
signal,
settings = {},
}) {
const compression = typeDef.compression;
if (!compression || compression.mode !== "hierarchical") {
return { created: 0, archived: 0 };
}
const maxDepth = Number.isFinite(Number(compression.maxDepth))
? Math.max(1, Number(compression.maxDepth))
: 1;
let totalCreated = 0;
let totalArchived = 0;
// 从最低层级开始逐层压缩
for (let level = 0; level < maxDepth; level++) {
throwIfAborted(signal);
const result = await compressLevel({
graph,
typeDef,
level,
embeddingConfig,
force,
customPrompt,
signal,
settings,
});
totalCreated += result.created;
totalArchived += result.archived;
// 如果这一层没有压缩发生,停止
if (result.created === 0) break;
}
return { created: totalCreated, archived: totalArchived };
}
/**
* 压缩特定层级的节点
*/
async function compressLevel({
graph,
typeDef,
level,
embeddingConfig,
force,
customPrompt,
signal,
settings = {},
}) {
const compression = typeDef.compression;
const { fanIn, threshold, keepRecent } = resolveCompressionWindow(
compression,
force,
);
throwIfAborted(signal);
// 获取该层级的活跃叶子节点
const levelNodes = getActiveNodes(graph, typeDef.id)
.filter((n) => n.level === level)
.sort((a, b) => a.seq - b.seq);
let created = 0;
let archived = 0;
for (const group of groupCompressionCandidates(levelNodes)) {
if (force ? group.length < fanIn : group.length <= threshold) {
continue;
}
const compressible = group.slice(0, Math.max(0, group.length - keepRecent));
if (compressible.length < fanIn) {
continue;
}
for (let i = 0; i < compressible.length; i += fanIn) {
const batch = compressible.slice(i, i + fanIn);
if (batch.length < 2) break;
const summaryResult = await summarizeBatch(
batch,
typeDef,
customPrompt,
signal,
settings,
);
if (!summaryResult) continue;
const normalizedFields = normalizeCompressedFields(
summaryResult,
typeDef,
batch,
);
if (Object.keys(normalizedFields).length === 0) {
throw new Error(
`压缩结果缺少可用 fields无法创建 ${typeDef?.label || typeDef?.id || "压缩"} 节点`,
);
}
const compressedNode = createNode({
type: typeDef.id,
fields: normalizedFields,
seq: batch[batch.length - 1].seq,
seqRange: [
batch[0].seqRange?.[0] ?? batch[0].seq,
batch[batch.length - 1].seqRange?.[1] ?? batch[batch.length - 1].seq,
],
importance: Math.max(...batch.map((n) => n.importance)),
scope: normalizeMemoryScope(batch[0]?.scope),
});
compressedNode.level = level + 1;
compressedNode.childIds = batch.map((n) => n.id);
const embeddingText =
normalizeCompressionFieldValue(
normalizedFields.summary ||
normalizedFields.insight ||
normalizedFields.title ||
normalizedFields.name,
) || "";
if (isDirectVectorConfig(embeddingConfig) && embeddingText) {
const vec = await embedText(
embeddingText,
embeddingConfig,
{ signal },
);
if (vec) compressedNode.embedding = Array.from(vec);
}
addNode(graph, compressedNode);
migrateBatchEdges(graph, batch, compressedNode);
created++;
for (const child of batch) {
child.archived = true;
child.parentId = compressedNode.id;
archived++;
}
}
}
return { created, archived };
}
function groupCompressionCandidates(nodes = []) {
const groups = new Map();
for (const node of nodes) {
const normalizedScope = normalizeMemoryScope(node?.scope);
const key =
normalizedScope.layer === "pov"
? [
"pov",
getScopeOwnerKey(normalizedScope) || "owner:none",
node.type || "",
].join("::")
: [
"objective",
getScopeRegionKey(normalizedScope) || "region:global",
node.type || "",
].join("::");
if (!groups.has(key)) {
groups.set(key, []);
}
groups.get(key).push(node);
}
return [...groups.values()].map((group) =>
group.sort((a, b) => a.seq - b.seq),
);
}
function inspectCompressibleGroup(group = [], compression = {}, force = false) {
const { fanIn, threshold, keepRecent } = resolveCompressionWindow(
compression,
force,
);
if (force ? group.length < fanIn : group.length <= threshold) {
return null;
}
const compressible = group.slice(0, Math.max(0, group.length - keepRecent));
if (compressible.length < fanIn) {
return null;
}
return {
candidateCount: compressible.length,
fanIn,
threshold,
keepRecent,
};
}
export function inspectAutoCompressionCandidates(
graph,
schema = [],
force = false,
) {
const safeSchema = Array.isArray(schema) ? schema : [];
for (const typeDef of safeSchema) {
if (typeDef?.compression?.mode !== "hierarchical") continue;
const maxDepth = Number.isFinite(Number(typeDef?.compression?.maxDepth))
? Math.max(1, Number(typeDef.compression.maxDepth))
: 1;
for (let level = 0; level < maxDepth; level++) {
const levelNodes = getActiveNodes(graph, typeDef.id)
.filter((node) => Number(node?.level || 0) === level)
.sort((a, b) => a.seq - b.seq);
for (const group of groupCompressionCandidates(levelNodes)) {
const summary = inspectCompressibleGroup(
group,
typeDef.compression,
force,
);
if (!summary) continue;
return {
hasCandidates: true,
typeId: String(typeDef.id || ""),
level,
candidateCount: summary.candidateCount,
threshold: summary.threshold,
fanIn: summary.fanIn,
keepRecent: summary.keepRecent,
reason: "",
};
}
}
}
return {
hasCandidates: false,
typeId: "",
level: null,
candidateCount: 0,
threshold: 0,
fanIn: 0,
keepRecent: 0,
reason: "已到自动压缩周期,但当前没有达到内部压缩阈值的候选组",
};
}
function migrateBatchEdges(graph, batch, compressedNode) {
const batchIds = new Set(batch.map((node) => node.id));
for (const edge of graph.edges) {
if (edge.invalidAt || edge.expiredAt) continue;
const fromInside = batchIds.has(edge.fromId);
const toInside = batchIds.has(edge.toId);
if (!fromInside && !toInside) continue;
if (fromInside && toInside) continue;
const newFromId = fromInside ? compressedNode.id : edge.fromId;
const newToId = toInside ? compressedNode.id : edge.toId;
if (newFromId === newToId) continue;
if (!getNode(graph, newFromId) || !getNode(graph, newToId)) continue;
const migratedEdge = createEdge({
fromId: newFromId,
toId: newToId,
relation: edge.relation,
strength: edge.strength,
edgeType: edge.edgeType,
scope: edge.scope,
});
migratedEdge.validAt = edge.validAt ?? migratedEdge.validAt;
migratedEdge.invalidAt = edge.invalidAt ?? migratedEdge.invalidAt;
migratedEdge.expiredAt = edge.expiredAt ?? migratedEdge.expiredAt;
addEdge(graph, migratedEdge);
}
}
/**
* 调用 LLM 总结一批节点
*/
async function summarizeBatch(
nodes,
typeDef,
customPrompt,
signal,
settings = {},
) {
const nodeDescriptions = nodes
.map((n, i) => {
const fieldsStr = Object.entries(n.fields)
.filter(([_, v]) => v)
.map(([k, v]) => `${k}: ${v}`)
.join("\n ");
return `节点 ${i + 1} [楼层 ${n.seq}]:\n ${fieldsStr}`;
})
.join("\n\n");
const instruction =
typeDef.compression.instruction || "将以下节点压缩总结为一条精炼记录。";
const compressPromptBuild = await buildTaskPrompt(settings, "compress", {
taskName: "compress",
nodeContent: nodeDescriptions,
candidateNodes: nodeDescriptions,
currentRange: `${nodes[0]?.seq ?? "?"} ~ ${nodes[nodes.length - 1]?.seq ?? "?"}`,
graphStats: `node_count=${nodes.length}, node_type=${typeDef.id}`,
...getSTContextForPrompt(),
});
const compressRegexInput = { entries: [] };
const systemPrompt = applyTaskRegex(
settings,
"compress",
"finalPrompt",
compressPromptBuild.systemPrompt ||
customPrompt ||
[
"你是一个记忆压缩器。将多个同类型节点总结为一条更高层级的压缩节点。",
instruction,
"",
"输出格式为严格 JSON",
`{"fields": {${typeDef.columns.map((c) => `"${c.name}": "..."`).join(", ")}}}`,
"",
"规则:",
"- 保留关键信息:因果关系、不可逆结果、未解决伏笔",
"- 去除重复和低信息密度内容",
"- 压缩后文本应精炼,目标 150 字左右",
].join("\n"),
compressRegexInput,
"system",
);
const userPrompt = `请压缩以下 ${nodes.length} 个 "${typeDef.label}" 节点:\n\n${nodeDescriptions}`;
const promptPayload = resolveTaskPromptPayload(
compressPromptBuild,
userPrompt,
);
const llmSystemPrompt =
Array.isArray(promptPayload.promptMessages) &&
promptPayload.promptMessages.length > 0
? String(promptPayload.systemPrompt || "")
: String(promptPayload.systemPrompt || systemPrompt || "");
return await callLLMForJSON({
systemPrompt: llmSystemPrompt,
userPrompt: promptPayload.userPrompt,
maxRetries: 1,
signal,
taskType: "compress",
debugContext: createTaskLlmDebugContext(
compressPromptBuild,
compressRegexInput,
),
promptMessages: promptPayload.promptMessages,
additionalMessages: promptPayload.additionalMessages,
});
}
/**
* 对所有支持压缩的类型执行压缩
*
* @param {object} graph
* @param {object[]} schema
* @param {object} embeddingConfig
* @param {boolean} [force=false]
* @returns {Promise<{created: number, archived: number}>}
*/
export async function compressAll(
graph,
schema,
embeddingConfig,
force = false,
customPrompt,
signal,
settings = {},
) {
let totalCreated = 0;
let totalArchived = 0;
for (const typeDef of schema) {
throwIfAborted(signal);
if (typeDef.compression?.mode === "hierarchical") {
const result = await compressType({
graph,
typeDef,
embeddingConfig,
force,
customPrompt,
signal,
settings,
});
totalCreated += result.created;
totalArchived += result.archived;
}
}
return { created: totalCreated, archived: totalArchived };
}
// ==================== v2: 主动遗忘SleepGate 启发) ====================
/**
* 睡眠清理周期
* 评估每个节点的保留价值,低于阈值的归档(遗忘)
*
* @param {object} graph - 图状态
* @param {object} settings - 包含 forgetThreshold 的设置
* @returns {{forgotten: number}} 本次遗忘的节点数
*/
export function sleepCycle(graph, settings) {
const threshold = settings.forgetThreshold ?? 0.5;
const nodes = getActiveNodes(graph);
const now = Date.now();
let forgotten = 0;
for (const node of nodes) {
// 跳过常驻类型synopsis, rule 等重要节点不应被遗忘)
if (
node.type === "synopsis" ||
node.type === "rule" ||
node.type === "thread"
)
continue;
// 跳过高重要性节点
if (node.importance >= 8) continue;
// 跳过最近创建的节点(< 1 小时)
if (now - node.createdTime < 3600000) continue;
// 计算保留价值 = importance × recency × (1 + accessFreq)
const ageHours = (now - node.createdTime) / 3600000;
const recency = 1 / (1 + Math.log10(1 + ageHours));
const accessFreq = node.accessCount / Math.max(1, ageHours / 24);
const retentionValue = (node.importance / 10) * recency * (1 + accessFreq);
if (retentionValue < threshold) {
node.archived = true;
forgotten++;
}
}
if (forgotten > 0) {
debugLog(`[ST-BME] 主动遗忘: ${forgotten} 个低价值节点已归档`);
}
return { forgotten };
}