// ST-BME: 统一记忆整合引擎(批量化版) // 合并 Mem0 精确对照 + A-MEM 记忆进化为单一阶段 // 批量 embed + 批量查近邻 + 单次 LLM 调用 import { embedBatch, searchSimilar } from './embedding.js'; import { addEdge, createEdge, getActiveNodes, getNode } from './graph.js'; import { callLLMForJSON } from './llm.js'; import { buildNodeVectorText, findSimilarNodesByText, isDirectVectorConfig, validateVectorConfig, } from './vector-index.js'; function createAbortError(message = '操作已终止') { const error = new Error(message); error.name = 'AbortError'; return error; } function isAbortError(error) { return error?.name === 'AbortError'; } function throwIfAborted(signal) { if (signal?.aborted) { throw signal.reason instanceof Error ? signal.reason : createAbortError(); } } /** * 统一记忆整合系统提示词(支持批量输出) */ const CONSOLIDATION_SYSTEM_PROMPT = `你是一个记忆整合分析器。当新记忆加入知识图谱时,你需要同时完成两项任务: **任务一:冲突检测** 判断新记忆与最近邻的已有记忆是否冲突或重复: - skip: 新记忆与已有记忆完全重复,应丢弃 - merge: 新记忆是对旧记忆的修正或补充,应合并 - keep: 新记忆是全新信息,应保留 **任务二:进化分析**(仅当 action=keep 时需要) 分析新记忆是否揭示了关于旧记忆的新信息: - 建立有意义的关联连接 - 反向更新旧记忆的描述或分类 输出严格 JSON: { "results": [ { "node_id": "新记忆的节点 ID", "action": "keep" | "merge" | "skip", "merge_target_id": "仅 action=merge 时必填:要合并到的旧节点 ID", "merged_fields": { "仅 action=merge 时可选:合并后的字段更新" }, "reason": "判定理由(简述)", "evolution": { "should_evolve": true/false, "connections": ["需要建立链接的旧记忆 ID 列表"], "neighbor_updates": [ { "nodeId": "需更新的旧节点 ID", "newContext": "基于新信息修正后的描述(不需修改则为 null)", "newTags": ["更新后的分类标签,不需修改则为 null"] } ] } } ] } 整合规则: - 必须对每条新记忆都给出一个 result 条目 - 当 action=skip 时,evolution 可省略或设 should_evolve=false - 当 action=merge 时,evolution 可省略或设 should_evolve=false - 仅当 action=keep 且新信息确实改变了对旧记忆的理解时,才设 should_evolve=true - 例如:揭露卧底身份 → 修正该角色之前事件中的动机描述 - 例如:发现地点的隐藏特性 → 更新地点节点的描述 - 不要对无关记忆强行建立联系 - neighbor_updates 中每条必须有实际意义的修改`; /** * 统一记忆整合主函数(批量化版) * * 4 阶段架构: * Phase 0: 收集有效新节点 * Phase 1: 批量 Embed(直连 1 次 embedBatch / 后端逐次) * Phase 2: 各节点查近邻(直连本地 cosine / 后端逐次 query) * Phase 3: 单次 LLM 批量判定 * Phase 4: 逐个处理结果 * * @param {object} params * @param {object} params.graph - 当前图状态 * @param {string[]} params.newNodeIds - 本次新创建的节点 ID 列表 * @param {object} params.embeddingConfig - Embedding API 配置 * @param {object} [params.options] * @param {number} [params.options.neighborCount=5] * @param {number} [params.options.conflictThreshold=0.85] * @param {string} [params.customPrompt] * @param {AbortSignal} [params.signal] * @returns {Promise<{merged: number, skipped: number, kept: number, evolved: number, connections: number, updates: number}>} */ export async function consolidateMemories({ graph, newNodeIds, embeddingConfig, options = {}, customPrompt, signal, }) { const neighborCount = options.neighborCount ?? 5; const conflictThreshold = options.conflictThreshold ?? 0.85; const stats = { merged: 0, skipped: 0, kept: 0, evolved: 0, connections: 0, updates: 0, }; if (!newNodeIds || newNodeIds.length === 0) return stats; if (!validateVectorConfig(embeddingConfig).valid) { console.log('[ST-BME] 记忆整合跳过:向量配置不可用'); return stats; } // ══════════════════════════════════════════════ // Phase 0: 收集有效新节点 // ══════════════════════════════════════════════ const newEntries = []; for (const id of newNodeIds) { const node = getNode(graph, id); if (!node || node.archived) continue; const text = buildNodeVectorText(node); if (!text) continue; newEntries.push({ id, node, text }); } if (newEntries.length === 0) return stats; const activeNodes = getActiveNodes(graph).filter(n => { const text = buildNodeVectorText(n); return typeof text === 'string' && text.length > 0; }); if (activeNodes.length < 2) { // 图中节点不够,全部 keep stats.kept = newEntries.length; return stats; } throwIfAborted(signal); console.log(`[ST-BME] 记忆整合开始: ${newEntries.length} 个新节点`); // ══════════════════════════════════════════════ // Phase 1 + 2: 批量 Embed + 查近邻 // ══════════════════════════════════════════════ /** @type {Map>} */ const neighborsMap = new Map(); if (isDirectVectorConfig(embeddingConfig)) { // ── 直连模式: 1 次 embedBatch + N 次本地 cosine ── const texts = newEntries.map(e => e.text); let queryVectors; try { queryVectors = await embedBatch(texts, embeddingConfig, { signal }); } catch (e) { if (isAbortError(e)) throw e; console.warn('[ST-BME] 批量 embed 失败,回退到逐条:', e.message); queryVectors = null; } // 构建候选池(含 embedding 的活跃节点) const candidatePool = activeNodes .filter(n => Array.isArray(n.embedding) && n.embedding.length > 0) .map(n => ({ nodeId: n.id, embedding: n.embedding })); for (let i = 0; i < newEntries.length; i++) { throwIfAborted(signal); const entry = newEntries[i]; const candidates = candidatePool.filter(c => c.nodeId !== entry.id); if (queryVectors?.[i] && candidates.length > 0) { // 本地 cosine 搜索(0 API 调用) const neighbors = searchSimilar(queryVectors[i], candidates, neighborCount); neighborsMap.set(entry.id, neighbors); } else { // fallback: 逐条 embed try { const neighbors = await findSimilarNodesByText( graph, entry.text, embeddingConfig, neighborCount, activeNodes.filter(n => n.id !== entry.id), signal, ); neighborsMap.set(entry.id, neighbors); } catch (e) { if (isAbortError(e)) throw e; console.warn(`[ST-BME] 近邻查询失败 (${entry.id}):`, e.message); neighborsMap.set(entry.id, []); } } } } else { // ── 后端模式: 逐条 /api/vector/query ── for (let i = 0; i < newEntries.length; i++) { throwIfAborted(signal); const entry = newEntries[i]; try { const neighbors = await findSimilarNodesByText( graph, entry.text, embeddingConfig, neighborCount, activeNodes.filter(n => n.id !== entry.id), signal, ); neighborsMap.set(entry.id, neighbors); } catch (e) { if (isAbortError(e)) throw e; console.warn(`[ST-BME] 近邻查询失败 (${entry.id}):`, e.message); neighborsMap.set(entry.id, []); } } } // ══════════════════════════════════════════════ // Phase 3: 单次 LLM 批量判定 // ══════════════════════════════════════════════ throwIfAborted(signal); const userPromptSections = []; userPromptSections.push(`本轮共新增 ${newEntries.length} 条记忆,请逐条分析:\n`); for (let i = 0; i < newEntries.length; i++) { const entry = newEntries[i]; const neighbors = neighborsMap.get(entry.id) || []; const newNodeFieldsStr = Object.entries(entry.node.fields) .map(([k, v]) => `${k}: ${v}`) .join(', '); // 构建近邻描述 let neighborText; if (neighbors.length === 0) { neighborText = ' (无近邻命中)'; } else { neighborText = neighbors.map(n => { const node = getNode(graph, n.nodeId); if (!node) return null; const fieldsStr = Object.entries(node.fields) .map(([k, v]) => `${k}: ${v}`) .join(', '); return ` - [${node.id}] 类型=${node.type}, ${fieldsStr} (相似度=${n.score.toFixed(3)})`; }).filter(Boolean).join('\n'); } // 检查高相似度 const hasHighSimilarity = neighbors.length > 0 && neighbors[0].score > conflictThreshold; const hint = hasHighSimilarity ? ` ⚠ 最高相似度 ${neighbors[0].score.toFixed(3)} 超过阈值 ${conflictThreshold}` : ''; userPromptSections.push([ `### 新记忆 #${i + 1}`, `[${entry.id}] 类型=${entry.node.type}, ${newNodeFieldsStr}`, '近邻记忆:', neighborText, hint, ].filter(Boolean).join('\n')); } const userPrompt = userPromptSections.join('\n\n'); let decision; try { decision = await callLLMForJSON({ systemPrompt: customPrompt || CONSOLIDATION_SYSTEM_PROMPT, userPrompt, maxRetries: 1, signal, }); } catch (e) { if (isAbortError(e)) throw e; console.error('[ST-BME] 记忆整合 LLM 调用失败:', e); stats.kept = newEntries.length; return stats; } // ══════════════════════════════════════════════ // Phase 4: 逐个处理结果 // ══════════════════════════════════════════════ // 解析 LLM 返回——兼容单条和批量格式 let results; if (Array.isArray(decision?.results)) { results = decision.results; } else if (decision?.action) { // 单条返回格式(LLM 可能忽略 results 包装) results = [{ ...decision, node_id: newEntries[0]?.id }]; } else { console.warn('[ST-BME] 记忆整合: LLM 返回格式异常,全部 keep'); stats.kept = newEntries.length; return stats; } // 建立 node_id → result 的映射 const resultMap = new Map(); for (const r of results) { if (r.node_id) resultMap.set(r.node_id, r); } // 处理每个新节点 for (const entry of newEntries) { const result = resultMap.get(entry.id); if (!result) { // LLM 未返回此节点的结果,fallback 为 keep stats.kept++; continue; } processOneResult(graph, entry, result, stats); } // 日志 const actionSummary = []; if (stats.merged > 0) actionSummary.push(`合并 ${stats.merged}`); if (stats.skipped > 0) actionSummary.push(`跳过 ${stats.skipped}`); if (stats.kept > 0) actionSummary.push(`保留 ${stats.kept}`); if (stats.evolved > 0) actionSummary.push(`进化 ${stats.evolved}`); if (stats.connections > 0) actionSummary.push(`新链接 ${stats.connections}`); if (stats.updates > 0) actionSummary.push(`回溯更新 ${stats.updates}`); if (actionSummary.length > 0) { console.log(`[ST-BME] 记忆整合完成: ${actionSummary.join(', ')}`); } return stats; } /** * 处理单个节点的整合结果 */ function processOneResult(graph, entry, result, stats) { const { id: newId, node: newNode } = entry; // ── 处理 action ── switch (result.action) { case 'skip': { console.log(`[ST-BME] 记忆整合: skip (重复) — ${newId}`); newNode.archived = true; stats.skipped++; break; } case 'merge': { const targetId = result.merge_target_id; const targetNode = targetId ? getNode(graph, targetId) : null; if (targetNode && !targetNode.archived) { console.log(`[ST-BME] 记忆整合: merge ${newId} → ${targetId}`); if (result.merged_fields && typeof result.merged_fields === 'object') { for (const [key, value] of Object.entries(result.merged_fields)) { if (value != null && value !== '') { targetNode.fields[key] = value; } } } else { for (const [key, value] of Object.entries(newNode.fields)) { if (value != null && value !== '' && !targetNode.fields[key]) { targetNode.fields[key] = value; } } } if (Number.isFinite(newNode.seq) && newNode.seq > (targetNode.seq || 0)) { targetNode.seq = newNode.seq; } targetNode.embedding = null; newNode.archived = true; stats.merged++; } else { console.warn(`[ST-BME] 记忆整合: merge target ${targetId} 不存在,回退为 keep`); stats.kept++; } break; } case 'keep': default: { stats.kept++; break; } } // ── 处理 evolution ── const evolution = result.evolution; if (evolution?.should_evolve && !newNode.archived) { stats.evolved++; console.log(`[ST-BME] 记忆整合/进化触发: ${result.reason || '(无理由)'}`); if (Array.isArray(evolution.connections)) { for (const targetId of evolution.connections) { if (!getNode(graph, targetId)) continue; const edge = createEdge({ fromId: newId, toId: targetId, relation: 'related', strength: 0.7, }); if (addEdge(graph, edge)) { stats.connections++; } } } if (Array.isArray(evolution.neighbor_updates)) { for (const update of evolution.neighbor_updates) { if (!update.nodeId) continue; const oldNode = getNode(graph, update.nodeId); if (!oldNode || oldNode.archived) continue; let changed = false; if (update.newContext && typeof update.newContext === 'string') { if (oldNode.fields.state !== undefined) { oldNode.fields.state = update.newContext; changed = true; } else if (oldNode.fields.summary !== undefined) { oldNode.fields.summary = update.newContext; changed = true; } else if (oldNode.fields.core_note !== undefined) { oldNode.fields.core_note = update.newContext; changed = true; } } if (update.newTags && Array.isArray(update.newTags)) { oldNode.clusters = update.newTags; changed = true; } if (changed) { oldNode.embedding = null; if (!oldNode._evolutionHistory) oldNode._evolutionHistory = []; oldNode._evolutionHistory.push({ triggeredBy: newId, timestamp: Date.now(), reason: result.reason || '', }); stats.updates++; } } } } }