ST-Bionic-Memory-Ecology/consolidator.js

// ST-BME: 统一记忆整合引擎（批量化版）
// 合并 Mem0 精确对照 + A-MEM 记忆进化为单一阶段
// 批量 embed + 批量查近邻 + 单次 LLM 调用

import { embedBatch, searchSimilar } from './embedding.js';
import { addEdge, createEdge, getActiveNodes, getNode } from './graph.js';
import { callLLMForJSON } from './llm.js';
import {
    buildNodeVectorText,
    findSimilarNodesByText,
    isDirectVectorConfig,
    validateVectorConfig,
} from './vector-index.js';

function createAbortError(message = '操作已终止') {
    const error = new Error(message);
    error.name = 'AbortError';
    return error;
}

function isAbortError(error) {
    return error?.name === 'AbortError';
}

function throwIfAborted(signal) {
    if (signal?.aborted) {
        throw signal.reason instanceof Error ? signal.reason : createAbortError();
    }
}

/**
 * 统一记忆整合系统提示词（支持批量输出）
 */
const CONSOLIDATION_SYSTEM_PROMPT = `你是一个记忆整合分析器。当新记忆加入知识图谱时，你需要同时完成两项任务：

**任务一：冲突检测**
判断新记忆与最近邻的已有记忆是否冲突或重复：
- skip: 新记忆与已有记忆完全重复，应丢弃
- merge: 新记忆是对旧记忆的修正或补充，应合并
- keep: 新记忆是全新信息，应保留

**任务二：进化分析**（仅当 action=keep 时需要）
分析新记忆是否揭示了关于旧记忆的新信息：
- 建立有意义的关联连接
- 反向更新旧记忆的描述或分类

输出严格 JSON：
{
  "results": [
    {
      "node_id": "新记忆的节点 ID",
      "action": "keep" | "merge" | "skip",
      "merge_target_id": "仅 action=merge 时必填：要合并到的旧节点 ID",
      "merged_fields": { "仅 action=merge 时可选：合并后的字段更新" },
      "reason": "判定理由（简述）",
      "evolution": {
        "should_evolve": true/false,
        "connections": ["需要建立链接的旧记忆 ID 列表"],
        "neighbor_updates": [
          {
            "nodeId": "需更新的旧节点 ID",
            "newContext": "基于新信息修正后的描述（不需修改则为 null）",
            "newTags": ["更新后的分类标签，不需修改则为 null"]
          }
        ]
      }
    }
  ]
}

整合规则：
- 必须对每条新记忆都给出一个 result 条目
- 当 action=skip 时，evolution 可省略或设 should_evolve=false
- 当 action=merge 时，evolution 可省略或设 should_evolve=false
- 仅当 action=keep 且新信息确实改变了对旧记忆的理解时，才设 should_evolve=true
- 例如：揭露卧底身份 → 修正该角色之前事件中的动机描述
- 例如：发现地点的隐藏特性 → 更新地点节点的描述
- 不要对无关记忆强行建立联系
- neighbor_updates 中每条必须有实际意义的修改`;

/**
 * 统一记忆整合主函数（批量化版）
 *
 * 4 阶段架构：
 *   Phase 0: 收集有效新节点
 *   Phase 1: 批量 Embed（直连 1 次 embedBatch / 后端逐次）
 *   Phase 2: 各节点查近邻（直连本地 cosine / 后端逐次 query）
 *   Phase 3: 单次 LLM 批量判定
 *   Phase 4: 逐个处理结果
 *
 * @param {object} params
 * @param {object} params.graph - 当前图状态
 * @param {string[]} params.newNodeIds - 本次新创建的节点 ID 列表
 * @param {object} params.embeddingConfig - Embedding API 配置
 * @param {object} [params.options]
 * @param {number} [params.options.neighborCount=5]
 * @param {number} [params.options.conflictThreshold=0.85]
 * @param {string} [params.customPrompt]
 * @param {AbortSignal} [params.signal]
 * @returns {Promise<{merged: number, skipped: number, kept: number, evolved: number, connections: number, updates: number}>}
 */
export async function consolidateMemories({
    graph,
    newNodeIds,
    embeddingConfig,
    options = {},
    customPrompt,
    signal,
}) {
    const neighborCount = options.neighborCount ?? 5;
    const conflictThreshold = options.conflictThreshold ?? 0.85;
    const stats = {
        merged: 0,
        skipped: 0,
        kept: 0,
        evolved: 0,
        connections: 0,
        updates: 0,
    };

    if (!newNodeIds || newNodeIds.length === 0) return stats;
    if (!validateVectorConfig(embeddingConfig).valid) {
        console.log('[ST-BME] 记忆整合跳过：向量配置不可用');
        return stats;
    }

    // ══════════════════════════════════════════════
    // Phase 0: 收集有效新节点
    // ══════════════════════════════════════════════
    const newEntries = [];
    for (const id of newNodeIds) {
        const node = getNode(graph, id);
        if (!node || node.archived) continue;
        const text = buildNodeVectorText(node);
        if (!text) continue;
        newEntries.push({ id, node, text });
    }

    if (newEntries.length === 0) return stats;

    const activeNodes = getActiveNodes(graph).filter(n => {
        const text = buildNodeVectorText(n);
        return typeof text === 'string' && text.length > 0;
    });

    if (activeNodes.length < 2) {
        // 图中节点不够，全部 keep
        stats.kept = newEntries.length;
        return stats;
    }

    throwIfAborted(signal);
    console.log(`[ST-BME] 记忆整合开始: ${newEntries.length} 个新节点`);

    // ══════════════════════════════════════════════
    // Phase 1 + 2: 批量 Embed + 查近邻
    // ══════════════════════════════════════════════
    /** @type {Map<string, Array<{nodeId: string, score: number}>>} */
    const neighborsMap = new Map();

    if (isDirectVectorConfig(embeddingConfig)) {
        // ── 直连模式: 1 次 embedBatch + N 次本地 cosine ──
        const texts = newEntries.map(e => e.text);
        let queryVectors;

        try {
            queryVectors = await embedBatch(texts, embeddingConfig, { signal });
        } catch (e) {
            if (isAbortError(e)) throw e;
            console.warn('[ST-BME] 批量 embed 失败，回退到逐条:', e.message);
            queryVectors = null;
        }

        // 构建候选池（含 embedding 的活跃节点）
        const candidatePool = activeNodes
            .filter(n => Array.isArray(n.embedding) && n.embedding.length > 0)
            .map(n => ({ nodeId: n.id, embedding: n.embedding }));

        for (let i = 0; i < newEntries.length; i++) {
            throwIfAborted(signal);
            const entry = newEntries[i];
            const candidates = candidatePool.filter(c => c.nodeId !== entry.id);

            if (queryVectors?.[i] && candidates.length > 0) {
                // 本地 cosine 搜索（0 API 调用）
                const neighbors = searchSimilar(queryVectors[i], candidates, neighborCount);
                neighborsMap.set(entry.id, neighbors);
            } else {
                // fallback: 逐条 embed
                try {
                    const neighbors = await findSimilarNodesByText(
                        graph, entry.text, embeddingConfig, neighborCount,
                        activeNodes.filter(n => n.id !== entry.id), signal,
                    );
                    neighborsMap.set(entry.id, neighbors);
                } catch (e) {
                    if (isAbortError(e)) throw e;
                    console.warn(`[ST-BME] 近邻查询失败 (${entry.id}):`, e.message);
                    neighborsMap.set(entry.id, []);
                }
            }
        }
    } else {
        // ── 后端模式: 逐条 /api/vector/query ──
        for (let i = 0; i < newEntries.length; i++) {
            throwIfAborted(signal);
            const entry = newEntries[i];
            try {
                const neighbors = await findSimilarNodesByText(
                    graph, entry.text, embeddingConfig, neighborCount,
                    activeNodes.filter(n => n.id !== entry.id), signal,
                );
                neighborsMap.set(entry.id, neighbors);
            } catch (e) {
                if (isAbortError(e)) throw e;
                console.warn(`[ST-BME] 近邻查询失败 (${entry.id}):`, e.message);
                neighborsMap.set(entry.id, []);
            }
        }
    }

    // ══════════════════════════════════════════════
    // Phase 3: 单次 LLM 批量判定
    // ══════════════════════════════════════════════
    throwIfAborted(signal);

    const userPromptSections = [];
    userPromptSections.push(`本轮共新增 ${newEntries.length} 条记忆，请逐条分析：\n`);

    for (let i = 0; i < newEntries.length; i++) {
        const entry = newEntries[i];
        const neighbors = neighborsMap.get(entry.id) || [];

        const newNodeFieldsStr = Object.entries(entry.node.fields)
            .map(([k, v]) => `${k}: ${v}`)
            .join(', ');

        // 构建近邻描述
        let neighborText;
        if (neighbors.length === 0) {
            neighborText = '  (无近邻命中)';
        } else {
            neighborText = neighbors.map(n => {
                const node = getNode(graph, n.nodeId);
                if (!node) return null;
                const fieldsStr = Object.entries(node.fields)
                    .map(([k, v]) => `${k}: ${v}`)
                    .join(', ');
                return `  - [${node.id}] 类型=${node.type}, ${fieldsStr} (相似度=${n.score.toFixed(3)})`;
            }).filter(Boolean).join('\n');
        }

        // 检查高相似度
        const hasHighSimilarity = neighbors.length > 0 && neighbors[0].score > conflictThreshold;
        const hint = hasHighSimilarity
            ? `  ⚠ 最高相似度 ${neighbors[0].score.toFixed(3)} 超过阈值 ${conflictThreshold}`
            : '';

        userPromptSections.push([
            `### 新记忆 #${i + 1}`,
            `[${entry.id}] 类型=${entry.node.type}, ${newNodeFieldsStr}`,
            '近邻记忆:',
            neighborText,
            hint,
        ].filter(Boolean).join('\n'));
    }

    const userPrompt = userPromptSections.join('\n\n');

    let decision;
    try {
        decision = await callLLMForJSON({
            systemPrompt: customPrompt || CONSOLIDATION_SYSTEM_PROMPT,
            userPrompt,
            maxRetries: 1,
            signal,
        });
    } catch (e) {
        if (isAbortError(e)) throw e;
        console.error('[ST-BME] 记忆整合 LLM 调用失败:', e);
        stats.kept = newEntries.length;
        return stats;
    }

    // ══════════════════════════════════════════════
    // Phase 4: 逐个处理结果
    // ══════════════════════════════════════════════

    // 解析 LLM 返回——兼容单条和批量格式
    let results;
    if (Array.isArray(decision?.results)) {
        results = decision.results;
    } else if (decision?.action) {
        // 单条返回格式（LLM 可能忽略 results 包装）
        results = [{ ...decision, node_id: newEntries[0]?.id }];
    } else {
        console.warn('[ST-BME] 记忆整合: LLM 返回格式异常，全部 keep');
        stats.kept = newEntries.length;
        return stats;
    }

    // 建立 node_id → result 的映射
    const resultMap = new Map();
    for (const r of results) {
        if (r.node_id) resultMap.set(r.node_id, r);
    }

    // 处理每个新节点
    for (const entry of newEntries) {
        const result = resultMap.get(entry.id);
        if (!result) {
            // LLM 未返回此节点的结果，fallback 为 keep
            stats.kept++;
            continue;
        }

        processOneResult(graph, entry, result, stats);
    }

    // 日志
    const actionSummary = [];
    if (stats.merged > 0) actionSummary.push(`合并 ${stats.merged}`);
    if (stats.skipped > 0) actionSummary.push(`跳过 ${stats.skipped}`);
    if (stats.kept > 0) actionSummary.push(`保留 ${stats.kept}`);
    if (stats.evolved > 0) actionSummary.push(`进化 ${stats.evolved}`);
    if (stats.connections > 0) actionSummary.push(`新链接 ${stats.connections}`);
    if (stats.updates > 0) actionSummary.push(`回溯更新 ${stats.updates}`);

    if (actionSummary.length > 0) {
        console.log(`[ST-BME] 记忆整合完成: ${actionSummary.join(', ')}`);
    }

    return stats;
}

/**
 * 处理单个节点的整合结果
 */
function processOneResult(graph, entry, result, stats) {
    const { id: newId, node: newNode } = entry;

    // ── 处理 action ──
    switch (result.action) {
        case 'skip': {
            console.log(`[ST-BME] 记忆整合: skip (重复) — ${newId}`);
            newNode.archived = true;
            stats.skipped++;
            break;
        }

        case 'merge': {
            const targetId = result.merge_target_id;
            const targetNode = targetId ? getNode(graph, targetId) : null;

            if (targetNode && !targetNode.archived) {
                console.log(`[ST-BME] 记忆整合: merge ${newId} → ${targetId}`);

                if (result.merged_fields && typeof result.merged_fields === 'object') {
                    for (const [key, value] of Object.entries(result.merged_fields)) {
                        if (value != null && value !== '') {
                            targetNode.fields[key] = value;
                        }
                    }
                } else {
                    for (const [key, value] of Object.entries(newNode.fields)) {
                        if (value != null && value !== '' && !targetNode.fields[key]) {
                            targetNode.fields[key] = value;
                        }
                    }
                }

                if (Number.isFinite(newNode.seq) && newNode.seq > (targetNode.seq || 0)) {
                    targetNode.seq = newNode.seq;
                }

                targetNode.embedding = null;
                newNode.archived = true;
                stats.merged++;
            } else {
                console.warn(`[ST-BME] 记忆整合: merge target ${targetId} 不存在，回退为 keep`);
                stats.kept++;
            }
            break;
        }

        case 'keep':
        default: {
            stats.kept++;
            break;
        }
    }

    // ── 处理 evolution ──
    const evolution = result.evolution;
    if (evolution?.should_evolve && !newNode.archived) {
        stats.evolved++;
        console.log(`[ST-BME] 记忆整合/进化触发: ${result.reason || '(无理由)'}`);

        if (Array.isArray(evolution.connections)) {
            for (const targetId of evolution.connections) {
                if (!getNode(graph, targetId)) continue;
                const edge = createEdge({
                    fromId: newId,
                    toId: targetId,
                    relation: 'related',
                    strength: 0.7,
                });
                if (addEdge(graph, edge)) {
                    stats.connections++;
                }
            }
        }

        if (Array.isArray(evolution.neighbor_updates)) {
            for (const update of evolution.neighbor_updates) {
                if (!update.nodeId) continue;
                const oldNode = getNode(graph, update.nodeId);
                if (!oldNode || oldNode.archived) continue;

                let changed = false;

                if (update.newContext && typeof update.newContext === 'string') {
                    if (oldNode.fields.state !== undefined) {
                        oldNode.fields.state = update.newContext;
                        changed = true;
                    } else if (oldNode.fields.summary !== undefined) {
                        oldNode.fields.summary = update.newContext;
                        changed = true;
                    } else if (oldNode.fields.core_note !== undefined) {
                        oldNode.fields.core_note = update.newContext;
                        changed = true;
                    }
                }

                if (update.newTags && Array.isArray(update.newTags)) {
                    oldNode.clusters = update.newTags;
                    changed = true;
                }

                if (changed) {
                    oldNode.embedding = null;
                    if (!oldNode._evolutionHistory) oldNode._evolutionHistory = [];
                    oldNode._evolutionHistory.push({
                        triggeredBy: newId,
                        timestamp: Date.now(),
                        reason: result.reason || '',
                    });
                    stats.updates++;
                }
            }
        }
    }
}