docs: add project README and initial ST-BME files

2026-05-15 22:30:38 +08:00 · 2026-03-23 03:57:59 +08:00
commit 436715216e
17 changed files with 4145 additions and 0 deletions
--- a/extractor.js
+++ b/extractor.js
@@ -0,0 +1,500 @@
+// ST-BME: LLM 记忆提取管线（写入路径）
+// 分析对话 → 提取节点和关系 → 更新图谱
+// v2: 融合 Mem0 精确对照 + Graphiti 时序边 + MemoRAG 全局概要
+
+import { createNode, addNode, updateNode, findLatestNode, createEdge, addEdge, getActiveNodes, invalidateEdge } from './graph.js';
+import { embedText, embedBatch, searchSimilar } from './embedding.js';
+import { callLLMForJSON } from './llm.js';
+import { RELATION_TYPES } from './schema.js';
+
+/**
+ * 对未处理的对话楼层执行记忆提取
+ *
+ * @param {object} params
+ * @param {object} params.graph - 当前图状态
+ * @param {Array<{role: string, content: string}>} params.messages - 要处理的对话消息
+ * @param {number} params.startSeq - 起始楼层号
+ * @param {object[]} params.schema - 节点类型 Schema
+ * @param {object} params.embeddingConfig - Embedding API 配置
+ * @param {string} [params.extractPrompt] - 自定义提取提示词
+ * @param {object} [params.v2Options] - v2 增强选项
+ * @returns {Promise<{success: boolean, newNodes: number, updatedNodes: number, newEdges: number, newNodeIds: string[]}>}
+ */
+export async function extractMemories({
+    graph,
+    messages,
+    startSeq,
+    schema,
+    embeddingConfig,
+    extractPrompt,
+    v2Options = {},
+}) {
+    if (!messages || messages.length === 0) {
+        return { success: true, newNodes: 0, updatedNodes: 0, newEdges: 0, newNodeIds: [] };
+    }
+
+    const enablePreciseConflict = v2Options.enablePreciseConflict ?? true;
+    const conflictThreshold = v2Options.conflictThreshold ?? 0.85;
+
+    console.log(`[ST-BME] 提取开始: 楼层 ${startSeq}, ${messages.length} 条消息`);
+
+    // 构建对话文本
+    const dialogueText = messages
+        .map(m => `[${m.role}]: ${m.content}`)
+        .join('\n\n');
+
+    // 构建当前图概览（让 LLM 知道已有哪些节点，避免重复）
+    const graphOverview = buildGraphOverview(graph, schema);
+
+    // 构建 Schema 描述
+    const schemaDescription = buildSchemaDescription(schema);
+
+    // 系统提示词
+    const systemPrompt = extractPrompt || buildDefaultExtractPrompt(schema);
+
+    // 用户提示词
+    const userPrompt = [
+        '## 当前对话内容（需提取记忆）',
+        dialogueText,
+        '',
+        '## 当前图谱状态',
+        graphOverview || '(空图谱，尚无节点)',
+        '',
+        '## 节点类型定义',
+        schemaDescription,
+        '',
+        '请分析对话，按 JSON 格式输出操作列表。',
+    ].join('\n');
+
+    // 调用 LLM
+    const result = await callLLMForJSON({ systemPrompt, userPrompt, maxRetries: 2 });
+
+    if (!result || !result.operations) {
+        console.warn('[ST-BME] 提取 LLM 未返回有效操作');
+        return { success: false, newNodes: 0, updatedNodes: 0, newEdges: 0, newNodeIds: [] };
+    }
+
+    // ========== v2: Mem0 精确对照阶段 ==========
+    if (enablePreciseConflict && embeddingConfig?.apiUrl) {
+        await mem0ConflictCheck(graph, result.operations, embeddingConfig, conflictThreshold);
+    }
+
+    // 执行操作
+    const stats = { newNodes: 0, updatedNodes: 0, newEdges: 0 };
+    const newNodeIds = [];   // v2: 收集新建节点 ID（用于进化引擎）
+    const refMap = new Map();
+
+    for (const op of result.operations) {
+        try {
+            switch (op.action) {
+                case 'create': {
+                    const createdId = handleCreate(graph, op, startSeq, schema, refMap, stats);
+                    if (createdId) newNodeIds.push(createdId);
+                    break;
+                }
+                case 'update':
+                    handleUpdate(graph, op, stats);
+                    break;
+                case 'delete':
+                    handleDelete(graph, op, stats);
+                    break;
+                case '_skip':
+                    // Mem0 对照判定为重复，跳过
+                    break;
+                default:
+                    console.warn(`[ST-BME] 未知操作类型: ${op.action}`);
+            }
+        } catch (e) {
+            console.error(`[ST-BME] 操作执行失败:`, op, e);
+        }
+    }
+
+    // 为新建节点生成 embedding
+    await generateNodeEmbeddings(graph, embeddingConfig);
+
+    // 更新处理进度
+    graph.lastProcessedSeq = startSeq + messages.filter(m => m.role === 'assistant').length;
+
+    console.log(`[ST-BME] 提取完成: 新建 ${stats.newNodes}, 更新 ${stats.updatedNodes}, 新边 ${stats.newEdges}`);
+
+    return { success: true, ...stats, newNodeIds };
+}
+
+/**
+ * 处理 create 操作
+ */
+function handleCreate(graph, op, seq, schema, refMap, stats) {
+    const typeDef = schema.find(s => s.id === op.type);
+    if (!typeDef) {
+        console.warn(`[ST-BME] 未知节点类型: ${op.type}`);
+        return null;
+    }
+
+    // latestOnly 类型：检查是否已存在同名节点
+    if (typeDef.latestOnly && op.fields?.name) {
+        const existing = findLatestNode(graph, op.type, op.fields.name);
+        if (existing) {
+            // 转为更新操作
+            updateNode(graph, existing.id, { fields: op.fields, seq });
+            stats.updatedNodes++;
+
+            if (op.ref) refMap.set(op.ref, existing.id);
+
+            // 处理关联边
+            if (op.links) {
+                handleLinks(graph, existing.id, op.links, refMap, stats);
+            }
+            return null;
+        }
+    }
+
+    // 创建新节点
+    const node = createNode({
+        type: op.type,
+        fields: op.fields || {},
+        seq,
+        importance: op.importance ?? 5.0,
+        clusters: op.clusters || [],
+    });
+
+    addNode(graph, node);
+    stats.newNodes++;
+
+    // 保存 ref 用于同批次引用
+    if (op.ref) {
+        refMap.set(op.ref, node.id);
+    }
+
+    // 处理关联边
+    if (op.links) {
+        handleLinks(graph, node.id, op.links, refMap, stats);
+    }
+
+    return node.id;
+}
+
+/**
+ * 处理 update 操作
+ */
+function handleUpdate(graph, op, stats) {
+    if (!op.nodeId) {
+        console.warn('[ST-BME] update 操作缺少 nodeId');
+        return;
+    }
+
+    const updated = updateNode(graph, op.nodeId, {
+        fields: op.fields || {},
+    });
+
+    if (updated) {
+        stats.updatedNodes++;
+        const node = graph.nodes.find(n => n.id === op.nodeId);
+        if (node) node.embedding = null;
+
+        // v2 Graphiti: 标记旧的 updates/temporal_update 边为失效
+        const oldEdges = graph.edges.filter(e =>
+            (e.fromId === op.nodeId || e.toId === op.nodeId) &&
+            (e.relation === 'updates' || e.relation === 'temporal_update') &&
+            !e.invalidAt
+        );
+        for (const e of oldEdges) {
+            invalidateEdge(e);
+        }
+    }
+}
+
+/**
+ * 处理 delete 操作
+ */
+function handleDelete(graph, op, stats) {
+    if (!op.nodeId) return;
+    const node = graph.nodes.find(n => n.id === op.nodeId);
+    if (node) {
+        node.archived = true; // 软删除
+    }
+}
+
+/**
+ * 处理关联边
+ */
+function handleLinks(graph, sourceId, links, refMap, stats) {
+    for (const link of links) {
+        let targetId = link.targetNodeId || null;
+
+        // 通过 ref 解析目标节点
+        if (!targetId && link.targetRef) {
+            targetId = refMap.get(link.targetRef);
+        }
+
+        if (!targetId) continue;
+
+        // 验证关系类型
+        const relation = RELATION_TYPES.includes(link.relation)
+            ? link.relation
+            : 'related';
+
+        const edgeType = relation === 'contradicts' ? 255 : 0;
+
+        const edge = createEdge({
+            fromId: sourceId,
+            toId: targetId,
+            relation,
+            strength: link.strength ?? 0.8,
+            edgeType,
+        });
+
+        if (addEdge(graph, edge)) {
+            stats.newEdges++;
+        }
+    }
+}
+
+/**
+ * 为缺少 embedding 的节点生成向量
+ */
+async function generateNodeEmbeddings(graph, embeddingConfig) {
+    if (!embeddingConfig?.apiUrl) return;
+
+    const needsEmbedding = graph.nodes.filter(n => !n.embedding && !n.archived);
+
+    if (needsEmbedding.length === 0) return;
+
+    const texts = needsEmbedding.map(n => {
+        // 用主要字段拼文本
+        const parts = [];
+        if (n.fields.summary) parts.push(n.fields.summary);
+        if (n.fields.name) parts.push(n.fields.name);
+        if (n.fields.title) parts.push(n.fields.title);
+        if (n.fields.traits) parts.push(n.fields.traits);
+        if (n.fields.state) parts.push(n.fields.state);
+        if (n.fields.constraint) parts.push(n.fields.constraint);
+        return parts.join(' | ') || n.type;
+    });
+
+    console.log(`[ST-BME] 为 ${texts.length} 个节点生成 embedding`);
+
+    const embeddings = await embedBatch(texts, embeddingConfig);
+
+    for (let i = 0; i < needsEmbedding.length; i++) {
+        if (embeddings[i]) {
+            needsEmbedding[i].embedding = Array.from(embeddings[i]);
+        }
+    }
+}
+
+/**
+ * 构建图谱概览文本（给 LLM 看）
+ */
+function buildGraphOverview(graph, schema) {
+    const activeNodes = graph.nodes.filter(n => !n.archived);
+    if (activeNodes.length === 0) return '';
+
+    const lines = [];
+    for (const typeDef of schema) {
+        const nodesOfType = activeNodes.filter(n => n.type === typeDef.id);
+        if (nodesOfType.length === 0) continue;
+
+        lines.push(`### ${typeDef.label} (${nodesOfType.length} 个节点)`);
+        for (const node of nodesOfType.slice(-10)) { // 只展示最近 10 个
+            const summary = node.fields.summary || node.fields.name || node.fields.title || '(无)';
+            lines.push(`  - [${node.id}] ${summary}`);
+        }
+    }
+
+    return lines.join('\n');
+}
+
+/**
+ * 构建 Schema 描述文本
+ */
+function buildSchemaDescription(schema) {
+    return schema.map(t => {
+        const cols = t.columns.map(c => `${c.name}${c.required ? '(必填)' : ''}: ${c.hint}`).join('\n    ');
+        return `类型 "${t.id}" (${t.label}):\n    ${cols}`;
+    }).join('\n\n');
+}
+
+/**
+ * 构建默认提取提示词
+ */
+function buildDefaultExtractPrompt(schema) {
+    const typeNames = schema.map(s => `${s.id}(${s.label})`).join(', ');
+
+    return [
+        '你是一个记忆提取分析器。从对话中提取结构化记忆节点并存入知识图谱。',
+        '',
+        `支持的节点类型：${typeNames}`,
+        '',
+        '输出格式为严格 JSON：',
+        '{',
+        '  "thought": "你对本段对话的分析（事件/角色变化/新信息）",',
+        '  "operations": [',
+        '    {',
+        '      "action": "create",',
+        '      "type": "event",',
+        '      "fields": {"summary": "...", "participants": "...", "status": "ongoing"},',
+        '      "importance": 6,',
+        '      "ref": "evt1",',
+        '      "links": [',
+        '        {"targetNodeId": "existing-id", "relation": "involved_in", "strength": 0.9},',
+        '        {"targetRef": "char1", "relation": "occurred_at", "strength": 0.8}',
+        '      ]',
+        '    },',
+        '    {',
+        '      "action": "update",',
+        '      "nodeId": "existing-node-id",',
+        '      "fields": {"state": "新的状态"}',
+        '    }',
+        '  ]',
+        '}',
+        '',
+        '规则：',
+        '- 每批对话最多创建 1 个事件节点，多个子事件合并为一条',
+        '- 角色/地点节点：如果图中已有同名节点，用 update 而非 create',
+        `- 关系类型限定：${RELATION_TYPES.join(', ')}`,
+        '- contradicts 关系用于矛盾/冲突信息',
+        '- evolves 关系用于新信息揭示旧记忆需修正的情况',
+        '- temporal_update 关系用于实体状态的时序变化',
+        '- 不要虚构内容，只提取对话中有证据支持的信息',
+        '- importance 范围 1-10，普通事件 5，关键转折 8+',
+        '- summary 应该是摘要抽象，不要复制原文',
+    ].join('\n');
+}
+
+// ==================== v2 增强功能 ====================
+
+/**
+ * Mem0 启发的精确对照
+ * 对每条 create 操作搜索近邻，高相似度时让 LLM 判断 add/update/skip
+ */
+async function mem0ConflictCheck(graph, operations, embeddingConfig, threshold) {
+    const activeNodes = getActiveNodes(graph).filter(n => n.embedding);
+    if (activeNodes.length === 0) return;
+
+    for (const op of operations) {
+        if (op.action !== 'create') continue;
+
+        const factText = op.fields?.summary || op.fields?.name || op.fields?.title || '';
+        if (!factText) continue;
+
+        try {
+            const factVec = await embedText(factText, embeddingConfig);
+            if (!factVec) continue;
+
+            const candidates = activeNodes.map(n => ({ nodeId: n.id, embedding: n.embedding }));
+            const similar = searchSimilar(factVec, candidates, 3);
+
+            if (similar.length > 0 && similar[0].score > threshold) {
+                const topMatch = graph.nodes.find(n => n.id === similar[0].nodeId);
+                if (!topMatch) continue;
+
+                const topFields = Object.entries(topMatch.fields)
+                    .map(([k, v]) => `${k}: ${v}`)
+                    .join(', ');
+
+                const decision = await callLLMForJSON({
+                    systemPrompt: [
+                        '判断新信息与已有记忆的关系。输出严格 JSON：',
+                        '{"action": "add"|"update"|"skip", "targetId": "旧节点ID", "mergedFields": {}}',
+                        '- add: 新信息完全不同，应新建',
+                        '- update: 新信息是对旧记忆的修正/补充',
+                        '- skip: 与旧记忆完全重复',
+                    ].join('\n'),
+                    userPrompt: [
+                        `新信息: [${op.type}] ${factText}`,
+                        `最相似旧记忆: [${topMatch.id}] 类型=${topMatch.type}, ${topFields}`,
+                        `相似度: ${similar[0].score.toFixed(3)}`,
+                    ].join('\n'),
+                    maxRetries: 1,
+                });
+
+                if (decision?.action === 'update' && decision.targetId) {
+                    console.log(`[ST-BME] Mem0对照: create->update (${decision.targetId})`);
+                    op.action = 'update';
+                    op.nodeId = decision.targetId;
+                    if (decision.mergedFields) {
+                        op.fields = { ...op.fields, ...decision.mergedFields };
+                    }
+                } else if (decision?.action === 'skip') {
+                    console.log('[ST-BME] Mem0对照: create->skip (重复)');
+                    op.action = '_skip';
+                }
+            }
+        } catch (e) {
+            console.warn('[ST-BME] Mem0对照失败，保持原操作:', e.message);
+        }
+    }
+}
+
+/**
+ * 全局故事概要生成（MemoRAG 启发）
+ * 基于图中事件/角色/主线自动生成/更新 synopsis 节点
+ *
+ * @param {object} params
+ * @param {object} params.graph
+ * @param {object[]} params.schema
+ * @param {number} params.currentSeq
+ * @returns {Promise<void>}
+ */
+export async function generateSynopsis({ graph, schema, currentSeq }) {
+    const eventNodes = getActiveNodes(graph, 'event')
+        .sort((a, b) => a.seq - b.seq);
+
+    if (eventNodes.length < 3) return;
+
+    const eventSummaries = eventNodes.map(n =>
+        `[楼${n.seq}] ${n.fields.summary || '(无)'}`,
+    ).join('\n');
+
+    const characterNodes = getActiveNodes(graph, 'character');
+    const charSummary = characterNodes.map(n =>
+        `${n.fields.name}: ${n.fields.state || '(无状态)'}`,
+    ).join('; ');
+
+    const threadNodes = getActiveNodes(graph, 'thread');
+    const threadSummary = threadNodes.map(n =>
+        `${n.fields.title}: ${n.fields.status || 'active'}`,
+    ).join('; ');
+
+    const result = await callLLMForJSON({
+        systemPrompt: [
+            '你是故事概要生成器。根据事件线、角色和主线生成简洁的前情提要。',
+            '输出 JSON：{"summary": "前情提要文本（200字以内）"}',
+            '要求：涵盖核心冲突、关键转折、主要角色当前状态。',
+        ].join('\n'),
+        userPrompt: [
+            '## 事件时间线',
+            eventSummaries,
+            '',
+            '## 角色状态',
+            charSummary || '(无)',
+            '',
+            '## 活跃主线',
+            threadSummary || '(无)',
+        ].join('\n'),
+        maxRetries: 1,
+    });
+
+    if (!result?.summary) return;
+
+    const existingSynopsis = graph.nodes.find(
+        n => n.type === 'synopsis' && !n.archived,
+    );
+
+    if (existingSynopsis) {
+        updateNode(graph, existingSynopsis.id, {
+            fields: { summary: result.summary, scope: `楼 1 ~ ${currentSeq}` },
+        });
+        existingSynopsis.embedding = null;
+        console.log('[ST-BME] 全局概要已更新');
+    } else {
+        const node = createNode({
+            type: 'synopsis',
+            fields: { summary: result.summary, scope: `楼 1 ~ ${currentSeq}` },
+            seq: currentSeq,
+            importance: 9.0,
+        });
+        addNode(graph, node);
+        console.log('[ST-BME] 全局概要已创建');
+    }
+}