Implement vector recovery and refresh docs

This commit is contained in:
Youzini-afk
2026-03-24 12:38:46 +08:00
parent 7b68eebb9e
commit 50ee8cc8ed
13 changed files with 2668 additions and 805 deletions

1191
README.md

File diff suppressed because it is too large Load Diff

View File

@@ -4,6 +4,7 @@
import { createNode, addNode, createEdge, addEdge, getActiveNodes, getNode } from './graph.js'; import { createNode, addNode, createEdge, addEdge, getActiveNodes, getNode } from './graph.js';
import { callLLMForJSON } from './llm.js'; import { callLLMForJSON } from './llm.js';
import { embedText } from './embedding.js'; import { embedText } from './embedding.js';
import { isDirectVectorConfig } from './vector-index.js';
/** /**
* 对指定类型执行层级压缩 * 对指定类型执行层级压缩
@@ -94,7 +95,7 @@ async function compressLevel({ graph, typeDef, level, embeddingConfig, force })
compressedNode.childIds = batch.map(n => n.id); compressedNode.childIds = batch.map(n => n.id);
// 生成 embedding // 生成 embedding
if (embeddingConfig?.apiUrl && summaryResult.fields.summary) { if (isDirectVectorConfig(embeddingConfig) && summaryResult.fields.summary) {
const vec = await embedText(summaryResult.fields.summary, embeddingConfig); const vec = await embedText(summaryResult.fields.summary, embeddingConfig);
if (vec) compressedNode.embedding = Array.from(vec); if (vec) compressedNode.embedding = Array.from(vec);
} }

View File

@@ -2,8 +2,12 @@
// 新节点写入后触发,回溯更新相关旧节点的 context/tags/links // 新节点写入后触发,回溯更新相关旧节点的 context/tags/links
import { getActiveNodes, getNode, createEdge, addEdge } from './graph.js'; import { getActiveNodes, getNode, createEdge, addEdge } from './graph.js';
import { searchSimilar } from './embedding.js';
import { callLLMForJSON } from './llm.js'; import { callLLMForJSON } from './llm.js';
import {
buildNodeVectorText,
findSimilarNodesByText,
validateVectorConfig,
} from './vector-index.js';
/** /**
* 进化系统提示词 * 进化系统提示词
@@ -57,8 +61,8 @@ export async function evolveMemories({
const stats = { evolved: 0, connections: 0, updates: 0 }; const stats = { evolved: 0, connections: 0, updates: 0 };
if (!newNodeIds || newNodeIds.length === 0) return stats; if (!newNodeIds || newNodeIds.length === 0) return stats;
if (!embeddingConfig?.apiUrl) { if (!validateVectorConfig(embeddingConfig).valid) {
console.log('[ST-BME] 记忆进化跳过:未配置 Embedding API'); console.log('[ST-BME] 记忆进化跳过:向量配置不可用');
return stats; return stats;
} }
@@ -67,16 +71,21 @@ export async function evolveMemories({
for (const newId of newNodeIds) { for (const newId of newNodeIds) {
const newNode = getNode(graph, newId); const newNode = getNode(graph, newId);
if (!newNode || !newNode.embedding) continue; if (!newNode) continue;
// 找最近邻(排除自身) const queryText = buildNodeVectorText(newNode);
const candidates = activeNodes if (!queryText) continue;
.filter(n => n.id !== newId && n.embedding)
.map(n => ({ nodeId: n.id, embedding: n.embedding }));
const candidates = activeNodes.filter(n => n.id !== newId);
if (candidates.length === 0) continue; if (candidates.length === 0) continue;
const neighbors = searchSimilar(newNode.embedding, candidates, neighborCount); const neighbors = await findSimilarNodesByText(
graph,
queryText,
embeddingConfig,
neighborCount,
candidates,
);
if (neighbors.length === 0) continue; if (neighbors.length === 0) continue;
// 构建 LLM 上下文 // 构建 LLM 上下文

View File

@@ -2,7 +2,7 @@
// 分析对话 → 提取节点和关系 → 更新图谱 // 分析对话 → 提取节点和关系 → 更新图谱
// v2: 融合 Mem0 精确对照 + Graphiti 时序边 + MemoRAG 全局概要 // v2: 融合 Mem0 精确对照 + Graphiti 时序边 + MemoRAG 全局概要
import { embedBatch, embedText, searchSimilar } from "./embedding.js"; import { embedBatch } from "./embedding.js";
import { import {
addEdge, addEdge,
addNode, addNode,
@@ -16,6 +16,12 @@ import {
} from "./graph.js"; } from "./graph.js";
import { callLLMForJSON } from "./llm.js"; import { callLLMForJSON } from "./llm.js";
import { RELATION_TYPES } from "./schema.js"; import { RELATION_TYPES } from "./schema.js";
import {
buildNodeVectorText,
findSimilarNodesByText,
isDirectVectorConfig,
validateVectorConfig,
} from "./vector-index.js";
/** /**
* 对未处理的对话楼层执行记忆提取 * 对未处理的对话楼层执行记忆提取
@@ -122,7 +128,7 @@ export async function extractMemories({
} }
// ========== v2: Mem0 精确对照阶段 ========== // ========== v2: Mem0 精确对照阶段 ==========
if (enablePreciseConflict && embeddingConfig?.apiUrl) { if (enablePreciseConflict && validateVectorConfig(embeddingConfig).valid) {
await mem0ConflictCheck( await mem0ConflictCheck(
graph, graph,
result.operations, result.operations,
@@ -411,7 +417,7 @@ function handleLinks(graph, sourceId, links, refMap, stats) {
* 为缺少 embedding 的节点生成向量 * 为缺少 embedding 的节点生成向量
*/ */
async function generateNodeEmbeddings(graph, embeddingConfig) { async function generateNodeEmbeddings(graph, embeddingConfig) {
if (!embeddingConfig?.apiUrl) return; if (!isDirectVectorConfig(embeddingConfig)) return;
const needsEmbedding = graph.nodes.filter( const needsEmbedding = graph.nodes.filter(
(n) => (n) =>
@@ -420,17 +426,7 @@ async function generateNodeEmbeddings(graph, embeddingConfig) {
if (needsEmbedding.length === 0) return; if (needsEmbedding.length === 0) return;
const texts = needsEmbedding.map((n) => { const texts = needsEmbedding.map((node) => buildNodeVectorText(node) || node.type);
// 用主要字段拼文本
const parts = [];
if (n.fields.summary) parts.push(n.fields.summary);
if (n.fields.name) parts.push(n.fields.name);
if (n.fields.title) parts.push(n.fields.title);
if (n.fields.traits) parts.push(n.fields.traits);
if (n.fields.state) parts.push(n.fields.state);
if (n.fields.constraint) parts.push(n.fields.constraint);
return parts.join(" | ") || n.type;
});
console.log(`[ST-BME] 为 ${texts.length} 个节点生成 embedding`); console.log(`[ST-BME] 为 ${texts.length} 个节点生成 embedding`);
@@ -543,9 +539,10 @@ async function mem0ConflictCheck(
threshold, threshold,
fallbackSeq, fallbackSeq,
) { ) {
const activeNodes = getActiveNodes(graph).filter( const activeNodes = getActiveNodes(graph).filter((node) => {
(n) => Array.isArray(n.embedding) && n.embedding.length > 0, const text = buildNodeVectorText(node);
); return typeof text === "string" && text.length > 0;
});
if (activeNodes.length === 0) return; if (activeNodes.length === 0) return;
for (const op of operations) { for (const op of operations) {
@@ -556,14 +553,13 @@ async function mem0ConflictCheck(
if (!factText) continue; if (!factText) continue;
try { try {
const factVec = await embedText(factText, embeddingConfig); const similar = await findSimilarNodesByText(
if (!factVec) continue; graph,
factText,
const candidates = activeNodes.map((n) => ({ embeddingConfig,
nodeId: n.id, 3,
embedding: n.embedding, activeNodes,
})); );
const similar = searchSimilar(factVec, candidates, 3);
if (similar.length > 0 && similar[0].score > threshold) { if (similar.length > 0 && similar[0].score > threshold) {
const topMatch = graph.nodes.find((n) => n.id === similar[0].nodeId); const topMatch = graph.nodes.find((n) => n.id === similar[0].nodeId);

View File

@@ -1,10 +1,17 @@
// ST-BME: 图数据模型 // ST-BME: 图数据模型
// 管理节点、边的 CRUD 操作,以及序列化到 chat_metadata // 管理节点、边的 CRUD 操作,以及序列化到 chat_metadata
import {
createDefaultBatchJournal,
createDefaultHistoryState,
createDefaultVectorIndexState,
normalizeGraphRuntimeState,
} from "./runtime-state.js";
/** /**
* 图状态版本号 * 图状态版本号
*/ */
const GRAPH_VERSION = 3; const GRAPH_VERSION = 4;
/** /**
* 生成 UUID v4 * 生成 UUID v4
@@ -22,13 +29,16 @@ function uuid() {
* @returns {GraphState} * @returns {GraphState}
*/ */
export function createEmptyGraph() { export function createEmptyGraph() {
return { return normalizeGraphRuntimeState({
version: GRAPH_VERSION, version: GRAPH_VERSION,
lastProcessedSeq: -1, lastProcessedSeq: -1,
nodes: [], nodes: [],
edges: [], edges: [],
lastRecallResult: null, lastRecallResult: null,
}; historyState: createDefaultHistoryState(),
vectorIndexState: createDefaultVectorIndexState(),
batchJournal: createDefaultBatchJournal(),
});
} }
// ==================== 节点操作 ==================== // ==================== 节点操作 ====================
@@ -481,6 +491,25 @@ export function deserializeGraph(json) {
} }
} }
if (data.version < 4) {
data.historyState = {
...createDefaultHistoryState(),
...(data.historyState || {}),
lastProcessedAssistantFloor: Number.isFinite(data.lastProcessedSeq)
? data.lastProcessedSeq
: -1,
};
data.vectorIndexState = {
...createDefaultVectorIndexState(),
...(data.vectorIndexState || {}),
dirty: true,
lastWarning: "旧版本图谱已迁移,需要重建向量运行时状态",
};
data.batchJournal = Array.isArray(data.batchJournal)
? data.batchJournal
: createDefaultBatchJournal();
}
data.version = GRAPH_VERSION; data.version = GRAPH_VERSION;
} }
@@ -513,8 +542,24 @@ export function deserializeGraph(json) {
data.lastRecallResult = Array.isArray(data.lastRecallResult) data.lastRecallResult = Array.isArray(data.lastRecallResult)
? data.lastRecallResult ? data.lastRecallResult
: null; : null;
data.historyState = {
...createDefaultHistoryState(),
...(data.historyState || {}),
lastProcessedAssistantFloor: Number.isFinite(
data?.historyState?.lastProcessedAssistantFloor,
)
? data.historyState.lastProcessedAssistantFloor
: data.lastProcessedSeq,
};
data.vectorIndexState = {
...createDefaultVectorIndexState(data?.historyState?.chatId || ""),
...(data.vectorIndexState || {}),
};
data.batchJournal = Array.isArray(data.batchJournal)
? data.batchJournal
: createDefaultBatchJournal();
return data; return normalizeGraphRuntimeState(data, data?.historyState?.chatId || "");
} catch (e) { } catch (e) {
console.error("[ST-BME] 图反序列化失败:", e); console.error("[ST-BME] 图反序列化失败:", e);
return createEmptyGraph(); return createEmptyGraph();
@@ -529,6 +574,17 @@ export function deserializeGraph(json) {
export function exportGraph(graph) { export function exportGraph(graph) {
const exportData = { const exportData = {
...graph, ...graph,
historyState: {
...createDefaultHistoryState(graph?.historyState?.chatId || ""),
lastProcessedAssistantFloor:
graph?.historyState?.lastProcessedAssistantFloor ?? graph?.lastProcessedSeq ?? -1,
},
vectorIndexState: {
...createDefaultVectorIndexState(graph?.historyState?.chatId || ""),
dirty: true,
lastWarning: "导出图谱不包含运行时向量索引",
},
batchJournal: createDefaultBatchJournal(),
nodes: graph.nodes.map((n) => ({ ...n, embedding: null })), nodes: graph.nodes.map((n) => ({ ...n, embedding: null })),
}; };
return JSON.stringify(exportData, null, 2); return JSON.stringify(exportData, null, 2);
@@ -540,10 +596,17 @@ export function exportGraph(graph) {
* @returns {GraphState} * @returns {GraphState}
*/ */
export function importGraph(json) { export function importGraph(json) {
const graph = deserializeGraph(json); const graph = normalizeGraphRuntimeState(deserializeGraph(json));
// 导入的节点需要重新生成 embedding // 导入的节点需要重新生成 embedding
for (const node of graph.nodes) { for (const node of graph.nodes) {
node.embedding = null; node.embedding = null;
} }
graph.batchJournal = createDefaultBatchJournal();
graph.historyState.processedMessageHashes = {};
graph.historyState.historyDirtyFrom = null;
graph.vectorIndexState.hashToNodeId = {};
graph.vectorIndexState.nodeToHash = {};
graph.vectorIndexState.dirty = true;
graph.vectorIndexState.lastWarning = "导入图谱后需要重建向量索引";
return graph; return graph;
} }

790
index.js

File diff suppressed because it is too large Load Diff

View File

@@ -61,6 +61,26 @@
</div> </div>
</div> </div>
<div class="bme-config-card">
<div class="bme-section-header">运行状态</div>
<div class="bme-config-row">
<label>当前聊天</label>
<div class="bme-recent-meta" id="bme-status-chat-id"></div>
</div>
<div class="bme-config-row">
<label>历史状态</label>
<div class="bme-recent-meta" id="bme-status-history"></div>
</div>
<div class="bme-config-row">
<label>向量状态</label>
<div class="bme-recent-meta" id="bme-status-vector"></div>
</div>
<div class="bme-config-row">
<label>最近恢复</label>
<div class="bme-recent-meta" id="bme-status-recovery"></div>
</div>
</div>
<!-- 移动端图谱预览(仅手机端可见) --> <!-- 移动端图谱预览(仅手机端可见) -->
<div class="bme-mobile-graph-preview" id="bme-mobile-graph-area"> <div class="bme-mobile-graph-preview" id="bme-mobile-graph-area">
<canvas id="bme-mobile-graph-canvas"></canvas> <canvas id="bme-mobile-graph-canvas"></canvas>
@@ -137,6 +157,32 @@
<i class="fa-solid fa-dna"></i> <i class="fa-solid fa-dna"></i>
<span>强制进化</span> <span>强制进化</span>
</button> </button>
<button class="bme-action-btn" id="bme-act-vector-rebuild">
<i class="fa-solid fa-database"></i>
<span>重建向量</span>
</button>
<button class="bme-action-btn" id="bme-act-vector-range">
<i class="fa-solid fa-layer-group"></i>
<span>范围重建</span>
</button>
<button class="bme-action-btn" id="bme-act-vector-reembed">
<i class="fa-solid fa-wand-magic-sparkles"></i>
<span>直连重嵌</span>
</button>
</div>
<div class="bme-config-card" style="margin-top:16px">
<div class="bme-section-header">范围重建</div>
<div class="bme-config-help">
仅重建与指定楼层范围相交的节点向量。留空时默认按整段聊天处理。
</div>
<div class="bme-config-row">
<label for="bme-range-start">起始楼层</label>
<input id="bme-range-start" class="bme-config-input" type="number" min="0" max="999999" />
</div>
<div class="bme-config-row">
<label for="bme-range-end">结束楼层</label>
<input id="bme-range-end" class="bme-config-input" type="number" min="0" max="999999" />
</div>
</div> </div>
</div> </div>
@@ -207,7 +253,49 @@
<div class="bme-config-card"> <div class="bme-config-card">
<div class="bme-section-header">Embedding</div> <div class="bme-section-header">Embedding</div>
<div class="bme-config-help"> <div class="bme-config-help">
图谱向量仍使用 OpenAI 兼容的 <code>/v1/embeddings</code> 接口。当前发布版不改酒馆本体,因此这里不会依赖额外宿主补丁;若目标服务不支持浏览器直连,请改用支持 CORS 的服务或本地可直连端点 向量支持两种模式:后端索引优先,以及完全独立的直连兜底。后端模式会优先复用酒馆现成 provider直连模式则继续使用你自己的第二套 URL/Key/Model
</div>
<div class="bme-config-row">
<label for="bme-setting-embed-mode">向量模式</label>
<select id="bme-setting-embed-mode" class="bme-config-input">
<option value="backend">后端索引</option>
<option value="direct">直连兜底</option>
</select>
</div>
<div class="bme-config-row">
<label for="bme-setting-embed-backend-source">后端向量源</label>
<select id="bme-setting-embed-backend-source" class="bme-config-input">
<option value="openai">OpenAI</option>
<option value="openrouter">OpenRouter</option>
<option value="cohere">Cohere</option>
<option value="mistral">Mistral</option>
<option value="electronhub">Electron Hub</option>
<option value="chutes">Chutes</option>
<option value="nanogpt">NanoGPT</option>
<option value="ollama">Ollama</option>
<option value="llamacpp">llama.cpp</option>
<option value="vllm">vLLM</option>
</select>
</div>
<div class="bme-config-row">
<label for="bme-setting-embed-backend-model">后端模型</label>
<input id="bme-setting-embed-backend-model" class="bme-config-input" type="text" placeholder="text-embedding-3-small / nomic-embed-text / BAAI/bge-m3" />
</div>
<div class="bme-config-row">
<label for="bme-setting-embed-backend-url">后端 API 地址</label>
<input id="bme-setting-embed-backend-url" class="bme-config-input" type="text" placeholder="仅 Ollama / llama.cpp / vLLM 需要填写" />
</div>
<div class="bme-config-row inline">
<label class="checkbox_label" for="bme-setting-embed-auto-suffix">
<input id="bme-setting-embed-auto-suffix" type="checkbox" />
<span>自动补全 /embeddings 后缀</span>
</label>
</div>
<div class="bme-config-help">
如果当前页面是 HTTPS 而你填的是 HTTP本地浏览器可能会拦截混合内容远程部署时请优先使用 HTTPS 或宿主可访问的同源入口。
</div>
<div class="bme-config-help">
直连模式会使用下面这组独立配置:
</div> </div>
<div class="bme-config-row"> <div class="bme-config-row">
<label for="bme-setting-embed-url">Embedding API 地址</label> <label for="bme-setting-embed-url">Embedding API 地址</label>

View File

@@ -3,6 +3,10 @@
import { renderTemplateAsync } from "../../../templates.js"; import { renderTemplateAsync } from "../../../templates.js";
import { GraphRenderer } from "./graph-renderer.js"; import { GraphRenderer } from "./graph-renderer.js";
import { getNodeColors } from "./themes.js"; import { getNodeColors } from "./themes.js";
import {
getSuggestedBackendModel,
getVectorIndexStats,
} from "./vector-index.js";
let panelEl = null; let panelEl = null;
let overlayEl = null; let overlayEl = null;
@@ -171,6 +175,32 @@ function _refreshDashboard() {
`NODES: ${activeNodes.length} | EDGES: ${graph.edges.length}`, `NODES: ${activeNodes.length} | EDGES: ${graph.edges.length}`,
); );
const chatId = graph?.historyState?.chatId || "—";
const lastProcessed = graph?.historyState?.lastProcessedAssistantFloor ?? -1;
const dirtyFrom = graph?.historyState?.historyDirtyFrom;
const vectorStats = getVectorIndexStats(graph);
const vectorMode = graph?.vectorIndexState?.mode || "—";
const vectorSource = graph?.vectorIndexState?.source || "—";
const recovery = graph?.historyState?.lastRecoveryResult;
_setText("bme-status-chat-id", chatId);
_setText(
"bme-status-history",
Number.isFinite(dirtyFrom)
? `脏区从楼层 ${dirtyFrom} 开始,已处理到 ${lastProcessed}`
: `干净,已处理到楼层 ${lastProcessed}`,
);
_setText(
"bme-status-vector",
`${vectorMode}/${vectorSource} · total ${vectorStats.total} · indexed ${vectorStats.indexed} · stale ${vectorStats.stale} · pending ${vectorStats.pending}`,
);
_setText(
"bme-status-recovery",
recovery
? `${recovery.status} · from ${recovery.fromFloor ?? "—"} · ${recovery.reason || "—"}`
: "暂无恢复记录",
);
_renderRecentList("bme-recent-extract", _getLastExtract?.() || []); _renderRecentList("bme-recent-extract", _getLastExtract?.() || []);
_renderRecentList("bme-recent-recall", _getLastRecall?.() || []); _renderRecentList("bme-recent-recall", _getLastRecall?.() || []);
} }
@@ -413,6 +443,8 @@ function _bindActions() {
"bme-act-import": "import", "bme-act-import": "import",
"bme-act-rebuild": "rebuild", "bme-act-rebuild": "rebuild",
"bme-act-evolve": "evolve", "bme-act-evolve": "evolve",
"bme-act-vector-rebuild": "rebuildVectorIndex",
"bme-act-vector-reembed": "reembedDirect",
}; };
for (const [elementId, actionKey] of Object.entries(bindings)) { for (const [elementId, actionKey] of Object.entries(bindings)) {
@@ -435,6 +467,22 @@ function _bindActions() {
} }
}); });
} }
document.getElementById("bme-act-vector-range")?.addEventListener("click", async () => {
try {
const start = _parseOptionalInt(document.getElementById("bme-range-start")?.value);
const end = _parseOptionalInt(document.getElementById("bme-range-end")?.value);
await _actionHandlers.rebuildVectorRange?.(
Number.isFinite(start) && Number.isFinite(end)
? { start, end }
: null,
);
_refreshDashboard();
_refreshGraph();
} catch (error) {
console.error("[ST-BME] Action rebuildVectorRange failed:", error);
}
});
} }
function _refreshConfigTab() { function _refreshConfigTab() {
@@ -461,6 +509,26 @@ function _refreshConfigTab() {
"bme-setting-embed-model", "bme-setting-embed-model",
settings.embeddingModel || "text-embedding-3-small", settings.embeddingModel || "text-embedding-3-small",
); );
_setInputValue(
"bme-setting-embed-mode",
settings.embeddingTransportMode || "backend",
);
_setInputValue(
"bme-setting-embed-backend-source",
settings.embeddingBackendSource || "openai",
);
_setInputValue(
"bme-setting-embed-backend-model",
settings.embeddingBackendModel || getSuggestedBackendModel(settings.embeddingBackendSource || "openai"),
);
_setInputValue(
"bme-setting-embed-backend-url",
settings.embeddingBackendApiUrl || "",
);
_setCheckboxValue(
"bme-setting-embed-auto-suffix",
settings.embeddingAutoSuffix !== false,
);
_setInputValue("bme-setting-extract-prompt", settings.extractPrompt || ""); _setInputValue("bme-setting-extract-prompt", settings.extractPrompt || "");
_setInputValue("bme-setting-panel-theme", settings.panelTheme || "crimson"); _setInputValue("bme-setting-panel-theme", settings.panelTheme || "crimson");
@@ -510,6 +578,28 @@ function _bindConfigControls() {
bindText("bme-setting-embed-model", (value) => bindText("bme-setting-embed-model", (value) =>
_updateSettings?.({ embeddingModel: value.trim() }), _updateSettings?.({ embeddingModel: value.trim() }),
); );
bindText("bme-setting-embed-mode", (value) =>
_updateSettings?.({ embeddingTransportMode: value }),
);
bindText("bme-setting-embed-backend-source", (value) => {
const patch = { embeddingBackendSource: value };
const settings = _getSettings?.() || {};
const suggestedModel = getSuggestedBackendModel(value);
if (!settings.embeddingBackendModel || settings.embeddingBackendModel === getSuggestedBackendModel(settings.embeddingBackendSource || "openai")) {
patch.embeddingBackendModel = suggestedModel;
}
_updateSettings?.(patch);
_setInputValue("bme-setting-embed-backend-model", patch.embeddingBackendModel || settings.embeddingBackendModel || "");
});
bindText("bme-setting-embed-backend-model", (value) =>
_updateSettings?.({ embeddingBackendModel: value.trim() }),
);
bindText("bme-setting-embed-backend-url", (value) =>
_updateSettings?.({ embeddingBackendApiUrl: value.trim() }),
);
bindCheckbox("bme-setting-embed-auto-suffix", (checked) =>
_updateSettings?.({ embeddingAutoSuffix: checked }),
);
bindText("bme-setting-extract-prompt", (value) => bindText("bme-setting-extract-prompt", (value) =>
_updateSettings?.({ extractPrompt: value }), _updateSettings?.({ extractPrompt: value }),
); );
@@ -575,6 +665,11 @@ function _setCheckboxValue(id, checked) {
} }
} }
function _parseOptionalInt(value) {
const parsed = Number.parseInt(String(value ?? "").trim(), 10);
return Number.isFinite(parsed) ? parsed : null;
}
function _escHtml(str) { function _escHtml(str) {
const div = document.createElement("div"); const div = document.createElement("div");
div.textContent = String(str ?? ""); div.textContent = String(str ?? "");

View File

@@ -4,7 +4,6 @@
import { diffuseAndRank } from "./diffusion.js"; import { diffuseAndRank } from "./diffusion.js";
import { hybridScore, reinforceAccessBatch } from "./dynamics.js"; import { hybridScore, reinforceAccessBatch } from "./dynamics.js";
import { embedText, searchSimilar } from "./embedding.js";
import { import {
buildTemporalAdjacencyMap, buildTemporalAdjacencyMap,
getActiveNodes, getActiveNodes,
@@ -12,6 +11,7 @@ import {
getNodeEdges, getNodeEdges,
} from "./graph.js"; } from "./graph.js";
import { callLLMForJSON } from "./llm.js"; import { callLLMForJSON } from "./llm.js";
import { findSimilarNodesByText, validateVectorConfig } from "./vector-index.js";
/** /**
* 自适应阈值 * 自适应阈值
@@ -82,9 +82,13 @@ export async function retrieve({
} }
// ========== 第 1 层:向量预筛 ========== // ========== 第 1 层:向量预筛 ==========
if (nodeCount >= STRATEGY_THRESHOLDS.SMALL && embeddingConfig?.apiUrl) { if (
nodeCount >= STRATEGY_THRESHOLDS.SMALL &&
validateVectorConfig(embeddingConfig).valid
) {
console.log("[ST-BME] 第1层: 向量预筛"); console.log("[ST-BME] 第1层: 向量预筛");
vectorResults = await vectorPreFilter( vectorResults = await vectorPreFilter(
graph,
userMessage, userMessage,
activeNodes, activeNodes,
embeddingConfig, embeddingConfig,
@@ -270,20 +274,20 @@ export async function retrieve({
* 向量预筛选 * 向量预筛选
*/ */
async function vectorPreFilter( async function vectorPreFilter(
graph,
userMessage, userMessage,
activeNodes, activeNodes,
embeddingConfig, embeddingConfig,
topK, topK,
) { ) {
try { try {
const queryVec = await embedText(userMessage, embeddingConfig); return await findSimilarNodesByText(
if (!queryVec) return []; graph,
userMessage,
const candidates = activeNodes embeddingConfig,
.filter((n) => Array.isArray(n.embedding) && n.embedding.length > 0) topK,
.map((n) => ({ nodeId: n.id, embedding: n.embedding })); activeNodes,
);
return searchSimilar(queryVec, candidates, topK);
} catch (e) { } catch (e) {
console.error("[ST-BME] 向量预筛失败:", e); console.error("[ST-BME] 向量预筛失败:", e);
return []; return [];

352
runtime-state.js Normal file
View File

@@ -0,0 +1,352 @@
// ST-BME: 运行时状态与历史恢复辅助
const BATCH_JOURNAL_LIMIT = 24;
export function buildVectorCollectionId(chatId) {
return `st-bme::${chatId || "unknown-chat"}`;
}
export function createDefaultHistoryState(chatId = "") {
return {
chatId,
lastProcessedAssistantFloor: -1,
processedMessageHashes: {},
historyDirtyFrom: null,
lastMutationReason: "",
lastRecoveryResult: null,
};
}
export function createDefaultVectorIndexState(chatId = "") {
return {
mode: "backend",
collectionId: buildVectorCollectionId(chatId),
source: "",
modelScope: "",
hashToNodeId: {},
nodeToHash: {},
dirty: false,
lastSyncAt: 0,
lastStats: {
total: 0,
indexed: 0,
stale: 0,
pending: 0,
},
lastWarning: "",
};
}
export function createDefaultBatchJournal() {
return [];
}
export function normalizeGraphRuntimeState(graph, chatId = "") {
if (!graph || typeof graph !== "object") {
return graph;
}
const historyState = {
...createDefaultHistoryState(chatId),
...(graph.historyState || {}),
};
const vectorIndexState = {
...createDefaultVectorIndexState(chatId),
...(graph.vectorIndexState || {}),
};
historyState.chatId = chatId || historyState.chatId || "";
if (!Number.isFinite(historyState.lastProcessedAssistantFloor)) {
historyState.lastProcessedAssistantFloor = Number.isFinite(graph.lastProcessedSeq)
? graph.lastProcessedSeq
: -1;
}
if (
!historyState.processedMessageHashes ||
typeof historyState.processedMessageHashes !== "object" ||
Array.isArray(historyState.processedMessageHashes)
) {
historyState.processedMessageHashes = {};
}
if (
!vectorIndexState.hashToNodeId ||
typeof vectorIndexState.hashToNodeId !== "object" ||
Array.isArray(vectorIndexState.hashToNodeId)
) {
vectorIndexState.hashToNodeId = {};
}
if (
!vectorIndexState.nodeToHash ||
typeof vectorIndexState.nodeToHash !== "object" ||
Array.isArray(vectorIndexState.nodeToHash)
) {
vectorIndexState.nodeToHash = {};
}
if (!vectorIndexState.lastStats || typeof vectorIndexState.lastStats !== "object") {
vectorIndexState.lastStats = createDefaultVectorIndexState(chatId).lastStats;
}
const previousCollectionId = vectorIndexState.collectionId;
vectorIndexState.collectionId = buildVectorCollectionId(chatId || historyState.chatId);
if (previousCollectionId && previousCollectionId !== vectorIndexState.collectionId) {
vectorIndexState.hashToNodeId = {};
vectorIndexState.nodeToHash = {};
vectorIndexState.dirty = true;
vectorIndexState.lastWarning = "聊天标识变化,向量索引已标记为待重建";
}
graph.historyState = historyState;
graph.vectorIndexState = vectorIndexState;
graph.batchJournal = Array.isArray(graph.batchJournal)
? graph.batchJournal.slice(-BATCH_JOURNAL_LIMIT)
: createDefaultBatchJournal();
graph.lastProcessedSeq = historyState.lastProcessedAssistantFloor;
return graph;
}
export function cloneGraphSnapshot(graph) {
const snapshot = JSON.parse(JSON.stringify(graph));
if (Array.isArray(snapshot.batchJournal)) {
snapshot.batchJournal = snapshot.batchJournal.map((journal) => {
if (!journal?.snapshotBefore) return journal;
return {
...journal,
snapshotBefore: {
...journal.snapshotBefore,
batchJournal: [],
},
};
});
}
return snapshot;
}
export function stableHashString(text) {
let hash = 2166136261;
for (const char of String(text || "")) {
hash ^= char.charCodeAt(0);
hash = Math.imul(hash, 16777619);
}
return Math.abs(hash >>> 0);
}
export function buildMessageHash(message) {
const swipeId = Number.isFinite(message?.swipe_id) ? message.swipe_id : null;
const payload = JSON.stringify({
isUser: Boolean(message?.is_user),
isSystem: Boolean(message?.is_system),
text: String(message?.mes || ""),
swipeId,
});
return String(stableHashString(payload));
}
export function snapshotProcessedMessageHashes(chat, lastProcessedAssistantFloor) {
const result = {};
if (!Array.isArray(chat) || lastProcessedAssistantFloor < 0) {
return result;
}
const upperBound = Math.min(lastProcessedAssistantFloor, chat.length - 1);
for (let index = 0; index <= upperBound; index++) {
result[index] = buildMessageHash(chat[index]);
}
return result;
}
export function detectHistoryMutation(chat, historyState) {
const lastProcessedAssistantFloor =
historyState?.lastProcessedAssistantFloor ?? -1;
const processedMessageHashes = historyState?.processedMessageHashes || {};
if (!Array.isArray(chat) || lastProcessedAssistantFloor < 0) {
return { dirty: false, earliestAffectedFloor: null, reason: "" };
}
const trackedFloors = Object.keys(processedMessageHashes)
.map((value) => Number.parseInt(value, 10))
.filter(Number.isFinite)
.sort((a, b) => a - b);
if (trackedFloors.length === 0) {
return { dirty: false, earliestAffectedFloor: null, reason: "" };
}
for (const floor of trackedFloors) {
if (floor >= chat.length) {
return {
dirty: true,
earliestAffectedFloor: floor,
reason: `楼层 ${floor} 已不存在,检测到历史删除/截断`,
};
}
const currentHash = buildMessageHash(chat[floor]);
if (currentHash !== processedMessageHashes[floor]) {
return {
dirty: true,
earliestAffectedFloor: floor,
reason: `楼层 ${floor} 内容或 swipe 已变化`,
};
}
}
if (lastProcessedAssistantFloor >= chat.length) {
return {
dirty: true,
earliestAffectedFloor: chat.length,
reason: "已处理楼层超出当前聊天长度,检测到历史截断",
};
}
return { dirty: false, earliestAffectedFloor: null, reason: "" };
}
export function markHistoryDirty(graph, floor, reason = "") {
normalizeGraphRuntimeState(graph, graph?.historyState?.chatId || "");
const currentDirtyFrom = graph.historyState.historyDirtyFrom;
if (!Number.isFinite(floor)) {
floor = graph.historyState.lastProcessedAssistantFloor;
}
graph.historyState.historyDirtyFrom = Number.isFinite(currentDirtyFrom)
? Math.min(currentDirtyFrom, floor)
: floor;
graph.historyState.lastMutationReason = String(reason || "").trim();
graph.historyState.lastRecoveryResult = {
status: "pending",
at: Date.now(),
fromFloor: graph.historyState.historyDirtyFrom,
reason: graph.historyState.lastMutationReason,
};
}
export function clearHistoryDirty(graph, result = null) {
normalizeGraphRuntimeState(graph, graph?.historyState?.chatId || "");
graph.historyState.historyDirtyFrom = null;
graph.historyState.lastMutationReason = "";
if (result) {
graph.historyState.lastRecoveryResult = result;
}
}
function buildNodeMap(nodes = []) {
return new Map(nodes.map((node) => [node.id, node]));
}
function buildEdgeMap(edges = []) {
return new Map(edges.map((edge) => [edge.id, edge]));
}
function hasMeaningfulNodeChange(beforeNode, afterNode) {
return JSON.stringify(beforeNode) !== JSON.stringify(afterNode);
}
function hasMeaningfulEdgeChange(beforeEdge, afterEdge) {
return JSON.stringify(beforeEdge) !== JSON.stringify(afterEdge);
}
export function createBatchJournalEntry(snapshotBefore, snapshotAfter, meta = {}) {
const beforeNodes = buildNodeMap(snapshotBefore?.nodes || []);
const afterNodes = buildNodeMap(snapshotAfter?.nodes || []);
const beforeEdges = buildEdgeMap(snapshotBefore?.edges || []);
const afterEdges = buildEdgeMap(snapshotAfter?.edges || []);
const createdNodeIds = [];
const createdEdgeIds = [];
const updatedNodeSnapshots = [];
const archivedNodeSnapshots = [];
const invalidatedEdgeSnapshots = [];
for (const [nodeId, afterNode] of afterNodes.entries()) {
if (!beforeNodes.has(nodeId)) {
createdNodeIds.push(nodeId);
continue;
}
const beforeNode = beforeNodes.get(nodeId);
if (!hasMeaningfulNodeChange(beforeNode, afterNode)) continue;
updatedNodeSnapshots.push(cloneGraphSnapshot(beforeNode));
if (beforeNode.archived !== afterNode.archived) {
archivedNodeSnapshots.push(cloneGraphSnapshot(beforeNode));
}
}
for (const [edgeId, afterEdge] of afterEdges.entries()) {
if (!beforeEdges.has(edgeId)) {
createdEdgeIds.push(edgeId);
continue;
}
const beforeEdge = beforeEdges.get(edgeId);
if (!hasMeaningfulEdgeChange(beforeEdge, afterEdge)) continue;
if (
beforeEdge.invalidAt !== afterEdge.invalidAt ||
beforeEdge.expiredAt !== afterEdge.expiredAt
) {
invalidatedEdgeSnapshots.push(cloneGraphSnapshot(beforeEdge));
}
}
return {
id: `batch-${Date.now()}-${Math.random().toString(36).slice(2, 8)}`,
createdAt: Date.now(),
processedRange: meta.processedRange || [-1, -1],
createdNodeIds,
createdEdgeIds,
updatedNodeSnapshots,
archivedNodeSnapshots,
invalidatedEdgeSnapshots,
vectorHashesInserted: Array.isArray(meta.vectorHashesInserted)
? [...new Set(meta.vectorHashesInserted)]
: [],
postProcessArtifacts: Array.isArray(meta.postProcessArtifacts)
? meta.postProcessArtifacts
: [],
snapshotBefore,
};
}
export function appendBatchJournal(graph, entry) {
normalizeGraphRuntimeState(graph, graph?.historyState?.chatId || "");
graph.batchJournal.push(entry);
if (graph.batchJournal.length > BATCH_JOURNAL_LIMIT) {
graph.batchJournal = graph.batchJournal.slice(-BATCH_JOURNAL_LIMIT);
}
}
export function findJournalRecoveryPoint(graph, dirtyFromFloor) {
const journals = Array.isArray(graph?.batchJournal) ? graph.batchJournal : [];
const affectedIndex = journals.findIndex((journal) => {
const range = Array.isArray(journal?.processedRange)
? journal.processedRange
: [-1, -1];
return Number.isFinite(range[1]) && range[1] >= dirtyFromFloor;
});
if (affectedIndex < 0) return null;
const journal = journals[affectedIndex];
if (!journal?.snapshotBefore) return null;
return {
affectedIndex,
journal,
snapshotBefore: cloneGraphSnapshot(journal.snapshotBefore),
};
}
export function buildRecoveryResult(status, extra = {}) {
return {
status,
at: Date.now(),
...extra,
};
}

66
tests/runtime-history.mjs Normal file
View File

@@ -0,0 +1,66 @@
import assert from "node:assert/strict";
import {
appendBatchJournal,
cloneGraphSnapshot,
createBatchJournalEntry,
detectHistoryMutation,
findJournalRecoveryPoint,
snapshotProcessedMessageHashes,
} from "../runtime-state.js";
import { createEmptyGraph } from "../graph.js";
const chat = [
{ is_user: true, mes: "你好" },
{ is_user: false, mes: "我记住了。" },
{ is_user: true, mes: "继续" },
{ is_user: false, mes: "新的回复" },
];
const hashes = snapshotProcessedMessageHashes(chat, 3);
const cleanDetection = detectHistoryMutation(chat, {
lastProcessedAssistantFloor: 3,
processedMessageHashes: hashes,
});
assert.equal(cleanDetection.dirty, false);
const editedChat = structuredClone(chat);
editedChat[1].mes = "我改过内容了。";
const editedDetection = detectHistoryMutation(editedChat, {
lastProcessedAssistantFloor: 3,
processedMessageHashes: hashes,
});
assert.equal(editedDetection.dirty, true);
assert.equal(editedDetection.earliestAffectedFloor, 1);
const truncatedChat = chat.slice(0, 2);
const truncatedDetection = detectHistoryMutation(truncatedChat, {
lastProcessedAssistantFloor: 3,
processedMessageHashes: hashes,
});
assert.equal(truncatedDetection.dirty, true);
assert.equal(truncatedDetection.earliestAffectedFloor, 2);
const graph = createEmptyGraph();
graph.historyState.chatId = "chat-history-test";
const beforeSnapshot = cloneGraphSnapshot(graph);
graph.lastProcessedSeq = 3;
graph.historyState.lastProcessedAssistantFloor = 3;
const afterSnapshot = cloneGraphSnapshot(graph);
appendBatchJournal(
graph,
createBatchJournalEntry(beforeSnapshot, afterSnapshot, {
processedRange: [1, 3],
postProcessArtifacts: ["compression"],
vectorHashesInserted: [1234],
}),
);
const recoveryPoint = findJournalRecoveryPoint(graph, 2);
assert.ok(recoveryPoint);
assert.equal(recoveryPoint.journal.processedRange[1], 3);
assert.equal(
recoveryPoint.snapshotBefore.historyState.lastProcessedAssistantFloor,
-1,
);
console.log("runtime-history tests passed");

71
tests/vector-config.mjs Normal file
View File

@@ -0,0 +1,71 @@
import assert from "node:assert/strict";
import fs from "node:fs/promises";
import path from "node:path";
import { fileURLToPath } from "node:url";
import vm from "node:vm";
async function loadVectorHelpers() {
const __dirname = path.dirname(fileURLToPath(import.meta.url));
const sourcePath = path.resolve(__dirname, "../vector-index.js");
const source = await fs.readFile(sourcePath, "utf8");
const pieces = [
source.match(/export const BACKEND_VECTOR_SOURCES = \[[\s\S]*?\];/m)?.[0],
source.match(/export const BACKEND_DEFAULT_MODELS = \{[\s\S]*?\};/m)?.[0],
source.match(/const BACKEND_SOURCES_REQUIRING_API_URL = new Set\([\s\S]*?\);/m)?.[0],
source.match(/export function normalizeOpenAICompatibleBaseUrl\(value, autoSuffix = true\) \{[\s\S]*?^\}/m)?.[0],
source.match(/export function getVectorConfigFromSettings\(settings = \{\}\) \{[\s\S]*?^\}/m)?.[0],
source.match(/export function isBackendVectorConfig\(config\) \{[\s\S]*?^\}/m)?.[0],
source.match(/export function isDirectVectorConfig\(config\) \{[\s\S]*?^\}/m)?.[0],
source.match(/export function validateVectorConfig\(config\) \{[\s\S]*?^\}/m)?.[0],
].filter(Boolean);
if (pieces.length < 8) {
throw new Error("无法从 vector-index.js 提取向量配置辅助函数");
}
const context = vm.createContext({});
const script = new vm.Script(`
${pieces.join("\n\n").replaceAll("export ", "")}
this.getVectorConfigFromSettings = getVectorConfigFromSettings;
this.validateVectorConfig = validateVectorConfig;
`);
script.runInContext(context);
return {
getVectorConfigFromSettings: context.getVectorConfigFromSettings,
validateVectorConfig: context.validateVectorConfig,
};
}
const { getVectorConfigFromSettings, validateVectorConfig } =
await loadVectorHelpers();
const backendConfig = getVectorConfigFromSettings({
embeddingTransportMode: "backend",
embeddingBackendSource: "openai",
embeddingBackendModel: "",
});
assert.equal(backendConfig.mode, "backend");
assert.equal(backendConfig.source, "openai");
assert.equal(backendConfig.model, "text-embedding-3-small");
assert.equal(validateVectorConfig(backendConfig).valid, true);
const directConfig = getVectorConfigFromSettings({
embeddingTransportMode: "direct",
embeddingApiUrl: "https://example.com/v1/embeddings",
embeddingApiKey: "sk-test",
embeddingModel: "text-embedding-3-small",
});
assert.equal(directConfig.mode, "direct");
assert.equal(directConfig.apiUrl, "https://example.com/v1");
assert.equal(validateVectorConfig(directConfig).valid, true);
const invalidBackendConfig = getVectorConfigFromSettings({
embeddingTransportMode: "backend",
embeddingBackendSource: "vllm",
embeddingBackendApiUrl: "",
embeddingBackendModel: "BAAI/bge-m3",
});
assert.equal(validateVectorConfig(invalidBackendConfig).valid, false);
console.log("vector-config tests passed");

641
vector-index.js Normal file
View File

@@ -0,0 +1,641 @@
// ST-BME: 向量模式、后端索引与直连兜底
import { getRequestHeaders } from "../../../../script.js";
import { embedBatch, embedText, searchSimilar } from "./embedding.js";
import { getActiveNodes } from "./graph.js";
import {
buildVectorCollectionId,
stableHashString,
} from "./runtime-state.js";
export const BACKEND_VECTOR_SOURCES = [
"openai",
"openrouter",
"cohere",
"mistral",
"electronhub",
"chutes",
"nanogpt",
"ollama",
"llamacpp",
"vllm",
];
const BACKEND_SOURCES_REQUIRING_API_URL = new Set([
"ollama",
"llamacpp",
"vllm",
]);
export const BACKEND_DEFAULT_MODELS = {
openai: "text-embedding-3-small",
openrouter: "openai/text-embedding-3-small",
cohere: "embed-multilingual-v3.0",
mistral: "mistral-embed",
electronhub: "text-embedding-3-small",
chutes: "chutes-qwen-qwen3-embedding-8b",
nanogpt: "text-embedding-3-small",
ollama: "nomic-embed-text",
llamacpp: "text-embedding-3-small",
vllm: "BAAI/bge-m3",
};
export function normalizeOpenAICompatibleBaseUrl(value, autoSuffix = true) {
let normalized = String(value || "")
.trim()
.replace(/\/+(chat\/completions|embeddings)$/i, "")
.replace(/\/+$/, "");
if (autoSuffix && normalized && !/\/v\d+$/i.test(normalized)) {
normalized = normalized;
}
return normalized;
}
export function getVectorConfigFromSettings(settings = {}) {
const mode =
settings.embeddingTransportMode === "direct" ? "direct" : "backend";
const autoSuffix = settings.embeddingAutoSuffix !== false;
if (mode === "direct") {
return {
mode,
source: "direct",
apiUrl: normalizeOpenAICompatibleBaseUrl(settings.embeddingApiUrl, autoSuffix),
apiKey: String(settings.embeddingApiKey || "").trim(),
model: String(settings.embeddingModel || "").trim(),
autoSuffix,
};
}
const source = BACKEND_VECTOR_SOURCES.includes(settings.embeddingBackendSource)
? settings.embeddingBackendSource
: "openai";
return {
mode,
source,
apiUrl: normalizeOpenAICompatibleBaseUrl(
settings.embeddingBackendApiUrl,
autoSuffix,
),
apiKey: "",
model: String(
settings.embeddingBackendModel || BACKEND_DEFAULT_MODELS[source] || "",
).trim(),
autoSuffix,
};
}
export function getSuggestedBackendModel(source) {
return BACKEND_DEFAULT_MODELS[source] || "text-embedding-3-small";
}
export function isBackendVectorConfig(config) {
return config?.mode === "backend";
}
export function isDirectVectorConfig(config) {
return config?.mode === "direct";
}
export function getVectorModelScope(config) {
if (!config) return "";
if (isDirectVectorConfig(config)) {
return [
"direct",
normalizeOpenAICompatibleBaseUrl(config.apiUrl, config.autoSuffix),
config.model || "",
].join("|");
}
return [
"backend",
config.source || "",
normalizeOpenAICompatibleBaseUrl(config.apiUrl, config.autoSuffix),
config.model || "",
].join("|");
}
export function validateVectorConfig(config) {
if (!config) {
return { valid: false, error: "未找到向量配置" };
}
if (isDirectVectorConfig(config)) {
if (!config.apiUrl) {
return { valid: false, error: "请填写直连 Embedding API 地址" };
}
if (!config.model) {
return { valid: false, error: "请填写直连 Embedding 模型" };
}
return { valid: true, error: "" };
}
if (!config.model) {
return { valid: false, error: "请填写后端向量模型" };
}
if (
BACKEND_SOURCES_REQUIRING_API_URL.has(config.source) &&
!config.apiUrl
) {
return { valid: false, error: "当前后端向量源需要填写 API 地址" };
}
return { valid: true, error: "" };
}
export function buildNodeVectorText(node) {
const fields = node?.fields || {};
const preferredKeys = [
"summary",
"insight",
"title",
"name",
"state",
"traits",
"constraint",
"goal",
"participants",
"suggestion",
"status",
"scope",
];
const parts = [];
for (const key of preferredKeys) {
const value = fields[key];
if (value == null || value === "") continue;
if (Array.isArray(value)) {
if (value.length > 0) parts.push(value.join(", "));
} else if (typeof value === "object") {
parts.push(JSON.stringify(value));
} else {
parts.push(String(value));
}
}
for (const [key, value] of Object.entries(fields)) {
if (preferredKeys.includes(key) || value == null || value === "") continue;
if (key === "embedding") continue;
if (Array.isArray(value)) {
if (value.length > 0) parts.push(`${key}: ${value.join(", ")}`);
continue;
}
if (typeof value === "object") {
parts.push(`${key}: ${JSON.stringify(value)}`);
continue;
}
parts.push(`${key}: ${value}`);
}
return parts.join(" | ").trim();
}
export function buildNodeVectorHash(node, config) {
const text = buildNodeVectorText(node);
const seqEnd = node?.seqRange?.[1] ?? node?.seq ?? 0;
const payload = [
node?.id || "",
text,
String(seqEnd),
getVectorModelScope(config),
].join("::");
return stableHashString(payload);
}
function buildBackendSourceRequest(config) {
const body = {
source: config.source,
model: config.model,
};
if (BACKEND_SOURCES_REQUIRING_API_URL.has(config.source)) {
body.apiUrl = config.apiUrl;
}
if (config.source === "ollama") {
body.keep = false;
}
return body;
}
function getEligibleVectorNodes(graph, range = null) {
let nodes = getActiveNodes(graph).filter((node) => !node.archived);
if (range && Number.isFinite(range.start) && Number.isFinite(range.end)) {
const start = Math.min(range.start, range.end);
const end = Math.max(range.start, range.end);
nodes = nodes.filter((node) => {
const seqStart = node?.seqRange?.[0] ?? node?.seq ?? -1;
const seqEnd = node?.seqRange?.[1] ?? node?.seq ?? -1;
return seqEnd >= start && seqStart <= end;
});
}
return nodes.filter((node) => buildNodeVectorText(node).length > 0);
}
function buildDesiredVectorEntries(graph, config, range = null) {
return getEligibleVectorNodes(graph, range).map((node) => {
const hash = buildNodeVectorHash(node, config);
return {
nodeId: node.id,
hash,
text: buildNodeVectorText(node),
index: node?.seqRange?.[1] ?? node?.seq ?? 0,
};
});
}
function computeVectorStats(graph, desiredEntries) {
const state = graph.vectorIndexState || {};
const desiredByNodeId = new Map(desiredEntries.map((entry) => [entry.nodeId, entry]));
const nodeToHash = state.nodeToHash || {};
const hashToNodeId = state.hashToNodeId || {};
let indexed = 0;
let pending = 0;
for (const entry of desiredEntries) {
if (nodeToHash[entry.nodeId] === entry.hash) {
indexed++;
} else {
pending++;
}
}
let stale = 0;
for (const [nodeId, hash] of Object.entries(nodeToHash)) {
const desired = desiredByNodeId.get(nodeId);
if (!desired || desired.hash !== hash || hashToNodeId[hash] !== nodeId) {
stale++;
}
}
return {
total: desiredEntries.length,
indexed,
stale,
pending,
};
}
async function purgeVectorCollection(collectionId) {
const response = await fetch("/api/vector/purge", {
method: "POST",
headers: getRequestHeaders(),
body: JSON.stringify({ collectionId }),
});
if (!response.ok) {
const message = await response.text().catch(() => response.statusText);
throw new Error(message || `HTTP ${response.status}`);
}
}
async function deleteVectorHashes(collectionId, config, hashes) {
if (!Array.isArray(hashes) || hashes.length === 0) return;
const response = await fetch("/api/vector/delete", {
method: "POST",
headers: getRequestHeaders(),
body: JSON.stringify({
collectionId,
hashes,
...buildBackendSourceRequest(config),
}),
});
if (!response.ok) {
const message = await response.text().catch(() => response.statusText);
throw new Error(message || `HTTP ${response.status}`);
}
}
async function insertVectorEntries(collectionId, config, entries) {
if (!Array.isArray(entries) || entries.length === 0) return;
const response = await fetch("/api/vector/insert", {
method: "POST",
headers: getRequestHeaders(),
body: JSON.stringify({
collectionId,
items: entries.map((entry) => ({
hash: entry.hash,
text: entry.text,
index: entry.index,
})),
...buildBackendSourceRequest(config),
}),
});
if (!response.ok) {
const message = await response.text().catch(() => response.statusText);
throw new Error(message || `HTTP ${response.status}`);
}
}
function resetVectorMappings(graph, config, chatId) {
graph.vectorIndexState.mode = config.mode;
graph.vectorIndexState.source = config.source || "";
graph.vectorIndexState.modelScope = getVectorModelScope(config);
graph.vectorIndexState.collectionId = buildVectorCollectionId(chatId);
graph.vectorIndexState.hashToNodeId = {};
graph.vectorIndexState.nodeToHash = {};
}
export async function syncGraphVectorIndex(
graph,
config,
{
chatId = "",
purge = false,
force = false,
range = null,
} = {},
) {
if (!graph || !config) {
return { insertedHashes: [], stats: { total: 0, indexed: 0, stale: 0, pending: 0 } };
}
const validation = validateVectorConfig(config);
if (!validation.valid) {
graph.vectorIndexState.lastWarning = validation.error;
graph.vectorIndexState.dirty = true;
return { insertedHashes: [], stats: graph.vectorIndexState.lastStats };
}
const state = graph.vectorIndexState;
const collectionId = buildVectorCollectionId(chatId || graph?.historyState?.chatId);
const desiredEntries = buildDesiredVectorEntries(graph, config, range);
const desiredByNodeId = new Map(desiredEntries.map((entry) => [entry.nodeId, entry]));
const insertedHashes = [];
const hasConcreteRange =
range &&
Number.isFinite(range.start) &&
Number.isFinite(range.end);
const rangedNodeIds = new Set(desiredEntries.map((entry) => entry.nodeId));
if (isBackendVectorConfig(config)) {
const scopeChanged =
state.mode !== "backend" ||
state.source !== config.source ||
state.modelScope !== getVectorModelScope(config) ||
state.collectionId !== collectionId;
const fullReset = purge || state.dirty || scopeChanged || (force && !hasConcreteRange);
if (fullReset) {
await purgeVectorCollection(collectionId);
resetVectorMappings(graph, config, chatId);
await insertVectorEntries(collectionId, config, desiredEntries);
for (const entry of desiredEntries) {
state.hashToNodeId[entry.hash] = entry.nodeId;
state.nodeToHash[entry.nodeId] = entry.hash;
insertedHashes.push(entry.hash);
}
} else {
const hashesToDelete = [];
const entriesToInsert = [];
if (force && hasConcreteRange) {
for (const entry of desiredEntries) {
const currentHash = state.nodeToHash[entry.nodeId];
if (currentHash) {
hashesToDelete.push(currentHash);
delete state.hashToNodeId[currentHash];
delete state.nodeToHash[entry.nodeId];
}
entriesToInsert.push(entry);
}
}
for (const [nodeId, hash] of Object.entries(state.nodeToHash)) {
if (hasConcreteRange && !rangedNodeIds.has(nodeId)) {
continue;
}
const desired = desiredByNodeId.get(nodeId);
if (!desired || desired.hash !== hash) {
hashesToDelete.push(hash);
delete state.nodeToHash[nodeId];
delete state.hashToNodeId[hash];
}
}
for (const entry of desiredEntries) {
if (force && hasConcreteRange) continue;
if (state.nodeToHash[entry.nodeId] === entry.hash) continue;
entriesToInsert.push(entry);
}
await deleteVectorHashes(collectionId, config, hashesToDelete);
await insertVectorEntries(collectionId, config, entriesToInsert);
for (const entry of entriesToInsert) {
state.hashToNodeId[entry.hash] = entry.nodeId;
state.nodeToHash[entry.nodeId] = entry.hash;
insertedHashes.push(entry.hash);
}
}
for (const node of graph.nodes || []) {
if (Array.isArray(node.embedding) && node.embedding.length > 0) {
node.embedding = null;
}
}
} else {
const entriesToEmbed = [];
const hashByNodeId = {};
for (const entry of desiredEntries) {
hashByNodeId[entry.nodeId] = entry.hash;
const currentHash = state.nodeToHash?.[entry.nodeId];
const node = graph.nodes.find((candidate) => candidate.id === entry.nodeId);
const hasEmbedding = Array.isArray(node?.embedding) && node.embedding.length > 0;
if (!force && !currentHash && hasEmbedding) {
state.hashToNodeId[entry.hash] = entry.nodeId;
state.nodeToHash[entry.nodeId] = entry.hash;
continue;
}
if (force || purge || currentHash !== entry.hash || !hasEmbedding) {
entriesToEmbed.push(entry);
}
}
if (purge || state.mode !== "direct") {
resetVectorMappings(graph, config, chatId);
} else {
for (const [nodeId, hash] of Object.entries(state.nodeToHash || {})) {
if (hasConcreteRange && !rangedNodeIds.has(nodeId)) {
continue;
}
if (!hashByNodeId[nodeId]) {
delete state.nodeToHash[nodeId];
delete state.hashToNodeId[hash];
}
}
}
if (entriesToEmbed.length > 0) {
const embeddings = await embedBatch(
entriesToEmbed.map((entry) => entry.text),
config,
);
for (let index = 0; index < entriesToEmbed.length; index++) {
const entry = entriesToEmbed[index];
const node = graph.nodes.find((candidate) => candidate.id === entry.nodeId);
if (!node) continue;
if (embeddings[index]) {
node.embedding = Array.from(embeddings[index]);
state.hashToNodeId[entry.hash] = entry.nodeId;
state.nodeToHash[entry.nodeId] = entry.hash;
insertedHashes.push(entry.hash);
}
}
}
state.mode = "direct";
state.source = "direct";
state.modelScope = getVectorModelScope(config);
state.collectionId = collectionId;
}
state.dirty = false;
state.lastWarning = "";
state.lastSyncAt = Date.now();
state.lastStats = computeVectorStats(graph, buildDesiredVectorEntries(graph, config));
return {
insertedHashes,
stats: state.lastStats,
};
}
export async function findSimilarNodesByText(
graph,
text,
config,
topK = 10,
candidates = null,
) {
if (!text || !graph || !config) return [];
const candidateNodes = Array.isArray(candidates)
? candidates
: getEligibleVectorNodes(graph);
if (candidateNodes.length === 0) return [];
if (isDirectVectorConfig(config)) {
const queryVec = await embedText(text, config);
if (!queryVec) return [];
return searchSimilar(
queryVec,
candidateNodes
.filter((node) => Array.isArray(node.embedding) && node.embedding.length > 0)
.map((node) => ({
nodeId: node.id,
embedding: node.embedding,
})),
topK,
);
}
const validation = validateVectorConfig(config);
if (!validation.valid) return [];
const response = await fetch("/api/vector/query", {
method: "POST",
headers: getRequestHeaders(),
body: JSON.stringify({
collectionId: graph.vectorIndexState.collectionId,
searchText: text,
topK,
threshold: 0,
...buildBackendSourceRequest(config),
}),
});
if (!response.ok) {
const errorText = await response.text().catch(() => response.statusText);
console.warn("[ST-BME] 后端向量查询失败:", errorText);
return [];
}
const data = await response.json().catch(() => ({ hashes: [] }));
const hashes = Array.isArray(data?.hashes) ? data.hashes : [];
const nodeIdByHash = graph.vectorIndexState?.hashToNodeId || {};
const allowedIds = new Set(candidateNodes.map((node) => node.id));
return hashes
.map((hash, index) => ({
nodeId: nodeIdByHash[hash],
score: Math.max(0.01, 1 - index / Math.max(1, hashes.length)),
}))
.filter((entry) => entry.nodeId && allowedIds.has(entry.nodeId))
.slice(0, topK);
}
export async function testVectorConnection(config, chatId = "connection-test") {
const validation = validateVectorConfig(config);
if (!validation.valid) {
return { success: false, dimensions: 0, error: validation.error };
}
if (isDirectVectorConfig(config)) {
try {
const vec = await embedText("test connection", config);
if (vec) {
return { success: true, dimensions: vec.length, error: "" };
}
return { success: false, dimensions: 0, error: "API 返回空结果" };
} catch (error) {
return { success: false, dimensions: 0, error: String(error) };
}
}
try {
const response = await fetch("/api/vector/query", {
method: "POST",
headers: getRequestHeaders(),
body: JSON.stringify({
collectionId: buildVectorCollectionId(chatId),
searchText: "test connection",
topK: 1,
threshold: 0,
...buildBackendSourceRequest(config),
}),
});
const payload = await response.text().catch(() => "");
if (!response.ok) {
return {
success: false,
dimensions: 0,
error: payload || response.statusText,
};
}
return { success: true, dimensions: 0, error: "" };
} catch (error) {
return { success: false, dimensions: 0, error: String(error) };
}
}
export function getVectorIndexStats(graph) {
const state = graph?.vectorIndexState;
if (!state) {
return { total: 0, indexed: 0, stale: 0, pending: 0 };
}
return state.lastStats || { total: 0, indexed: 0, stale: 0, pending: 0 };
}