From 61db501db019a4b9cf235434d048d60841db350a Mon Sep 17 00:00:00 2001 From: youzini Date: Fri, 15 May 2026 15:14:17 +0000 Subject: [PATCH] feat(vector): gate local search by vector manifest --- runtime/runtime-state.js | 18 ++++ tests/vector-manifest.mjs | 102 +++++++++++++++++++++++ ui/panel.js | 15 +++- vector/vector-index.js | 168 +++++++++++++++++++++++++++++++++++++- 4 files changed, 300 insertions(+), 3 deletions(-) create mode 100644 tests/vector-manifest.mjs diff --git a/runtime/runtime-state.js b/runtime/runtime-state.js index eaf014e..ee5aba7 100644 --- a/runtime/runtime-state.js +++ b/runtime/runtime-state.js @@ -78,6 +78,8 @@ export function createDefaultVectorIndexState(chatId = "") { stale: 0, pending: 0, }, + currentVectorSpace: null, + manifest: null, lastWarning: "", lastIntegrityIssue: null, }; @@ -753,6 +755,20 @@ export function normalizeGraphRuntimeState(graph, chatId = "", options = {}) { vectorIndexState.lastStats = createDefaultVectorIndexState(chatId).lastStats; } + if ( + vectorIndexState.currentVectorSpace != null && + (typeof vectorIndexState.currentVectorSpace !== "object" || + Array.isArray(vectorIndexState.currentVectorSpace)) + ) { + vectorIndexState.currentVectorSpace = null; + } + if ( + vectorIndexState.manifest != null && + (typeof vectorIndexState.manifest !== "object" || + Array.isArray(vectorIndexState.manifest)) + ) { + vectorIndexState.manifest = null; + } if (!Array.isArray(vectorIndexState.replayRequiredNodeIds)) { vectorIndexState.replayRequiredNodeIds = []; } else { @@ -786,6 +802,8 @@ export function normalizeGraphRuntimeState(graph, chatId = "", options = {}) { vectorIndexState.hashToNodeId = {}; vectorIndexState.nodeToHash = {}; vectorIndexState.replayRequiredNodeIds = []; + vectorIndexState.currentVectorSpace = null; + vectorIndexState.manifest = null; vectorIndexState.dirty = true; vectorIndexState.dirtyReason = "chat-id-changed"; vectorIndexState.pendingRepairFromFloor = 0; diff --git a/tests/vector-manifest.mjs b/tests/vector-manifest.mjs new file mode 100644 index 0000000..f2db077 --- /dev/null +++ b/tests/vector-manifest.mjs @@ -0,0 +1,102 @@ +import assert from "node:assert/strict"; +import { addNode, createEmptyGraph, createNode } from "../graph/graph.js"; +import { + installResolveHooks, + toDataModuleUrl, +} from "./helpers/register-hooks-compat.mjs"; + +installResolveHooks([ + { + specifiers: ["../../../../../script.js"], + url: toDataModuleUrl("export function getRequestHeaders() { return {}; }"), + }, + { + specifiers: ["../../../../extensions.js"], + url: toDataModuleUrl("export const extension_settings = { st_bme: {} };"), + }, +]); + +let embeddingDim = 3; +globalThis.__stBmeTestOverrides = { + embedding: { + async embedBatch(texts = []) { + return texts.map((text, index) => + Array.from({ length: embeddingDim }, (_, dimIndex) => + dimIndex === 0 ? 1 : (index + dimIndex + String(text || "").length) / 100, + ), + ); + }, + async embedText(text = "") { + return Array.from({ length: embeddingDim }, (_, dimIndex) => + dimIndex === 0 ? 1 : (dimIndex + String(text || "").length) / 100, + ); + }, + }, +}; + +const { + findSimilarNodesByText, + getVectorModelScope, + syncGraphVectorIndex, +} = await import("../vector/vector-index.js"); + +function createVectorGraph() { + const graph = createEmptyGraph(); + graph.historyState.chatId = "chat-vector-manifest"; + const node = createNode({ + type: "event", + fields: { summary: "Alice finds the old compass" }, + seq: 1, + }); + node.id = "node-a"; + addNode(graph, node); + return graph; +} + +const baseConfig = { + mode: "direct", + apiUrl: "https://example.com/v1/embeddings", + apiKey: "sk-hidden", + model: "text-embedding-3-small", +}; + +{ + const graph = createVectorGraph(); + embeddingDim = 3; + await syncGraphVectorIndex(graph, baseConfig, { chatId: graph.historyState.chatId, force: true }); + assert.equal(graph.vectorIndexState.manifest.status, "clean"); + assert.equal(graph.vectorIndexState.manifest.observedDim, 3); + assert.equal(graph.vectorIndexState.manifest.model, "text-embedding-3-small"); + assert.equal(graph.vectorIndexState.manifest.vectorSpaceId.startsWith("vs_"), true); + assert.equal(JSON.stringify(graph.vectorIndexState.manifest).includes("sk-hidden"), false); +} + +{ + const graph = createVectorGraph(); + embeddingDim = 3; + await syncGraphVectorIndex(graph, baseConfig, { chatId: graph.historyState.chatId, force: true }); + const oldSpaceId = graph.vectorIndexState.manifest.vectorSpaceId; + const changedModelConfig = { ...baseConfig, model: "text-embedding-3-large" }; + const results = await findSimilarNodesByText(graph, "compass", changedModelConfig, 5); + assert.deepEqual(results, []); + assert.equal(graph.vectorIndexState.dirty, true); + assert.equal(graph.vectorIndexState.dirtyReason, "vector-space-mismatch"); + assert.equal(graph.vectorIndexState.manifest.vectorSpaceId, oldSpaceId); + assert.notEqual(getVectorModelScope(baseConfig), getVectorModelScope(changedModelConfig)); + assert.equal(graph.vectorIndexState.lastSearchTimings.reason, "vector-space-mismatch"); +} + +{ + const graph = createVectorGraph(); + embeddingDim = 3; + await syncGraphVectorIndex(graph, baseConfig, { chatId: graph.historyState.chatId, force: true }); + embeddingDim = 4; + const results = await findSimilarNodesByText(graph, "compass", baseConfig, 5); + assert.deepEqual(results, []); + assert.equal(graph.vectorIndexState.dirty, true); + assert.equal(graph.vectorIndexState.dirtyReason, "query-dimension-mismatch"); + assert.equal(graph.vectorIndexState.manifest.status, "stale"); + assert.equal(graph.vectorIndexState.lastSearchTimings.reason, "query-dimension-mismatch"); +} + +console.log("vector-manifest tests passed"); diff --git a/ui/panel.js b/ui/panel.js index 0573e98..8e2b0d2 100644 --- a/ui/panel.js +++ b/ui/panel.js @@ -4408,6 +4408,18 @@ function _refreshDashboard() { const vectorStats = getVectorIndexStats(graph); const vectorMode = graph?.vectorIndexState?.mode || "—"; const vectorSource = graph?.vectorIndexState?.source || "—"; + const vectorManifest = graph?.vectorIndexState?.manifest || null; + const vectorManifestMeta = vectorManifest?.status + ? [ + `索引 ${vectorManifest.status}`, + Number(vectorManifest.observedDim || 0) > 0 + ? `${Number(vectorManifest.observedDim)}D` + : "", + vectorManifest.lastError ? `原因 ${vectorManifest.lastError}` : "", + ] + .filter(Boolean) + .join(" · ") + : "索引未记录维度"; const recovery = graph?.historyState?.lastRecoveryResult; const extractionStatus = _getLastExtractionStatus?.() || {}; const lastBatchStatus = _getLatestBatchStatusSnapshot(); @@ -4428,7 +4440,7 @@ function _refreshDashboard() { ); _setText( "bme-status-vector", - `${vectorMode}/${vectorSource} · total ${vectorStats.total} · indexed ${vectorStats.indexed} · stale ${vectorStats.stale} · pending ${vectorStats.pending}`, + `${vectorMode}/${vectorSource} · ${vectorManifestMeta} · total ${vectorStats.total} · indexed ${vectorStats.indexed} · stale ${vectorStats.stale} · pending ${vectorStats.pending}`, ); _setText( "bme-status-recovery", @@ -14554,4 +14566,3 @@ function _getNodeSnippet(node) { function _isMobile() { return window.innerWidth <= 768; } - diff --git a/vector/vector-index.js b/vector/vector-index.js index d9e7510..e0425b1 100644 --- a/vector/vector-index.js +++ b/vector/vector-index.js @@ -6,6 +6,12 @@ import { getActiveNodes } from "../graph/graph.js"; import { describeMemoryScope, normalizeMemoryScope } from "../graph/memory-scope.js"; import { resolveConfiguredTimeoutMs } from "../runtime/request-timeout.js"; import { buildVectorCollectionId, stableHashString } from "../runtime/runtime-state.js"; +import { + createVectorManifest, + deriveVectorSpace, + isVectorManifestCompatible, + summarizeVectorSpaceChange, +} from "./vector-space.js"; import { AUTHORITY_VECTOR_MODE, AUTHORITY_VECTOR_SOURCE, @@ -580,6 +586,71 @@ function resetVectorMappings(graph, config, chatId) { graph.vectorIndexState.nodeToHash = {}; } +function getEmbeddingDimensionFromEntries(graph, entries = []) { + const nodesById = new Map((graph?.nodes || []).map((node) => [String(node?.id || ""), node])); + let dim = 0; + for (const entry of entries || []) { + const node = nodesById.get(String(entry?.nodeId || "")); + const vector = Array.isArray(node?.embedding) ? node.embedding : []; + if (!vector.length) continue; + if (!dim) dim = vector.length; + if (dim && vector.length !== dim) return -1; + } + return dim; +} + +function updateVectorManifest(graph, config, { + backend = "local", + chatId = "", + collectionId = "", + graphRevision = 0, + desiredEntries = [], + observedDim = 0, + status = "clean", + failedNodeCount = 0, + lastError = "", +} = {}) { + if (!graph?.vectorIndexState) return null; + const vectorSpace = observedDim > 0 + ? deriveVectorSpace(config, observedDim) + : null; + const manifest = createVectorManifest({ + backend, + chatId: chatId || graph?.historyState?.chatId || "", + collectionId: collectionId || graph.vectorIndexState.collectionId || "", + graphRevision, + vectorSpace, + status, + nodeCount: desiredEntries.length, + embeddedNodeCount: Math.max(0, desiredEntries.length - failedNodeCount), + failedNodeCount, + lastError, + }); + graph.vectorIndexState.currentVectorSpace = vectorSpace; + graph.vectorIndexState.manifest = manifest; + return manifest; +} + +function markLocalVectorManifestStale(graph, config, reason = "vector-space-changed") { + if (!graph?.vectorIndexState) return; + const state = graph.vectorIndexState; + const previousManifest = state.manifest && typeof state.manifest === "object" + ? state.manifest + : null; + state.manifest = { + ...(previousManifest || createVectorManifest({ backend: "local", status: "stale" })), + backend: previousManifest?.backend || "local", + status: "stale", + lastError: reason, + completedAt: 0, + }; + state.dirty = true; + state.dirtyReason = reason; + state.lastWarning = reason === "dimension-changed" + ? "向量模型维度变化,索引已标记为待重建" + : "向量模型配置变化,索引已标记为待重建"; +} + function markBackendVectorStateDirty( graph, config, @@ -1129,6 +1200,24 @@ export async function syncGraphVectorIndex( } } } else { + const directScopeChanged = + state.mode !== "direct" || + state.modelScope !== getVectorModelScope(config) || + state.collectionId !== collectionId; + if (directScopeChanged && state.manifest?.vectorSpaceId) { + const previous = state.currentVectorSpace || { + vectorSpaceId: state.manifest.vectorSpaceId, + observedDim: state.manifest.observedDim, + model: state.manifest.model, + normalizedApiUrl: state.manifest.normalizedApiUrl, + }; + const current = deriveVectorSpace(config, Number(state.manifest.observedDim || 0)); + markLocalVectorManifestStale( + graph, + config, + summarizeVectorSpaceChange(previous, current), + ); + } const entriesToEmbed = []; const hashByNodeId = {}; @@ -1152,7 +1241,7 @@ export async function syncGraphVectorIndex( } } - if (purge || state.mode !== "direct") { + if (purge || directScopeChanged) { resetVectorMappings(graph, config, chatId); } else { for (const [nodeId, hash] of Object.entries(state.nodeToHash || {})) { @@ -1201,9 +1290,51 @@ export async function syncGraphVectorIndex( state.modelScope = getVectorModelScope(config); state.collectionId = collectionId; state.dirty = directSyncHadFailures; + state.dirtyReason = directSyncHadFailures ? "partial-embedding-failure" : ""; state.lastWarning = directSyncHadFailures ? "部分节点 embedding 生成失败,向量索引仍待修复" : ""; + const observedDim = getEmbeddingDimensionFromEntries(graph, desiredEntries); + if (observedDim < 0) { + updateVectorManifest(graph, config, { + backend: "local", + chatId, + collectionId, + graphRevision: graph?.meta?.revision || graph?.revision || 0, + desiredEntries, + observedDim: 0, + status: "failed", + failedNodeCount: desiredEntries.length, + lastError: "mixed-dimensions", + }); + state.dirty = true; + state.dirtyReason = "mixed-vector-dimensions"; + state.lastWarning = "检测到混合向量维度,索引已标记为待重建"; + } else if (observedDim > 0) { + updateVectorManifest(graph, config, { + backend: "local", + chatId, + collectionId, + graphRevision: graph?.meta?.revision || graph?.revision || 0, + desiredEntries, + observedDim, + status: directSyncHadFailures ? "dirty" : "clean", + failedNodeCount: directSyncHadFailures ? Math.max(1, desiredEntries.length - insertedHashes.length) : 0, + lastError: directSyncHadFailures ? "partial-embedding-failure" : "", + }); + } else { + updateVectorManifest(graph, config, { + backend: "local", + chatId, + collectionId, + graphRevision: graph?.meta?.revision || graph?.revision || 0, + desiredEntries, + observedDim: 0, + status: "missing", + failedNodeCount: desiredEntries.length, + lastError: "no-vectors", + }); + } } if (state.mode !== "direct") { @@ -1302,6 +1433,24 @@ export async function findSimilarNodesByText( } if (isDirectVectorConfig(config)) { + const state = graph?.vectorIndexState || {}; + const currentDim = Number(state.currentVectorSpace?.observedDim || state.manifest?.observedDim || 0); + const currentVectorSpace = currentDim > 0 + ? deriveVectorSpace(config, currentDim) + : state.currentVectorSpace; + if (!isVectorManifestCompatible(state.manifest, currentVectorSpace)) { + recordSearchTimings({ + success: false, + reason: "vector-space-mismatch", + resultCount: 0, + }); + if (state) { + state.dirty = true; + state.dirtyReason = "vector-space-mismatch"; + state.lastWarning = "向量空间不匹配,已切换到非向量召回并等待重建"; + } + return []; + } const queryEmbedStartedAt = nowMs(); const queryVec = await embedText(text, config, { signal, isQuery: true }); const queryEmbedMs = nowMs() - queryEmbedStartedAt; @@ -1314,6 +1463,23 @@ export async function findSimilarNodesByText( }); return []; } + if (currentDim > 0 && queryVec.length !== currentDim) { + recordSearchTimings({ + success: false, + reason: "query-dimension-mismatch", + queryDim: queryVec.length, + expectedDim: currentDim, + queryEmbedMs: roundMs(queryEmbedMs), + resultCount: 0, + }); + state.dirty = true; + state.dirtyReason = "query-dimension-mismatch"; + state.lastWarning = `查询向量维度 ${queryVec.length} 与索引维度 ${currentDim} 不一致,已切换到非向量召回`; + if (state.manifest) { + state.manifest = { ...state.manifest, status: "stale", lastError: "query-dimension-mismatch" }; + } + return []; + } const localSearchStartedAt = nowMs(); const results = searchSimilar(