From 1ef9e53320d22e82389992df8a4858a6b49519ba Mon Sep 17 00:00:00 2001 From: youzini Date: Fri, 15 May 2026 15:28:11 +0000 Subject: [PATCH] fix(vector): prevent incompatible vector reuse --- tests/authority-vector-primary.mjs | 57 +++++++++++++++++++++++++++++- tests/vector-manifest.mjs | 13 +++++++ vector/vector-index.js | 56 ++++++++++++++++++++++++++++- 3 files changed, 124 insertions(+), 2 deletions(-) diff --git a/tests/authority-vector-primary.mjs b/tests/authority-vector-primary.mjs index a5bf085..68a14ed 100644 --- a/tests/authority-vector-primary.mjs +++ b/tests/authority-vector-primary.mjs @@ -71,7 +71,12 @@ function createAuthorityVectorGraph() { return { graph, first, second }; } -function createMockTriviumClient({ failBulkUpsert = false, failSearch = false, failBmeVectorApply = false } = {}) { +function createMockTriviumClient({ + failBulkUpsert = false, + failSearch = false, + failBmeVectorApply = false, + failBmeVectorApplyCompatibility = false, +} = {}) { const calls = []; return { calls, @@ -156,6 +161,14 @@ function createMockTriviumClient({ failBulkUpsert = false, failSearch = false, f path: "/bme/vector-apply", }); } + if (failBmeVectorApplyCompatibility) { + throw new AuthorityHttpError("BME vector apply dimension mismatch", { + status: 400, + category: "validation", + payload: { details: { category: "vector-dimension-mismatch" } }, + path: "/bme/vector-apply", + }); + } return { ok: true, database: payload.database || "st_bme_vectors", @@ -239,6 +252,9 @@ assert.equal(isAuthorityVectorConfig(config), true); assert.equal(result.stats.indexed, 2); assert.equal(graph.vectorIndexState.dirty, false); + assert.equal(graph.vectorIndexState.manifest.status, "clean"); + assert.equal(graph.vectorIndexState.manifest.backend, "authority"); + assert.equal(graph.vectorIndexState.manifest.observedDim, 2); assert.equal(triviumClient.calls.filter(([name]) => name === "bmeVectorApply").length, 1); assert.equal(triviumClient.calls.some(([name]) => name === "purge"), false); assert.equal(triviumClient.calls.some(([name]) => name === "bulkUpsert"), false); @@ -275,6 +291,45 @@ assert.equal(isAuthorityVectorConfig(config), true); assert.equal(triviumClient.calls.some(([name]) => name === "bmeVectorApply"), false); } +{ + const { graph } = createAuthorityVectorGraph(); + const triviumClient = createMockTriviumClient({ failBmeVectorApplyCompatibility: true }); + const applyConfig = { ...config, bmeVectorApplyReady: true }; + const result = await syncGraphVectorIndexFromIndex(graph, applyConfig, { + chatId: "chat-authority-vector", + purge: true, + triviumClient, + }); + + assert.equal(graph.vectorIndexState.dirty, true); + assert.equal(result.errorCategory, "validation"); + assert.equal(triviumClient.calls.filter(([name]) => name === "bmeVectorApply").length, 1); + assert.equal(triviumClient.calls.some(([name]) => name === "purge"), false); + assert.equal(triviumClient.calls.some(([name]) => name === "bulkUpsert"), false); +} + +{ + const { graph, first, second } = createAuthorityVectorGraph(); + const triviumClient = createMockTriviumClient(); + const applyConfig = { ...config, bmeVectorApplyReady: true }; + await syncGraphVectorIndexFromIndex(graph, applyConfig, { + chatId: "chat-authority-vector", + purge: true, + triviumClient, + }); + const changedModelConfig = { ...applyConfig, model: "other-embedding-model" }; + const results = await findSimilarNodesByTextFromIndex( + graph, + "archive door", + changedModelConfig, + 5, + [first, second], + ); + assert.deepEqual(results, []); + assert.equal(graph.vectorIndexState.dirtyReason, "authority-vector-space-mismatch"); + assert.equal(graph.vectorIndexState.lastSearchTimings.reason, "authority-vector-space-mismatch"); +} + { const { graph } = createAuthorityVectorGraph(); const triviumClient = createMockTriviumClient({ failBmeVectorApply: true }); diff --git a/tests/vector-manifest.mjs b/tests/vector-manifest.mjs index f2db077..3579d27 100644 --- a/tests/vector-manifest.mjs +++ b/tests/vector-manifest.mjs @@ -99,4 +99,17 @@ const baseConfig = { assert.equal(graph.vectorIndexState.lastSearchTimings.reason, "query-dimension-mismatch"); } +{ + const graph = createVectorGraph(); + graph.nodes[0].embedding = [0.1, 0.2, 0.3]; + embeddingDim = 3; + const changedModelConfig = { ...baseConfig, model: "text-embedding-3-large" }; + await syncGraphVectorIndex(graph, changedModelConfig, { chatId: graph.historyState.chatId }); + assert.equal(graph.vectorIndexState.manifest.status, "clean"); + assert.equal(graph.vectorIndexState.manifest.model, "text-embedding-3-large"); + assert.equal(graph.nodes[0].embedding.length, 3); + assert.equal(graph.nodes[0].embedding[0], 1); + assert.notDeepEqual(graph.nodes[0].embedding, [0.1, 0.2, 0.3]); +} + console.log("vector-manifest tests passed"); diff --git a/vector/vector-index.js b/vector/vector-index.js index e0425b1..11e2484 100644 --- a/vector/vector-index.js +++ b/vector/vector-index.js @@ -651,6 +651,16 @@ function markLocalVectorManifestStale(graph, config, reason = "vector-space-chan : "向量模型配置变化,索引已标记为待重建"; } +function isVectorApplyCompatibilityError(error = null) { + const detailCategory = String(error?.payload?.details?.category || error?.details?.category || "").trim(); + const message = String(error?.message || "").toLowerCase(); + return detailCategory === "vector-dimension-mismatch" || + detailCategory === "vector-space-mismatch" || + message.includes("dimension mismatch") || + message.includes("vectorspaceid mismatch") || + message.includes("single vector dimension"); +} + function markBackendVectorStateDirty( graph, config, @@ -924,6 +934,18 @@ export async function syncGraphVectorIndex( ); authorityUpsertMs += nowMs() - applyStartedAt; authorityUpsertDiagnostics = applyResult?.diagnostics || null; + const observedDim = Number(applyResult?.manifest?.observedDim || getEmbeddingDimensionFromEntries(graph, desiredEntries) || 0); + if (observedDim > 0) { + updateVectorManifest(graph, config, { + backend: "authority", + chatId: effectiveChatId, + collectionId, + graphRevision: graph?.meta?.revision || graph?.revision || 0, + desiredEntries, + observedDim, + status: "clean", + }); + } authorityLinkDiagnostics = { operation: "bmeVectorApply:links", totalItems: Number(applyResult?.diagnostics?.linkItems || 0), @@ -934,6 +956,7 @@ export async function syncGraphVectorIndex( appliedViaBme = true; } catch (applyError) { if (isAbortError(applyError)) throw applyError; + if (isVectorApplyCompatibilityError(applyError)) throw applyError; console.warn("[ST-BME] BME 服务端向量 apply 失败,回退 Authority Trivium 旧路径:", applyError); } } @@ -1016,6 +1039,18 @@ export async function syncGraphVectorIndex( ); authorityUpsertMs += nowMs() - applyStartedAt; authorityUpsertDiagnostics = applyResult?.diagnostics || null; + const observedDim = Number(applyResult?.manifest?.observedDim || getEmbeddingDimensionFromEntries(graph, entriesToUpsert) || 0); + if (observedDim > 0) { + updateVectorManifest(graph, config, { + backend: "authority", + chatId: effectiveChatId, + collectionId, + graphRevision: graph?.meta?.revision || graph?.revision || 0, + desiredEntries, + observedDim, + status: "clean", + }); + } authorityLinkDiagnostics = { operation: "bmeVectorApply:links", totalItems: Number(applyResult?.diagnostics?.linkItems || 0), @@ -1025,6 +1060,7 @@ export async function syncGraphVectorIndex( appliedViaBme = true; } catch (applyError) { if (isAbortError(applyError)) throw applyError; + if (isVectorApplyCompatibilityError(applyError)) throw applyError; console.warn("[ST-BME] BME 服务端向量 apply 失败,回退 Authority Trivium 旧路径:", applyError); } } @@ -1230,7 +1266,7 @@ export async function syncGraphVectorIndex( const hasEmbedding = Array.isArray(node?.embedding) && node.embedding.length > 0; - if (!force && !currentHash && hasEmbedding) { + if (!directScopeChanged && !force && !currentHash && hasEmbedding) { state.hashToNodeId[entry.hash] = entry.nodeId; state.nodeToHash[entry.nodeId] = entry.hash; continue; @@ -1516,6 +1552,24 @@ export async function findSimilarNodesByText( } if (isAuthorityVectorConfig(config)) { + const state = graph?.vectorIndexState || {}; + if (config.bmeVectorApplyReady === true || config.bmeVectorManifestReady === true) { + const currentDim = Number(state.currentVectorSpace?.observedDim || state.manifest?.observedDim || 0); + const currentVectorSpace = currentDim > 0 + ? deriveVectorSpace(config, currentDim) + : state.currentVectorSpace; + if (!isVectorManifestCompatible(state.manifest, currentVectorSpace)) { + recordSearchTimings({ + success: false, + reason: "authority-vector-space-mismatch", + resultCount: 0, + }); + state.dirty = true; + state.dirtyReason = "authority-vector-space-mismatch"; + state.lastWarning = "Authority 向量空间不匹配,已切换到非向量召回并等待重建"; + return []; + } + } const requestStartedAt = nowMs(); try { const queryEmbedStartedAt = nowMs();