mirror of
https://github.com/Youzini-afk/ST-Bionic-Memory-Ecology.git
synced 2026-06-13 18:31:16 +08:00
Merge dev into main
# Conflicts: # manifest.json
This commit is contained in:
@@ -6,6 +6,6 @@
|
||||
"js": "index.js",
|
||||
"css": "style.css",
|
||||
"author": "Youzini",
|
||||
"version": "6.3.7",
|
||||
"version": "6.4.2",
|
||||
"homePage": "https://github.com/Youzini-afk/ST-Bionic-Memory-Ecology"
|
||||
}
|
||||
|
||||
@@ -78,6 +78,8 @@ export function createDefaultVectorIndexState(chatId = "") {
|
||||
stale: 0,
|
||||
pending: 0,
|
||||
},
|
||||
currentVectorSpace: null,
|
||||
manifest: null,
|
||||
lastWarning: "",
|
||||
lastIntegrityIssue: null,
|
||||
};
|
||||
@@ -753,6 +755,20 @@ export function normalizeGraphRuntimeState(graph, chatId = "", options = {}) {
|
||||
vectorIndexState.lastStats =
|
||||
createDefaultVectorIndexState(chatId).lastStats;
|
||||
}
|
||||
if (
|
||||
vectorIndexState.currentVectorSpace != null &&
|
||||
(typeof vectorIndexState.currentVectorSpace !== "object" ||
|
||||
Array.isArray(vectorIndexState.currentVectorSpace))
|
||||
) {
|
||||
vectorIndexState.currentVectorSpace = null;
|
||||
}
|
||||
if (
|
||||
vectorIndexState.manifest != null &&
|
||||
(typeof vectorIndexState.manifest !== "object" ||
|
||||
Array.isArray(vectorIndexState.manifest))
|
||||
) {
|
||||
vectorIndexState.manifest = null;
|
||||
}
|
||||
if (!Array.isArray(vectorIndexState.replayRequiredNodeIds)) {
|
||||
vectorIndexState.replayRequiredNodeIds = [];
|
||||
} else {
|
||||
@@ -786,6 +802,8 @@ export function normalizeGraphRuntimeState(graph, chatId = "", options = {}) {
|
||||
vectorIndexState.hashToNodeId = {};
|
||||
vectorIndexState.nodeToHash = {};
|
||||
vectorIndexState.replayRequiredNodeIds = [];
|
||||
vectorIndexState.currentVectorSpace = null;
|
||||
vectorIndexState.manifest = null;
|
||||
vectorIndexState.dirty = true;
|
||||
vectorIndexState.dirtyReason = "chat-id-changed";
|
||||
vectorIndexState.pendingRepairFromFloor = 0;
|
||||
|
||||
@@ -33,6 +33,7 @@ const {
|
||||
isAuthorityVectorConfig,
|
||||
normalizeAuthorityVectorConfig,
|
||||
queryAuthorityTriviumNeighbors,
|
||||
applyAuthorityBmeVectorManifest,
|
||||
} = await import("../vector/authority-vector-primary-adapter.js");
|
||||
const {
|
||||
findSimilarNodesByText: findSimilarNodesByTextFromIndex,
|
||||
@@ -70,7 +71,12 @@ function createAuthorityVectorGraph() {
|
||||
return { graph, first, second };
|
||||
}
|
||||
|
||||
function createMockTriviumClient({ failBulkUpsert = false, failSearch = false, failBmeVectorApply = false } = {}) {
|
||||
function createMockTriviumClient({
|
||||
failBulkUpsert = false,
|
||||
failSearch = false,
|
||||
failBmeVectorApply = false,
|
||||
failBmeVectorApplyCompatibility = false,
|
||||
} = {}) {
|
||||
const calls = [];
|
||||
return {
|
||||
calls,
|
||||
@@ -155,6 +161,14 @@ function createMockTriviumClient({ failBulkUpsert = false, failSearch = false, f
|
||||
path: "/bme/vector-apply",
|
||||
});
|
||||
}
|
||||
if (failBmeVectorApplyCompatibility) {
|
||||
throw new AuthorityHttpError("BME vector apply dimension mismatch", {
|
||||
status: 400,
|
||||
category: "validation",
|
||||
payload: { details: { category: "vector-dimension-mismatch" } },
|
||||
path: "/bme/vector-apply",
|
||||
});
|
||||
}
|
||||
return {
|
||||
ok: true,
|
||||
database: payload.database || "st_bme_vectors",
|
||||
@@ -238,16 +252,84 @@ assert.equal(isAuthorityVectorConfig(config), true);
|
||||
|
||||
assert.equal(result.stats.indexed, 2);
|
||||
assert.equal(graph.vectorIndexState.dirty, false);
|
||||
assert.equal(graph.vectorIndexState.manifest.status, "clean");
|
||||
assert.equal(graph.vectorIndexState.manifest.backend, "authority");
|
||||
assert.equal(graph.vectorIndexState.manifest.observedDim, 2);
|
||||
assert.equal(triviumClient.calls.filter(([name]) => name === "bmeVectorApply").length, 1);
|
||||
assert.equal(triviumClient.calls.some(([name]) => name === "purge"), false);
|
||||
assert.equal(triviumClient.calls.some(([name]) => name === "bulkUpsert"), false);
|
||||
const applyCall = triviumClient.calls.find(([name]) => name === "bmeVectorApply")?.[1];
|
||||
assert.equal(applyCall.items.length, 2);
|
||||
assert.equal(applyCall.links.length, 1);
|
||||
assert.equal(applyCall.observedDim, 2);
|
||||
assert.equal(String(applyCall.vectorSpaceId || "").startsWith("vs_"), true);
|
||||
assert.equal(applyCall.items.every((item) => item.payload?.vectorSpaceId === applyCall.vectorSpaceId), true);
|
||||
assert.equal(applyCall.items.every((item) => item.payload?.observedDim === 2), true);
|
||||
assert.equal(applyCall.items.every((item) => Array.isArray(item.vector) && item.vector.length > 0), true);
|
||||
assert.equal(result.timings.authorityDiagnostics.upsert.operation, "bmeVectorApply");
|
||||
}
|
||||
|
||||
{
|
||||
const { graph } = createAuthorityVectorGraph();
|
||||
const triviumClient = createMockTriviumClient();
|
||||
const entries = [
|
||||
{ nodeId: "node-a", text: "a", hash: "hash-a", index: 0 },
|
||||
{ nodeId: "node-b", text: "b", hash: "hash-b", index: 1 },
|
||||
];
|
||||
graph.nodes[0].embedding = [1, 0, 0];
|
||||
graph.nodes[1].embedding = [1, 0];
|
||||
await assert.rejects(
|
||||
() => applyAuthorityBmeVectorManifest(graph, { ...config, bmeVectorApplyReady: true }, entries, {
|
||||
namespace: "st-bme::chat-authority-vector",
|
||||
collectionId: "st-bme::chat-authority-vector",
|
||||
chatId: "chat-authority-vector",
|
||||
modelScope: "scope",
|
||||
triviumClient,
|
||||
}),
|
||||
/single vector dimension/,
|
||||
);
|
||||
assert.equal(triviumClient.calls.some(([name]) => name === "bmeVectorApply"), false);
|
||||
}
|
||||
|
||||
{
|
||||
const { graph } = createAuthorityVectorGraph();
|
||||
const triviumClient = createMockTriviumClient({ failBmeVectorApplyCompatibility: true });
|
||||
const applyConfig = { ...config, bmeVectorApplyReady: true };
|
||||
const result = await syncGraphVectorIndexFromIndex(graph, applyConfig, {
|
||||
chatId: "chat-authority-vector",
|
||||
purge: true,
|
||||
triviumClient,
|
||||
});
|
||||
|
||||
assert.equal(graph.vectorIndexState.dirty, true);
|
||||
assert.equal(result.errorCategory, "validation");
|
||||
assert.equal(triviumClient.calls.filter(([name]) => name === "bmeVectorApply").length, 1);
|
||||
assert.equal(triviumClient.calls.some(([name]) => name === "purge"), false);
|
||||
assert.equal(triviumClient.calls.some(([name]) => name === "bulkUpsert"), false);
|
||||
}
|
||||
|
||||
{
|
||||
const { graph, first, second } = createAuthorityVectorGraph();
|
||||
const triviumClient = createMockTriviumClient();
|
||||
const applyConfig = { ...config, bmeVectorApplyReady: true };
|
||||
await syncGraphVectorIndexFromIndex(graph, applyConfig, {
|
||||
chatId: "chat-authority-vector",
|
||||
purge: true,
|
||||
triviumClient,
|
||||
});
|
||||
const changedModelConfig = { ...applyConfig, model: "other-embedding-model" };
|
||||
const results = await findSimilarNodesByTextFromIndex(
|
||||
graph,
|
||||
"archive door",
|
||||
changedModelConfig,
|
||||
5,
|
||||
[first, second],
|
||||
);
|
||||
assert.deepEqual(results, []);
|
||||
assert.equal(graph.vectorIndexState.dirtyReason, "authority-vector-space-mismatch");
|
||||
assert.equal(graph.vectorIndexState.lastSearchTimings.reason, "authority-vector-space-mismatch");
|
||||
}
|
||||
|
||||
{
|
||||
const { graph } = createAuthorityVectorGraph();
|
||||
const triviumClient = createMockTriviumClient({ failBmeVectorApply: true });
|
||||
|
||||
115
tests/vector-manifest.mjs
Normal file
115
tests/vector-manifest.mjs
Normal file
@@ -0,0 +1,115 @@
|
||||
import assert from "node:assert/strict";
|
||||
import { addNode, createEmptyGraph, createNode } from "../graph/graph.js";
|
||||
import {
|
||||
installResolveHooks,
|
||||
toDataModuleUrl,
|
||||
} from "./helpers/register-hooks-compat.mjs";
|
||||
|
||||
installResolveHooks([
|
||||
{
|
||||
specifiers: ["../../../../../script.js"],
|
||||
url: toDataModuleUrl("export function getRequestHeaders() { return {}; }"),
|
||||
},
|
||||
{
|
||||
specifiers: ["../../../../extensions.js"],
|
||||
url: toDataModuleUrl("export const extension_settings = { st_bme: {} };"),
|
||||
},
|
||||
]);
|
||||
|
||||
let embeddingDim = 3;
|
||||
globalThis.__stBmeTestOverrides = {
|
||||
embedding: {
|
||||
async embedBatch(texts = []) {
|
||||
return texts.map((text, index) =>
|
||||
Array.from({ length: embeddingDim }, (_, dimIndex) =>
|
||||
dimIndex === 0 ? 1 : (index + dimIndex + String(text || "").length) / 100,
|
||||
),
|
||||
);
|
||||
},
|
||||
async embedText(text = "") {
|
||||
return Array.from({ length: embeddingDim }, (_, dimIndex) =>
|
||||
dimIndex === 0 ? 1 : (dimIndex + String(text || "").length) / 100,
|
||||
);
|
||||
},
|
||||
},
|
||||
};
|
||||
|
||||
const {
|
||||
findSimilarNodesByText,
|
||||
getVectorModelScope,
|
||||
syncGraphVectorIndex,
|
||||
} = await import("../vector/vector-index.js");
|
||||
|
||||
function createVectorGraph() {
|
||||
const graph = createEmptyGraph();
|
||||
graph.historyState.chatId = "chat-vector-manifest";
|
||||
const node = createNode({
|
||||
type: "event",
|
||||
fields: { summary: "Alice finds the old compass" },
|
||||
seq: 1,
|
||||
});
|
||||
node.id = "node-a";
|
||||
addNode(graph, node);
|
||||
return graph;
|
||||
}
|
||||
|
||||
const baseConfig = {
|
||||
mode: "direct",
|
||||
apiUrl: "https://example.com/v1/embeddings",
|
||||
apiKey: "sk-hidden",
|
||||
model: "text-embedding-3-small",
|
||||
};
|
||||
|
||||
{
|
||||
const graph = createVectorGraph();
|
||||
embeddingDim = 3;
|
||||
await syncGraphVectorIndex(graph, baseConfig, { chatId: graph.historyState.chatId, force: true });
|
||||
assert.equal(graph.vectorIndexState.manifest.status, "clean");
|
||||
assert.equal(graph.vectorIndexState.manifest.observedDim, 3);
|
||||
assert.equal(graph.vectorIndexState.manifest.model, "text-embedding-3-small");
|
||||
assert.equal(graph.vectorIndexState.manifest.vectorSpaceId.startsWith("vs_"), true);
|
||||
assert.equal(JSON.stringify(graph.vectorIndexState.manifest).includes("sk-hidden"), false);
|
||||
}
|
||||
|
||||
{
|
||||
const graph = createVectorGraph();
|
||||
embeddingDim = 3;
|
||||
await syncGraphVectorIndex(graph, baseConfig, { chatId: graph.historyState.chatId, force: true });
|
||||
const oldSpaceId = graph.vectorIndexState.manifest.vectorSpaceId;
|
||||
const changedModelConfig = { ...baseConfig, model: "text-embedding-3-large" };
|
||||
const results = await findSimilarNodesByText(graph, "compass", changedModelConfig, 5);
|
||||
assert.deepEqual(results, []);
|
||||
assert.equal(graph.vectorIndexState.dirty, true);
|
||||
assert.equal(graph.vectorIndexState.dirtyReason, "vector-space-mismatch");
|
||||
assert.equal(graph.vectorIndexState.manifest.vectorSpaceId, oldSpaceId);
|
||||
assert.notEqual(getVectorModelScope(baseConfig), getVectorModelScope(changedModelConfig));
|
||||
assert.equal(graph.vectorIndexState.lastSearchTimings.reason, "vector-space-mismatch");
|
||||
}
|
||||
|
||||
{
|
||||
const graph = createVectorGraph();
|
||||
embeddingDim = 3;
|
||||
await syncGraphVectorIndex(graph, baseConfig, { chatId: graph.historyState.chatId, force: true });
|
||||
embeddingDim = 4;
|
||||
const results = await findSimilarNodesByText(graph, "compass", baseConfig, 5);
|
||||
assert.deepEqual(results, []);
|
||||
assert.equal(graph.vectorIndexState.dirty, true);
|
||||
assert.equal(graph.vectorIndexState.dirtyReason, "query-dimension-mismatch");
|
||||
assert.equal(graph.vectorIndexState.manifest.status, "stale");
|
||||
assert.equal(graph.vectorIndexState.lastSearchTimings.reason, "query-dimension-mismatch");
|
||||
}
|
||||
|
||||
{
|
||||
const graph = createVectorGraph();
|
||||
graph.nodes[0].embedding = [0.1, 0.2, 0.3];
|
||||
embeddingDim = 3;
|
||||
const changedModelConfig = { ...baseConfig, model: "text-embedding-3-large" };
|
||||
await syncGraphVectorIndex(graph, changedModelConfig, { chatId: graph.historyState.chatId });
|
||||
assert.equal(graph.vectorIndexState.manifest.status, "clean");
|
||||
assert.equal(graph.vectorIndexState.manifest.model, "text-embedding-3-large");
|
||||
assert.equal(graph.nodes[0].embedding.length, 3);
|
||||
assert.equal(graph.nodes[0].embedding[0], 1);
|
||||
assert.notDeepEqual(graph.nodes[0].embedding, [0.1, 0.2, 0.3]);
|
||||
}
|
||||
|
||||
console.log("vector-manifest tests passed");
|
||||
67
tests/vector-space.mjs
Normal file
67
tests/vector-space.mjs
Normal file
@@ -0,0 +1,67 @@
|
||||
import assert from "node:assert/strict";
|
||||
|
||||
import {
|
||||
createVectorManifest,
|
||||
deriveVectorSpace,
|
||||
isVectorManifestCompatible,
|
||||
normalizeVectorApiUrl,
|
||||
summarizeVectorSpaceChange,
|
||||
} from "../vector/vector-space.js";
|
||||
|
||||
assert.equal(
|
||||
normalizeVectorApiUrl("https://example.com/v1/embeddings?key=secret"),
|
||||
"https://example.com/v1",
|
||||
);
|
||||
|
||||
const baseConfig = {
|
||||
mode: "direct",
|
||||
apiUrl: "https://example.com/v1/embeddings",
|
||||
apiKey: "sk-should-not-appear",
|
||||
model: "text-embedding-3-small",
|
||||
};
|
||||
|
||||
const space1536 = deriveVectorSpace(baseConfig, 1536, { probedAt: 1 });
|
||||
const sameSpace = deriveVectorSpace(
|
||||
{ ...baseConfig, apiKey: "sk-different" },
|
||||
1536,
|
||||
{ probedAt: 2 },
|
||||
);
|
||||
assert.equal(space1536.vectorSpaceId, sameSpace.vectorSpaceId);
|
||||
assert.equal(JSON.stringify(space1536).includes("sk-should-not-appear"), false);
|
||||
assert.equal(space1536.observedDim, 1536);
|
||||
|
||||
const space3072 = deriveVectorSpace(baseConfig, 3072, { probedAt: 3 });
|
||||
assert.notEqual(space1536.vectorSpaceId, space3072.vectorSpaceId);
|
||||
assert.equal(summarizeVectorSpaceChange(space1536, space3072), "dimension-changed");
|
||||
|
||||
const differentModel = deriveVectorSpace(
|
||||
{ ...baseConfig, model: "text-embedding-3-large" },
|
||||
1536,
|
||||
{ probedAt: 4 },
|
||||
);
|
||||
assert.notEqual(space1536.vectorSpaceId, differentModel.vectorSpaceId);
|
||||
assert.equal(summarizeVectorSpaceChange(space1536, differentModel), "model-changed");
|
||||
|
||||
const differentEndpoint = deriveVectorSpace(
|
||||
{ ...baseConfig, apiUrl: "https://other.example.com/v1/embeddings" },
|
||||
1536,
|
||||
{ probedAt: 5 },
|
||||
);
|
||||
assert.notEqual(space1536.vectorSpaceId, differentEndpoint.vectorSpaceId);
|
||||
assert.equal(summarizeVectorSpaceChange(space1536, differentEndpoint), "endpoint-changed");
|
||||
|
||||
const manifest = createVectorManifest({
|
||||
backend: "local",
|
||||
vectorSpace: space1536,
|
||||
status: "clean",
|
||||
nodeCount: 2,
|
||||
embeddedNodeCount: 2,
|
||||
});
|
||||
assert.equal(isVectorManifestCompatible(manifest, space1536), true);
|
||||
assert.equal(isVectorManifestCompatible(manifest, space3072), false);
|
||||
assert.equal(
|
||||
isVectorManifestCompatible({ ...manifest, status: "stale" }, space1536),
|
||||
false,
|
||||
);
|
||||
|
||||
console.log("vector-space tests passed");
|
||||
15
ui/panel.js
15
ui/panel.js
@@ -4408,6 +4408,18 @@ function _refreshDashboard() {
|
||||
const vectorStats = getVectorIndexStats(graph);
|
||||
const vectorMode = graph?.vectorIndexState?.mode || "—";
|
||||
const vectorSource = graph?.vectorIndexState?.source || "—";
|
||||
const vectorManifest = graph?.vectorIndexState?.manifest || null;
|
||||
const vectorManifestMeta = vectorManifest?.status
|
||||
? [
|
||||
`索引 ${vectorManifest.status}`,
|
||||
Number(vectorManifest.observedDim || 0) > 0
|
||||
? `${Number(vectorManifest.observedDim)}D`
|
||||
: "",
|
||||
vectorManifest.lastError ? `原因 ${vectorManifest.lastError}` : "",
|
||||
]
|
||||
.filter(Boolean)
|
||||
.join(" · ")
|
||||
: "索引未记录维度";
|
||||
const recovery = graph?.historyState?.lastRecoveryResult;
|
||||
const extractionStatus = _getLastExtractionStatus?.() || {};
|
||||
const lastBatchStatus = _getLatestBatchStatusSnapshot();
|
||||
@@ -4428,7 +4440,7 @@ function _refreshDashboard() {
|
||||
);
|
||||
_setText(
|
||||
"bme-status-vector",
|
||||
`${vectorMode}/${vectorSource} · total ${vectorStats.total} · indexed ${vectorStats.indexed} · stale ${vectorStats.stale} · pending ${vectorStats.pending}`,
|
||||
`${vectorMode}/${vectorSource} · ${vectorManifestMeta} · total ${vectorStats.total} · indexed ${vectorStats.indexed} · stale ${vectorStats.stale} · pending ${vectorStats.pending}`,
|
||||
);
|
||||
_setText(
|
||||
"bme-status-recovery",
|
||||
@@ -14554,4 +14566,3 @@ function _getNodeSnippet(node) {
|
||||
function _isMobile() {
|
||||
return window.innerWidth <= 768;
|
||||
}
|
||||
|
||||
|
||||
@@ -5,6 +5,7 @@ import {
|
||||
AuthorityHttpError,
|
||||
} from "../runtime/authority-http-client.js";
|
||||
import { embedBatch } from "./embedding.js";
|
||||
import { deriveVectorSpace } from "./vector-space.js";
|
||||
|
||||
export const AUTHORITY_VECTOR_MODE = "authority";
|
||||
export const AUTHORITY_VECTOR_SOURCE = "authority-trivium";
|
||||
@@ -872,7 +873,7 @@ export async function upsertAuthorityTriviumEntries(graph, config = {}, entries
|
||||
}
|
||||
|
||||
export async function applyAuthorityBmeVectorManifest(graph, config = {}, entries = [], options = {}) {
|
||||
const items = buildAuthorityVectorItems(graph, entries, options);
|
||||
let items = buildAuthorityVectorItems(graph, entries, options);
|
||||
const links = buildAuthorityLinkItems(graph, options).map((link) => ({
|
||||
src: buildNodeReference(link.fromId, options.namespace),
|
||||
dst: buildNodeReference(link.toId, options.namespace),
|
||||
@@ -883,6 +884,29 @@ export async function applyAuthorityBmeVectorManifest(graph, config = {}, entrie
|
||||
if (missingVector) {
|
||||
throw new Error("BME vector apply requires vector for every item");
|
||||
}
|
||||
const observedDim = items.reduce((dim, item) => {
|
||||
const vectorDim = normalizeVector(item?.vector || item?.embedding).length;
|
||||
if (!vectorDim) return dim;
|
||||
if (!dim) return vectorDim;
|
||||
return dim === vectorDim ? dim : -1;
|
||||
}, 0);
|
||||
if (observedDim < 0) {
|
||||
const error = new Error("BME vector apply requires a single vector dimension per batch");
|
||||
error.errorCategory = "vector-dimension-mismatch";
|
||||
error.errorDomain = "embedding";
|
||||
throw error;
|
||||
}
|
||||
const vectorSpace = observedDim > 0 ? deriveVectorSpace(config, observedDim) : null;
|
||||
if (vectorSpace?.vectorSpaceId) {
|
||||
items = items.map((item) => ({
|
||||
...item,
|
||||
payload: {
|
||||
...(item.payload || {}),
|
||||
vectorSpaceId: vectorSpace.vectorSpaceId,
|
||||
observedDim: vectorSpace.observedDim,
|
||||
},
|
||||
}));
|
||||
}
|
||||
throwIfAborted(options.signal);
|
||||
const client = createAuthorityTriviumClient(config, options);
|
||||
const startedAt = nowMs();
|
||||
@@ -900,6 +924,8 @@ export async function applyAuthorityBmeVectorManifest(graph, config = {}, entrie
|
||||
graphRevision: Math.max(0, Math.floor(Number(options.revision) || 0)),
|
||||
modelScope: String(options.modelScope || ""),
|
||||
embeddingMode: config.embeddingMode || "client",
|
||||
...(vectorSpace?.vectorSpaceId ? { vectorSpaceId: vectorSpace.vectorSpaceId } : {}),
|
||||
...(observedDim > 0 ? { observedDim } : {}),
|
||||
items,
|
||||
links,
|
||||
idempotencyKey: [
|
||||
|
||||
@@ -6,6 +6,12 @@ import { getActiveNodes } from "../graph/graph.js";
|
||||
import { describeMemoryScope, normalizeMemoryScope } from "../graph/memory-scope.js";
|
||||
import { resolveConfiguredTimeoutMs } from "../runtime/request-timeout.js";
|
||||
import { buildVectorCollectionId, stableHashString } from "../runtime/runtime-state.js";
|
||||
import {
|
||||
createVectorManifest,
|
||||
deriveVectorSpace,
|
||||
isVectorManifestCompatible,
|
||||
summarizeVectorSpaceChange,
|
||||
} from "./vector-space.js";
|
||||
import {
|
||||
AUTHORITY_VECTOR_MODE,
|
||||
AUTHORITY_VECTOR_SOURCE,
|
||||
@@ -580,6 +586,81 @@ function resetVectorMappings(graph, config, chatId) {
|
||||
graph.vectorIndexState.nodeToHash = {};
|
||||
}
|
||||
|
||||
function getEmbeddingDimensionFromEntries(graph, entries = []) {
|
||||
const nodesById = new Map((graph?.nodes || []).map((node) => [String(node?.id || ""), node]));
|
||||
let dim = 0;
|
||||
for (const entry of entries || []) {
|
||||
const node = nodesById.get(String(entry?.nodeId || ""));
|
||||
const vector = Array.isArray(node?.embedding) ? node.embedding : [];
|
||||
if (!vector.length) continue;
|
||||
if (!dim) dim = vector.length;
|
||||
if (dim && vector.length !== dim) return -1;
|
||||
}
|
||||
return dim;
|
||||
}
|
||||
|
||||
function updateVectorManifest(graph, config, {
|
||||
backend = "local",
|
||||
chatId = "",
|
||||
collectionId = "",
|
||||
graphRevision = 0,
|
||||
desiredEntries = [],
|
||||
observedDim = 0,
|
||||
status = "clean",
|
||||
failedNodeCount = 0,
|
||||
lastError = "",
|
||||
} = {}) {
|
||||
if (!graph?.vectorIndexState) return null;
|
||||
const vectorSpace = observedDim > 0
|
||||
? deriveVectorSpace(config, observedDim)
|
||||
: null;
|
||||
const manifest = createVectorManifest({
|
||||
backend,
|
||||
chatId: chatId || graph?.historyState?.chatId || "",
|
||||
collectionId: collectionId || graph.vectorIndexState.collectionId || "",
|
||||
graphRevision,
|
||||
vectorSpace,
|
||||
status,
|
||||
nodeCount: desiredEntries.length,
|
||||
embeddedNodeCount: Math.max(0, desiredEntries.length - failedNodeCount),
|
||||
failedNodeCount,
|
||||
lastError,
|
||||
});
|
||||
graph.vectorIndexState.currentVectorSpace = vectorSpace;
|
||||
graph.vectorIndexState.manifest = manifest;
|
||||
return manifest;
|
||||
}
|
||||
|
||||
function markLocalVectorManifestStale(graph, config, reason = "vector-space-changed") {
|
||||
if (!graph?.vectorIndexState) return;
|
||||
const state = graph.vectorIndexState;
|
||||
const previousManifest = state.manifest && typeof state.manifest === "object"
|
||||
? state.manifest
|
||||
: null;
|
||||
state.manifest = {
|
||||
...(previousManifest || createVectorManifest({ backend: "local", status: "stale" })),
|
||||
backend: previousManifest?.backend || "local",
|
||||
status: "stale",
|
||||
lastError: reason,
|
||||
completedAt: 0,
|
||||
};
|
||||
state.dirty = true;
|
||||
state.dirtyReason = reason;
|
||||
state.lastWarning = reason === "dimension-changed"
|
||||
? "向量模型维度变化,索引已标记为待重建"
|
||||
: "向量模型配置变化,索引已标记为待重建";
|
||||
}
|
||||
|
||||
function isVectorApplyCompatibilityError(error = null) {
|
||||
const detailCategory = String(error?.payload?.details?.category || error?.details?.category || "").trim();
|
||||
const message = String(error?.message || "").toLowerCase();
|
||||
return detailCategory === "vector-dimension-mismatch" ||
|
||||
detailCategory === "vector-space-mismatch" ||
|
||||
message.includes("dimension mismatch") ||
|
||||
message.includes("vectorspaceid mismatch") ||
|
||||
message.includes("single vector dimension");
|
||||
}
|
||||
|
||||
function markBackendVectorStateDirty(
|
||||
graph,
|
||||
config,
|
||||
@@ -853,6 +934,18 @@ export async function syncGraphVectorIndex(
|
||||
);
|
||||
authorityUpsertMs += nowMs() - applyStartedAt;
|
||||
authorityUpsertDiagnostics = applyResult?.diagnostics || null;
|
||||
const observedDim = Number(applyResult?.manifest?.observedDim || getEmbeddingDimensionFromEntries(graph, desiredEntries) || 0);
|
||||
if (observedDim > 0) {
|
||||
updateVectorManifest(graph, config, {
|
||||
backend: "authority",
|
||||
chatId: effectiveChatId,
|
||||
collectionId,
|
||||
graphRevision: graph?.meta?.revision || graph?.revision || 0,
|
||||
desiredEntries,
|
||||
observedDim,
|
||||
status: "clean",
|
||||
});
|
||||
}
|
||||
authorityLinkDiagnostics = {
|
||||
operation: "bmeVectorApply:links",
|
||||
totalItems: Number(applyResult?.diagnostics?.linkItems || 0),
|
||||
@@ -863,6 +956,7 @@ export async function syncGraphVectorIndex(
|
||||
appliedViaBme = true;
|
||||
} catch (applyError) {
|
||||
if (isAbortError(applyError)) throw applyError;
|
||||
if (isVectorApplyCompatibilityError(applyError)) throw applyError;
|
||||
console.warn("[ST-BME] BME 服务端向量 apply 失败,回退 Authority Trivium 旧路径:", applyError);
|
||||
}
|
||||
}
|
||||
@@ -945,6 +1039,18 @@ export async function syncGraphVectorIndex(
|
||||
);
|
||||
authorityUpsertMs += nowMs() - applyStartedAt;
|
||||
authorityUpsertDiagnostics = applyResult?.diagnostics || null;
|
||||
const observedDim = Number(applyResult?.manifest?.observedDim || getEmbeddingDimensionFromEntries(graph, entriesToUpsert) || 0);
|
||||
if (observedDim > 0) {
|
||||
updateVectorManifest(graph, config, {
|
||||
backend: "authority",
|
||||
chatId: effectiveChatId,
|
||||
collectionId,
|
||||
graphRevision: graph?.meta?.revision || graph?.revision || 0,
|
||||
desiredEntries,
|
||||
observedDim,
|
||||
status: "clean",
|
||||
});
|
||||
}
|
||||
authorityLinkDiagnostics = {
|
||||
operation: "bmeVectorApply:links",
|
||||
totalItems: Number(applyResult?.diagnostics?.linkItems || 0),
|
||||
@@ -954,6 +1060,7 @@ export async function syncGraphVectorIndex(
|
||||
appliedViaBme = true;
|
||||
} catch (applyError) {
|
||||
if (isAbortError(applyError)) throw applyError;
|
||||
if (isVectorApplyCompatibilityError(applyError)) throw applyError;
|
||||
console.warn("[ST-BME] BME 服务端向量 apply 失败,回退 Authority Trivium 旧路径:", applyError);
|
||||
}
|
||||
}
|
||||
@@ -1129,6 +1236,24 @@ export async function syncGraphVectorIndex(
|
||||
}
|
||||
}
|
||||
} else {
|
||||
const directScopeChanged =
|
||||
state.mode !== "direct" ||
|
||||
state.modelScope !== getVectorModelScope(config) ||
|
||||
state.collectionId !== collectionId;
|
||||
if (directScopeChanged && state.manifest?.vectorSpaceId) {
|
||||
const previous = state.currentVectorSpace || {
|
||||
vectorSpaceId: state.manifest.vectorSpaceId,
|
||||
observedDim: state.manifest.observedDim,
|
||||
model: state.manifest.model,
|
||||
normalizedApiUrl: state.manifest.normalizedApiUrl,
|
||||
};
|
||||
const current = deriveVectorSpace(config, Number(state.manifest.observedDim || 0));
|
||||
markLocalVectorManifestStale(
|
||||
graph,
|
||||
config,
|
||||
summarizeVectorSpaceChange(previous, current),
|
||||
);
|
||||
}
|
||||
const entriesToEmbed = [];
|
||||
const hashByNodeId = {};
|
||||
|
||||
@@ -1141,7 +1266,7 @@ export async function syncGraphVectorIndex(
|
||||
const hasEmbedding =
|
||||
Array.isArray(node?.embedding) && node.embedding.length > 0;
|
||||
|
||||
if (!force && !currentHash && hasEmbedding) {
|
||||
if (!directScopeChanged && !force && !currentHash && hasEmbedding) {
|
||||
state.hashToNodeId[entry.hash] = entry.nodeId;
|
||||
state.nodeToHash[entry.nodeId] = entry.hash;
|
||||
continue;
|
||||
@@ -1152,7 +1277,7 @@ export async function syncGraphVectorIndex(
|
||||
}
|
||||
}
|
||||
|
||||
if (purge || state.mode !== "direct") {
|
||||
if (purge || directScopeChanged) {
|
||||
resetVectorMappings(graph, config, chatId);
|
||||
} else {
|
||||
for (const [nodeId, hash] of Object.entries(state.nodeToHash || {})) {
|
||||
@@ -1201,9 +1326,51 @@ export async function syncGraphVectorIndex(
|
||||
state.modelScope = getVectorModelScope(config);
|
||||
state.collectionId = collectionId;
|
||||
state.dirty = directSyncHadFailures;
|
||||
state.dirtyReason = directSyncHadFailures ? "partial-embedding-failure" : "";
|
||||
state.lastWarning = directSyncHadFailures
|
||||
? "部分节点 embedding 生成失败,向量索引仍待修复"
|
||||
: "";
|
||||
const observedDim = getEmbeddingDimensionFromEntries(graph, desiredEntries);
|
||||
if (observedDim < 0) {
|
||||
updateVectorManifest(graph, config, {
|
||||
backend: "local",
|
||||
chatId,
|
||||
collectionId,
|
||||
graphRevision: graph?.meta?.revision || graph?.revision || 0,
|
||||
desiredEntries,
|
||||
observedDim: 0,
|
||||
status: "failed",
|
||||
failedNodeCount: desiredEntries.length,
|
||||
lastError: "mixed-dimensions",
|
||||
});
|
||||
state.dirty = true;
|
||||
state.dirtyReason = "mixed-vector-dimensions";
|
||||
state.lastWarning = "检测到混合向量维度,索引已标记为待重建";
|
||||
} else if (observedDim > 0) {
|
||||
updateVectorManifest(graph, config, {
|
||||
backend: "local",
|
||||
chatId,
|
||||
collectionId,
|
||||
graphRevision: graph?.meta?.revision || graph?.revision || 0,
|
||||
desiredEntries,
|
||||
observedDim,
|
||||
status: directSyncHadFailures ? "dirty" : "clean",
|
||||
failedNodeCount: directSyncHadFailures ? Math.max(1, desiredEntries.length - insertedHashes.length) : 0,
|
||||
lastError: directSyncHadFailures ? "partial-embedding-failure" : "",
|
||||
});
|
||||
} else {
|
||||
updateVectorManifest(graph, config, {
|
||||
backend: "local",
|
||||
chatId,
|
||||
collectionId,
|
||||
graphRevision: graph?.meta?.revision || graph?.revision || 0,
|
||||
desiredEntries,
|
||||
observedDim: 0,
|
||||
status: "missing",
|
||||
failedNodeCount: desiredEntries.length,
|
||||
lastError: "no-vectors",
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
if (state.mode !== "direct") {
|
||||
@@ -1302,6 +1469,24 @@ export async function findSimilarNodesByText(
|
||||
}
|
||||
|
||||
if (isDirectVectorConfig(config)) {
|
||||
const state = graph?.vectorIndexState || {};
|
||||
const currentDim = Number(state.currentVectorSpace?.observedDim || state.manifest?.observedDim || 0);
|
||||
const currentVectorSpace = currentDim > 0
|
||||
? deriveVectorSpace(config, currentDim)
|
||||
: state.currentVectorSpace;
|
||||
if (!isVectorManifestCompatible(state.manifest, currentVectorSpace)) {
|
||||
recordSearchTimings({
|
||||
success: false,
|
||||
reason: "vector-space-mismatch",
|
||||
resultCount: 0,
|
||||
});
|
||||
if (state) {
|
||||
state.dirty = true;
|
||||
state.dirtyReason = "vector-space-mismatch";
|
||||
state.lastWarning = "向量空间不匹配,已切换到非向量召回并等待重建";
|
||||
}
|
||||
return [];
|
||||
}
|
||||
const queryEmbedStartedAt = nowMs();
|
||||
const queryVec = await embedText(text, config, { signal, isQuery: true });
|
||||
const queryEmbedMs = nowMs() - queryEmbedStartedAt;
|
||||
@@ -1314,6 +1499,23 @@ export async function findSimilarNodesByText(
|
||||
});
|
||||
return [];
|
||||
}
|
||||
if (currentDim > 0 && queryVec.length !== currentDim) {
|
||||
recordSearchTimings({
|
||||
success: false,
|
||||
reason: "query-dimension-mismatch",
|
||||
queryDim: queryVec.length,
|
||||
expectedDim: currentDim,
|
||||
queryEmbedMs: roundMs(queryEmbedMs),
|
||||
resultCount: 0,
|
||||
});
|
||||
state.dirty = true;
|
||||
state.dirtyReason = "query-dimension-mismatch";
|
||||
state.lastWarning = `查询向量维度 ${queryVec.length} 与索引维度 ${currentDim} 不一致,已切换到非向量召回`;
|
||||
if (state.manifest) {
|
||||
state.manifest = { ...state.manifest, status: "stale", lastError: "query-dimension-mismatch" };
|
||||
}
|
||||
return [];
|
||||
}
|
||||
|
||||
const localSearchStartedAt = nowMs();
|
||||
const results = searchSimilar(
|
||||
@@ -1350,6 +1552,24 @@ export async function findSimilarNodesByText(
|
||||
}
|
||||
|
||||
if (isAuthorityVectorConfig(config)) {
|
||||
const state = graph?.vectorIndexState || {};
|
||||
if (config.bmeVectorApplyReady === true || config.bmeVectorManifestReady === true) {
|
||||
const currentDim = Number(state.currentVectorSpace?.observedDim || state.manifest?.observedDim || 0);
|
||||
const currentVectorSpace = currentDim > 0
|
||||
? deriveVectorSpace(config, currentDim)
|
||||
: state.currentVectorSpace;
|
||||
if (!isVectorManifestCompatible(state.manifest, currentVectorSpace)) {
|
||||
recordSearchTimings({
|
||||
success: false,
|
||||
reason: "authority-vector-space-mismatch",
|
||||
resultCount: 0,
|
||||
});
|
||||
state.dirty = true;
|
||||
state.dirtyReason = "authority-vector-space-mismatch";
|
||||
state.lastWarning = "Authority 向量空间不匹配,已切换到非向量召回并等待重建";
|
||||
return [];
|
||||
}
|
||||
}
|
||||
const requestStartedAt = nowMs();
|
||||
try {
|
||||
const queryEmbedStartedAt = nowMs();
|
||||
|
||||
129
vector/vector-space.js
Normal file
129
vector/vector-space.js
Normal file
@@ -0,0 +1,129 @@
|
||||
import { stableHashString } from "../runtime/runtime-state.js";
|
||||
|
||||
export const VECTOR_MANIFEST_VERSION = 1;
|
||||
|
||||
function normalizeString(value) {
|
||||
return String(value || "").trim();
|
||||
}
|
||||
|
||||
function normalizeLower(value) {
|
||||
return normalizeString(value).toLowerCase();
|
||||
}
|
||||
|
||||
export function normalizeVectorApiUrl(value) {
|
||||
const raw = normalizeString(value);
|
||||
if (!raw) return "";
|
||||
try {
|
||||
const url = new URL(raw, raw.startsWith("/") ? "http://st-bme.local" : undefined);
|
||||
url.hash = "";
|
||||
url.search = "";
|
||||
let pathname = url.pathname.replace(/\/+$/, "");
|
||||
pathname = pathname.replace(/\/embeddings$/i, "").replace(/\/v1$/i, "/v1");
|
||||
const normalized = `${url.protocol}//${url.host}${pathname}`.replace(/\/+$/, "");
|
||||
return raw.startsWith("/") ? normalized.replace(/^http:\/\/st-bme\.local/i, "") : normalized;
|
||||
} catch {
|
||||
return raw.replace(/[?#].*$/, "").replace(/\/+$/, "").replace(/\/embeddings$/i, "");
|
||||
}
|
||||
}
|
||||
|
||||
export function getVectorProviderKind(config = {}) {
|
||||
if (config?.mode === "authority" || config?.source === "authority-trivium") {
|
||||
return "authority-client";
|
||||
}
|
||||
if (config?.mode === "backend") {
|
||||
return "st-backend";
|
||||
}
|
||||
return "direct-openai-compatible";
|
||||
}
|
||||
|
||||
export function getVectorEmbeddingMode(config = {}) {
|
||||
if (config?.mode === "backend") return "st-backend";
|
||||
if (config?.embeddingMode === "server") return "server";
|
||||
return "client";
|
||||
}
|
||||
|
||||
export function deriveVectorSpace(config = {}, observedDim = 0, extra = {}) {
|
||||
const dim = Math.max(0, Math.floor(Number(observedDim) || 0));
|
||||
const providerKind = normalizeLower(extra.providerKind || getVectorProviderKind(config));
|
||||
const embeddingMode = normalizeLower(extra.embeddingMode || getVectorEmbeddingMode(config));
|
||||
const source = normalizeLower(config.embeddingSource || config.source || "");
|
||||
const normalizedApiUrl = normalizeVectorApiUrl(
|
||||
config.apiUrl || config.baseUrl || extra.apiUrl || "",
|
||||
);
|
||||
const model = normalizeString(config.model || extra.model || "");
|
||||
const material = {
|
||||
providerKind,
|
||||
embeddingMode,
|
||||
source,
|
||||
normalizedApiUrl,
|
||||
model,
|
||||
observedDim: dim,
|
||||
};
|
||||
const vectorSpaceId = dim > 0
|
||||
? `vs_${stableHashString(JSON.stringify(material))}`
|
||||
: "";
|
||||
return {
|
||||
vectorSpaceId,
|
||||
providerKind,
|
||||
embeddingMode,
|
||||
source,
|
||||
normalizedApiUrl,
|
||||
model,
|
||||
observedDim: dim,
|
||||
settingsFingerprint: stableHashString(JSON.stringify({ ...material, observedDim: undefined })),
|
||||
probedAt: Number(extra.probedAt || Date.now()),
|
||||
};
|
||||
}
|
||||
|
||||
export function createVectorManifest({
|
||||
backend = "local",
|
||||
chatId = "",
|
||||
collectionId = "",
|
||||
graphRevision = 0,
|
||||
vectorSpace = null,
|
||||
status = "missing",
|
||||
nodeCount = 0,
|
||||
embeddedNodeCount = 0,
|
||||
failedNodeCount = 0,
|
||||
lastError = "",
|
||||
} = {}) {
|
||||
const observedDim = Math.max(0, Math.floor(Number(vectorSpace?.observedDim) || 0));
|
||||
const now = Date.now();
|
||||
return {
|
||||
manifestVersion: VECTOR_MANIFEST_VERSION,
|
||||
backend,
|
||||
chatId,
|
||||
collectionId,
|
||||
graphRevision: Math.max(0, Math.floor(Number(graphRevision) || 0)),
|
||||
vectorSpaceId: vectorSpace?.vectorSpaceId || "",
|
||||
observedDim,
|
||||
model: vectorSpace?.model || "",
|
||||
normalizedApiUrl: vectorSpace?.normalizedApiUrl || "",
|
||||
status,
|
||||
nodeCount: Math.max(0, Math.floor(Number(nodeCount) || 0)),
|
||||
embeddedNodeCount: Math.max(0, Math.floor(Number(embeddedNodeCount) || 0)),
|
||||
failedNodeCount: Math.max(0, Math.floor(Number(failedNodeCount) || 0)),
|
||||
createdAt: now,
|
||||
completedAt: status === "clean" ? now : 0,
|
||||
lastError: lastError || "",
|
||||
};
|
||||
}
|
||||
|
||||
export function isVectorManifestCompatible(manifest, vectorSpace) {
|
||||
if (!manifest || !vectorSpace) return false;
|
||||
if (manifest.status !== "clean") return false;
|
||||
if (!manifest.vectorSpaceId || !vectorSpace.vectorSpaceId) return false;
|
||||
if (manifest.vectorSpaceId !== vectorSpace.vectorSpaceId) return false;
|
||||
return Number(manifest.observedDim || 0) === Number(vectorSpace.observedDim || 0);
|
||||
}
|
||||
|
||||
export function summarizeVectorSpaceChange(previous, current) {
|
||||
if (!previous?.vectorSpaceId || !current?.vectorSpaceId) return "vector-space-missing";
|
||||
if (previous.vectorSpaceId === current.vectorSpaceId) return "unchanged";
|
||||
if (Number(previous.observedDim || 0) !== Number(current.observedDim || 0)) {
|
||||
return "dimension-changed";
|
||||
}
|
||||
if (previous.model !== current.model) return "model-changed";
|
||||
if (previous.normalizedApiUrl !== current.normalizedApiUrl) return "endpoint-changed";
|
||||
return "vector-space-changed";
|
||||
}
|
||||
Reference in New Issue
Block a user