From 2a674a7563381e421d46d8c0fd5f9c4deba0527b Mon Sep 17 00:00:00 2001 From: youzini Date: Fri, 15 May 2026 15:09:57 +0000 Subject: [PATCH] feat(vector): add vector space identity helpers --- tests/vector-space.mjs | 67 +++++++++++++++++++++ vector/vector-space.js | 129 +++++++++++++++++++++++++++++++++++++++++ 2 files changed, 196 insertions(+) create mode 100644 tests/vector-space.mjs create mode 100644 vector/vector-space.js diff --git a/tests/vector-space.mjs b/tests/vector-space.mjs new file mode 100644 index 0000000..bb17d69 --- /dev/null +++ b/tests/vector-space.mjs @@ -0,0 +1,67 @@ +import assert from "node:assert/strict"; + +import { + createVectorManifest, + deriveVectorSpace, + isVectorManifestCompatible, + normalizeVectorApiUrl, + summarizeVectorSpaceChange, +} from "../vector/vector-space.js"; + +assert.equal( + normalizeVectorApiUrl("https://example.com/v1/embeddings?key=secret"), + "https://example.com/v1", +); + +const baseConfig = { + mode: "direct", + apiUrl: "https://example.com/v1/embeddings", + apiKey: "sk-should-not-appear", + model: "text-embedding-3-small", +}; + +const space1536 = deriveVectorSpace(baseConfig, 1536, { probedAt: 1 }); +const sameSpace = deriveVectorSpace( + { ...baseConfig, apiKey: "sk-different" }, + 1536, + { probedAt: 2 }, +); +assert.equal(space1536.vectorSpaceId, sameSpace.vectorSpaceId); +assert.equal(JSON.stringify(space1536).includes("sk-should-not-appear"), false); +assert.equal(space1536.observedDim, 1536); + +const space3072 = deriveVectorSpace(baseConfig, 3072, { probedAt: 3 }); +assert.notEqual(space1536.vectorSpaceId, space3072.vectorSpaceId); +assert.equal(summarizeVectorSpaceChange(space1536, space3072), "dimension-changed"); + +const differentModel = deriveVectorSpace( + { ...baseConfig, model: "text-embedding-3-large" }, + 1536, + { probedAt: 4 }, +); +assert.notEqual(space1536.vectorSpaceId, differentModel.vectorSpaceId); +assert.equal(summarizeVectorSpaceChange(space1536, differentModel), "model-changed"); + +const differentEndpoint = deriveVectorSpace( + { ...baseConfig, apiUrl: "https://other.example.com/v1/embeddings" }, + 1536, + { probedAt: 5 }, +); +assert.notEqual(space1536.vectorSpaceId, differentEndpoint.vectorSpaceId); +assert.equal(summarizeVectorSpaceChange(space1536, differentEndpoint), "endpoint-changed"); + +const manifest = createVectorManifest({ + backend: "local", + vectorSpace: space1536, + status: "clean", + nodeCount: 2, + embeddedNodeCount: 2, +}); +assert.equal(isVectorManifestCompatible(manifest, space1536), true); +assert.equal(isVectorManifestCompatible(manifest, space3072), false); +assert.equal( + isVectorManifestCompatible({ ...manifest, status: "stale" }, space1536), + false, +); + +console.log("vector-space tests passed"); diff --git a/vector/vector-space.js b/vector/vector-space.js new file mode 100644 index 0000000..b89eb4c --- /dev/null +++ b/vector/vector-space.js @@ -0,0 +1,129 @@ +import { stableHashString } from "../runtime/runtime-state.js"; + +export const VECTOR_MANIFEST_VERSION = 1; + +function normalizeString(value) { + return String(value || "").trim(); +} + +function normalizeLower(value) { + return normalizeString(value).toLowerCase(); +} + +export function normalizeVectorApiUrl(value) { + const raw = normalizeString(value); + if (!raw) return ""; + try { + const url = new URL(raw, raw.startsWith("/") ? "http://st-bme.local" : undefined); + url.hash = ""; + url.search = ""; + let pathname = url.pathname.replace(/\/+$/, ""); + pathname = pathname.replace(/\/embeddings$/i, "").replace(/\/v1$/i, "/v1"); + const normalized = `${url.protocol}//${url.host}${pathname}`.replace(/\/+$/, ""); + return raw.startsWith("/") ? normalized.replace(/^http:\/\/st-bme\.local/i, "") : normalized; + } catch { + return raw.replace(/[?#].*$/, "").replace(/\/+$/, "").replace(/\/embeddings$/i, ""); + } +} + +export function getVectorProviderKind(config = {}) { + if (config?.mode === "authority" || config?.source === "authority-trivium") { + return "authority-client"; + } + if (config?.mode === "backend") { + return "st-backend"; + } + return "direct-openai-compatible"; +} + +export function getVectorEmbeddingMode(config = {}) { + if (config?.mode === "backend") return "st-backend"; + if (config?.embeddingMode === "server") return "server"; + return "client"; +} + +export function deriveVectorSpace(config = {}, observedDim = 0, extra = {}) { + const dim = Math.max(0, Math.floor(Number(observedDim) || 0)); + const providerKind = normalizeLower(extra.providerKind || getVectorProviderKind(config)); + const embeddingMode = normalizeLower(extra.embeddingMode || getVectorEmbeddingMode(config)); + const source = normalizeLower(config.embeddingSource || config.source || ""); + const normalizedApiUrl = normalizeVectorApiUrl( + config.apiUrl || config.baseUrl || extra.apiUrl || "", + ); + const model = normalizeString(config.model || extra.model || ""); + const material = { + providerKind, + embeddingMode, + source, + normalizedApiUrl, + model, + observedDim: dim, + }; + const vectorSpaceId = dim > 0 + ? `vs_${stableHashString(JSON.stringify(material))}` + : ""; + return { + vectorSpaceId, + providerKind, + embeddingMode, + source, + normalizedApiUrl, + model, + observedDim: dim, + settingsFingerprint: stableHashString(JSON.stringify({ ...material, observedDim: undefined })), + probedAt: Number(extra.probedAt || Date.now()), + }; +} + +export function createVectorManifest({ + backend = "local", + chatId = "", + collectionId = "", + graphRevision = 0, + vectorSpace = null, + status = "missing", + nodeCount = 0, + embeddedNodeCount = 0, + failedNodeCount = 0, + lastError = "", +} = {}) { + const observedDim = Math.max(0, Math.floor(Number(vectorSpace?.observedDim) || 0)); + const now = Date.now(); + return { + manifestVersion: VECTOR_MANIFEST_VERSION, + backend, + chatId, + collectionId, + graphRevision: Math.max(0, Math.floor(Number(graphRevision) || 0)), + vectorSpaceId: vectorSpace?.vectorSpaceId || "", + observedDim, + model: vectorSpace?.model || "", + normalizedApiUrl: vectorSpace?.normalizedApiUrl || "", + status, + nodeCount: Math.max(0, Math.floor(Number(nodeCount) || 0)), + embeddedNodeCount: Math.max(0, Math.floor(Number(embeddedNodeCount) || 0)), + failedNodeCount: Math.max(0, Math.floor(Number(failedNodeCount) || 0)), + createdAt: now, + completedAt: status === "clean" ? now : 0, + lastError: lastError || "", + }; +} + +export function isVectorManifestCompatible(manifest, vectorSpace) { + if (!manifest || !vectorSpace) return false; + if (manifest.status !== "clean") return false; + if (!manifest.vectorSpaceId || !vectorSpace.vectorSpaceId) return false; + if (manifest.vectorSpaceId !== vectorSpace.vectorSpaceId) return false; + return Number(manifest.observedDim || 0) === Number(vectorSpace.observedDim || 0); +} + +export function summarizeVectorSpaceChange(previous, current) { + if (!previous?.vectorSpaceId || !current?.vectorSpaceId) return "vector-space-missing"; + if (previous.vectorSpaceId === current.vectorSpaceId) return "unchanged"; + if (Number(previous.observedDim || 0) !== Number(current.observedDim || 0)) { + return "dimension-changed"; + } + if (previous.model !== current.model) return "model-changed"; + if (previous.normalizedApiUrl !== current.normalizedApiUrl) return "endpoint-changed"; + return "vector-space-changed"; +}