feat: shared ranking core + prompt node references; recall reuses shared core for base query/vector/diffusion; remove retriever-local duplicate helpers; add regression tests

This commit is contained in:
Youzini-afk
2026-04-12 14:59:22 +08:00
parent 4b4f77caff
commit dc5051f2ef
8 changed files with 1855 additions and 402 deletions

View File

@@ -23,9 +23,7 @@ import {
collectSupplementalAnchorNodeIds,
createCooccurrenceIndex,
isEligibleAnchorNode,
mergeVectorResults,
runResidualRecall,
splitIntentSegments,
} from "./retrieval-enhancer.js";
import {
MEMORY_SCOPE_BUCKETS,
@@ -36,6 +34,7 @@ import {
normalizeMemoryScope,
resolveScopeBucketWeight,
} from "../graph/memory-scope.js";
import { rankNodesForTaskContext } from "./shared-ranking.js";
import {
computeKnowledgeGateForNode,
listKnowledgeOwners,
@@ -54,8 +53,8 @@ import {
} from "../graph/story-timeline.js";
import { getActiveSummaryEntries } from "../graph/summary-state.js";
import { applyTaskRegex } from "../prompting/task-regex.js";
import { createPromptNodeReferenceMap } from "../prompting/prompt-node-references.js";
import { getSTContextForPrompt } from "../host/st-context.js";
import { findSimilarNodesByText, validateVectorConfig } from "../vector/vector-index.js";
function createAbortError(message = "操作已终止") {
const error = new Error(message);
@@ -241,14 +240,6 @@ function normalizeQueryText(value, maxLength = 400) {
return normalized.slice(0, Math.max(1, maxLength));
}
function createTextPreview(text, maxLength = 120) {
const normalized = normalizeQueryText(text, maxLength + 4);
if (!normalized) return "";
return normalized.length > maxLength
? `${normalized.slice(0, maxLength)}...`
: normalized;
}
function normalizeRecallSelectionList(values = [], maxLength = 64) {
const normalized = [];
const seen = new Set();
@@ -262,262 +253,23 @@ function normalizeRecallSelectionList(values = [], maxLength = 64) {
return normalized;
}
function getRecallCandidateLabel(node = {}) {
return String(
node?.fields?.title ||
node?.fields?.name ||
node?.fields?.summary ||
node?.fields?.insight ||
node?.fields?.belief ||
node?.id ||
"",
).trim();
}
function createRecallCandidateKeyMaps(candidates = []) {
const candidateKeyToNodeId = {};
const candidateKeyToCandidateMeta = {};
const nodeIdToCandidateKey = {};
for (const [index, candidate] of (Array.isArray(candidates) ? candidates : []).entries()) {
const node = candidate?.node || {};
const nodeId = String(candidate?.nodeId || node?.id || "").trim();
if (!nodeId) continue;
const candidateKey = `R${index + 1}`;
candidateKeyToNodeId[candidateKey] = nodeId;
nodeIdToCandidateKey[nodeId] = candidateKey;
candidateKeyToCandidateMeta[candidateKey] = {
nodeId,
type: String(node?.type || ""),
label: getRecallCandidateLabel(node),
scopeBucket: String(candidate?.scopeBucket || ""),
temporalBucket: String(candidate?.temporalBucket || ""),
const referenceMap = createPromptNodeReferenceMap(candidates, {
prefix: "R",
maxLength: 80,
buildMeta: ({ entry }) => ({
scopeBucket: String(entry?.scopeBucket || ""),
temporalBucket: String(entry?.temporalBucket || ""),
score:
Math.round(
(Number(candidate?.weightedScore ?? candidate?.finalScore) || 0) * 1000,
(Number(entry?.weightedScore ?? entry?.finalScore) || 0) * 1000,
) / 1000,
};
}
}),
});
return {
candidateKeyToNodeId,
candidateKeyToCandidateMeta,
nodeIdToCandidateKey,
};
}
function roundBlendWeight(value) {
return Math.round((Number(value) || 0) * 1000) / 1000;
}
function uniqueStrings(values = [], maxLength = 400) {
const result = [];
const seen = new Set();
for (const value of values) {
const text = normalizeQueryText(value, maxLength);
const key = text.toLowerCase();
if (!text || seen.has(key)) continue;
seen.add(key);
result.push(text);
}
return result;
}
function parseRecallContextLine(line = "") {
const raw = String(line ?? "").trim();
if (!raw) return null;
const bracketMatch = raw.match(/^\[(user|assistant)\]\s*:\s*([\s\S]*)$/i);
if (bracketMatch) {
const role = String(bracketMatch[1] || "").toLowerCase();
const text = normalizeQueryText(bracketMatch[2] || "");
return text ? { role, text } : null;
}
const plainMatch = raw.match(
/^(user|assistant|用户|助手|ai)\s*[:]\s*([\s\S]*)$/i,
);
if (!plainMatch) return null;
const roleToken = String(plainMatch[1] || "").toLowerCase();
const role =
roleToken === "assistant" || roleToken === "助手" || roleToken === "ai"
? "assistant"
: "user";
const text = normalizeQueryText(plainMatch[2] || "");
return text ? { role, text } : null;
}
function buildContextQueryBlend(
userMessage,
recentMessages = [],
{
enabled = true,
assistantWeight = 0.2,
previousUserWeight = 0.1,
maxTextLength = 400,
} = {},
) {
const currentText = normalizeQueryText(userMessage, maxTextLength);
const normalizedAssistantWeight = clampRange(assistantWeight, 0.2, 0, 1);
const normalizedPreviousUserWeight = clampRange(
previousUserWeight,
0.1,
0,
1,
);
const currentWeight = Math.max(
0,
1 - normalizedAssistantWeight - normalizedPreviousUserWeight,
);
let assistantText = "";
let previousUserText = "";
const parsedMessages = Array.isArray(recentMessages)
? recentMessages.map((line) => parseRecallContextLine(line)).filter(Boolean)
: [];
for (let index = parsedMessages.length - 1; index >= 0; index--) {
const item = parsedMessages[index];
if (!assistantText && item.role === "assistant") {
assistantText = normalizeQueryText(item.text, maxTextLength);
}
if (
!previousUserText &&
item.role === "user" &&
normalizeQueryText(item.text, maxTextLength).toLowerCase() !==
currentText.toLowerCase()
) {
previousUserText = normalizeQueryText(item.text, maxTextLength);
}
if (assistantText && previousUserText) break;
}
const rawParts = [
{
kind: "currentUser",
label: "当前用户消息",
text: currentText,
weight: enabled ? currentWeight : 1,
},
];
if (enabled && assistantText) {
rawParts.push({
kind: "assistantContext",
label: "最近 assistant 回复",
text: assistantText,
weight: normalizedAssistantWeight,
});
}
if (enabled && previousUserText) {
rawParts.push({
kind: "previousUser",
label: "上一条 user 消息",
text: previousUserText,
weight: normalizedPreviousUserWeight,
});
}
const dedupedParts = [];
const seen = new Set();
for (const part of rawParts) {
const text = normalizeQueryText(part.text, maxTextLength);
const key = text.toLowerCase();
if (!text || seen.has(key)) continue;
seen.add(key);
dedupedParts.push({
...part,
text,
});
}
if (dedupedParts.length === 0) {
return {
active: false,
parts: [],
currentText: "",
assistantText: "",
previousUserText: "",
combinedText: "",
};
}
const totalWeight = dedupedParts.reduce(
(sum, part) => sum + Math.max(0, Number(part.weight) || 0),
0,
);
const normalizedParts = dedupedParts.map((part) => ({
...part,
weight:
totalWeight > 0
? roundBlendWeight((Math.max(0, Number(part.weight) || 0) || 0) / totalWeight)
: roundBlendWeight(1 / dedupedParts.length),
}));
const combinedText =
normalizedParts.length <= 1
? normalizedParts[0]?.text || ""
: normalizedParts
.map((part) => `${part.label}:\n${part.text}`)
.join("\n\n");
return {
active: enabled && normalizedParts.length > 1,
parts: normalizedParts,
currentText: currentText || normalizedParts[0]?.text || "",
assistantText,
previousUserText,
combinedText,
};
}
function buildVectorQueryPlan(
blendPlan,
{ enableMultiIntent = true, maxSegments = 4 } = {},
) {
const plan = [];
let currentSegments = [];
for (const part of blendPlan?.parts || []) {
let queries = [part.text];
if (part.kind === "currentUser" && enableMultiIntent) {
currentSegments = splitIntentSegments(part.text, { maxSegments });
queries = uniqueStrings([
part.text,
...currentSegments.filter((item) => item !== part.text),
]);
} else {
queries = uniqueStrings([part.text]);
}
plan.push({
kind: part.kind,
label: part.label,
weight: part.weight,
queries,
});
}
return {
plan,
currentSegments,
};
}
function buildLexicalQuerySources(
userMessage,
{ enableMultiIntent = true, maxSegments = 4 } = {},
) {
const currentText = normalizeQueryText(userMessage, 400);
const segments = enableMultiIntent
? splitIntentSegments(currentText, { maxSegments })
: [];
return {
sources: uniqueStrings([currentText, ...segments]),
segments,
candidateKeyToNodeId: referenceMap.keyToNodeId,
candidateKeyToCandidateMeta: referenceMap.keyToMeta,
nodeIdToCandidateKey: referenceMap.nodeIdToKey,
};
}
@@ -722,13 +474,6 @@ function buildVisibilityTopHits(scoredNodes = [], maxCount = 6) {
}));
}
function scaleVectorResults(results = [], weight = 1) {
return (Array.isArray(results) ? results : []).map((item) => ({
...item,
score: (Number(item?.score) || 0) * Math.max(0, Number(weight) || 0),
}));
}
function pickActiveRegion(graph, optionValue = "") {
const direct = String(optionValue || "").trim();
if (direct) return direct;
@@ -1462,7 +1207,6 @@ export async function retrieve({
normalizedMaxRecallNodes,
llmCandidatePool,
);
const vectorValidation = validateVectorConfig(embeddingConfig);
const retrievalMeta = createRetrievalMeta(enableLLMRecall);
retrievalMeta.activeRegion = activeRegion;
retrievalMeta.activeRegionSource = activeRegionContext.source || "";
@@ -1490,29 +1234,6 @@ export async function retrieve({
retrievalMeta.knowledgeGateMode = enableCognitiveMemory
? "anchored-soft-visibility"
: "disabled";
const contextQueryBlend = buildContextQueryBlend(userMessage, recentMessages, {
enabled: enableContextQueryBlend,
assistantWeight: contextAssistantWeight,
previousUserWeight: contextPreviousUserWeight,
});
retrievalMeta.queryBlendActive = contextQueryBlend.active;
retrievalMeta.queryBlendParts = (contextQueryBlend.parts || []).map((part) => ({
kind: part.kind,
label: part.label,
weight: part.weight,
text: createTextPreview(part.text),
length: part.text.length,
}));
retrievalMeta.queryBlendWeights = Object.fromEntries(
(contextQueryBlend.parts || []).map((part) => [part.kind, part.weight]),
);
const lexicalQuery = buildLexicalQuerySources(
contextQueryBlend.currentText || userMessage,
{
enableMultiIntent,
maxSegments: multiIntentMaxSegments,
},
);
debugLog(
`[ST-BME] 检索开始: ${nodeCount} 个活跃节点${enableVisibility ? " (认知边界已启用)" : ""}`,
);
@@ -1567,49 +1288,85 @@ export async function retrieve({
},
});
}
const vectorStartedAt = nowMs();
if (enableVectorPrefilter && vectorValidation.valid) {
debugLog("[ST-BME] 第1层: 向量预筛");
const queryPlan = buildVectorQueryPlan(contextQueryBlend, {
const sharedRanking = await rankNodesForTaskContext({
graph,
userMessage,
recentMessages,
embeddingConfig,
signal,
options: {
topK: normalizedTopK,
diffusionTopK: normalizedDiffusionTopK,
enableVectorPrefilter,
enableGraphDiffusion,
enableContextQueryBlend,
enableMultiIntent,
maxSegments: multiIntentMaxSegments,
});
const groups = [];
retrievalMeta.segmentsUsed = queryPlan.currentSegments;
for (const part of queryPlan.plan) {
for (const queryText of part.queries) {
const results = await vectorPreFilter(
graph,
queryText,
activeNodes,
embeddingConfig,
normalizedTopK,
signal,
);
groups.push(scaleVectorResults(results, part.weight || 1));
}
}
const merged = mergeVectorResults(
groups,
Math.max(normalizedTopK * 2, 24),
);
retrievalMeta.vectorHits = merged.rawHitCount;
retrievalMeta.vectorMergedHits = merged.results.length;
vectorResults = merged.results;
} else if (enableVectorPrefilter) {
pushSkipReason(retrievalMeta, "vector-config-invalid");
}
retrievalMeta.timings.vector = roundMs(nowMs() - vectorStartedAt);
exactEntityAnchors.push(
...extractEntityAnchors(
contextQueryBlend.currentText || userMessage,
multiIntentMaxSegments,
contextAssistantWeight,
contextPreviousUserWeight,
teleportAlpha,
enableTemporalLinks,
temporalLinkStrength,
enableLexicalBoost,
lexicalWeight,
weights,
activeNodes,
),
},
});
const contextQueryBlend = sharedRanking.contextQueryBlend;
const lexicalQuery = sharedRanking.lexicalQuery;
retrievalMeta.queryBlendActive = Boolean(
sharedRanking?.diagnostics?.queryBlendActive,
);
retrievalMeta.queryBlendParts = Array.isArray(
sharedRanking?.diagnostics?.queryBlendParts,
)
? [...sharedRanking.diagnostics.queryBlendParts]
: [];
retrievalMeta.queryBlendWeights = {
...(sharedRanking?.diagnostics?.queryBlendWeights || {}),
};
retrievalMeta.segmentsUsed = Array.isArray(sharedRanking?.diagnostics?.segmentsUsed)
? [...sharedRanking.diagnostics.segmentsUsed]
: [];
retrievalMeta.vectorHits = Number(sharedRanking?.diagnostics?.vectorHits || 0);
retrievalMeta.vectorMergedHits = Number(
sharedRanking?.diagnostics?.vectorMergedHits || 0,
);
retrievalMeta.seedCount = Number(sharedRanking?.diagnostics?.seedCount || 0);
retrievalMeta.diffusionHits = Number(
sharedRanking?.diagnostics?.diffusionHits || 0,
);
retrievalMeta.lexicalBoostedNodes = Number(
sharedRanking?.diagnostics?.lexicalBoostedNodes || 0,
);
retrievalMeta.temporalSyntheticEdgeCount = Number(
sharedRanking?.diagnostics?.temporalSyntheticEdgeCount || 0,
);
retrievalMeta.teleportAlpha = Number(
sharedRanking?.diagnostics?.teleportAlpha || teleportAlpha,
);
retrievalMeta.lexicalTopHits = Array.isArray(
sharedRanking?.diagnostics?.lexicalTopHits,
)
? [...sharedRanking.diagnostics.lexicalTopHits]
: [];
retrievalMeta.timings.vector = Number(
sharedRanking?.diagnostics?.timings?.vector || 0,
);
retrievalMeta.timings.diffusion = Number(
sharedRanking?.diagnostics?.timings?.diffusion || 0,
);
for (const reason of sharedRanking?.diagnostics?.skipReasons || []) {
pushSkipReason(retrievalMeta, reason);
}
vectorResults = Array.isArray(sharedRanking?.vectorResults)
? [...sharedRanking.vectorResults]
: [];
diffusionResults = Array.isArray(sharedRanking?.diffusionResults)
? [...sharedRanking.diffusionResults]
: [];
exactEntityAnchors.push(...(sharedRanking?.exactEntityAnchors || []));
supplementalAnchorNodeIds = collectSupplementalAnchorNodeIds(
graph,
vectorResults,
@@ -1650,7 +1407,7 @@ export async function retrieve({
retrievalMeta.timings.residual = roundMs(nowMs() - residualStartedAt);
const diffusionStartedAt = nowMs();
if (enableGraphDiffusion) {
if (enableGraphDiffusion && (enableCrossRecall || residualResult.triggered)) {
debugLog("[ST-BME] 第2层: PEDSA 图扩散");
const seeds = [
...vectorResults.map((v) => ({ id: v.nodeId, energy: v.score })),
@@ -1705,9 +1462,11 @@ export async function retrieve({
return node && !node.archived;
});
}
retrievalMeta.diffusionHits = diffusionResults.length;
}
if (enableGraphDiffusion && (enableCrossRecall || residualResult.triggered)) {
retrievalMeta.timings.diffusion = roundMs(nowMs() - diffusionStartedAt);
}
retrievalMeta.diffusionHits = diffusionResults.length;
retrievalMeta.timings.diffusion = roundMs(nowMs() - diffusionStartedAt);
debugLog("[ST-BME] 第3层: 混合评分");
@@ -2259,7 +2018,9 @@ export async function retrieve({
retrievalMeta.timings.total = roundMs(nowMs() - startedAt);
return buildResult(graph, selectedNodeIds, schema, {
retrieval: retrievalMeta,
retrieval: {
...retrievalMeta,
},
scopeContext: {
enableScopedMemory,
enablePovMemory,
@@ -2295,62 +2056,6 @@ export async function retrieve({
});
}
/**
* 向量预筛选
*/
async function vectorPreFilter(
graph,
userMessage,
activeNodes,
embeddingConfig,
topK,
signal,
) {
try {
return await findSimilarNodesByText(
graph,
userMessage,
embeddingConfig,
topK,
activeNodes,
signal,
);
} catch (e) {
if (isAbortError(e)) {
throw e;
}
console.error("[ST-BME] 向量预筛失败:", e);
return [];
}
}
/**
* 实体锚点提取
* 从用户消息中提取名词/实体,匹配图中的节点名称
*/
function extractEntityAnchors(userMessage, activeNodes) {
const anchors = [];
const seen = new Set();
for (const node of activeNodes) {
const candidates = [node.fields?.name, node.fields?.title]
.filter((value) => typeof value === "string")
.map((value) => value.trim())
.filter((value) => value.length >= 2);
for (const candidate of candidates) {
if (!userMessage.includes(candidate)) continue;
const key = `${node.id}:${candidate}`;
if (seen.has(key)) continue;
seen.add(key);
anchors.push({ nodeId: node.id, entity: candidate });
break;
}
}
return anchors;
}
function buildResidualBasisNodes(
graph,
exactEntityAnchors,
@@ -2463,7 +2168,9 @@ async function llmRecall(
const fieldsStr = Object.entries(node.fields)
.map(([k, v]) => `${k}: ${v}`)
.join(", ");
const candidateKey = `R${index + 1}`;
const candidateKey =
nodeIdToCandidateKey[String(c?.nodeId || node?.id || "").trim()] ||
`R${index + 1}`;
return `[${candidateKey}] 类型=${typeLabel}, 作用域=${describeMemoryScope(node.scope)}, 时间=${storyTimeLabel || "未标注"}, 时间桶=${String(c.temporalBucket || STORY_TEMPORAL_BUCKETS.UNDATED)}, 召回桶=${describeScopeBucket(c.scopeBucket)}, 认知=${String(c.knowledgeMode || "unknown")}, 可见性=${(Number(c.knowledgeVisibilityScore) || 0).toFixed(3)}, ${fieldsStr} (评分=${(c.weightedScore ?? c.finalScore).toFixed(3)})`;
})
.join("\n");

752
retrieval/shared-ranking.js Normal file
View File

@@ -0,0 +1,752 @@
import { buildTemporalAdjacencyMap, getActiveNodes, getNode } from "../graph/graph.js";
import { findSimilarNodesByText, validateVectorConfig } from "../vector/vector-index.js";
import { hybridScore } from "./dynamics.js";
import { diffuseAndRank } from "./diffusion.js";
import { mergeVectorResults, splitIntentSegments } from "./retrieval-enhancer.js";
function nowMs() {
if (typeof performance?.now === "function") {
return performance.now();
}
return Date.now();
}
function roundMs(value) {
return Math.round((Number(value) || 0) * 10) / 10;
}
export function clampPositiveInt(value, fallback, min = 1) {
const parsed = Math.floor(Number(value));
return Number.isFinite(parsed) && parsed >= min ? parsed : fallback;
}
export function clampRange(value, fallback, min = 0, max = 1) {
const parsed = Number(value);
if (!Number.isFinite(parsed)) return fallback;
return Math.max(min, Math.min(max, parsed));
}
export function normalizeQueryText(value, maxLength = 400) {
const normalized = String(value ?? "")
.replace(/\r\n/g, "\n")
.replace(/\s+/g, " ")
.trim();
if (!normalized) return "";
return normalized.slice(0, Math.max(1, maxLength));
}
export function createTextPreview(text, maxLength = 120) {
const normalized = normalizeQueryText(text, maxLength + 4);
if (!normalized) return "";
return normalized.length > maxLength
? `${normalized.slice(0, maxLength)}...`
: normalized;
}
function uniqueStrings(values = [], maxLength = 400) {
const result = [];
const seen = new Set();
for (const value of values) {
const text = normalizeQueryText(value, maxLength);
const key = text.toLowerCase();
if (!text || seen.has(key)) continue;
seen.add(key);
result.push(text);
}
return result;
}
function parseContextLine(line = "") {
const raw = String(line ?? "").trim();
if (!raw) return null;
const bracketMatch = raw.match(/^\[(user|assistant)\]\s*:\s*([\s\S]*)$/i);
if (bracketMatch) {
const role = String(bracketMatch[1] || "").toLowerCase();
const text = normalizeQueryText(bracketMatch[2] || "");
return text ? { role, text } : null;
}
const plainMatch = raw.match(/^(user|assistant|用户|助手|ai)\s*[:]\s*([\s\S]*)$/i);
if (!plainMatch) return null;
const roleToken = String(plainMatch[1] || "").toLowerCase();
const role =
roleToken === "assistant" || roleToken === "助手" || roleToken === "ai"
? "assistant"
: "user";
const text = normalizeQueryText(plainMatch[2] || "");
return text ? { role, text } : null;
}
export function buildContextQueryBlend(
userMessage,
recentMessages = [],
{
enabled = true,
assistantWeight = 0.2,
previousUserWeight = 0.1,
maxTextLength = 400,
} = {},
) {
const currentText = normalizeQueryText(userMessage, maxTextLength);
const normalizedAssistantWeight = clampRange(assistantWeight, 0.2, 0, 1);
const normalizedPreviousUserWeight = clampRange(
previousUserWeight,
0.1,
0,
1,
);
const currentWeight = Math.max(
0,
1 - normalizedAssistantWeight - normalizedPreviousUserWeight,
);
let assistantText = "";
let previousUserText = "";
const parsedMessages = Array.isArray(recentMessages)
? recentMessages.map((line) => parseContextLine(line)).filter(Boolean)
: [];
for (let index = parsedMessages.length - 1; index >= 0; index -= 1) {
const item = parsedMessages[index];
if (!assistantText && item.role === "assistant") {
assistantText = normalizeQueryText(item.text, maxTextLength);
}
if (
!previousUserText &&
item.role === "user" &&
normalizeQueryText(item.text, maxTextLength).toLowerCase() !==
currentText.toLowerCase()
) {
previousUserText = normalizeQueryText(item.text, maxTextLength);
}
if (assistantText && previousUserText) break;
}
const rawParts = [
{
kind: "currentUser",
label: "当前用户消息",
text: currentText,
weight: enabled ? currentWeight : 1,
},
];
if (enabled && assistantText) {
rawParts.push({
kind: "assistantContext",
label: "最近 assistant 回复",
text: assistantText,
weight: normalizedAssistantWeight,
});
}
if (enabled && previousUserText) {
rawParts.push({
kind: "previousUser",
label: "上一条 user 消息",
text: previousUserText,
weight: normalizedPreviousUserWeight,
});
}
const dedupedParts = [];
const seen = new Set();
for (const part of rawParts) {
const text = normalizeQueryText(part.text, maxTextLength);
const key = text.toLowerCase();
if (!text || seen.has(key)) continue;
seen.add(key);
dedupedParts.push({
...part,
text,
});
}
if (dedupedParts.length === 0) {
return {
active: false,
parts: [],
currentText: "",
assistantText: "",
previousUserText: "",
combinedText: "",
};
}
const totalWeight = dedupedParts.reduce(
(sum, part) => sum + Math.max(0, Number(part.weight) || 0),
0,
);
const normalizedParts = dedupedParts.map((part) => ({
...part,
weight:
totalWeight > 0
? Math.round(
((Math.max(0, Number(part.weight) || 0) || 0) / totalWeight) * 1000,
) / 1000
: Math.round((1 / dedupedParts.length) * 1000) / 1000,
}));
const combinedText =
normalizedParts.length <= 1
? normalizedParts[0]?.text || ""
: normalizedParts
.map((part) => `${part.label}:\n${part.text}`)
.join("\n\n");
return {
active: enabled && normalizedParts.length > 1,
parts: normalizedParts,
currentText: currentText || normalizedParts[0]?.text || "",
assistantText,
previousUserText,
combinedText,
};
}
export function buildVectorQueryPlan(
blendPlan,
{ enableMultiIntent = true, maxSegments = 4 } = {},
) {
const plan = [];
let currentSegments = [];
for (const part of blendPlan?.parts || []) {
let queries = [part.text];
if (part.kind === "currentUser" && enableMultiIntent) {
currentSegments = splitIntentSegments(part.text, { maxSegments });
queries = uniqueStrings([
part.text,
...currentSegments.filter((item) => item !== part.text),
]);
} else {
queries = uniqueStrings([part.text]);
}
plan.push({
kind: part.kind,
label: part.label,
weight: part.weight,
queries,
});
}
return {
plan,
currentSegments,
};
}
export function buildLexicalQuerySources(
userMessage,
{ enableMultiIntent = true, maxSegments = 4 } = {},
) {
const currentText = normalizeQueryText(userMessage, 400);
const segments = enableMultiIntent
? splitIntentSegments(currentText, { maxSegments })
: [];
return {
sources: uniqueStrings([currentText, ...segments]),
segments,
};
}
function normalizeLexicalText(value = "") {
return normalizeQueryText(value, 600).toLowerCase();
}
function buildLexicalUnits(text = "") {
const normalized = normalizeLexicalText(text);
if (!normalized) return [];
const rawTokens = normalized.match(/[a-z0-9]+|[\u4e00-\u9fff]+/g) || [];
const units = [];
for (const token of rawTokens) {
if (token.length >= 2) {
units.push(token);
}
if (/[\u4e00-\u9fff]/.test(token) && token.length > 2) {
for (let index = 0; index < token.length - 1; index += 1) {
units.push(token.slice(index, index + 2));
}
}
}
return [...new Set(units)];
}
function computeTokenOverlapScore(sourceUnits = [], targetUnits = []) {
if (!sourceUnits.length || !targetUnits.length) return 0;
const targetSet = new Set(targetUnits);
let overlap = 0;
for (const unit of sourceUnits) {
if (targetSet.has(unit)) {
overlap += 1;
}
}
return overlap / Math.max(1, sourceUnits.length);
}
function scoreFieldMatch(
fieldText,
querySources = [],
{ exact = 1, includes = 0.9, overlap = 0.6 } = {},
) {
const normalizedField = normalizeLexicalText(fieldText);
if (!normalizedField) return 0;
const fieldUnits = buildLexicalUnits(normalizedField);
let best = 0;
for (const sourceText of querySources) {
const normalizedSource = normalizeLexicalText(sourceText);
if (!normalizedSource) continue;
if (normalizedSource === normalizedField) {
best = Math.max(best, exact);
continue;
}
if (
Math.min(normalizedSource.length, normalizedField.length) >= 2 &&
(normalizedSource.includes(normalizedField) ||
normalizedField.includes(normalizedSource))
) {
best = Math.max(best, includes);
}
const overlapScore = computeTokenOverlapScore(
buildLexicalUnits(normalizedSource),
fieldUnits,
);
best = Math.max(best, overlapScore * overlap);
}
return Math.min(1, best);
}
function collectNodeLexicalTexts(node, fieldNames = []) {
const values = [];
for (const fieldName of fieldNames) {
const value = node?.fields?.[fieldName];
if (typeof value === "string" && value.trim()) {
values.push(value.trim());
} else if (Array.isArray(value)) {
for (const item of value) {
if (typeof item === "string" && item.trim()) {
values.push(item.trim());
}
}
}
}
return values;
}
export function computeLexicalScore(node, querySources = []) {
if (!node || !Array.isArray(querySources) || querySources.length === 0) {
return 0;
}
const primaryTexts = collectNodeLexicalTexts(node, ["name", "title"]);
const secondaryTexts = collectNodeLexicalTexts(node, [
"summary",
"insight",
"state",
"traits",
"participants",
"status",
]);
const combinedText = [...primaryTexts, ...secondaryTexts].join(" ");
const primaryScore = primaryTexts.reduce(
(best, value) =>
Math.max(
best,
scoreFieldMatch(value, querySources, {
exact: 1,
includes: 0.92,
overlap: 0.72,
}),
),
0,
);
const secondaryScore = secondaryTexts.reduce(
(best, value) =>
Math.max(
best,
scoreFieldMatch(value, querySources, {
exact: 0.82,
includes: 0.68,
overlap: 0.52,
}),
),
0,
);
const tokenScore = scoreFieldMatch(combinedText, querySources, {
exact: 0.65,
includes: 0.55,
overlap: 0.45,
});
if (primaryScore <= 0 && secondaryScore <= 0 && tokenScore <= 0) {
return 0;
}
return Math.min(
1,
Math.max(
primaryScore,
secondaryScore * 0.82,
tokenScore * 0.7,
primaryScore * 0.75 + secondaryScore * 0.35 + tokenScore * 0.2,
),
);
}
export function scaleVectorResults(results = [], weight = 1) {
return (Array.isArray(results) ? results : []).map((item) => ({
...item,
score: (Number(item?.score) || 0) * Math.max(0, Number(weight) || 0),
}));
}
function isAbortError(error) {
return error?.name === "AbortError";
}
export async function vectorPreFilter(
graph,
userMessage,
activeNodes,
embeddingConfig,
topK,
signal,
) {
try {
return await findSimilarNodesByText(
graph,
userMessage,
embeddingConfig,
topK,
activeNodes,
signal,
);
} catch (error) {
if (isAbortError(error)) {
throw error;
}
console.error("[ST-BME] 向量预筛失败:", error);
return [];
}
}
export function extractEntityAnchors(userMessage, activeNodes) {
const anchors = [];
const seen = new Set();
for (const node of Array.isArray(activeNodes) ? activeNodes : []) {
const candidates = [node?.fields?.name, node?.fields?.title]
.filter((value) => typeof value === "string")
.map((value) => value.trim())
.filter((value) => value.length >= 2);
for (const candidate of candidates) {
if (!String(userMessage || "").includes(candidate)) continue;
const key = `${node.id}:${candidate}`;
if (seen.has(key)) continue;
seen.add(key);
anchors.push({ nodeId: node.id, entity: candidate });
break;
}
}
return anchors;
}
function buildLexicalTopHits(scoredNodes = [], maxCount = 5) {
return scoredNodes
.filter((item) => (Number(item?.lexicalScore) || 0) > 0)
.sort((a, b) => {
const lexicalDelta =
(Number(b?.lexicalScore) || 0) - (Number(a?.lexicalScore) || 0);
if (lexicalDelta !== 0) return lexicalDelta;
return (Number(b?.finalScore) || 0) - (Number(a?.finalScore) || 0);
})
.slice(0, Math.max(1, maxCount))
.map((item) => ({
nodeId: item.nodeId,
type: item.node?.type || "",
label:
item.node?.fields?.name ||
item.node?.fields?.title ||
item.node?.fields?.summary ||
item.nodeId,
lexicalScore: Math.round((Number(item.lexicalScore) || 0) * 1000) / 1000,
finalScore: Math.round((Number(item.finalScore) || 0) * 1000) / 1000,
}));
}
export async function rankNodesForTaskContext({
graph,
userMessage,
recentMessages = [],
embeddingConfig,
signal = undefined,
options = {},
} = {}) {
const topK = clampPositiveInt(options.topK, 20);
const diffusionTopK = clampPositiveInt(options.diffusionTopK, 100);
const enableVectorPrefilter = options.enableVectorPrefilter ?? true;
const enableGraphDiffusion = options.enableGraphDiffusion ?? true;
const enableContextQueryBlend = options.enableContextQueryBlend ?? true;
const enableMultiIntent = options.enableMultiIntent ?? true;
const multiIntentMaxSegments = clampPositiveInt(
options.multiIntentMaxSegments,
4,
);
const contextAssistantWeight = clampRange(
options.contextAssistantWeight,
0.2,
0,
1,
);
const contextPreviousUserWeight = clampRange(
options.contextPreviousUserWeight,
0.1,
0,
1,
);
const enableLexicalBoost = options.enableLexicalBoost ?? true;
const lexicalWeight = clampRange(options.lexicalWeight, 0.18, 0, 10);
const teleportAlpha = clampRange(options.teleportAlpha, 0.15);
const enableTemporalLinks = options.enableTemporalLinks ?? true;
const temporalLinkStrength = clampRange(
options.temporalLinkStrength,
0.2,
0,
1,
);
const maxTextLength = clampPositiveInt(options.maxTextLength, 400, 32);
const weights = options.weights ?? {};
const activeNodes = Array.isArray(options.activeNodes)
? options.activeNodes.filter((node) => node && !node.archived)
: getActiveNodes(graph).filter((node) => node && !node.archived);
const vectorValidation = validateVectorConfig(embeddingConfig);
const contextQueryBlend = buildContextQueryBlend(userMessage, recentMessages, {
enabled: enableContextQueryBlend,
assistantWeight: contextAssistantWeight,
previousUserWeight: contextPreviousUserWeight,
maxTextLength,
});
const queryPlan = buildVectorQueryPlan(contextQueryBlend, {
enableMultiIntent,
maxSegments: multiIntentMaxSegments,
});
const lexicalQuery = buildLexicalQuerySources(
contextQueryBlend.currentText || userMessage,
{
enableMultiIntent,
maxSegments: multiIntentMaxSegments,
},
);
const diagnostics = {
queryBlendActive: contextQueryBlend.active,
queryBlendParts: (contextQueryBlend.parts || []).map((part) => ({
kind: part.kind,
label: part.label,
weight: part.weight,
text: createTextPreview(part.text),
length: part.text.length,
})),
queryBlendWeights: Object.fromEntries(
(contextQueryBlend.parts || []).map((part) => [part.kind, part.weight]),
),
segmentsUsed: [...(queryPlan.currentSegments || [])],
vectorValidation,
vectorHits: 0,
vectorMergedHits: 0,
seedCount: 0,
diffusionHits: 0,
temporalSyntheticEdgeCount: 0,
teleportAlpha,
lexicalBoostedNodes: 0,
lexicalTopHits: [],
skipReasons: [],
timings: {
vector: 0,
diffusion: 0,
},
};
if (!graph || activeNodes.length === 0) {
return {
activeNodes,
contextQueryBlend,
queryPlan,
lexicalQuery,
vectorResults: [],
exactEntityAnchors: [],
diffusionResults: [],
scoredNodes: [],
diagnostics,
};
}
let vectorResults = [];
const vectorStartedAt = nowMs();
if (enableVectorPrefilter && vectorValidation.valid) {
const groups = [];
for (const part of queryPlan.plan) {
for (const queryText of part.queries) {
const results = await vectorPreFilter(
graph,
queryText,
activeNodes,
embeddingConfig,
topK,
signal,
);
groups.push(scaleVectorResults(results, part.weight || 1));
}
}
const merged = mergeVectorResults(groups, Math.max(topK * 2, 24));
diagnostics.vectorHits = merged.rawHitCount;
diagnostics.vectorMergedHits = merged.results.length;
vectorResults = merged.results;
} else if (enableVectorPrefilter) {
diagnostics.skipReasons.push("vector-config-invalid");
}
diagnostics.timings.vector = roundMs(nowMs() - vectorStartedAt);
const exactEntityAnchors = extractEntityAnchors(
contextQueryBlend.currentText || userMessage,
activeNodes,
);
let diffusionResults = [];
const diffusionStartedAt = nowMs();
if (enableGraphDiffusion) {
const seeds = [
...vectorResults.map((item) => ({ id: item.nodeId, energy: item.score })),
...exactEntityAnchors.map((item) => ({ id: item.nodeId, energy: 2.0 })),
];
const seedMap = new Map();
for (const seed of seeds) {
const existing = seedMap.get(seed.id) || 0;
if (seed.energy > existing) {
seedMap.set(seed.id, seed.energy);
}
}
const uniqueSeeds = [...seedMap.entries()].map(([id, energy]) => ({
id,
energy,
}));
diagnostics.seedCount = uniqueSeeds.length;
if (uniqueSeeds.length > 0) {
const adjacencyMap = buildTemporalAdjacencyMap(graph, {
includeTemporalLinks: enableTemporalLinks,
temporalLinkStrength,
});
diagnostics.temporalSyntheticEdgeCount =
Number(adjacencyMap?.syntheticEdgeCount) || 0;
diffusionResults = diffuseAndRank(adjacencyMap, uniqueSeeds, {
maxSteps: 2,
decayFactor: 0.6,
topK: diffusionTopK,
teleportAlpha,
}).filter((item) => {
const node = getNode(graph, item.nodeId);
return node && !node.archived;
});
}
}
diagnostics.diffusionHits = diffusionResults.length;
diagnostics.timings.diffusion = roundMs(nowMs() - diffusionStartedAt);
const scoreMap = new Map();
for (const item of vectorResults) {
const entry = scoreMap.get(item.nodeId) || { graphScore: 0, vectorScore: 0 };
entry.vectorScore = item.score;
scoreMap.set(item.nodeId, entry);
}
for (const item of diffusionResults) {
const entry = scoreMap.get(item.nodeId) || { graphScore: 0, vectorScore: 0 };
entry.graphScore = item.energy;
scoreMap.set(item.nodeId, entry);
}
if (scoreMap.size === 0) {
for (const node of activeNodes) {
if (!scoreMap.has(node.id)) {
scoreMap.set(node.id, { graphScore: 0, vectorScore: 0 });
}
}
}
const scoredNodes = [];
for (const [nodeId, scores] of scoreMap.entries()) {
const node = getNode(graph, nodeId);
if (!node || node.archived) continue;
const lexicalScore = enableLexicalBoost
? computeLexicalScore(node, lexicalQuery.sources)
: 0;
const finalScore = hybridScore(
{
graphScore: scores.graphScore,
vectorScore: scores.vectorScore,
lexicalScore,
importance: node.importance,
createdTime: node.createdTime,
},
{
...weights,
lexicalWeight: enableLexicalBoost ? lexicalWeight : 0,
},
);
scoredNodes.push({
nodeId,
node,
graphScore: scores.graphScore,
vectorScore: scores.vectorScore,
lexicalScore,
finalScore,
weightedScore: finalScore,
});
}
scoredNodes.sort((left, right) => {
const weightedDelta =
(Number(right.weightedScore) || 0) - (Number(left.weightedScore) || 0);
if (weightedDelta !== 0) return weightedDelta;
const finalDelta =
(Number(right.finalScore) || 0) - (Number(left.finalScore) || 0);
if (finalDelta !== 0) return finalDelta;
const lexicalDelta =
(Number(right.lexicalScore) || 0) - (Number(left.lexicalScore) || 0);
if (lexicalDelta !== 0) return lexicalDelta;
return String(left.nodeId).localeCompare(String(right.nodeId));
});
diagnostics.lexicalBoostedNodes = scoredNodes.filter(
(item) => (Number(item.lexicalScore) || 0) > 0,
).length;
diagnostics.lexicalTopHits = buildLexicalTopHits(scoredNodes);
return {
activeNodes,
contextQueryBlend,
queryPlan,
lexicalQuery,
vectorResults,
exactEntityAnchors,
diffusionResults,
scoredNodes,
diagnostics,
};
}