mirror of
https://github.com/Youzini-afk/ST-Bionic-Memory-Ecology.git
synced 2026-05-15 22:30:38 +08:00
Harden legacy extraction and recall JSON parsing
This commit is contained in:
135
extractor.js
135
extractor.js
@@ -67,6 +67,136 @@ function throwIfAborted(signal) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
function resolveExtractionOperations(result, schema = []) {
|
||||||
|
const candidates = [
|
||||||
|
{ source: "operations", value: result?.operations },
|
||||||
|
{ source: "nodes", value: result?.nodes },
|
||||||
|
{ source: "memories", value: result?.memories },
|
||||||
|
{ source: "root", value: result },
|
||||||
|
];
|
||||||
|
|
||||||
|
for (const candidate of candidates) {
|
||||||
|
if (!Array.isArray(candidate.value)) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
const normalized = normalizeExtractionOperations(candidate.value, schema);
|
||||||
|
if (normalized?.legacyCount > 0) {
|
||||||
|
console.info("[ST-BME] 兼容旧版扁平提取输出", {
|
||||||
|
source: candidate.source,
|
||||||
|
normalizedCount: normalized.legacyCount,
|
||||||
|
totalCount: normalized.operations.length,
|
||||||
|
});
|
||||||
|
}
|
||||||
|
return normalized.operations;
|
||||||
|
}
|
||||||
|
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
function normalizeExtractionOperations(operations, schema = []) {
|
||||||
|
if (!Array.isArray(operations)) {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
let legacyCount = 0;
|
||||||
|
const normalizedOperations = operations.map((operation) => {
|
||||||
|
const normalized = normalizeExtractionOperation(operation, schema);
|
||||||
|
if (normalized?.__legacyCompat) {
|
||||||
|
legacyCount += 1;
|
||||||
|
delete normalized.__legacyCompat;
|
||||||
|
}
|
||||||
|
return normalized;
|
||||||
|
});
|
||||||
|
|
||||||
|
return {
|
||||||
|
operations: normalizedOperations,
|
||||||
|
legacyCount,
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
function normalizeExtractionOperation(operation, schema = []) {
|
||||||
|
if (!operation || typeof operation !== "object") {
|
||||||
|
return operation;
|
||||||
|
}
|
||||||
|
|
||||||
|
const normalized = { ...operation };
|
||||||
|
const normalizedAction = normalizeOperationAction(normalized);
|
||||||
|
if (normalizedAction) {
|
||||||
|
normalized.action = normalizedAction;
|
||||||
|
}
|
||||||
|
|
||||||
|
const typeDef = schema.find((entry) => entry?.id === normalized.type);
|
||||||
|
const normalizedFields = extractOperationFields(normalized, typeDef);
|
||||||
|
if (
|
||||||
|
normalized.action === "create" ||
|
||||||
|
normalized.action === "update" ||
|
||||||
|
(!normalized.action && Object.keys(normalizedFields).length > 0)
|
||||||
|
) {
|
||||||
|
normalized.fields = normalizedFields;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (
|
||||||
|
!normalized.action &&
|
||||||
|
typeDef &&
|
||||||
|
!normalized.nodeId &&
|
||||||
|
Object.keys(normalizedFields).length > 0
|
||||||
|
) {
|
||||||
|
normalized.action = "create";
|
||||||
|
normalized.__legacyCompat = true;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (
|
||||||
|
(normalized.action === "update" || normalized.action === "delete") &&
|
||||||
|
!normalized.nodeId &&
|
||||||
|
typeof normalized.id === "string" &&
|
||||||
|
normalized.id.trim()
|
||||||
|
) {
|
||||||
|
normalized.nodeId = normalized.id.trim();
|
||||||
|
}
|
||||||
|
|
||||||
|
if (
|
||||||
|
normalized.action === "create" &&
|
||||||
|
!normalized.ref &&
|
||||||
|
typeof normalized.id === "string" &&
|
||||||
|
normalized.id.trim()
|
||||||
|
) {
|
||||||
|
normalized.ref = normalized.id.trim();
|
||||||
|
}
|
||||||
|
|
||||||
|
return normalized;
|
||||||
|
}
|
||||||
|
|
||||||
|
function normalizeOperationAction(operation = {}) {
|
||||||
|
const candidate = operation.action ?? operation.op ?? operation.operation;
|
||||||
|
return typeof candidate === "string" && candidate.trim()
|
||||||
|
? candidate.trim()
|
||||||
|
: "";
|
||||||
|
}
|
||||||
|
|
||||||
|
function extractOperationFields(operation = {}, typeDef = null) {
|
||||||
|
const fields = {
|
||||||
|
...(operation.fields && typeof operation.fields === "object"
|
||||||
|
? operation.fields
|
||||||
|
: {}),
|
||||||
|
};
|
||||||
|
|
||||||
|
const columnNames = Array.isArray(typeDef?.columns)
|
||||||
|
? typeDef.columns
|
||||||
|
.map((column) => String(column?.name || "").trim())
|
||||||
|
.filter(Boolean)
|
||||||
|
: [];
|
||||||
|
|
||||||
|
for (const fieldName of columnNames) {
|
||||||
|
if (fields[fieldName] !== undefined || operation[fieldName] === undefined) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
fields[fieldName] = operation[fieldName];
|
||||||
|
}
|
||||||
|
|
||||||
|
return fields;
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* 对未处理的对话楼层执行记忆提取
|
* 对未处理的对话楼层执行记忆提取
|
||||||
*
|
*
|
||||||
@@ -203,7 +333,8 @@ export async function extractMemories({
|
|||||||
});
|
});
|
||||||
throwIfAborted(signal);
|
throwIfAborted(signal);
|
||||||
|
|
||||||
if (!result || !Array.isArray(result.operations)) {
|
const operations = resolveExtractionOperations(result, schema);
|
||||||
|
if (!result || !Array.isArray(operations)) {
|
||||||
console.warn("[ST-BME] 提取 LLM 未返回有效操作");
|
console.warn("[ST-BME] 提取 LLM 未返回有效操作");
|
||||||
return {
|
return {
|
||||||
success: false,
|
success: false,
|
||||||
@@ -222,7 +353,7 @@ export async function extractMemories({
|
|||||||
const refMap = new Map();
|
const refMap = new Map();
|
||||||
const operationErrors = [];
|
const operationErrors = [];
|
||||||
|
|
||||||
for (const op of result.operations) {
|
for (const op of operations) {
|
||||||
try {
|
try {
|
||||||
switch (op.action) {
|
switch (op.action) {
|
||||||
case "create": {
|
case "create": {
|
||||||
|
|||||||
55
retriever.js
55
retriever.js
@@ -97,6 +97,44 @@ function buildRecallFallbackReason(llmResult) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
function resolveRecallSelectedIds(result) {
|
||||||
|
if (Array.isArray(result)) {
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
const visited = new Set();
|
||||||
|
const queue = [{ value: result, depth: 0 }];
|
||||||
|
while (queue.length > 0) {
|
||||||
|
const current = queue.shift();
|
||||||
|
const value = current?.value;
|
||||||
|
const depth = Number(current?.depth) || 0;
|
||||||
|
if (!value || typeof value !== "object" || visited.has(value) || depth > 1) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
visited.add(value);
|
||||||
|
|
||||||
|
const directCandidates = [
|
||||||
|
value.selected_ids,
|
||||||
|
value.selectedIds,
|
||||||
|
value.node_ids,
|
||||||
|
value.nodeIds,
|
||||||
|
value.ids,
|
||||||
|
];
|
||||||
|
for (const candidate of directCandidates) {
|
||||||
|
if (Array.isArray(candidate)) {
|
||||||
|
return candidate;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
queue.push({ value: value.data, depth: depth + 1 });
|
||||||
|
queue.push({ value: value.result, depth: depth + 1 });
|
||||||
|
queue.push({ value: value.payload, depth: depth + 1 });
|
||||||
|
queue.push({ value: value.output, depth: depth + 1 });
|
||||||
|
}
|
||||||
|
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
function isAbortError(error) {
|
function isAbortError(error) {
|
||||||
return error?.name === "AbortError";
|
return error?.name === "AbortError";
|
||||||
}
|
}
|
||||||
@@ -1515,21 +1553,22 @@ async function llmRecall(
|
|||||||
returnFailureDetails: true,
|
returnFailureDetails: true,
|
||||||
});
|
});
|
||||||
const result = llmResult?.ok ? llmResult.data : null;
|
const result = llmResult?.ok ? llmResult.data : null;
|
||||||
|
const selectedIds = resolveRecallSelectedIds(result);
|
||||||
|
|
||||||
if (result?.selected_ids && Array.isArray(result.selected_ids)) {
|
if (Array.isArray(selectedIds)) {
|
||||||
// 校验 ID 有效性
|
// 校验 ID 有效性
|
||||||
const validIds = uniqueNodeIds(
|
const validIds = uniqueNodeIds(
|
||||||
result.selected_ids.filter((id) =>
|
selectedIds.filter((id) =>
|
||||||
candidates.some((c) => c.nodeId === id),
|
candidates.some((c) => c.nodeId === id),
|
||||||
),
|
),
|
||||||
).slice(0, maxNodes);
|
).slice(0, maxNodes);
|
||||||
|
|
||||||
if (validIds.length > 0 || result.selected_ids.length === 0) {
|
if (validIds.length > 0 || selectedIds.length === 0) {
|
||||||
return {
|
return {
|
||||||
selectedNodeIds: validIds,
|
selectedNodeIds: validIds,
|
||||||
status: "llm",
|
status: "llm",
|
||||||
reason:
|
reason:
|
||||||
validIds.length < result.selected_ids.length
|
validIds.length < selectedIds.length
|
||||||
? "LLM 返回了部分无效或超限 ID,已自动裁剪"
|
? "LLM 返回了部分无效或超限 ID,已自动裁剪"
|
||||||
: "LLM 精排完成",
|
: "LLM 精排完成",
|
||||||
};
|
};
|
||||||
@@ -1538,7 +1577,7 @@ async function llmRecall(
|
|||||||
|
|
||||||
// LLM 失败时回退到纯评分排序
|
// LLM 失败时回退到纯评分排序
|
||||||
const fallbackReason = llmResult?.ok
|
const fallbackReason = llmResult?.ok
|
||||||
? Array.isArray(result?.selected_ids)
|
? Array.isArray(selectedIds)
|
||||||
? "LLM 返回的候选 ID 无效,已回退到评分排序"
|
? "LLM 返回的候选 ID 无效,已回退到评分排序"
|
||||||
: "LLM 返回了无法识别的 JSON 结构,已回退到评分排序"
|
: "LLM 返回了无法识别的 JSON 结构,已回退到评分排序"
|
||||||
: buildRecallFallbackReason(llmResult);
|
: buildRecallFallbackReason(llmResult);
|
||||||
@@ -1546,7 +1585,11 @@ async function llmRecall(
|
|||||||
selectedNodeIds: candidates.slice(0, maxNodes).map((c) => c.nodeId),
|
selectedNodeIds: candidates.slice(0, maxNodes).map((c) => c.nodeId),
|
||||||
status: "fallback",
|
status: "fallback",
|
||||||
reason: fallbackReason,
|
reason: fallbackReason,
|
||||||
fallbackType: llmResult?.ok ? "invalid-candidate" : llmResult?.errorType || "unknown",
|
fallbackType: llmResult?.ok
|
||||||
|
? Array.isArray(selectedIds)
|
||||||
|
? "invalid-candidate"
|
||||||
|
: "invalid-structure"
|
||||||
|
: llmResult?.errorType || "unknown",
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -2105,6 +2105,62 @@ async function testExtractorFailsOnUnknownOperation() {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
async function testExtractorSupportsLegacyFlatNodeOperations() {
|
||||||
|
const graph = createEmptyGraph();
|
||||||
|
const restoreOverrides = pushTestOverrides({
|
||||||
|
llm: {
|
||||||
|
async callLLMForJSON() {
|
||||||
|
return {
|
||||||
|
operations: [
|
||||||
|
{
|
||||||
|
type: "event",
|
||||||
|
id: "evt-legacy",
|
||||||
|
title: "夜间喂食",
|
||||||
|
summary: "角色完成了一次深夜喂食。",
|
||||||
|
participants: "悟岳, 访客",
|
||||||
|
status: "resolved",
|
||||||
|
importance: 6,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
type: "character",
|
||||||
|
id: "char-legacy",
|
||||||
|
name: "悟岳",
|
||||||
|
state: "放松下来",
|
||||||
|
},
|
||||||
|
],
|
||||||
|
};
|
||||||
|
},
|
||||||
|
},
|
||||||
|
});
|
||||||
|
|
||||||
|
try {
|
||||||
|
const result = await extractMemories({
|
||||||
|
graph,
|
||||||
|
messages: [{ seq: 7, role: "assistant", content: "测试旧版扁平提取输出" }],
|
||||||
|
startSeq: 7,
|
||||||
|
endSeq: 7,
|
||||||
|
schema,
|
||||||
|
embeddingConfig: null,
|
||||||
|
settings: {},
|
||||||
|
});
|
||||||
|
|
||||||
|
assert.equal(result.success, true);
|
||||||
|
assert.equal(result.newNodes, 2);
|
||||||
|
assert.equal(graph.lastProcessedSeq, 7);
|
||||||
|
|
||||||
|
const eventNode = graph.nodes.find((node) => node.type === "event");
|
||||||
|
const characterNode = graph.nodes.find((node) => node.type === "character");
|
||||||
|
assert.ok(eventNode);
|
||||||
|
assert.ok(characterNode);
|
||||||
|
assert.equal(eventNode.fields?.title, "夜间喂食");
|
||||||
|
assert.equal(eventNode.fields?.summary, "角色完成了一次深夜喂食。");
|
||||||
|
assert.equal(characterNode.fields?.name, "悟岳");
|
||||||
|
assert.equal(characterNode.fields?.state, "放松下来");
|
||||||
|
} finally {
|
||||||
|
restoreOverrides();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
async function testConsolidatorMergeUpdatesSeqRange() {
|
async function testConsolidatorMergeUpdatesSeqRange() {
|
||||||
const graph = createEmptyGraph();
|
const graph = createEmptyGraph();
|
||||||
const target = createNode({
|
const target = createNode({
|
||||||
@@ -5023,6 +5079,7 @@ async function testLlmOutputRegexCleansResponseBeforeJsonParse() {
|
|||||||
await testCompressorMigratesEdgesToCompressedNode();
|
await testCompressorMigratesEdgesToCompressedNode();
|
||||||
await testVectorIndexKeepsDirtyOnDirectPartialEmbeddingFailure();
|
await testVectorIndexKeepsDirtyOnDirectPartialEmbeddingFailure();
|
||||||
await testExtractorFailsOnUnknownOperation();
|
await testExtractorFailsOnUnknownOperation();
|
||||||
|
await testExtractorSupportsLegacyFlatNodeOperations();
|
||||||
await testConsolidatorMergeUpdatesSeqRange();
|
await testConsolidatorMergeUpdatesSeqRange();
|
||||||
await testConsolidatorMergeFallbackKeepsNodeWhenTargetMissing();
|
await testConsolidatorMergeFallbackKeepsNodeWhenTargetMissing();
|
||||||
await testBatchJournalVectorDeltaCapturesRecoveryFields();
|
await testBatchJournalVectorDeltaCapturesRecoveryFields();
|
||||||
|
|||||||
@@ -329,6 +329,54 @@ assert.equal(state.llmOptions[0].returnFailureDetails, true);
|
|||||||
assert.equal(state.llmOptions[0].maxRetries, 2);
|
assert.equal(state.llmOptions[0].maxRetries, 2);
|
||||||
assert.equal(state.llmOptions[0].maxCompletionTokens, 512);
|
assert.equal(state.llmOptions[0].maxCompletionTokens, 512);
|
||||||
|
|
||||||
|
state.vectorCalls.length = 0;
|
||||||
|
state.diffusionCalls.length = 0;
|
||||||
|
state.llmCalls.length = 0;
|
||||||
|
state.llmOptions.length = 0;
|
||||||
|
state.llmCandidateCount = 0;
|
||||||
|
state.llmResponse = { selectedIds: ["rule-1"] };
|
||||||
|
const llmCamelCaseResult = await retrieve({
|
||||||
|
graph,
|
||||||
|
userMessage: "换个 JSON 键名也应该兼容",
|
||||||
|
recentMessages: [],
|
||||||
|
embeddingConfig: {},
|
||||||
|
schema,
|
||||||
|
options: {
|
||||||
|
topK: 4,
|
||||||
|
maxRecallNodes: 2,
|
||||||
|
enableVectorPrefilter: true,
|
||||||
|
enableGraphDiffusion: false,
|
||||||
|
enableLLMRecall: true,
|
||||||
|
llmCandidatePool: 2,
|
||||||
|
},
|
||||||
|
});
|
||||||
|
assert.deepEqual(Array.from(llmCamelCaseResult.selectedNodeIds), ["rule-1"]);
|
||||||
|
assert.equal(llmCamelCaseResult.meta.retrieval.llm.status, "llm");
|
||||||
|
|
||||||
|
state.vectorCalls.length = 0;
|
||||||
|
state.diffusionCalls.length = 0;
|
||||||
|
state.llmCalls.length = 0;
|
||||||
|
state.llmOptions.length = 0;
|
||||||
|
state.llmCandidateCount = 0;
|
||||||
|
state.llmResponse = { data: { selected_ids: ["rule-2"] } };
|
||||||
|
const llmNestedResult = await retrieve({
|
||||||
|
graph,
|
||||||
|
userMessage: "嵌套 JSON 结构也应该兼容",
|
||||||
|
recentMessages: [],
|
||||||
|
embeddingConfig: {},
|
||||||
|
schema,
|
||||||
|
options: {
|
||||||
|
topK: 4,
|
||||||
|
maxRecallNodes: 2,
|
||||||
|
enableVectorPrefilter: true,
|
||||||
|
enableGraphDiffusion: false,
|
||||||
|
enableLLMRecall: true,
|
||||||
|
llmCandidatePool: 2,
|
||||||
|
},
|
||||||
|
});
|
||||||
|
assert.deepEqual(Array.from(llmNestedResult.selectedNodeIds), ["rule-2"]);
|
||||||
|
assert.equal(llmNestedResult.meta.retrieval.llm.status, "llm");
|
||||||
|
|
||||||
state.vectorCalls.length = 0;
|
state.vectorCalls.length = 0;
|
||||||
state.diffusionCalls.length = 0;
|
state.diffusionCalls.length = 0;
|
||||||
state.llmCalls.length = 0;
|
state.llmCalls.length = 0;
|
||||||
|
|||||||
Reference in New Issue
Block a user