fix(extract): 增强 JSON 解析容错 + 扩充容器键白名单 + 单操作兜底 + 诊断改善

- extractJSON: 新增 trailing comma 容错和截断 JSON 自动闭合修复
- extractOperationsPayload: 容器键白名单从 5 扩充到 13,新增智能探测和单操作对象包装
- 错误日志现在会输出 result 的类型、键名和预览,方便排查
This commit is contained in:
Youzini-afk
2026-04-08 13:38:31 +08:00
parent c085520f27
commit b0cd52b1cc
2 changed files with 109 additions and 1 deletions

View File

@@ -1976,5 +1976,51 @@ function extractJSON(text) {
} }
} }
// 4. trailing comma 容错 (常见 LLM 错误: {"a": 1,} 或 [1, 2,])
if (startIdx >= 0) {
const lastEnd = trimmed.lastIndexOf(endChar);
if (lastEnd > startIdx) {
const candidate = trimmed
.slice(startIdx, lastEnd + 1)
.replace(/,\s*([}\]])/g, "$1");
try {
return JSON.parse(candidate);
} catch {
/* continue */
}
}
}
// 5. 截断 JSON 修复: 尝试补全不匹配的括号
if (startIdx >= 0) {
let candidate = trimmed.slice(startIdx);
// 先清理 trailing comma
candidate = candidate.replace(/,\s*$/g, "");
const opens = { "{": 0, "[": 0 };
const closes = { "}": "{", "]": "[" };
for (const ch of candidate) {
if (ch in opens) opens[ch]++;
if (ch in closes && opens[closes[ch]] > 0) opens[closes[ch]]--;
}
if (opens["["] > 0 || opens["{"] > 0) {
// 去除末尾不完整的 key-value 残片(如 "key": "未完...
candidate = candidate.replace(
/,?\s*"[^"]*"?\s*:\s*"?[^"}\]]*$/,
"",
);
candidate = candidate.replace(/,\s*$/g, "");
for (let i = 0; i < opens["["]; i++) candidate += "]";
for (let i = 0; i < opens["{"]; i++) candidate += "}";
candidate = candidate.replace(/,\s*([}\]])/g, "$1");
try {
return JSON.parse(candidate);
} catch {
/* continue */
}
}
}
return null; return null;
} }

View File

@@ -84,6 +84,14 @@ const EXTRACTION_RESULT_CONTAINER_KEYS = [
"items", "items",
"entries", "entries",
"memories", "memories",
"results",
"data",
"memory_operations",
"actions",
"output",
"extracted",
"extractions",
"memory_nodes",
]; ];
const EXTRACTION_OPERATION_META_KEYS = new Set([ const EXTRACTION_OPERATION_META_KEYS = new Set([
@@ -116,7 +124,22 @@ function isPlainObject(value) {
return Boolean(value) && typeof value === "object" && !Array.isArray(value); return Boolean(value) && typeof value === "object" && !Array.isArray(value);
} }
/**
* 判断一个对象是否像一个 extraction 操作
* (包含 action/op/operation/type 中的至少一个)
*/
function looksLikeSingleOperation(obj) {
if (!isPlainObject(obj)) return false;
return (
typeof obj.action === "string" ||
typeof obj.op === "string" ||
typeof obj.operation === "string" ||
typeof obj.type === "string"
);
}
function extractOperationsPayload(result) { function extractOperationsPayload(result) {
// 直接是数组 → 直接返回
if (Array.isArray(result)) { if (Array.isArray(result)) {
return result; return result;
} }
@@ -124,12 +147,35 @@ function extractOperationsPayload(result) {
return null; return null;
} }
// 1. 优先匹配已知容器键
for (const key of EXTRACTION_RESULT_CONTAINER_KEYS) { for (const key of EXTRACTION_RESULT_CONTAINER_KEYS) {
if (Array.isArray(result[key])) { if (Array.isArray(result[key])) {
return result[key]; return result[key];
} }
} }
// 2. 智能探测:扫描对象中第一个值为非空数组且元素为对象的键
for (const [key, value] of Object.entries(result)) {
if (
Array.isArray(value) &&
value.length > 0 &&
isPlainObject(value[0])
) {
debugLog(
`[ST-BME] 自动探测到非标准容器键: "${key}" (${value.length} 项)`,
);
return value;
}
}
// 3. 单个操作对象兜底:如果整个结果看起来像一条操作,包装成数组
if (looksLikeSingleOperation(result)) {
debugLog(
"[ST-BME] LLM 返回了单个操作对象,自动包装为数组",
);
return [result];
}
return null; return null;
} }
@@ -455,7 +501,23 @@ export async function extractMemories({
const normalizedResult = normalizeExtractionResultPayload(result, schema); const normalizedResult = normalizeExtractionResultPayload(result, schema);
if (!normalizedResult || !Array.isArray(normalizedResult.operations)) { if (!normalizedResult || !Array.isArray(normalizedResult.operations)) {
console.warn("[ST-BME] 提取 LLM 未返回有效操作"); const diagType = result === null
? "null"
: Array.isArray(result)
? `array(len=${result.length})`
: typeof result;
const diagKeys = isPlainObject(result)
? Object.keys(result).slice(0, 10).join(", ")
: "";
const diagPreview = typeof result === "string"
? result.slice(0, 120)
: "";
console.warn(
`[ST-BME] 提取 LLM 未返回有效操作 ` +
`[type=${diagType}]` +
(diagKeys ? ` [keys=${diagKeys}]` : "") +
(diagPreview ? ` [preview=${diagPreview}]` : ""),
);
return { return {
success: false, success: false,
error: "提取 LLM 未返回有效操作", error: "提取 LLM 未返回有效操作",