From b0cd52b1ccb0299449e1b723dc3e4a799a8a60a8 Mon Sep 17 00:00:00 2001 From: Youzini-afk <13153778771cx@gmail.com> Date: Wed, 8 Apr 2026 13:38:31 +0800 Subject: [PATCH] =?UTF-8?q?fix(extract):=20=E5=A2=9E=E5=BC=BA=20JSON=20?= =?UTF-8?q?=E8=A7=A3=E6=9E=90=E5=AE=B9=E9=94=99=20+=20=E6=89=A9=E5=85=85?= =?UTF-8?q?=E5=AE=B9=E5=99=A8=E9=94=AE=E7=99=BD=E5=90=8D=E5=8D=95=20+=20?= =?UTF-8?q?=E5=8D=95=E6=93=8D=E4=BD=9C=E5=85=9C=E5=BA=95=20+=20=E8=AF=8A?= =?UTF-8?q?=E6=96=AD=E6=94=B9=E5=96=84?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - extractJSON: 新增 trailing comma 容错和截断 JSON 自动闭合修复 - extractOperationsPayload: 容器键白名单从 5 扩充到 13,新增智能探测和单操作对象包装 - 错误日志现在会输出 result 的类型、键名和预览,方便排查 --- llm/llm.js | 46 +++++++++++++++++++++++++++++ maintenance/extractor.js | 64 +++++++++++++++++++++++++++++++++++++++- 2 files changed, 109 insertions(+), 1 deletion(-) diff --git a/llm/llm.js b/llm/llm.js index 613f40a..aac9219 100644 --- a/llm/llm.js +++ b/llm/llm.js @@ -1976,5 +1976,51 @@ function extractJSON(text) { } } + // 4. trailing comma 容错 (常见 LLM 错误: {"a": 1,} 或 [1, 2,]) + if (startIdx >= 0) { + const lastEnd = trimmed.lastIndexOf(endChar); + if (lastEnd > startIdx) { + const candidate = trimmed + .slice(startIdx, lastEnd + 1) + .replace(/,\s*([}\]])/g, "$1"); + try { + return JSON.parse(candidate); + } catch { + /* continue */ + } + } + } + + // 5. 截断 JSON 修复: 尝试补全不匹配的括号 + if (startIdx >= 0) { + let candidate = trimmed.slice(startIdx); + // 先清理 trailing comma + candidate = candidate.replace(/,\s*$/g, ""); + + const opens = { "{": 0, "[": 0 }; + const closes = { "}": "{", "]": "[" }; + for (const ch of candidate) { + if (ch in opens) opens[ch]++; + if (ch in closes && opens[closes[ch]] > 0) opens[closes[ch]]--; + } + + if (opens["["] > 0 || opens["{"] > 0) { + // 去除末尾不完整的 key-value 残片(如 "key": "未完...) + candidate = candidate.replace( + /,?\s*"[^"]*"?\s*:\s*"?[^"}\]]*$/, + "", + ); + candidate = candidate.replace(/,\s*$/g, ""); + for (let i = 0; i < opens["["]; i++) candidate += "]"; + for (let i = 0; i < opens["{"]; i++) candidate += "}"; + candidate = candidate.replace(/,\s*([}\]])/g, "$1"); + try { + return JSON.parse(candidate); + } catch { + /* continue */ + } + } + } + return null; } diff --git a/maintenance/extractor.js b/maintenance/extractor.js index dc65e78..c49d8ae 100644 --- a/maintenance/extractor.js +++ b/maintenance/extractor.js @@ -84,6 +84,14 @@ const EXTRACTION_RESULT_CONTAINER_KEYS = [ "items", "entries", "memories", + "results", + "data", + "memory_operations", + "actions", + "output", + "extracted", + "extractions", + "memory_nodes", ]; const EXTRACTION_OPERATION_META_KEYS = new Set([ @@ -116,7 +124,22 @@ function isPlainObject(value) { return Boolean(value) && typeof value === "object" && !Array.isArray(value); } +/** + * 判断一个对象是否像一个 extraction 操作 + * (包含 action/op/operation/type 中的至少一个) + */ +function looksLikeSingleOperation(obj) { + if (!isPlainObject(obj)) return false; + return ( + typeof obj.action === "string" || + typeof obj.op === "string" || + typeof obj.operation === "string" || + typeof obj.type === "string" + ); +} + function extractOperationsPayload(result) { + // 直接是数组 → 直接返回 if (Array.isArray(result)) { return result; } @@ -124,12 +147,35 @@ function extractOperationsPayload(result) { return null; } + // 1. 优先匹配已知容器键 for (const key of EXTRACTION_RESULT_CONTAINER_KEYS) { if (Array.isArray(result[key])) { return result[key]; } } + // 2. 智能探测:扫描对象中第一个值为非空数组且元素为对象的键 + for (const [key, value] of Object.entries(result)) { + if ( + Array.isArray(value) && + value.length > 0 && + isPlainObject(value[0]) + ) { + debugLog( + `[ST-BME] 自动探测到非标准容器键: "${key}" (${value.length} 项)`, + ); + return value; + } + } + + // 3. 单个操作对象兜底:如果整个结果看起来像一条操作,包装成数组 + if (looksLikeSingleOperation(result)) { + debugLog( + "[ST-BME] LLM 返回了单个操作对象,自动包装为数组", + ); + return [result]; + } + return null; } @@ -455,7 +501,23 @@ export async function extractMemories({ const normalizedResult = normalizeExtractionResultPayload(result, schema); if (!normalizedResult || !Array.isArray(normalizedResult.operations)) { - console.warn("[ST-BME] 提取 LLM 未返回有效操作"); + const diagType = result === null + ? "null" + : Array.isArray(result) + ? `array(len=${result.length})` + : typeof result; + const diagKeys = isPlainObject(result) + ? Object.keys(result).slice(0, 10).join(", ") + : ""; + const diagPreview = typeof result === "string" + ? result.slice(0, 120) + : ""; + console.warn( + `[ST-BME] 提取 LLM 未返回有效操作 ` + + `[type=${diagType}]` + + (diagKeys ? ` [keys=${diagKeys}]` : "") + + (diagPreview ? ` [preview=${diagPreview}]` : ""), + ); return { success: false, error: "提取 LLM 未返回有效操作",