diff --git a/extractor.js b/extractor.js index 3a4f517..bdd1865 100644 --- a/extractor.js +++ b/extractor.js @@ -67,6 +67,219 @@ function throwIfAborted(signal) { } } +const EXTRACTION_RESULT_CONTAINER_KEYS = [ + "operations", + "nodes", + "items", + "entries", + "memories", +]; + +const EXTRACTION_OPERATION_META_KEYS = new Set([ + "action", + "op", + "operation", + "type", + "fields", + "nodeId", + "node_id", + "targetNodeId", + "target_node_id", + "sourceNodeId", + "source_node_id", + "ref", + "reference", + "id", + "links", + "relations", + "edges", + "importance", + "clusters", + "scope", + "seq", + "temporalStrength", + "temporal_strength", +]); + +function isPlainObject(value) { + return Boolean(value) && typeof value === "object" && !Array.isArray(value); +} + +function extractOperationsPayload(result) { + if (Array.isArray(result)) { + return result; + } + if (!isPlainObject(result)) { + return null; + } + + for (const key of EXTRACTION_RESULT_CONTAINER_KEYS) { + if (Array.isArray(result[key])) { + return result[key]; + } + } + + return null; +} + +function resolveExtractionAction(rawOp) { + const explicitAction = rawOp?.action ?? rawOp?.op ?? rawOp?.operation; + if (typeof explicitAction === "string" && explicitAction.trim()) { + return explicitAction.trim().toLowerCase(); + } + + if (rawOp?.type) { + if (rawOp?.nodeId || rawOp?.node_id) { + return "update"; + } + return "create"; + } + + return ""; +} + +function resolveExtractionTypeDef(schema, type) { + if (!Array.isArray(schema) || !type) { + return null; + } + return schema.find((entry) => entry?.id === type) || null; +} + +function resolveExtractionFieldNames(typeDef) { + return new Set( + Array.isArray(typeDef?.columns) + ? typeDef.columns + .map((column) => String(column?.name || "").trim()) + .filter(Boolean) + : [], + ); +} + +function resolveExtractionNodeId(rawOp) { + const nodeId = + rawOp?.nodeId ?? + rawOp?.node_id ?? + rawOp?.targetNodeId ?? + rawOp?.target_node_id ?? + rawOp?.id; + return nodeId == null || nodeId === "" ? "" : String(nodeId); +} + +function resolveExtractionRef(rawOp) { + const ref = rawOp?.ref ?? rawOp?.reference ?? rawOp?.id; + return ref == null || ref === "" ? "" : String(ref); +} + +function collectNormalizedOperationFields(rawOp, typeDef) { + const fieldNames = resolveExtractionFieldNames(typeDef); + const fields = isPlainObject(rawOp?.fields) ? { ...rawOp.fields } : {}; + + for (const [key, value] of Object.entries(rawOp || {})) { + if (key === "fields") { + continue; + } + + if (key === "scope") { + if (!isPlainObject(value) && (fieldNames.has("scope") || !typeDef)) { + fields.scope = value; + } + continue; + } + + if (EXTRACTION_OPERATION_META_KEYS.has(key)) { + continue; + } + + if (!typeDef || fieldNames.has(key)) { + fields[key] = value; + } + } + + return fields; +} + +function normalizeExtractionOperation(rawOp, schema) { + if (!isPlainObject(rawOp)) { + return rawOp; + } + + const action = resolveExtractionAction(rawOp); + const type = rawOp?.type == null ? "" : String(rawOp.type).trim(); + const typeDef = resolveExtractionTypeDef(schema, type); + const normalized = { + ...rawOp, + ...(action ? { action } : {}), + ...(type ? { type } : {}), + }; + + const nodeId = resolveExtractionNodeId(rawOp); + const ref = resolveExtractionRef(rawOp); + + if (action === "create") { + if (ref) { + normalized.ref = ref; + } + delete normalized.nodeId; + } else if ((action === "update" || action === "delete") && nodeId) { + normalized.nodeId = nodeId; + } + + if (Array.isArray(rawOp?.relations) && !Array.isArray(rawOp?.links)) { + normalized.links = rawOp.relations; + } else if (Array.isArray(rawOp?.edges) && !Array.isArray(rawOp?.links)) { + normalized.links = rawOp.edges; + } + + if (!Array.isArray(normalized.clusters) && normalized.clusters != null) { + normalized.clusters = [normalized.clusters].filter(Boolean); + } + + if (isPlainObject(rawOp?.scope)) { + normalized.scope = rawOp.scope; + } else if (action === "create" || action === "update") { + delete normalized.scope; + } + + if (action === "create" || action === "update") { + const fields = collectNormalizedOperationFields(rawOp, typeDef); + if (Object.keys(fields).length > 0) { + normalized.fields = fields; + } + } + + delete normalized.op; + delete normalized.operation; + delete normalized.node_id; + delete normalized.target_node_id; + delete normalized.source_node_id; + delete normalized.reference; + delete normalized.relations; + delete normalized.edges; + delete normalized.temporal_strength; + + return normalized; +} + +function normalizeExtractionResultPayload(result, schema) { + const operations = extractOperationsPayload(result); + if (!Array.isArray(operations)) { + return result; + } + + const normalizedOperations = operations.map((op) => + normalizeExtractionOperation(op, schema), + ); + + if (Array.isArray(result) || !isPlainObject(result)) { + return { operations: normalizedOperations }; + } + + return { + ...result, + operations: normalizedOperations, + }; +} + /** * 对未处理的对话楼层执行记忆提取 * @@ -202,8 +415,9 @@ export async function extractMemories({ onStreamProgress, }); throwIfAborted(signal); + const normalizedResult = normalizeExtractionResultPayload(result, schema); - if (!result || !Array.isArray(result.operations)) { + if (!normalizedResult || !Array.isArray(normalizedResult.operations)) { console.warn("[ST-BME] 提取 LLM 未返回有效操作"); return { success: false, @@ -222,7 +436,7 @@ export async function extractMemories({ const refMap = new Map(); const operationErrors = []; - for (const op of result.operations) { + for (const op of normalizedResult.operations) { try { switch (op.action) { case "create": { diff --git a/tests/p0-regressions.mjs b/tests/p0-regressions.mjs index 6349604..d279469 100644 --- a/tests/p0-regressions.mjs +++ b/tests/p0-regressions.mjs @@ -200,6 +200,11 @@ const schema = [ columns: [{ name: "name" }, { name: "state" }], latestOnly: true, }, + { + id: "synopsis", + label: "概要", + columns: [{ name: "summary" }, { name: "scope" }], + }, ]; function createBatchStageHarness() { @@ -2105,6 +2110,95 @@ async function testExtractorFailsOnUnknownOperation() { } } +async function testExtractorNormalizesFlatCreateOperation() { + const graph = createEmptyGraph(); + const restoreOverrides = pushTestOverrides({ + llm: { + async callLLMForJSON() { + return { + operations: [ + { + type: "event", + id: "evt1", + title: "午夜越界", + summary: "两人在午夜越界相见,留下了新的冲突线索。", + participants: "悟悟, 晗", + }, + ], + }; + }, + }, + }); + + try { + const result = await extractMemories({ + graph, + messages: [{ seq: 6, role: "assistant", content: "测试扁平 create" }], + startSeq: 6, + endSeq: 6, + schema, + embeddingConfig: null, + settings: {}, + }); + + assert.equal(result.success, true); + assert.equal(result.newNodes, 1); + assert.equal(graph.lastProcessedSeq, 6); + const created = graph.nodes.find((node) => !node.archived && node.type === "event"); + assert.ok(created); + assert.equal(created.fields.title, "午夜越界"); + assert.equal( + created.fields.summary, + "两人在午夜越界相见,留下了新的冲突线索。", + ); + assert.equal(created.fields.participants, "悟悟, 晗"); + } finally { + restoreOverrides(); + } +} + +async function testExtractorNormalizesArrayPayloadAndPreservesScopeField() { + const graph = createEmptyGraph(); + const restoreOverrides = pushTestOverrides({ + llm: { + async callLLMForJSON() { + return [ + { + type: "synopsis", + id: "syn1", + summary: "最近的整体剧情进入高压对峙阶段。", + scope: "20-2-2", + }, + ]; + }, + }, + }); + + try { + const result = await extractMemories({ + graph, + messages: [{ seq: 8, role: "assistant", content: "测试数组 payload" }], + startSeq: 8, + endSeq: 8, + schema, + embeddingConfig: null, + settings: {}, + }); + + assert.equal(result.success, true); + assert.equal(result.newNodes, 1); + const created = graph.nodes.find( + (node) => !node.archived && node.type === "synopsis", + ); + assert.ok(created); + assert.equal(created.fields.summary, "最近的整体剧情进入高压对峙阶段。"); + assert.equal(created.fields.scope, "20-2-2"); + assert.equal(created.scope?.layer, "objective"); + } finally { + restoreOverrides(); + } +} + async function testConsolidatorMergeUpdatesSeqRange() { const graph = createEmptyGraph(); const target = createNode({ @@ -5023,6 +5117,8 @@ async function testLlmOutputRegexCleansResponseBeforeJsonParse() { await testCompressorMigratesEdgesToCompressedNode(); await testVectorIndexKeepsDirtyOnDirectPartialEmbeddingFailure(); await testExtractorFailsOnUnknownOperation(); +await testExtractorNormalizesFlatCreateOperation(); +await testExtractorNormalizesArrayPayloadAndPreservesScopeField(); await testConsolidatorMergeUpdatesSeqRange(); await testConsolidatorMergeFallbackKeepsNodeWhenTargetMissing(); await testBatchJournalVectorDeltaCapturesRecoveryFields();