mirror of
https://github.com/Youzini-afk/ST-Bionic-Memory-Ecology.git
synced 2026-06-13 18:31:16 +08:00
feat(extraction): default to split extraction pipeline
This commit is contained in:
@@ -41,6 +41,7 @@ import {
|
||||
buildTaskLlmPayload,
|
||||
buildTaskPrompt,
|
||||
} from "../prompting/prompt-builder.js";
|
||||
import { isExtractProfileSplitSafe } from "../prompting/prompt-profiles.js";
|
||||
import { RELATION_TYPES } from "../graph/schema.js";
|
||||
import { applyTaskRegex } from "../prompting/task-regex.js";
|
||||
import { getSTContextForPrompt, getSTContextSnapshot } from "../host/st-context.js";
|
||||
@@ -1110,7 +1111,11 @@ async function applyExtractionPostCommit({
|
||||
}
|
||||
|
||||
function resolveExtractPipelineVersion(settings = {}) {
|
||||
return String(settings?.extractPipelineVersion || "legacy-single").trim().toLowerCase();
|
||||
const requested = String(settings?.extractPipelineVersion || "split-v1").trim().toLowerCase();
|
||||
if (requested === "split-v1" && !isExtractProfileSplitSafe(settings)) {
|
||||
return "legacy-single";
|
||||
}
|
||||
return requested;
|
||||
}
|
||||
|
||||
function shouldUseSplitExtractionPipeline(settings = {}) {
|
||||
|
||||
@@ -2035,6 +2035,48 @@ function shouldRefreshBuiltinDefaultProfile(taskType, profile = {}) {
|
||||
return false;
|
||||
}
|
||||
|
||||
export function isExtractProfileSplitSafe(settings = {}) {
|
||||
if (String(settings?.extractPrompt || "").trim()) {
|
||||
return false;
|
||||
}
|
||||
|
||||
const rawTaskProfiles = settings?.taskProfiles?.extract;
|
||||
if (!rawTaskProfiles) return true;
|
||||
|
||||
const profiles = Array.isArray(rawTaskProfiles?.profiles) ? rawTaskProfiles.profiles : [];
|
||||
const activeProfileId = String(rawTaskProfiles?.activeProfileId || DEFAULT_PROFILE_ID);
|
||||
const rawActiveProfile = profiles.find((profile) => String(profile?.id || "") === activeProfileId);
|
||||
if (!rawActiveProfile) return false;
|
||||
if (String(rawActiveProfile?.id || "") !== DEFAULT_PROFILE_ID) return false;
|
||||
if (rawActiveProfile?.builtin !== true) return false;
|
||||
if (rawActiveProfile?.metadata?.migratedFromLegacy === true) return false;
|
||||
|
||||
const canonicalDefault = createDefaultTaskProfile("extract");
|
||||
if (shouldRefreshBuiltinDefaultProfile("extract", rawActiveProfile)) return false;
|
||||
if (
|
||||
JSON.stringify(buildPromptBlockComparisonPayload(rawActiveProfile?.blocks || [])) !==
|
||||
JSON.stringify(buildPromptBlockComparisonPayload(canonicalDefault.blocks || []))
|
||||
) {
|
||||
return false;
|
||||
}
|
||||
if (JSON.stringify(rawActiveProfile?.generation || {}) !== JSON.stringify(canonicalDefault.generation || {})) {
|
||||
return false;
|
||||
}
|
||||
if (JSON.stringify(rawActiveProfile?.input || {}) !== JSON.stringify(canonicalDefault.input || {})) {
|
||||
return false;
|
||||
}
|
||||
if (JSON.stringify(rawActiveProfile?.regex || {}) !== JSON.stringify(canonicalDefault.regex || {})) {
|
||||
return false;
|
||||
}
|
||||
if (String(rawActiveProfile?.promptMode || "") !== String(canonicalDefault.promptMode || "")) {
|
||||
return false;
|
||||
}
|
||||
if ((rawActiveProfile?.enabled !== false) !== (canonicalDefault.enabled !== false)) {
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
function createFallbackDefaultTaskProfile(taskType) {
|
||||
const legacyPromptField = LEGACY_PROMPT_FIELD_MAP[taskType];
|
||||
const templateStamp = getDefaultTaskProfileTemplateStamp(taskType);
|
||||
|
||||
@@ -37,7 +37,7 @@ export const defaultSettings = {
|
||||
extractIncludeStoryTime: true,
|
||||
extractIncludeSummaries: true,
|
||||
extractActionMode: "pending",
|
||||
extractPipelineVersion: "legacy-single",
|
||||
extractPipelineVersion: "split-v1",
|
||||
|
||||
// 召回设置
|
||||
recallEnabled: true,
|
||||
|
||||
@@ -109,7 +109,7 @@ assert.equal(defaultSettings.loadNativeHydrateThresholdRecords, 30000);
|
||||
assert.equal(defaultSettings.nativeRolloutVersion, 2);
|
||||
assert.equal(defaultSettings.nativeEngineFailOpen, true);
|
||||
assert.equal(defaultSettings.graphNativeForceDisable, false);
|
||||
assert.equal(defaultSettings.extractPipelineVersion, "legacy-single");
|
||||
assert.equal(defaultSettings.extractPipelineVersion, "split-v1");
|
||||
assert.equal(defaultSettings.taskProfilesVersion, 3);
|
||||
assert.equal(defaultSettings.extractObjectivePrompt, "");
|
||||
assert.equal(defaultSettings.extractSubjectivePrompt, "");
|
||||
|
||||
@@ -65,6 +65,7 @@ installResolveHooks([
|
||||
const { createEmptyGraph, createNode, addNode } = await import("../graph/graph.js");
|
||||
const { DEFAULT_NODE_SCHEMA } = await import("../graph/schema.js");
|
||||
const { extractMemories } = await import("../maintenance/extractor.js");
|
||||
const { defaultSettings } = await import("../runtime/settings-defaults.js");
|
||||
|
||||
function setTestOverrides(overrides = {}) {
|
||||
globalThis.__stBmeTestOverrides = overrides;
|
||||
@@ -190,6 +191,106 @@ function characterKnowledgeEntries(graph) {
|
||||
);
|
||||
}
|
||||
|
||||
async function captureTaskTypesForExtract(settings, options = {}) {
|
||||
const graph = createGraphWithCharacter();
|
||||
const capturedTaskTypes = [];
|
||||
const restore = setTestOverrides({
|
||||
llm: {
|
||||
async callLLMForJSON(payload = {}) {
|
||||
capturedTaskTypes.push(payload.taskType);
|
||||
if (payload.taskType === "extract_objective") return objectivePayload();
|
||||
if (payload.taskType === "extract_subjective") return subjectivePayload();
|
||||
if (payload.taskType === "extract") return { operations: [], cognitionUpdates: [], regionUpdates: {} };
|
||||
return { operations: [], cognitionUpdates: [], regionUpdates: {} };
|
||||
},
|
||||
},
|
||||
});
|
||||
|
||||
try {
|
||||
const params = {
|
||||
graph,
|
||||
...baseExtractParams,
|
||||
};
|
||||
if (options.includeSettings !== false) {
|
||||
params.settings = settings;
|
||||
}
|
||||
const result = await extractMemories(params);
|
||||
return { graph, result, capturedTaskTypes };
|
||||
} finally {
|
||||
restore();
|
||||
}
|
||||
}
|
||||
|
||||
function cloneJson(value) {
|
||||
return JSON.parse(JSON.stringify(value));
|
||||
}
|
||||
|
||||
function createCustomizedLegacyExtractProfileSettings() {
|
||||
const taskProfiles = cloneJson(defaultSettings.taskProfiles);
|
||||
const baseProfile = taskProfiles.extract.profiles[0];
|
||||
const customProfile = {
|
||||
...baseProfile,
|
||||
id: "custom-legacy-extract-profile",
|
||||
name: "Custom legacy extract profile",
|
||||
builtin: false,
|
||||
blocks: (Array.isArray(baseProfile.blocks) ? baseProfile.blocks : []).map((block, index) =>
|
||||
index === 0
|
||||
? { ...block, content: `${String(block.content || "")}\nCUSTOM_LEGACY_EXTRACT_SENTINEL` }
|
||||
: { ...block },
|
||||
),
|
||||
};
|
||||
taskProfiles.extract = {
|
||||
activeProfileId: customProfile.id,
|
||||
profiles: [baseProfile, customProfile],
|
||||
};
|
||||
return {
|
||||
...defaultSettings,
|
||||
extractPipelineVersion: "split-v1",
|
||||
taskProfiles,
|
||||
};
|
||||
}
|
||||
|
||||
function createDefaultExtractProfileSettings(mutator) {
|
||||
const taskProfiles = cloneJson(defaultSettings.taskProfiles);
|
||||
const extractProfiles = taskProfiles.extract.profiles || [];
|
||||
const defaultProfile = extractProfiles.find((profile) => profile.id === "default") || extractProfiles[0];
|
||||
mutator?.(defaultProfile, taskProfiles.extract);
|
||||
return {
|
||||
...defaultSettings,
|
||||
extractPipelineVersion: "split-v1",
|
||||
taskProfiles,
|
||||
};
|
||||
}
|
||||
|
||||
// Phase 4 default switch: omitting settings should use the split pipeline by default.
|
||||
{
|
||||
const { result, capturedTaskTypes } = await captureTaskTypesForExtract(undefined, {
|
||||
includeSettings: false,
|
||||
});
|
||||
|
||||
assert.equal(result.success, true);
|
||||
assert.deepEqual(
|
||||
capturedTaskTypes,
|
||||
["extract_objective", "extract_subjective"],
|
||||
"extractMemories without explicit settings should default to split objective+subjective extraction",
|
||||
);
|
||||
}
|
||||
|
||||
// Phase 4 default switch: the default settings object should request split-v1.
|
||||
{
|
||||
const { result, capturedTaskTypes } = await captureTaskTypesForExtract({
|
||||
...defaultSettings,
|
||||
});
|
||||
|
||||
assert.equal(result.success, true);
|
||||
assert.equal(defaultSettings.extractPipelineVersion, "split-v1");
|
||||
assert.deepEqual(
|
||||
capturedTaskTypes,
|
||||
["extract_objective", "extract_subjective"],
|
||||
"defaultSettings should call split objective+subjective extraction",
|
||||
);
|
||||
}
|
||||
|
||||
// split-v1 calls objective then subjective, merges both stage outputs, and commits once.
|
||||
{
|
||||
const graph = createGraphWithCharacter();
|
||||
@@ -282,35 +383,92 @@ function characterKnowledgeEntries(graph) {
|
||||
}
|
||||
}
|
||||
|
||||
// Legacy/default extraction keeps the single extract taskType path.
|
||||
// Legacy guard: a non-empty legacy extractPrompt should force the single extract taskType path.
|
||||
{
|
||||
const graph = createGraphWithCharacter();
|
||||
const capturedTaskTypes = [];
|
||||
const restore = setTestOverrides({
|
||||
llm: {
|
||||
async callLLMForJSON(payload = {}) {
|
||||
capturedTaskTypes.push(payload.taskType);
|
||||
return { operations: [], cognitionUpdates: [], regionUpdates: {} };
|
||||
},
|
||||
},
|
||||
const { result, capturedTaskTypes } = await captureTaskTypesForExtract({
|
||||
...defaultSettings,
|
||||
extractPipelineVersion: "split-v1",
|
||||
extractPrompt: "CUSTOM LEGACY EXTRACT PROMPT",
|
||||
});
|
||||
|
||||
try {
|
||||
const result = await extractMemories({
|
||||
graph,
|
||||
...baseExtractParams,
|
||||
settings: {},
|
||||
});
|
||||
assert.equal(result.success, true);
|
||||
assert.deepEqual(
|
||||
capturedTaskTypes,
|
||||
["extract"],
|
||||
"non-empty extractPrompt should guard back to legacy taskType extract",
|
||||
);
|
||||
}
|
||||
|
||||
assert.equal(result.success, true);
|
||||
assert.deepEqual(
|
||||
capturedTaskTypes,
|
||||
["extract"],
|
||||
"default extraction should keep calling only legacy taskType extract",
|
||||
);
|
||||
} finally {
|
||||
restore();
|
||||
}
|
||||
// Legacy guard: an active customized legacy extract task profile should force the single extract path.
|
||||
{
|
||||
const { result, capturedTaskTypes } = await captureTaskTypesForExtract(
|
||||
createCustomizedLegacyExtractProfileSettings(),
|
||||
);
|
||||
|
||||
assert.equal(result.success, true);
|
||||
assert.deepEqual(
|
||||
capturedTaskTypes,
|
||||
["extract"],
|
||||
"customized active taskProfiles.extract profile should guard back to legacy taskType extract",
|
||||
);
|
||||
}
|
||||
|
||||
// Legacy guard: an explicit legacy override should always keep the single extract path.
|
||||
{
|
||||
const { result, capturedTaskTypes } = await captureTaskTypesForExtract({
|
||||
...defaultSettings,
|
||||
extractPipelineVersion: "legacy-single",
|
||||
});
|
||||
|
||||
assert.equal(result.success, true);
|
||||
assert.deepEqual(capturedTaskTypes, ["extract"]);
|
||||
}
|
||||
|
||||
// Legacy guard: migrated legacy default-looking profiles are conservative legacy.
|
||||
{
|
||||
const { result, capturedTaskTypes } = await captureTaskTypesForExtract(
|
||||
createDefaultExtractProfileSettings((profile) => {
|
||||
profile.metadata = {
|
||||
...(profile.metadata || {}),
|
||||
migratedFromLegacy: true,
|
||||
};
|
||||
}),
|
||||
);
|
||||
|
||||
assert.equal(result.success, true);
|
||||
assert.deepEqual(capturedTaskTypes, ["extract"]);
|
||||
}
|
||||
|
||||
// Legacy guard: stale default profile metadata is conservative legacy.
|
||||
{
|
||||
const { result, capturedTaskTypes } = await captureTaskTypesForExtract(
|
||||
createDefaultExtractProfileSettings((profile) => {
|
||||
profile.metadata = {
|
||||
...(profile.metadata || {}),
|
||||
defaultTemplateFingerprint: "stale-fingerprint",
|
||||
};
|
||||
}),
|
||||
);
|
||||
|
||||
assert.equal(result.success, true);
|
||||
assert.deepEqual(capturedTaskTypes, ["extract"]);
|
||||
}
|
||||
|
||||
// Legacy guard: modified default profile content is conservative legacy even if id/builtin remain default.
|
||||
{
|
||||
const { result, capturedTaskTypes } = await captureTaskTypesForExtract(
|
||||
createDefaultExtractProfileSettings((profile) => {
|
||||
profile.blocks = (profile.blocks || []).map((block, index) =>
|
||||
index === 0
|
||||
? { ...block, content: `${String(block.content || "")}
|
||||
CUSTOM_DEFAULT_PROFILE_SENTINEL` }
|
||||
: { ...block },
|
||||
);
|
||||
}),
|
||||
);
|
||||
|
||||
assert.equal(result.success, true);
|
||||
assert.deepEqual(capturedTaskTypes, ["extract"]);
|
||||
}
|
||||
|
||||
console.log("extractor-split-pipeline tests passed");
|
||||
|
||||
Reference in New Issue
Block a user