From 922b716ff66a17413dd317000237734095476ace Mon Sep 17 00:00:00 2001 From: Youzini-afk <13153778771cx@gmail.com> Date: Thu, 9 Apr 2026 00:28:12 +0800 Subject: [PATCH] Fix host regex reuse for extraction inputs --- maintenance/extractor.js | 1 + prompting/injection-sanitizer.js | 122 ++++++++++++++++++++++++++++-- prompting/prompt-builder.js | 96 ++++++++++++++++++++++- prompting/task-regex.js | 60 ++++++++++++++- tests/prompt-builder-defaults.mjs | 77 +++++++++++++++++++ tests/task-regex.mjs | 86 ++++++++++++++++++++- ui/panel.js | 6 +- 7 files changed, 432 insertions(+), 16 deletions(-) diff --git a/maintenance/extractor.js b/maintenance/extractor.js index e1ec19e..e456227 100644 --- a/maintenance/extractor.js +++ b/maintenance/extractor.js @@ -722,6 +722,7 @@ export async function extractMemories({ schema: schemaDescription, schemaDescription, recentMessages: dialogueText, + chatMessages: messages, dialogueText, graphStats: graphOverview, graphOverview, diff --git a/prompting/injection-sanitizer.js b/prompting/injection-sanitizer.js index 722e4bf..dd1673a 100644 --- a/prompting/injection-sanitizer.js +++ b/prompting/injection-sanitizer.js @@ -26,6 +26,90 @@ function normalizeReasons(reasons = []) { : []; } +function normalizeMessageLikeRole(value = "", isUser = false) { + if (typeof value === "string") { + const normalized = value.trim().toLowerCase(); + if (normalized === "user") { + return "user"; + } + if (normalized === "assistant") { + return "assistant"; + } + } + return isUser ? "user" : "assistant"; +} + +function getStructuredMessageDescriptor(value) { + if (!value || typeof value !== "object" || Array.isArray(value)) { + return null; + } + + if (typeof value.content === "string") { + const role = normalizeMessageLikeRole(value.role, false); + return { + contentKey: "content", + role, + sourceType: role === "user" ? "user_input" : "ai_output", + depth: Number.isFinite(Number(value.depth)) ? Number(value.depth) : null, + }; + } + + if (typeof value.mes === "string") { + const role = normalizeMessageLikeRole("", Boolean(value.is_user)); + return { + contentKey: "mes", + role, + sourceType: role === "user" ? "user_input" : "ai_output", + depth: Number.isFinite(Number(value.depth)) ? Number(value.depth) : null, + }; + } + + return null; +} + +function mergeFormatterOptions(baseOptions = null, overrides = {}) { + const base = + baseOptions && typeof baseOptions === "object" ? baseOptions : {}; + const merged = { + ...base, + ...overrides, + }; + + if (merged.isPrompt == null) { + merged.isPrompt = true; + } + if (merged.isMarkdown == null) { + merged.isMarkdown = false; + } + + if (!Number.isFinite(Number(merged.depth))) { + delete merged.depth; + } else { + merged.depth = Number(merged.depth); + } + + return merged; +} + +function buildMessageFormatterOptions( + baseOptions = null, + descriptor = null, + index = -1, + total = 0, +) { + let depth = + descriptor?.depth != null && Number.isFinite(Number(descriptor.depth)) + ? Number(descriptor.depth) + : null; + if (!Number.isFinite(depth) && Number.isFinite(index) && total > 0) { + depth = Math.max(total - index - 1, 0); + } + + return Number.isFinite(depth) + ? mergeFormatterOptions(baseOptions, { depth }) + : mergeFormatterOptions(baseOptions); +} + function pushUnique(target = [], value = "") { const normalized = String(value || "").trim(); if (!normalized || target.includes(normalized)) { @@ -134,12 +218,17 @@ export function sanitizeInjectionText( recordSanitizerDebug(debugState, path, sanitizerResult, stage); const afterSanitizer = String(sanitizerResult.text || ""); - const hostReuseResult = eligible && applyHostRegex && regexSourceType + const normalizedFormatterOptions = mergeFormatterOptions(formatterOptions); + const hostReuseResult = + eligible && + applyHostRegex && + regexSourceType && + afterSanitizer.length > 0 ? applyHostRegexReuse(settings, taskType, afterSanitizer, { sourceType: regexSourceType, role, debugCollector: regexCollector, - formatterOptions, + formatterOptions: normalizedFormatterOptions, }) : { text: afterSanitizer, @@ -284,6 +373,7 @@ export function sanitizeInjectionStructuredValue( const sanitizedArray = []; let changed = false; for (let index = 0; index < value.length; index += 1) { + const messageDescriptor = getStructuredMessageDescriptor(value[index]); const childResult = sanitizeInjectionStructuredValue( settings, taskType, @@ -297,7 +387,14 @@ export function sanitizeInjectionStructuredValue( sanitizationEligible, regexSourceType, role, - formatterOptions, + formatterOptions: messageDescriptor + ? buildMessageFormatterOptions( + formatterOptions, + messageDescriptor, + index, + value.length, + ) + : formatterOptions, debugState, regexCollector, applySanitizer, @@ -335,6 +432,7 @@ export function sanitizeInjectionStructuredValue( seen.add(value); const originalLooksMvuContainer = looksLikeMvuStateContainer(value); + const messageDescriptor = getStructuredMessageDescriptor(value); const sanitizedObject = {}; let changed = false; let keptEntries = 0; @@ -359,6 +457,8 @@ export function sanitizeInjectionStructuredValue( continue; } + const isMessageContentField = + messageDescriptor && key === messageDescriptor.contentKey; const childResult = sanitizeInjectionStructuredValue( settings, taskType, @@ -370,13 +470,21 @@ export function sanitizeInjectionStructuredValue( blockedContents, contentOrigin, sanitizationEligible, - regexSourceType, - role, - formatterOptions, + regexSourceType: isMessageContentField + ? messageDescriptor.sourceType + : regexSourceType, + role: isMessageContentField ? messageDescriptor.role : role, + formatterOptions: isMessageContentField + ? buildMessageFormatterOptions(formatterOptions, messageDescriptor) + : formatterOptions, debugState, regexCollector, applySanitizer, - applyHostRegex, + applyHostRegex: messageDescriptor + ? isMessageContentField + ? applyHostRegex && Boolean(messageDescriptor.sourceType) + : false + : applyHostRegex, stripMvuContainers, seen, }, diff --git a/prompting/prompt-builder.js b/prompting/prompt-builder.js index 9a0e6da..d217115 100644 --- a/prompting/prompt-builder.js +++ b/prompting/prompt-builder.js @@ -269,12 +269,67 @@ function messageUsesWorldInfoContent(message = {}) { return String(message?.source || "") === "worldInfo-atDepth"; } +function getPromptMessageLikeDescriptor(value) { + if (!value || typeof value !== "object" || Array.isArray(value)) { + return null; + } + + if (typeof value.content === "string") { + const role = String(value.role || "assistant").trim().toLowerCase(); + return { + content: String(value.content || ""), + role: role === "user" ? "user" : "assistant", + seq: Number.isFinite(Number(value.seq)) ? Number(value.seq) : null, + }; + } + + if (typeof value.mes === "string") { + return { + content: String(value.mes || ""), + role: value.is_user === true ? "user" : "assistant", + seq: Number.isFinite(Number(value.seq)) ? Number(value.seq) : null, + }; + } + + return null; +} + +function isPromptMessageArray(value) { + return ( + Array.isArray(value) && + value.length > 0 && + value.every((entry) => getPromptMessageLikeDescriptor(entry)) + ); +} + +function formatPromptMessageTranscript(value) { + const entries = Array.isArray(value) ? value : [value]; + return entries + .map((entry, index) => { + const descriptor = getPromptMessageLikeDescriptor(entry); + if (!descriptor) { + return ""; + } + const seqLabel = + descriptor.seq != null ? `#${descriptor.seq}` : `#${index + 1}`; + return `${seqLabel} [${descriptor.role}]: ${descriptor.content}`; + }) + .filter(Boolean) + .join("\n\n"); +} + function stringifyInterpolatedValue(value) { if (value == null) return ""; if (typeof value === "string") return value; if (typeof value === "number" || typeof value === "boolean") { return String(value); } + if (getPromptMessageLikeDescriptor(value)) { + return formatPromptMessageTranscript(value); + } + if (isPromptMessageArray(value)) { + return formatPromptMessageTranscript(value); + } try { return JSON.stringify(value, null, 2); @@ -632,6 +687,32 @@ function sanitizePromptMessages( .filter(Boolean); } +function resolveStructuredMessageSanitizerInput(fieldName = "", context = {}, value) { + const normalizedFieldName = String(fieldName || "").trim(); + if (!["recentMessages", "dialogueText"].includes(normalizedFieldName)) { + return { + value, + renderAsTranscript: false, + }; + } + + if ( + typeof value === "string" && + Array.isArray(context?.chatMessages) && + isPromptMessageArray(context.chatMessages) + ) { + return { + value: context.chatMessages, + renderAsTranscript: true, + }; + } + + return { + value, + renderAsTranscript: false, + }; +} + function sanitizePromptContextInputs( settings = {}, taskType, @@ -697,13 +778,19 @@ function sanitizePromptContextInputs( continue; } const value = sanitizedContext[fieldName]; + const structuredSanitizerInput = resolveStructuredMessageSanitizerInput( + fieldName, + context, + value, + ); + const valueForSanitizer = structuredSanitizerInput.value; const regexStage = INPUT_REGEX_STAGE_BY_FIELD[fieldName] || ""; const regexRole = INPUT_REGEX_ROLE_BY_FIELD[fieldName] || "system"; const regexSourceType = INPUT_HOST_REGEX_SOURCE_BY_FIELD[fieldName] || ""; const sanitized = sanitizeInjectionStructuredValue( settings, taskType, - value, + valueForSanitizer, { fieldName, path: fieldName, @@ -720,12 +807,15 @@ function sanitizePromptContextInputs( }, ); let sanitizedValue = sanitized.omit - ? Array.isArray(value) + ? Array.isArray(valueForSanitizer) ? [] - : typeof value === "string" + : typeof valueForSanitizer === "string" ? "" : null : sanitized.value; + if (structuredSanitizerInput.renderAsTranscript) { + sanitizedValue = stringifyInterpolatedValue(sanitizedValue); + } sanitizedValue = applyLocalRegexToStructuredValue( sanitizedValue, regexStage, diff --git a/prompting/task-regex.js b/prompting/task-regex.js index 07f4979..8242946 100644 --- a/prompting/task-regex.js +++ b/prompting/task-regex.js @@ -777,6 +777,49 @@ function normalizeHostRegexSourceType(sourceType = "") { return ""; } +function normalizeHostFormatterOptions(formatterOptions = null) { + const normalized = + formatterOptions && typeof formatterOptions === "object" + ? { ...formatterOptions } + : {}; + if (normalized.isPrompt == null) { + normalized.isPrompt = true; + } + if (normalized.isMarkdown == null) { + normalized.isMarkdown = false; + } + if (!Number.isFinite(Number(normalized.depth))) { + delete normalized.depth; + } else { + normalized.depth = Number(normalized.depth); + } + return normalized; +} + +function ruleMatchesFormatterDepth(rule, formatterOptions = null) { + const depth = Number(formatterOptions?.depth); + if (!Number.isFinite(depth)) { + return true; + } + if ( + rule?.minDepth != null && + Number.isFinite(Number(rule.minDepth)) && + Number(rule.minDepth) >= -1 && + depth < Number(rule.minDepth) + ) { + return false; + } + if ( + rule?.maxDepth != null && + Number.isFinite(Number(rule.maxDepth)) && + Number(rule.maxDepth) >= 0 && + depth > Number(rule.maxDepth) + ) { + return false; + } + return true; +} + function buildHostRegexExecutionState(regexHost = null) { const formatterAvailable = typeof regexHost?.formatAsTavernRegexedString === "function"; @@ -835,9 +878,15 @@ function shouldReuseTavernRuleForSourceType(rule, sourceType = "", role = "syste } if (normalizedSourceType === "user_input") { + if (role === "mixed") { + return rule.sourceFlags.user !== false || rule.sourceFlags.assistant !== false; + } return rule.sourceFlags.user !== false; } if (normalizedSourceType === "ai_output") { + if (role === "mixed") { + return rule.sourceFlags.user !== false || rule.sourceFlags.assistant !== false; + } if (role === "user") { return rule.sourceFlags.user !== false; } @@ -934,11 +983,13 @@ function applyHostRegexReuseFallback( { sourceType = "", role = "system", + formatterOptions = null, } = {}, ) { let output = String(input || ""); const appliedRules = []; const normalizedSourceType = normalizeHostRegexSourceType(sourceType); + const normalizedFormatterOptions = normalizeHostFormatterOptions(formatterOptions); for (const rule of Array.isArray(tavernRules) ? tavernRules : []) { if (!shouldReuseTavernRuleForPrompt(rule, "host-fallback")) { @@ -947,6 +998,9 @@ function applyHostRegexReuseFallback( if (!shouldReuseTavernRuleForSourceType(rule, normalizedSourceType, role)) { continue; } + if (!ruleMatchesFormatterDepth(rule, normalizedFormatterOptions)) { + continue; + } const result = applyOneRule(output, rule, ""); if (result.error) { @@ -986,6 +1040,7 @@ export function applyHostRegexReuse( const input = typeof text === "string" ? text : ""; const normalizedTaskType = String(taskType || "").trim(); const normalizedSourceType = normalizeHostRegexSourceType(sourceType); + const normalizedFormatterOptions = normalizeHostFormatterOptions(formatterOptions); const profile = getActiveTaskProfile(settings, normalizedTaskType); const regexConfig = profile?.regex || {}; const regexHost = getRegexHost(); @@ -1064,9 +1119,7 @@ export function applyHostRegexReuse( input, normalizedSourceType, "prompt", - formatterOptions && typeof formatterOptions === "object" - ? formatterOptions - : undefined, + normalizedFormatterOptions, ) ?? input, ); pushDebug(debugCollector, { @@ -1101,6 +1154,7 @@ export function applyHostRegexReuse( const fallback = applyHostRegexReuseFallback(input, tavernRules, { sourceType: normalizedSourceType, role, + formatterOptions: normalizedFormatterOptions, }); const fallbackReason = executionState.mode === "host-unavailable" diff --git a/tests/prompt-builder-defaults.mjs b/tests/prompt-builder-defaults.mjs index a4ac8b6..85acbe4 100644 --- a/tests/prompt-builder-defaults.mjs +++ b/tests/prompt-builder-defaults.mjs @@ -51,6 +51,7 @@ registerHooks({ const { buildTaskLlmPayload, buildTaskPrompt } = await import("../prompting/prompt-builder.js"); const { createDefaultTaskProfiles } = await import("../prompting/prompt-profiles.js"); +const { initializeHostAdapter } = await import("../host/adapter/index.js"); const settings = { taskProfilesVersion: 3, @@ -146,4 +147,80 @@ assert.match(String(recallFormatBlock?.content || ""), /active_owner_keys/); assert.match(String(recallFormatBlock?.content || ""), /active_owner_scores/); assert.match(String(recallRulesBlock?.content || ""), /剧情时间/); +const formatterCalls = []; +initializeHostAdapter({ + regexProvider: { + getTavernRegexes() { + return []; + }, + isCharacterTavernRegexesEnabled() { + return true; + }, + formatAsTavernRegexedString(text, source, destination, options) { + formatterCalls.push({ text, source, destination, options }); + if (source === "ai_output") { + return String(text || "").replace(/.*?<\/action>/g, ""); + } + if (source === "user_input") { + return String(text || "").replace(/|<\/u>/g, ""); + } + return String(text || ""); + }, + }, +}); + +const regexAwarePromptBuild = await buildTaskPrompt(settings, "extract", { + taskName: "extract", + charDescription: "", + userPersona: "", + recentMessages: "这里会被 chatMessages 回填", + chatMessages: [ + { + seq: 36, + role: "assistant", + content: "挥手继续说明", + }, + { + seq: 37, + role: "user", + content: "用户输入", + }, + ], + graphStats: "node_count=1", + schema: "event(title, summary)", + currentRange: "36 ~ 37", +}); +const regexAwarePayload = buildTaskLlmPayload( + regexAwarePromptBuild, + "fallback-user", +); +const regexAwareRecentBlock = regexAwarePayload.promptMessages.find( + (message) => message.sourceKey === "recentMessages", +); +assert.match(String(regexAwareRecentBlock?.content || ""), /#36 \[assistant\]: 继续说明/); +assert.match(String(regexAwareRecentBlock?.content || ""), /#37 \[user\]: 用户输入/); +assert.doesNotMatch(String(regexAwareRecentBlock?.content || ""), /action||<\/u>/i); +assert.equal( + formatterCalls.some( + (call) => + call.source === "ai_output" && + call.destination === "prompt" && + call.options?.depth === 1 && + call.options?.isPrompt === true, + ), + true, +); +assert.equal( + formatterCalls.some( + (call) => + call.source === "user_input" && + call.destination === "prompt" && + call.options?.depth === 0 && + call.options?.isPrompt === true, + ), + true, +); + +initializeHostAdapter({}); + console.log("prompt-builder-defaults tests passed"); diff --git a/tests/task-regex.mjs b/tests/task-regex.mjs index 3b05ea4..d66a5fa 100644 --- a/tests/task-regex.mjs +++ b/tests/task-regex.mjs @@ -300,8 +300,8 @@ try { isCharacterTavernRegexesEnabled() { return true; }, - formatAsTavernRegexedString(text, source, destination) { - formatterCalls.push({ text, source, destination }); + formatAsTavernRegexedString(text, source, destination, options) { + formatterCalls.push({ text, source, destination, options }); return String(text || "").replace(/Alpha/g, "HOST"); }, }, @@ -330,6 +330,10 @@ try { text: "Alpha Beta", source: "user_input", destination: "prompt", + options: { + isPrompt: true, + isMarkdown: false, + }, }, ]); assert.equal(fullBridgeDebug.entries[0].executionMode, "host-real"); @@ -398,6 +402,84 @@ try { assert.equal(fallbackOutput.text, "C1"); assert.equal(fallbackDebug.entries[0].executionMode, "host-fallback"); + setTestContext({ + extensionSettings: { + regex: [ + createTavernRule("depth-aware", "/Gamma/g", "DEPTH", { + placement: [PLACEMENT.WORLD_INFO], + minDepth: 1, + maxDepth: 1, + }), + ], + preset_allowed_regex: {}, + character_allowed_regex: [], + }, + }); + initializeHostAdapter({}); + const depthMissResult = applyHostRegexReuse( + buildSettings({ + sources: { + global: true, + preset: false, + character: false, + }, + }), + "extract", + "Gamma", + { + sourceType: "world_info", + role: "system", + formatterOptions: { + depth: 0, + }, + debugCollector: { entries: [] }, + }, + ); + const depthHitResult = applyHostRegexReuse( + buildSettings({ + sources: { + global: true, + preset: false, + character: false, + }, + }), + "extract", + "Gamma", + { + sourceType: "world_info", + role: "system", + formatterOptions: { + depth: 1, + }, + debugCollector: { entries: [] }, + }, + ); + assert.equal(depthMissResult.text, "Gamma"); + assert.equal(depthHitResult.text, "DEPTH"); + + setTestContext({ + extensionSettings: fallbackExtensionSettings, + presetScripts: [ + createTavernRule("preset-fallback", "/G1/g", "P1", { + promptOnly: true, + }), + ], + characters: [ + { + avatar: "hero.png", + data: { + extensions: { + regex_scripts: [ + createTavernRule("character-fallback", "/P1/g", "C1", { + promptOnly: true, + }), + ], + }, + }, + }, + ], + }); + initializeHostAdapter({}); const fallbackInspect = inspectTaskRegexReuse(buildSettings(), "extract"); assert.equal(fallbackInspect.activeRuleCount, 3); assert.deepEqual( diff --git a/ui/panel.js b/ui/panel.js index 1c53da7..81e38d1 100644 --- a/ui/panel.js +++ b/ui/panel.js @@ -6233,7 +6233,11 @@ function _renderRegexReuseBadges(rule = {}) { text: "仅 Prompt", }); } - if (rule.promptStageMode !== "skip" && rule.promptStageApplies === false) { + if ( + rule.sourceType === "local" && + rule.promptStageMode !== "skip" && + rule.promptStageApplies === false + ) { badges.push({ className: "is-skip", text: "当前任务未启用",