From 5cc33fabda32667d5d5eff541adf46cd62c39adf Mon Sep 17 00:00:00 2001 From: Youzini-afk <13153778771cx@gmail.com> Date: Sun, 5 Apr 2026 00:30:54 +0800 Subject: [PATCH] Fix task regex final input pipeline --- llm.js | 166 ++++++++++++++++++++++++++++++++--- panel.js | 68 +++++++++++--- prompt-builder.js | 8 +- prompt-profiles.js | 94 ++++++++++++++++++-- task-regex.js | 35 ++++---- tests/prompt-builder-mvu.mjs | 19 +++- tests/task-regex.mjs | 73 +++++++++++++++ 7 files changed, 412 insertions(+), 51 deletions(-) diff --git a/llm.js b/llm.js index 2931f8d..0c8f406 100644 --- a/llm.js +++ b/llm.js @@ -215,6 +215,104 @@ function applyTaskOutputRegexStages(taskType, text) { }; } +function applyTaskFinalInputRegex(taskType, messages = []) { + const normalizedMessages = (Array.isArray(messages) ? messages : []) + .map((message) => { + if (!message || typeof message !== "object") { + return null; + } + const role = String(message.role || "").trim().toLowerCase(); + if (!["system", "user", "assistant"].includes(role)) { + return null; + } + return { + ...message, + role, + content: String(message.content || ""), + }; + }) + .filter(Boolean); + const normalizedTaskType = String(taskType || "").trim(); + + if (!normalizedTaskType || normalizedMessages.length === 0) { + const cleanedMessages = normalizedMessages.filter((message) => + String(message.content || "").trim(), + ); + return { + messages: cleanedMessages, + debug: { + stage: "input.finalPrompt", + changed: cleanedMessages.length !== normalizedMessages.length, + applied: false, + rawMessageCount: normalizedMessages.length, + cleanedMessageCount: cleanedMessages.length, + droppedMessageCount: normalizedMessages.length - cleanedMessages.length, + stages: [], + }, + }; + } + + const settings = extension_settings[MODULE_NAME] || {}; + const regexDebug = { entries: [] }; + let changed = false; + let droppedMessageCount = 0; + const cleanedMessages = normalizedMessages + .map((message) => { + const originalContent = String(message.content || ""); + const cleanedContent = applyTaskRegex( + settings, + normalizedTaskType, + "input.finalPrompt", + originalContent, + regexDebug, + message.role, + ); + if (cleanedContent !== originalContent) { + changed = true; + } + if (!String(cleanedContent || "").trim()) { + droppedMessageCount += 1; + return null; + } + return { + ...message, + content: cleanedContent, + }; + }) + .filter(Boolean); + const normalizedEntries = normalizeRegexDebugEntries(regexDebug); + const applied = normalizedEntries.some( + (entry) => entry.appliedRules.length > 0, + ); + + return { + messages: cleanedMessages, + debug: { + stage: "input.finalPrompt", + changed: changed || droppedMessageCount > 0, + applied, + rawMessageCount: normalizedMessages.length, + cleanedMessageCount: cleanedMessages.length, + droppedMessageCount, + stages: normalizedEntries, + }, + }; +} + +function attachRequestCleaningToPromptExecution( + promptExecutionSummary, + requestCleaning, +) { + const base = + promptExecutionSummary && typeof promptExecutionSummary === "object" + ? cloneRuntimeDebugValue(promptExecutionSummary, {}) + : {}; + if (requestCleaning && typeof requestCleaning === "object") { + base.requestCleaning = cloneRuntimeDebugValue(requestCleaning, null); + } + return base; +} + function buildEffectiveLlmRoute( hasDedicatedConfig, privateRequestSource, @@ -1477,7 +1575,7 @@ export async function callLLMForJSON({ for (let attempt = 0; attempt <= maxRetries; attempt++) { try { - const messages = buildJsonAttemptMessages( + const assembledMessages = buildJsonAttemptMessages( systemPrompt, userPrompt, attempt, @@ -1485,7 +1583,25 @@ export async function callLLMForJSON({ additionalMessages, promptMessages, ); - const response = await callDedicatedOpenAICompatible(messages, { + const requestCleaning = applyTaskFinalInputRegex( + taskType, + assembledMessages, + ); + const promptExecutionSnapshot = attachRequestCleaningToPromptExecution( + promptExecutionSummary, + requestCleaning.debug, + ); + recordTaskLlmRequest( + taskType || privateRequestSource, + { + requestCleaning: requestCleaning.debug, + promptExecution: promptExecutionSnapshot, + }, + { + merge: true, + }, + ); + const response = await callDedicatedOpenAICompatible(requestCleaning.messages, { signal, jsonMode: true, taskType, @@ -1500,8 +1616,9 @@ export async function callLLMForJSON({ recordTaskLlmRequest( taskType || privateRequestSource, { + requestCleaning: requestCleaning.debug, responseCleaning: outputCleanup.debug, - promptExecution: promptExecutionSummary, + promptExecution: promptExecutionSnapshot, }, { merge: true, @@ -1592,19 +1709,48 @@ export async function callLLM(systemPrompt, userPrompt, options = {}) { return await override(systemPrompt, userPrompt, options); } - const messages = [ + const taskType = String(options.taskType || "").trim(); + const privateRequestSource = resolvePrivateRequestSource( + taskType, + options.requestSource || options.source || "diagnostic:call-llm", + { allowAnonymous: true }, + ); + const promptExecutionSummary = buildPromptExecutionSummary( + options.debugContext || null, + ); + const assembledMessages = [ { role: "system", content: systemPrompt }, { role: "user", content: userPrompt }, ]; + const requestCleaning = applyTaskFinalInputRegex(taskType, assembledMessages); + const promptExecutionSnapshot = attachRequestCleaningToPromptExecution( + promptExecutionSummary, + requestCleaning.debug, + ); try { - const response = await callDedicatedOpenAICompatible(messages, { - signal: options.signal, - taskType: options.taskType || "", - requestSource: - options.requestSource || options.source || "diagnostic:call-llm", + recordTaskLlmRequest(taskType || privateRequestSource, { + requestCleaning: requestCleaning.debug, + promptExecution: promptExecutionSnapshot, + }, { + merge: true, }); - return response?.content || null; + const response = await callDedicatedOpenAICompatible(requestCleaning.messages, { + signal: options.signal, + taskType, + requestSource: privateRequestSource, + }); + const responseText = + typeof response?.content === "string" ? response.content : ""; + const outputCleanup = applyTaskOutputRegexStages(taskType, responseText); + recordTaskLlmRequest(taskType || privateRequestSource, { + requestCleaning: requestCleaning.debug, + responseCleaning: outputCleanup.debug, + promptExecution: promptExecutionSnapshot, + }, { + merge: true, + }); + return outputCleanup.cleanedText || null; } catch (e) { console.error("[ST-BME] LLM 调用失败:", e); return null; diff --git a/panel.js b/panel.js index cdc6f5e..fdbfa24 100644 --- a/panel.js +++ b/panel.js @@ -20,6 +20,8 @@ import { getLegacyPromptFieldForTask, getTaskTypeOptions, importTaskProfile as parseImportedTaskProfile, + isTaskRegexStageEnabled, + normalizeTaskRegexStages, restoreDefaultTaskProfile, setActiveTaskProfileId, upsertTaskProfile, @@ -143,8 +145,46 @@ const TASK_PROFILE_GENERATION_GROUPS = [ ]; const TASK_PROFILE_REGEX_STAGES = [ - { key: "input", label: "输入阶段", desc: "对发送给 LLM 的 prompt 执行正则替换。" }, - { key: "output", label: "输出阶段", desc: "对 LLM 返回的结果执行正则替换。" }, + { + key: "input", + label: "输入总开关", + desc: "控制全部输入阶段;未单独覆写的细分阶段会跟随它。", + }, + { + key: "input.userMessage", + label: "输入: 用户消息", + desc: "处理当前 userMessage。", + }, + { + key: "input.recentMessages", + label: "输入: 最近上下文", + desc: "处理 recentMessages、chatMessages、dialogueText。", + }, + { + key: "input.candidateText", + label: "输入: 候选与摘要", + desc: "处理 candidateText、candidateNodes、nodeContent 和各类摘要。", + }, + { + key: "input.finalPrompt", + label: "输入: 发送前最终消息", + desc: "在最终 messages 全部组装完成、真正发送给 LLM 前统一清洗。", + }, + { + key: "output", + label: "输出总开关", + desc: "控制全部输出阶段;未单独覆写的细分阶段会跟随它。", + }, + { + key: "output.rawResponse", + label: "输出: 原始响应", + desc: "LLM 原始文本到手后先清洗一次。", + }, + { + key: "output.beforeParse", + label: "输出: 解析前", + desc: "在 JSON 提取/解析前再清洗一次。", + }, ]; let panelEl = null; @@ -3342,6 +3382,7 @@ function _renderTaskGenerationTab(state) { function _renderTaskRegexTab(state) { const regex = state.profile.regex || {}; + const normalizedStages = normalizeTaskRegexStages(regex.stages || {}); return `
@@ -3415,14 +3456,14 @@ function _renderTaskRegexTab(state) { ${_escHtml(stage.label)} ${_escHtml(stage.desc)} - - - `, - ).join("")} + + + `, + ).join("")}
@@ -3754,8 +3795,13 @@ function _renderTaskDebugLlmCard(taskType, llmRequest) { 输出清洗 ${_escHtml(llmRequest.responseCleaning?.applied ? "已生效" : "未生效")} +
+ 发送前输入清洗 + ${_escHtml(llmRequest.requestCleaning?.applied ? "已生效" : "未生效")} +
${_renderDebugDetails("提示词执行摘要", llmRequest.promptExecution || null)} + ${_renderDebugDetails("发送前输入清洗", llmRequest.requestCleaning || null)} ${_renderDebugDetails("实际请求路径", llmRequest.effectiveRoute || null)} ${_renderDebugDetails("输出清洗", llmRequest.responseCleaning || null)} ${_renderDebugDetails("实际保留参数", llmRequest.filteredGeneration || {})} @@ -4623,7 +4669,7 @@ function _normalizeTaskProfileDraft(profile = {}) { stages: { input: true, output: true, - ...(draft.regex?.stages || {}), + ...normalizeTaskRegexStages(draft.regex?.stages || {}), }, localRules: Array.isArray(draft.regex?.localRules) ? draft.regex.localRules.map((rule) => ({ diff --git a/prompt-builder.js b/prompt-builder.js index 0ce9c71..601f728 100644 --- a/prompt-builder.js +++ b/prompt-builder.js @@ -526,7 +526,7 @@ function sanitizePromptMessages( messages = [], { blockedContents = [], - regexStage = "input.finalPrompt", + regexStage = "", debugState = null, regexCollector = null, } = {}, @@ -646,7 +646,7 @@ function sanitizeWorldInfoEntries( { mode: "aggressive", blockedContents, - regexStage: "input.finalPrompt", + regexStage: "", role: entry?.role || "system", regexCollector, }, @@ -728,7 +728,7 @@ function sanitizeWorldInfoContext( { mode: "aggressive", blockedContents: runtimeBlockedContents, - regexStage: "input.finalPrompt", + regexStage: "", role: message?.role || "system", regexCollector, }, @@ -1107,7 +1107,7 @@ export async function buildTaskPrompt(settings = {}, taskType, context = {}) { { mode: "final-safe", blockedContents: worldInfoRuntimeBlockedContents, - regexStage: "input.finalPrompt", + regexStage: "", role, regexCollector: promptRegexInput, }, diff --git a/prompt-profiles.js b/prompt-profiles.js index ffdac17..9369151 100644 --- a/prompt-profiles.js +++ b/prompt-profiles.js @@ -569,6 +569,88 @@ function normalizeRegexLocalRule(rule = {}, taskType = "task", index = 0) { }; } +const TASK_REGEX_STAGE_ALIAS_MAP = Object.freeze({ + finalPrompt: "input.finalPrompt", + rawResponse: "output.rawResponse", + beforeParse: "output.beforeParse", +}); + +const TASK_REGEX_STAGE_GROUPS = Object.freeze({ + input: Object.freeze([ + "input.userMessage", + "input.recentMessages", + "input.candidateText", + "input.finalPrompt", + ]), + output: Object.freeze([ + "output.rawResponse", + "output.beforeParse", + ]), +}); + +function normalizeRegexStageKey(stageKey = "") { + const normalized = String(stageKey || "").trim(); + return TASK_REGEX_STAGE_ALIAS_MAP[normalized] || normalized; +} + +export function normalizeTaskRegexStages(stages = {}) { + const source = + stages && typeof stages === "object" && !Array.isArray(stages) ? stages : {}; + const normalized = { ...source }; + + for (const [legacyKey, canonicalKey] of Object.entries( + TASK_REGEX_STAGE_ALIAS_MAP, + )) { + if ( + !Object.prototype.hasOwnProperty.call(normalized, canonicalKey) && + Object.prototype.hasOwnProperty.call(normalized, legacyKey) + ) { + normalized[canonicalKey] = Boolean(normalized[legacyKey]); + } + delete normalized[legacyKey]; + } + + for (const [groupKey, stageKeys] of Object.entries(TASK_REGEX_STAGE_GROUPS)) { + if (normalized[groupKey] === false) { + continue; + } + const allSpecificStagesFalse = + stageKeys.length > 0 && + stageKeys.every((stageKey) => normalized[stageKey] === false); + if (!allSpecificStagesFalse) { + continue; + } + for (const stageKey of stageKeys) { + delete normalized[stageKey]; + } + } + + return normalized; +} + +export function isTaskRegexStageEnabled(stages = {}, stageKey = "") { + const normalizedStages = normalizeTaskRegexStages(stages); + const normalizedStageKey = normalizeRegexStageKey(stageKey); + + if (!normalizedStageKey) { + return normalizedStages.input !== false; + } + + if (Object.prototype.hasOwnProperty.call(normalizedStages, normalizedStageKey)) { + return normalizedStages[normalizedStageKey] !== false; + } + + if (normalizedStageKey.startsWith("input.")) { + return normalizedStages.input !== false; + } + + if (normalizedStageKey.startsWith("output.")) { + return normalizedStages.output !== false; + } + + return normalizedStages[normalizedStageKey] !== false; +} + function normalizeTaskProfilesState(taskProfiles = {}) { return ensureTaskProfiles({ taskProfiles }); } @@ -741,7 +823,7 @@ function createFallbackDefaultTaskProfile(taskType) { preset: true, character: true, }, - stages: { + stages: normalizeTaskRegexStages({ finalPrompt: true, "input.userMessage": false, "input.recentMessages": false, @@ -751,7 +833,7 @@ function createFallbackDefaultTaskProfile(taskType) { beforeParse: false, "output.rawResponse": false, "output.beforeParse": false, - }, + }), localRules: [], }, metadata: { @@ -799,10 +881,10 @@ export function createDefaultTaskProfile(taskType) { ...fallback.regex.sources, ...(template?.regex?.sources || {}), }, - stages: { + stages: normalizeTaskRegexStages({ ...fallback.regex.stages, ...(template?.regex?.stages || {}), - }, + }), localRules: Array.isArray(template?.regex?.localRules) ? template.regex.localRules.map((rule, index) => normalizeRegexLocalRule(rule, taskType, index), @@ -978,10 +1060,10 @@ export function normalizeTaskProfile(taskType, profile = {}, settings = {}) { ...base.regex.sources, ...(profile?.regex?.sources || {}), }, - stages: { + stages: normalizeTaskRegexStages({ ...base.regex.stages, ...(profile?.regex?.stages || {}), - }, + }), localRules: Array.isArray(profile?.regex?.localRules) ? profile.regex.localRules.map((rule, index) => normalizeRegexLocalRule(rule, taskType, index), diff --git a/task-regex.js b/task-regex.js index 92de906..8dbe9db 100644 --- a/task-regex.js +++ b/task-regex.js @@ -4,7 +4,11 @@ import { extension_settings, getContext } from "../../../extensions.js"; import { getHostAdapter } from "./host-adapter/index.js"; -import { getActiveTaskProfile } from "./prompt-profiles.js"; +import { + getActiveTaskProfile, + isTaskRegexStageEnabled, + normalizeTaskRegexStages, +} from "./prompt-profiles.js"; const HTML_TAG_PATTERN = /<\/?(?:div|span|p|br|hr|img|details|summary|section|article|aside|header|footer|nav|ul|ol|li|table|tr|td|th|h[1-6]|a|em|strong|blockquote|pre|code|svg|path)\b/i; @@ -320,26 +324,19 @@ function collectLocalRules(regexConfig = {}) { function shouldApplyRuleForStage(rule, stage = "", stagesConfig = {}) { const normalizedStage = String(stage || "").trim(); - if ( - normalizedStage && - Object.prototype.hasOwnProperty.call(stagesConfig, normalizedStage) - ) { - return ( - stagesConfig[normalizedStage] !== false && - rule.destinationFlags.prompt !== false - ); + if (rule.destinationFlags.prompt === false) { + return false; } - if (PROMPT_STAGES.has(normalizedStage)) { - return ( - stagesConfig.input !== false && rule.destinationFlags.prompt !== false - ); + + if (!normalizedStage) { + return isTaskRegexStageEnabled(stagesConfig, "input"); } - if (OUTPUT_STAGES.has(normalizedStage)) { - return ( - stagesConfig.output !== false && rule.destinationFlags.prompt !== false - ); + + if (PROMPT_STAGES.has(normalizedStage) || OUTPUT_STAGES.has(normalizedStage)) { + return isTaskRegexStageEnabled(stagesConfig, normalizedStage); } - return stagesConfig.input !== false && rule.destinationFlags.prompt !== false; + + return isTaskRegexStageEnabled(stagesConfig, normalizedStage); } function shouldApplyRuleForRole(rule, role = "system") { @@ -398,7 +395,7 @@ export function applyTaskRegex( } // 阶段检查已移到 shouldApplyRuleForStage 中,无需单独 gate - const stagesConfig = regexConfig?.stages || {}; + const stagesConfig = normalizeTaskRegexStages(regexConfig?.stages || {}); const tavernRules = collectTavernRules(regexConfig); const localRules = collectLocalRules(regexConfig); diff --git a/tests/prompt-builder-mvu.mjs b/tests/prompt-builder-mvu.mjs index 5e5eb40..40c4063 100644 --- a/tests/prompt-builder-mvu.mjs +++ b/tests/prompt-builder-mvu.mjs @@ -254,7 +254,8 @@ try { assert.match(promptBuild.systemPrompt, /GOOD_RECENT/); assert.match(JSON.stringify(promptBuild.executionMessages), /GOOD_CANDIDATE/); - assert.match(promptBuild.systemPrompt, /FINAL_GOOD/); + assert.match(promptBuild.systemPrompt, /FINAL_BAD/); + assert.doesNotMatch(promptBuild.systemPrompt, /FINAL_GOOD/); assert.equal( promptBuild.debug.mvu.sanitizedFields.some((entry) => entry.name === "userMessage"), true, @@ -454,6 +455,8 @@ try { const payload = buildTaskLlmPayload(promptBuild, "unused fallback"); assert.equal(payload.systemPrompt, ""); + assert.match(JSON.stringify(payload.promptMessages), /FINAL_BAD/); + assert.doesNotMatch(JSON.stringify(payload.promptMessages), /FINAL_GOOD/); const result = await llm.callLLMForJSON({ systemPrompt: payload.systemPrompt, userPrompt: payload.userPrompt, @@ -466,6 +469,8 @@ try { assert.deepEqual(result, { ok: true }); assert.equal(capturedBodies.length, 1); + assert.match(JSON.stringify(capturedBodies[0].messages), /FINAL_GOOD/); + assert.doesNotMatch(JSON.stringify(capturedBodies[0].messages), /FINAL_BAD/); assert.doesNotMatch( JSON.stringify(capturedBodies[0].messages), /status_current_variable|updatevariable|StatusPlaceHolderImpl|stat_data|display_data|delta_data|get_message_variable/i, @@ -478,6 +483,18 @@ try { assert.ok(runtimePromptBuild); assert.ok(runtimeLlmRequest); + assert.match(JSON.stringify(runtimeLlmRequest.messages), /FINAL_GOOD/); + assert.equal(runtimeLlmRequest.requestCleaning?.applied, true); + assert.equal( + runtimeLlmRequest.requestCleaning?.stages?.length > 0, + true, + ); + assert.equal( + runtimeLlmRequest.requestCleaning?.stages?.every( + (entry) => entry.stage === "input.finalPrompt", + ), + true, + ); assert.doesNotMatch( JSON.stringify(runtimePromptBuild.executionMessages), /status_current_variable|updatevariable|StatusPlaceHolderImpl|stat_data|display_data|delta_data|get_message_variable/i, diff --git a/tests/task-regex.mjs b/tests/task-regex.mjs index b236281..ecb1ed8 100644 --- a/tests/task-regex.mjs +++ b/tests/task-regex.mjs @@ -365,6 +365,79 @@ try { assert.equal(exactStageResult, "JSON"); assert.deepEqual(exactStageDebug.entries[0].appliedRules, []); + const legacyStageCompatibilitySettings = { + taskProfilesVersion: 1, + taskProfiles: { + extract: { + activeProfileId: "legacy-stage-compat", + profiles: [ + { + id: "legacy-stage-compat", + taskType: "extract", + regex: { + enabled: true, + inheritStRegex: false, + sources: { + global: false, + preset: false, + character: false, + }, + stages: { + input: true, + output: true, + "input.userMessage": false, + "input.recentMessages": false, + "input.candidateText": false, + "input.finalPrompt": false, + "output.rawResponse": false, + "output.beforeParse": false, + }, + localRules: [ + createRule("legacy-input-user", "/Alpha/g", "A1"), + createRule("legacy-output-raw", "/Omega/g", "O1", { + source: { + user_input: false, + ai_output: true, + }, + }), + ], + }, + }, + ], + }, + }, + }; + + const legacyStageInputDebug = { entries: [] }; + const legacyStageInputResult = applyTaskRegex( + legacyStageCompatibilitySettings, + "extract", + "input.userMessage", + "Alpha", + legacyStageInputDebug, + "user", + ); + assert.equal(legacyStageInputResult, "A1"); + assert.deepEqual( + legacyStageInputDebug.entries[0].appliedRules.map((item) => item.id), + ["legacy-input-user"], + ); + + const legacyStageOutputDebug = { entries: [] }; + const legacyStageOutputResult = applyTaskRegex( + legacyStageCompatibilitySettings, + "extract", + "output.rawResponse", + "Omega", + legacyStageOutputDebug, + "assistant", + ); + assert.equal(legacyStageOutputResult, "O1"); + assert.deepEqual( + legacyStageOutputDebug.entries[0].appliedRules.map((item) => item.id), + ["legacy-output-raw"], + ); + console.log("task-regex tests passed"); } finally { if (originalSillyTavern === undefined) {