From 5cc33fabda32667d5d5eff541adf46cd62c39adf Mon Sep 17 00:00:00 2001
From: Youzini-afk <13153778771cx@gmail.com>
Date: Sun, 5 Apr 2026 00:30:54 +0800
Subject: [PATCH] Fix task regex final input pipeline
---
llm.js | 166 ++++++++++++++++++++++++++++++++---
panel.js | 68 +++++++++++---
prompt-builder.js | 8 +-
prompt-profiles.js | 94 ++++++++++++++++++--
task-regex.js | 35 ++++----
tests/prompt-builder-mvu.mjs | 19 +++-
tests/task-regex.mjs | 73 +++++++++++++++
7 files changed, 412 insertions(+), 51 deletions(-)
diff --git a/llm.js b/llm.js
index 2931f8d..0c8f406 100644
--- a/llm.js
+++ b/llm.js
@@ -215,6 +215,104 @@ function applyTaskOutputRegexStages(taskType, text) {
};
}
+function applyTaskFinalInputRegex(taskType, messages = []) {
+ const normalizedMessages = (Array.isArray(messages) ? messages : [])
+ .map((message) => {
+ if (!message || typeof message !== "object") {
+ return null;
+ }
+ const role = String(message.role || "").trim().toLowerCase();
+ if (!["system", "user", "assistant"].includes(role)) {
+ return null;
+ }
+ return {
+ ...message,
+ role,
+ content: String(message.content || ""),
+ };
+ })
+ .filter(Boolean);
+ const normalizedTaskType = String(taskType || "").trim();
+
+ if (!normalizedTaskType || normalizedMessages.length === 0) {
+ const cleanedMessages = normalizedMessages.filter((message) =>
+ String(message.content || "").trim(),
+ );
+ return {
+ messages: cleanedMessages,
+ debug: {
+ stage: "input.finalPrompt",
+ changed: cleanedMessages.length !== normalizedMessages.length,
+ applied: false,
+ rawMessageCount: normalizedMessages.length,
+ cleanedMessageCount: cleanedMessages.length,
+ droppedMessageCount: normalizedMessages.length - cleanedMessages.length,
+ stages: [],
+ },
+ };
+ }
+
+ const settings = extension_settings[MODULE_NAME] || {};
+ const regexDebug = { entries: [] };
+ let changed = false;
+ let droppedMessageCount = 0;
+ const cleanedMessages = normalizedMessages
+ .map((message) => {
+ const originalContent = String(message.content || "");
+ const cleanedContent = applyTaskRegex(
+ settings,
+ normalizedTaskType,
+ "input.finalPrompt",
+ originalContent,
+ regexDebug,
+ message.role,
+ );
+ if (cleanedContent !== originalContent) {
+ changed = true;
+ }
+ if (!String(cleanedContent || "").trim()) {
+ droppedMessageCount += 1;
+ return null;
+ }
+ return {
+ ...message,
+ content: cleanedContent,
+ };
+ })
+ .filter(Boolean);
+ const normalizedEntries = normalizeRegexDebugEntries(regexDebug);
+ const applied = normalizedEntries.some(
+ (entry) => entry.appliedRules.length > 0,
+ );
+
+ return {
+ messages: cleanedMessages,
+ debug: {
+ stage: "input.finalPrompt",
+ changed: changed || droppedMessageCount > 0,
+ applied,
+ rawMessageCount: normalizedMessages.length,
+ cleanedMessageCount: cleanedMessages.length,
+ droppedMessageCount,
+ stages: normalizedEntries,
+ },
+ };
+}
+
+function attachRequestCleaningToPromptExecution(
+ promptExecutionSummary,
+ requestCleaning,
+) {
+ const base =
+ promptExecutionSummary && typeof promptExecutionSummary === "object"
+ ? cloneRuntimeDebugValue(promptExecutionSummary, {})
+ : {};
+ if (requestCleaning && typeof requestCleaning === "object") {
+ base.requestCleaning = cloneRuntimeDebugValue(requestCleaning, null);
+ }
+ return base;
+}
+
function buildEffectiveLlmRoute(
hasDedicatedConfig,
privateRequestSource,
@@ -1477,7 +1575,7 @@ export async function callLLMForJSON({
for (let attempt = 0; attempt <= maxRetries; attempt++) {
try {
- const messages = buildJsonAttemptMessages(
+ const assembledMessages = buildJsonAttemptMessages(
systemPrompt,
userPrompt,
attempt,
@@ -1485,7 +1583,25 @@ export async function callLLMForJSON({
additionalMessages,
promptMessages,
);
- const response = await callDedicatedOpenAICompatible(messages, {
+ const requestCleaning = applyTaskFinalInputRegex(
+ taskType,
+ assembledMessages,
+ );
+ const promptExecutionSnapshot = attachRequestCleaningToPromptExecution(
+ promptExecutionSummary,
+ requestCleaning.debug,
+ );
+ recordTaskLlmRequest(
+ taskType || privateRequestSource,
+ {
+ requestCleaning: requestCleaning.debug,
+ promptExecution: promptExecutionSnapshot,
+ },
+ {
+ merge: true,
+ },
+ );
+ const response = await callDedicatedOpenAICompatible(requestCleaning.messages, {
signal,
jsonMode: true,
taskType,
@@ -1500,8 +1616,9 @@ export async function callLLMForJSON({
recordTaskLlmRequest(
taskType || privateRequestSource,
{
+ requestCleaning: requestCleaning.debug,
responseCleaning: outputCleanup.debug,
- promptExecution: promptExecutionSummary,
+ promptExecution: promptExecutionSnapshot,
},
{
merge: true,
@@ -1592,19 +1709,48 @@ export async function callLLM(systemPrompt, userPrompt, options = {}) {
return await override(systemPrompt, userPrompt, options);
}
- const messages = [
+ const taskType = String(options.taskType || "").trim();
+ const privateRequestSource = resolvePrivateRequestSource(
+ taskType,
+ options.requestSource || options.source || "diagnostic:call-llm",
+ { allowAnonymous: true },
+ );
+ const promptExecutionSummary = buildPromptExecutionSummary(
+ options.debugContext || null,
+ );
+ const assembledMessages = [
{ role: "system", content: systemPrompt },
{ role: "user", content: userPrompt },
];
+ const requestCleaning = applyTaskFinalInputRegex(taskType, assembledMessages);
+ const promptExecutionSnapshot = attachRequestCleaningToPromptExecution(
+ promptExecutionSummary,
+ requestCleaning.debug,
+ );
try {
- const response = await callDedicatedOpenAICompatible(messages, {
- signal: options.signal,
- taskType: options.taskType || "",
- requestSource:
- options.requestSource || options.source || "diagnostic:call-llm",
+ recordTaskLlmRequest(taskType || privateRequestSource, {
+ requestCleaning: requestCleaning.debug,
+ promptExecution: promptExecutionSnapshot,
+ }, {
+ merge: true,
});
- return response?.content || null;
+ const response = await callDedicatedOpenAICompatible(requestCleaning.messages, {
+ signal: options.signal,
+ taskType,
+ requestSource: privateRequestSource,
+ });
+ const responseText =
+ typeof response?.content === "string" ? response.content : "";
+ const outputCleanup = applyTaskOutputRegexStages(taskType, responseText);
+ recordTaskLlmRequest(taskType || privateRequestSource, {
+ requestCleaning: requestCleaning.debug,
+ responseCleaning: outputCleanup.debug,
+ promptExecution: promptExecutionSnapshot,
+ }, {
+ merge: true,
+ });
+ return outputCleanup.cleanedText || null;
} catch (e) {
console.error("[ST-BME] LLM 调用失败:", e);
return null;
diff --git a/panel.js b/panel.js
index cdc6f5e..fdbfa24 100644
--- a/panel.js
+++ b/panel.js
@@ -20,6 +20,8 @@ import {
getLegacyPromptFieldForTask,
getTaskTypeOptions,
importTaskProfile as parseImportedTaskProfile,
+ isTaskRegexStageEnabled,
+ normalizeTaskRegexStages,
restoreDefaultTaskProfile,
setActiveTaskProfileId,
upsertTaskProfile,
@@ -143,8 +145,46 @@ const TASK_PROFILE_GENERATION_GROUPS = [
];
const TASK_PROFILE_REGEX_STAGES = [
- { key: "input", label: "输入阶段", desc: "对发送给 LLM 的 prompt 执行正则替换。" },
- { key: "output", label: "输出阶段", desc: "对 LLM 返回的结果执行正则替换。" },
+ {
+ key: "input",
+ label: "输入总开关",
+ desc: "控制全部输入阶段;未单独覆写的细分阶段会跟随它。",
+ },
+ {
+ key: "input.userMessage",
+ label: "输入: 用户消息",
+ desc: "处理当前 userMessage。",
+ },
+ {
+ key: "input.recentMessages",
+ label: "输入: 最近上下文",
+ desc: "处理 recentMessages、chatMessages、dialogueText。",
+ },
+ {
+ key: "input.candidateText",
+ label: "输入: 候选与摘要",
+ desc: "处理 candidateText、candidateNodes、nodeContent 和各类摘要。",
+ },
+ {
+ key: "input.finalPrompt",
+ label: "输入: 发送前最终消息",
+ desc: "在最终 messages 全部组装完成、真正发送给 LLM 前统一清洗。",
+ },
+ {
+ key: "output",
+ label: "输出总开关",
+ desc: "控制全部输出阶段;未单独覆写的细分阶段会跟随它。",
+ },
+ {
+ key: "output.rawResponse",
+ label: "输出: 原始响应",
+ desc: "LLM 原始文本到手后先清洗一次。",
+ },
+ {
+ key: "output.beforeParse",
+ label: "输出: 解析前",
+ desc: "在 JSON 提取/解析前再清洗一次。",
+ },
];
let panelEl = null;
@@ -3342,6 +3382,7 @@ function _renderTaskGenerationTab(state) {
function _renderTaskRegexTab(state) {
const regex = state.profile.regex || {};
+ const normalizedStages = normalizeTaskRegexStages(regex.stages || {});
return `
@@ -3754,8 +3795,13 @@ function _renderTaskDebugLlmCard(taskType, llmRequest) {
输出清洗
${_escHtml(llmRequest.responseCleaning?.applied ? "已生效" : "未生效")}
+
+ 发送前输入清洗
+ ${_escHtml(llmRequest.requestCleaning?.applied ? "已生效" : "未生效")}
+
${_renderDebugDetails("提示词执行摘要", llmRequest.promptExecution || null)}
+ ${_renderDebugDetails("发送前输入清洗", llmRequest.requestCleaning || null)}
${_renderDebugDetails("实际请求路径", llmRequest.effectiveRoute || null)}
${_renderDebugDetails("输出清洗", llmRequest.responseCleaning || null)}
${_renderDebugDetails("实际保留参数", llmRequest.filteredGeneration || {})}
@@ -4623,7 +4669,7 @@ function _normalizeTaskProfileDraft(profile = {}) {
stages: {
input: true,
output: true,
- ...(draft.regex?.stages || {}),
+ ...normalizeTaskRegexStages(draft.regex?.stages || {}),
},
localRules: Array.isArray(draft.regex?.localRules)
? draft.regex.localRules.map((rule) => ({
diff --git a/prompt-builder.js b/prompt-builder.js
index 0ce9c71..601f728 100644
--- a/prompt-builder.js
+++ b/prompt-builder.js
@@ -526,7 +526,7 @@ function sanitizePromptMessages(
messages = [],
{
blockedContents = [],
- regexStage = "input.finalPrompt",
+ regexStage = "",
debugState = null,
regexCollector = null,
} = {},
@@ -646,7 +646,7 @@ function sanitizeWorldInfoEntries(
{
mode: "aggressive",
blockedContents,
- regexStage: "input.finalPrompt",
+ regexStage: "",
role: entry?.role || "system",
regexCollector,
},
@@ -728,7 +728,7 @@ function sanitizeWorldInfoContext(
{
mode: "aggressive",
blockedContents: runtimeBlockedContents,
- regexStage: "input.finalPrompt",
+ regexStage: "",
role: message?.role || "system",
regexCollector,
},
@@ -1107,7 +1107,7 @@ export async function buildTaskPrompt(settings = {}, taskType, context = {}) {
{
mode: "final-safe",
blockedContents: worldInfoRuntimeBlockedContents,
- regexStage: "input.finalPrompt",
+ regexStage: "",
role,
regexCollector: promptRegexInput,
},
diff --git a/prompt-profiles.js b/prompt-profiles.js
index ffdac17..9369151 100644
--- a/prompt-profiles.js
+++ b/prompt-profiles.js
@@ -569,6 +569,88 @@ function normalizeRegexLocalRule(rule = {}, taskType = "task", index = 0) {
};
}
+const TASK_REGEX_STAGE_ALIAS_MAP = Object.freeze({
+ finalPrompt: "input.finalPrompt",
+ rawResponse: "output.rawResponse",
+ beforeParse: "output.beforeParse",
+});
+
+const TASK_REGEX_STAGE_GROUPS = Object.freeze({
+ input: Object.freeze([
+ "input.userMessage",
+ "input.recentMessages",
+ "input.candidateText",
+ "input.finalPrompt",
+ ]),
+ output: Object.freeze([
+ "output.rawResponse",
+ "output.beforeParse",
+ ]),
+});
+
+function normalizeRegexStageKey(stageKey = "") {
+ const normalized = String(stageKey || "").trim();
+ return TASK_REGEX_STAGE_ALIAS_MAP[normalized] || normalized;
+}
+
+export function normalizeTaskRegexStages(stages = {}) {
+ const source =
+ stages && typeof stages === "object" && !Array.isArray(stages) ? stages : {};
+ const normalized = { ...source };
+
+ for (const [legacyKey, canonicalKey] of Object.entries(
+ TASK_REGEX_STAGE_ALIAS_MAP,
+ )) {
+ if (
+ !Object.prototype.hasOwnProperty.call(normalized, canonicalKey) &&
+ Object.prototype.hasOwnProperty.call(normalized, legacyKey)
+ ) {
+ normalized[canonicalKey] = Boolean(normalized[legacyKey]);
+ }
+ delete normalized[legacyKey];
+ }
+
+ for (const [groupKey, stageKeys] of Object.entries(TASK_REGEX_STAGE_GROUPS)) {
+ if (normalized[groupKey] === false) {
+ continue;
+ }
+ const allSpecificStagesFalse =
+ stageKeys.length > 0 &&
+ stageKeys.every((stageKey) => normalized[stageKey] === false);
+ if (!allSpecificStagesFalse) {
+ continue;
+ }
+ for (const stageKey of stageKeys) {
+ delete normalized[stageKey];
+ }
+ }
+
+ return normalized;
+}
+
+export function isTaskRegexStageEnabled(stages = {}, stageKey = "") {
+ const normalizedStages = normalizeTaskRegexStages(stages);
+ const normalizedStageKey = normalizeRegexStageKey(stageKey);
+
+ if (!normalizedStageKey) {
+ return normalizedStages.input !== false;
+ }
+
+ if (Object.prototype.hasOwnProperty.call(normalizedStages, normalizedStageKey)) {
+ return normalizedStages[normalizedStageKey] !== false;
+ }
+
+ if (normalizedStageKey.startsWith("input.")) {
+ return normalizedStages.input !== false;
+ }
+
+ if (normalizedStageKey.startsWith("output.")) {
+ return normalizedStages.output !== false;
+ }
+
+ return normalizedStages[normalizedStageKey] !== false;
+}
+
function normalizeTaskProfilesState(taskProfiles = {}) {
return ensureTaskProfiles({ taskProfiles });
}
@@ -741,7 +823,7 @@ function createFallbackDefaultTaskProfile(taskType) {
preset: true,
character: true,
},
- stages: {
+ stages: normalizeTaskRegexStages({
finalPrompt: true,
"input.userMessage": false,
"input.recentMessages": false,
@@ -751,7 +833,7 @@ function createFallbackDefaultTaskProfile(taskType) {
beforeParse: false,
"output.rawResponse": false,
"output.beforeParse": false,
- },
+ }),
localRules: [],
},
metadata: {
@@ -799,10 +881,10 @@ export function createDefaultTaskProfile(taskType) {
...fallback.regex.sources,
...(template?.regex?.sources || {}),
},
- stages: {
+ stages: normalizeTaskRegexStages({
...fallback.regex.stages,
...(template?.regex?.stages || {}),
- },
+ }),
localRules: Array.isArray(template?.regex?.localRules)
? template.regex.localRules.map((rule, index) =>
normalizeRegexLocalRule(rule, taskType, index),
@@ -978,10 +1060,10 @@ export function normalizeTaskProfile(taskType, profile = {}, settings = {}) {
...base.regex.sources,
...(profile?.regex?.sources || {}),
},
- stages: {
+ stages: normalizeTaskRegexStages({
...base.regex.stages,
...(profile?.regex?.stages || {}),
- },
+ }),
localRules: Array.isArray(profile?.regex?.localRules)
? profile.regex.localRules.map((rule, index) =>
normalizeRegexLocalRule(rule, taskType, index),
diff --git a/task-regex.js b/task-regex.js
index 92de906..8dbe9db 100644
--- a/task-regex.js
+++ b/task-regex.js
@@ -4,7 +4,11 @@
import { extension_settings, getContext } from "../../../extensions.js";
import { getHostAdapter } from "./host-adapter/index.js";
-import { getActiveTaskProfile } from "./prompt-profiles.js";
+import {
+ getActiveTaskProfile,
+ isTaskRegexStageEnabled,
+ normalizeTaskRegexStages,
+} from "./prompt-profiles.js";
const HTML_TAG_PATTERN =
/<\/?(?:div|span|p|br|hr|img|details|summary|section|article|aside|header|footer|nav|ul|ol|li|table|tr|td|th|h[1-6]|a|em|strong|blockquote|pre|code|svg|path)\b/i;
@@ -320,26 +324,19 @@ function collectLocalRules(regexConfig = {}) {
function shouldApplyRuleForStage(rule, stage = "", stagesConfig = {}) {
const normalizedStage = String(stage || "").trim();
- if (
- normalizedStage &&
- Object.prototype.hasOwnProperty.call(stagesConfig, normalizedStage)
- ) {
- return (
- stagesConfig[normalizedStage] !== false &&
- rule.destinationFlags.prompt !== false
- );
+ if (rule.destinationFlags.prompt === false) {
+ return false;
}
- if (PROMPT_STAGES.has(normalizedStage)) {
- return (
- stagesConfig.input !== false && rule.destinationFlags.prompt !== false
- );
+
+ if (!normalizedStage) {
+ return isTaskRegexStageEnabled(stagesConfig, "input");
}
- if (OUTPUT_STAGES.has(normalizedStage)) {
- return (
- stagesConfig.output !== false && rule.destinationFlags.prompt !== false
- );
+
+ if (PROMPT_STAGES.has(normalizedStage) || OUTPUT_STAGES.has(normalizedStage)) {
+ return isTaskRegexStageEnabled(stagesConfig, normalizedStage);
}
- return stagesConfig.input !== false && rule.destinationFlags.prompt !== false;
+
+ return isTaskRegexStageEnabled(stagesConfig, normalizedStage);
}
function shouldApplyRuleForRole(rule, role = "system") {
@@ -398,7 +395,7 @@ export function applyTaskRegex(
}
// 阶段检查已移到 shouldApplyRuleForStage 中,无需单独 gate
- const stagesConfig = regexConfig?.stages || {};
+ const stagesConfig = normalizeTaskRegexStages(regexConfig?.stages || {});
const tavernRules = collectTavernRules(regexConfig);
const localRules = collectLocalRules(regexConfig);
diff --git a/tests/prompt-builder-mvu.mjs b/tests/prompt-builder-mvu.mjs
index 5e5eb40..40c4063 100644
--- a/tests/prompt-builder-mvu.mjs
+++ b/tests/prompt-builder-mvu.mjs
@@ -254,7 +254,8 @@ try {
assert.match(promptBuild.systemPrompt, /GOOD_RECENT/);
assert.match(JSON.stringify(promptBuild.executionMessages), /GOOD_CANDIDATE/);
- assert.match(promptBuild.systemPrompt, /FINAL_GOOD/);
+ assert.match(promptBuild.systemPrompt, /FINAL_BAD/);
+ assert.doesNotMatch(promptBuild.systemPrompt, /FINAL_GOOD/);
assert.equal(
promptBuild.debug.mvu.sanitizedFields.some((entry) => entry.name === "userMessage"),
true,
@@ -454,6 +455,8 @@ try {
const payload = buildTaskLlmPayload(promptBuild, "unused fallback");
assert.equal(payload.systemPrompt, "");
+ assert.match(JSON.stringify(payload.promptMessages), /FINAL_BAD/);
+ assert.doesNotMatch(JSON.stringify(payload.promptMessages), /FINAL_GOOD/);
const result = await llm.callLLMForJSON({
systemPrompt: payload.systemPrompt,
userPrompt: payload.userPrompt,
@@ -466,6 +469,8 @@ try {
assert.deepEqual(result, { ok: true });
assert.equal(capturedBodies.length, 1);
+ assert.match(JSON.stringify(capturedBodies[0].messages), /FINAL_GOOD/);
+ assert.doesNotMatch(JSON.stringify(capturedBodies[0].messages), /FINAL_BAD/);
assert.doesNotMatch(
JSON.stringify(capturedBodies[0].messages),
/status_current_variable|updatevariable|StatusPlaceHolderImpl|stat_data|display_data|delta_data|get_message_variable/i,
@@ -478,6 +483,18 @@ try {
assert.ok(runtimePromptBuild);
assert.ok(runtimeLlmRequest);
+ assert.match(JSON.stringify(runtimeLlmRequest.messages), /FINAL_GOOD/);
+ assert.equal(runtimeLlmRequest.requestCleaning?.applied, true);
+ assert.equal(
+ runtimeLlmRequest.requestCleaning?.stages?.length > 0,
+ true,
+ );
+ assert.equal(
+ runtimeLlmRequest.requestCleaning?.stages?.every(
+ (entry) => entry.stage === "input.finalPrompt",
+ ),
+ true,
+ );
assert.doesNotMatch(
JSON.stringify(runtimePromptBuild.executionMessages),
/status_current_variable|updatevariable|StatusPlaceHolderImpl|stat_data|display_data|delta_data|get_message_variable/i,
diff --git a/tests/task-regex.mjs b/tests/task-regex.mjs
index b236281..ecb1ed8 100644
--- a/tests/task-regex.mjs
+++ b/tests/task-regex.mjs
@@ -365,6 +365,79 @@ try {
assert.equal(exactStageResult, "JSON");
assert.deepEqual(exactStageDebug.entries[0].appliedRules, []);
+ const legacyStageCompatibilitySettings = {
+ taskProfilesVersion: 1,
+ taskProfiles: {
+ extract: {
+ activeProfileId: "legacy-stage-compat",
+ profiles: [
+ {
+ id: "legacy-stage-compat",
+ taskType: "extract",
+ regex: {
+ enabled: true,
+ inheritStRegex: false,
+ sources: {
+ global: false,
+ preset: false,
+ character: false,
+ },
+ stages: {
+ input: true,
+ output: true,
+ "input.userMessage": false,
+ "input.recentMessages": false,
+ "input.candidateText": false,
+ "input.finalPrompt": false,
+ "output.rawResponse": false,
+ "output.beforeParse": false,
+ },
+ localRules: [
+ createRule("legacy-input-user", "/Alpha/g", "A1"),
+ createRule("legacy-output-raw", "/Omega/g", "O1", {
+ source: {
+ user_input: false,
+ ai_output: true,
+ },
+ }),
+ ],
+ },
+ },
+ ],
+ },
+ },
+ };
+
+ const legacyStageInputDebug = { entries: [] };
+ const legacyStageInputResult = applyTaskRegex(
+ legacyStageCompatibilitySettings,
+ "extract",
+ "input.userMessage",
+ "Alpha",
+ legacyStageInputDebug,
+ "user",
+ );
+ assert.equal(legacyStageInputResult, "A1");
+ assert.deepEqual(
+ legacyStageInputDebug.entries[0].appliedRules.map((item) => item.id),
+ ["legacy-input-user"],
+ );
+
+ const legacyStageOutputDebug = { entries: [] };
+ const legacyStageOutputResult = applyTaskRegex(
+ legacyStageCompatibilitySettings,
+ "extract",
+ "output.rawResponse",
+ "Omega",
+ legacyStageOutputDebug,
+ "assistant",
+ );
+ assert.equal(legacyStageOutputResult, "O1");
+ assert.deepEqual(
+ legacyStageOutputDebug.entries[0].appliedRules.map((item) => item.id),
+ ["legacy-output-raw"],
+ );
+
console.log("task-regex tests passed");
} finally {
if (originalSillyTavern === undefined) {