Fix task regex final input pipeline

This commit is contained in:
Youzini-afk
2026-04-05 00:30:54 +08:00
parent 1eeb6f05b5
commit 5cc33fabda
7 changed files with 412 additions and 51 deletions

166
llm.js
View File

@@ -215,6 +215,104 @@ function applyTaskOutputRegexStages(taskType, text) {
};
}
function applyTaskFinalInputRegex(taskType, messages = []) {
const normalizedMessages = (Array.isArray(messages) ? messages : [])
.map((message) => {
if (!message || typeof message !== "object") {
return null;
}
const role = String(message.role || "").trim().toLowerCase();
if (!["system", "user", "assistant"].includes(role)) {
return null;
}
return {
...message,
role,
content: String(message.content || ""),
};
})
.filter(Boolean);
const normalizedTaskType = String(taskType || "").trim();
if (!normalizedTaskType || normalizedMessages.length === 0) {
const cleanedMessages = normalizedMessages.filter((message) =>
String(message.content || "").trim(),
);
return {
messages: cleanedMessages,
debug: {
stage: "input.finalPrompt",
changed: cleanedMessages.length !== normalizedMessages.length,
applied: false,
rawMessageCount: normalizedMessages.length,
cleanedMessageCount: cleanedMessages.length,
droppedMessageCount: normalizedMessages.length - cleanedMessages.length,
stages: [],
},
};
}
const settings = extension_settings[MODULE_NAME] || {};
const regexDebug = { entries: [] };
let changed = false;
let droppedMessageCount = 0;
const cleanedMessages = normalizedMessages
.map((message) => {
const originalContent = String(message.content || "");
const cleanedContent = applyTaskRegex(
settings,
normalizedTaskType,
"input.finalPrompt",
originalContent,
regexDebug,
message.role,
);
if (cleanedContent !== originalContent) {
changed = true;
}
if (!String(cleanedContent || "").trim()) {
droppedMessageCount += 1;
return null;
}
return {
...message,
content: cleanedContent,
};
})
.filter(Boolean);
const normalizedEntries = normalizeRegexDebugEntries(regexDebug);
const applied = normalizedEntries.some(
(entry) => entry.appliedRules.length > 0,
);
return {
messages: cleanedMessages,
debug: {
stage: "input.finalPrompt",
changed: changed || droppedMessageCount > 0,
applied,
rawMessageCount: normalizedMessages.length,
cleanedMessageCount: cleanedMessages.length,
droppedMessageCount,
stages: normalizedEntries,
},
};
}
function attachRequestCleaningToPromptExecution(
promptExecutionSummary,
requestCleaning,
) {
const base =
promptExecutionSummary && typeof promptExecutionSummary === "object"
? cloneRuntimeDebugValue(promptExecutionSummary, {})
: {};
if (requestCleaning && typeof requestCleaning === "object") {
base.requestCleaning = cloneRuntimeDebugValue(requestCleaning, null);
}
return base;
}
function buildEffectiveLlmRoute(
hasDedicatedConfig,
privateRequestSource,
@@ -1477,7 +1575,7 @@ export async function callLLMForJSON({
for (let attempt = 0; attempt <= maxRetries; attempt++) {
try {
const messages = buildJsonAttemptMessages(
const assembledMessages = buildJsonAttemptMessages(
systemPrompt,
userPrompt,
attempt,
@@ -1485,7 +1583,25 @@ export async function callLLMForJSON({
additionalMessages,
promptMessages,
);
const response = await callDedicatedOpenAICompatible(messages, {
const requestCleaning = applyTaskFinalInputRegex(
taskType,
assembledMessages,
);
const promptExecutionSnapshot = attachRequestCleaningToPromptExecution(
promptExecutionSummary,
requestCleaning.debug,
);
recordTaskLlmRequest(
taskType || privateRequestSource,
{
requestCleaning: requestCleaning.debug,
promptExecution: promptExecutionSnapshot,
},
{
merge: true,
},
);
const response = await callDedicatedOpenAICompatible(requestCleaning.messages, {
signal,
jsonMode: true,
taskType,
@@ -1500,8 +1616,9 @@ export async function callLLMForJSON({
recordTaskLlmRequest(
taskType || privateRequestSource,
{
requestCleaning: requestCleaning.debug,
responseCleaning: outputCleanup.debug,
promptExecution: promptExecutionSummary,
promptExecution: promptExecutionSnapshot,
},
{
merge: true,
@@ -1592,19 +1709,48 @@ export async function callLLM(systemPrompt, userPrompt, options = {}) {
return await override(systemPrompt, userPrompt, options);
}
const messages = [
const taskType = String(options.taskType || "").trim();
const privateRequestSource = resolvePrivateRequestSource(
taskType,
options.requestSource || options.source || "diagnostic:call-llm",
{ allowAnonymous: true },
);
const promptExecutionSummary = buildPromptExecutionSummary(
options.debugContext || null,
);
const assembledMessages = [
{ role: "system", content: systemPrompt },
{ role: "user", content: userPrompt },
];
const requestCleaning = applyTaskFinalInputRegex(taskType, assembledMessages);
const promptExecutionSnapshot = attachRequestCleaningToPromptExecution(
promptExecutionSummary,
requestCleaning.debug,
);
try {
const response = await callDedicatedOpenAICompatible(messages, {
signal: options.signal,
taskType: options.taskType || "",
requestSource:
options.requestSource || options.source || "diagnostic:call-llm",
recordTaskLlmRequest(taskType || privateRequestSource, {
requestCleaning: requestCleaning.debug,
promptExecution: promptExecutionSnapshot,
}, {
merge: true,
});
return response?.content || null;
const response = await callDedicatedOpenAICompatible(requestCleaning.messages, {
signal: options.signal,
taskType,
requestSource: privateRequestSource,
});
const responseText =
typeof response?.content === "string" ? response.content : "";
const outputCleanup = applyTaskOutputRegexStages(taskType, responseText);
recordTaskLlmRequest(taskType || privateRequestSource, {
requestCleaning: requestCleaning.debug,
responseCleaning: outputCleanup.debug,
promptExecution: promptExecutionSnapshot,
}, {
merge: true,
});
return outputCleanup.cleanedText || null;
} catch (e) {
console.error("[ST-BME] LLM 调用失败:", e);
return null;

View File

@@ -20,6 +20,8 @@ import {
getLegacyPromptFieldForTask,
getTaskTypeOptions,
importTaskProfile as parseImportedTaskProfile,
isTaskRegexStageEnabled,
normalizeTaskRegexStages,
restoreDefaultTaskProfile,
setActiveTaskProfileId,
upsertTaskProfile,
@@ -143,8 +145,46 @@ const TASK_PROFILE_GENERATION_GROUPS = [
];
const TASK_PROFILE_REGEX_STAGES = [
{ key: "input", label: "输入阶段", desc: "对发送给 LLM 的 prompt 执行正则替换。" },
{ key: "output", label: "输出阶段", desc: "对 LLM 返回的结果执行正则替换。" },
{
key: "input",
label: "输入总开关",
desc: "控制全部输入阶段;未单独覆写的细分阶段会跟随它。",
},
{
key: "input.userMessage",
label: "输入: 用户消息",
desc: "处理当前 userMessage。",
},
{
key: "input.recentMessages",
label: "输入: 最近上下文",
desc: "处理 recentMessages、chatMessages、dialogueText。",
},
{
key: "input.candidateText",
label: "输入: 候选与摘要",
desc: "处理 candidateText、candidateNodes、nodeContent 和各类摘要。",
},
{
key: "input.finalPrompt",
label: "输入: 发送前最终消息",
desc: "在最终 messages 全部组装完成、真正发送给 LLM 前统一清洗。",
},
{
key: "output",
label: "输出总开关",
desc: "控制全部输出阶段;未单独覆写的细分阶段会跟随它。",
},
{
key: "output.rawResponse",
label: "输出: 原始响应",
desc: "LLM 原始文本到手后先清洗一次。",
},
{
key: "output.beforeParse",
label: "输出: 解析前",
desc: "在 JSON 提取/解析前再清洗一次。",
},
];
let panelEl = null;
@@ -3342,6 +3382,7 @@ function _renderTaskGenerationTab(state) {
function _renderTaskRegexTab(state) {
const regex = state.profile.regex || {};
const normalizedStages = normalizeTaskRegexStages(regex.stages || {});
return `
<div class="bme-task-tab-body">
<div class="bme-task-regex-top">
@@ -3415,14 +3456,14 @@ function _renderTaskRegexTab(state) {
<span class="bme-toggle-title">${_escHtml(stage.label)}</span>
<span class="bme-toggle-desc">${_escHtml(stage.desc)}</span>
</span>
<input
type="checkbox"
data-regex-stage="${_escAttr(stage.key)}"
${(regex.stages?.[stage.key] ?? true) ? "checked" : ""}
/>
</label>
`,
).join("")}
<input
type="checkbox"
data-regex-stage="${_escAttr(stage.key)}"
${isTaskRegexStageEnabled(normalizedStages, stage.key) ? "checked" : ""}
/>
</label>
`,
).join("")}
</div>
</div>
@@ -3754,8 +3795,13 @@ function _renderTaskDebugLlmCard(taskType, llmRequest) {
<span class="bme-debug-kv-key">输出清洗</span>
<span class="bme-debug-kv-value">${_escHtml(llmRequest.responseCleaning?.applied ? "已生效" : "未生效")}</span>
</div>
<div class="bme-debug-kv-item">
<span class="bme-debug-kv-key">发送前输入清洗</span>
<span class="bme-debug-kv-value">${_escHtml(llmRequest.requestCleaning?.applied ? "已生效" : "未生效")}</span>
</div>
</div>
${_renderDebugDetails("提示词执行摘要", llmRequest.promptExecution || null)}
${_renderDebugDetails("发送前输入清洗", llmRequest.requestCleaning || null)}
${_renderDebugDetails("实际请求路径", llmRequest.effectiveRoute || null)}
${_renderDebugDetails("输出清洗", llmRequest.responseCleaning || null)}
${_renderDebugDetails("实际保留参数", llmRequest.filteredGeneration || {})}
@@ -4623,7 +4669,7 @@ function _normalizeTaskProfileDraft(profile = {}) {
stages: {
input: true,
output: true,
...(draft.regex?.stages || {}),
...normalizeTaskRegexStages(draft.regex?.stages || {}),
},
localRules: Array.isArray(draft.regex?.localRules)
? draft.regex.localRules.map((rule) => ({

View File

@@ -526,7 +526,7 @@ function sanitizePromptMessages(
messages = [],
{
blockedContents = [],
regexStage = "input.finalPrompt",
regexStage = "",
debugState = null,
regexCollector = null,
} = {},
@@ -646,7 +646,7 @@ function sanitizeWorldInfoEntries(
{
mode: "aggressive",
blockedContents,
regexStage: "input.finalPrompt",
regexStage: "",
role: entry?.role || "system",
regexCollector,
},
@@ -728,7 +728,7 @@ function sanitizeWorldInfoContext(
{
mode: "aggressive",
blockedContents: runtimeBlockedContents,
regexStage: "input.finalPrompt",
regexStage: "",
role: message?.role || "system",
regexCollector,
},
@@ -1107,7 +1107,7 @@ export async function buildTaskPrompt(settings = {}, taskType, context = {}) {
{
mode: "final-safe",
blockedContents: worldInfoRuntimeBlockedContents,
regexStage: "input.finalPrompt",
regexStage: "",
role,
regexCollector: promptRegexInput,
},

View File

@@ -569,6 +569,88 @@ function normalizeRegexLocalRule(rule = {}, taskType = "task", index = 0) {
};
}
const TASK_REGEX_STAGE_ALIAS_MAP = Object.freeze({
finalPrompt: "input.finalPrompt",
rawResponse: "output.rawResponse",
beforeParse: "output.beforeParse",
});
const TASK_REGEX_STAGE_GROUPS = Object.freeze({
input: Object.freeze([
"input.userMessage",
"input.recentMessages",
"input.candidateText",
"input.finalPrompt",
]),
output: Object.freeze([
"output.rawResponse",
"output.beforeParse",
]),
});
function normalizeRegexStageKey(stageKey = "") {
const normalized = String(stageKey || "").trim();
return TASK_REGEX_STAGE_ALIAS_MAP[normalized] || normalized;
}
export function normalizeTaskRegexStages(stages = {}) {
const source =
stages && typeof stages === "object" && !Array.isArray(stages) ? stages : {};
const normalized = { ...source };
for (const [legacyKey, canonicalKey] of Object.entries(
TASK_REGEX_STAGE_ALIAS_MAP,
)) {
if (
!Object.prototype.hasOwnProperty.call(normalized, canonicalKey) &&
Object.prototype.hasOwnProperty.call(normalized, legacyKey)
) {
normalized[canonicalKey] = Boolean(normalized[legacyKey]);
}
delete normalized[legacyKey];
}
for (const [groupKey, stageKeys] of Object.entries(TASK_REGEX_STAGE_GROUPS)) {
if (normalized[groupKey] === false) {
continue;
}
const allSpecificStagesFalse =
stageKeys.length > 0 &&
stageKeys.every((stageKey) => normalized[stageKey] === false);
if (!allSpecificStagesFalse) {
continue;
}
for (const stageKey of stageKeys) {
delete normalized[stageKey];
}
}
return normalized;
}
export function isTaskRegexStageEnabled(stages = {}, stageKey = "") {
const normalizedStages = normalizeTaskRegexStages(stages);
const normalizedStageKey = normalizeRegexStageKey(stageKey);
if (!normalizedStageKey) {
return normalizedStages.input !== false;
}
if (Object.prototype.hasOwnProperty.call(normalizedStages, normalizedStageKey)) {
return normalizedStages[normalizedStageKey] !== false;
}
if (normalizedStageKey.startsWith("input.")) {
return normalizedStages.input !== false;
}
if (normalizedStageKey.startsWith("output.")) {
return normalizedStages.output !== false;
}
return normalizedStages[normalizedStageKey] !== false;
}
function normalizeTaskProfilesState(taskProfiles = {}) {
return ensureTaskProfiles({ taskProfiles });
}
@@ -741,7 +823,7 @@ function createFallbackDefaultTaskProfile(taskType) {
preset: true,
character: true,
},
stages: {
stages: normalizeTaskRegexStages({
finalPrompt: true,
"input.userMessage": false,
"input.recentMessages": false,
@@ -751,7 +833,7 @@ function createFallbackDefaultTaskProfile(taskType) {
beforeParse: false,
"output.rawResponse": false,
"output.beforeParse": false,
},
}),
localRules: [],
},
metadata: {
@@ -799,10 +881,10 @@ export function createDefaultTaskProfile(taskType) {
...fallback.regex.sources,
...(template?.regex?.sources || {}),
},
stages: {
stages: normalizeTaskRegexStages({
...fallback.regex.stages,
...(template?.regex?.stages || {}),
},
}),
localRules: Array.isArray(template?.regex?.localRules)
? template.regex.localRules.map((rule, index) =>
normalizeRegexLocalRule(rule, taskType, index),
@@ -978,10 +1060,10 @@ export function normalizeTaskProfile(taskType, profile = {}, settings = {}) {
...base.regex.sources,
...(profile?.regex?.sources || {}),
},
stages: {
stages: normalizeTaskRegexStages({
...base.regex.stages,
...(profile?.regex?.stages || {}),
},
}),
localRules: Array.isArray(profile?.regex?.localRules)
? profile.regex.localRules.map((rule, index) =>
normalizeRegexLocalRule(rule, taskType, index),

View File

@@ -4,7 +4,11 @@
import { extension_settings, getContext } from "../../../extensions.js";
import { getHostAdapter } from "./host-adapter/index.js";
import { getActiveTaskProfile } from "./prompt-profiles.js";
import {
getActiveTaskProfile,
isTaskRegexStageEnabled,
normalizeTaskRegexStages,
} from "./prompt-profiles.js";
const HTML_TAG_PATTERN =
/<\/?(?:div|span|p|br|hr|img|details|summary|section|article|aside|header|footer|nav|ul|ol|li|table|tr|td|th|h[1-6]|a|em|strong|blockquote|pre|code|svg|path)\b/i;
@@ -320,26 +324,19 @@ function collectLocalRules(regexConfig = {}) {
function shouldApplyRuleForStage(rule, stage = "", stagesConfig = {}) {
const normalizedStage = String(stage || "").trim();
if (
normalizedStage &&
Object.prototype.hasOwnProperty.call(stagesConfig, normalizedStage)
) {
return (
stagesConfig[normalizedStage] !== false &&
rule.destinationFlags.prompt !== false
);
if (rule.destinationFlags.prompt === false) {
return false;
}
if (PROMPT_STAGES.has(normalizedStage)) {
return (
stagesConfig.input !== false && rule.destinationFlags.prompt !== false
);
if (!normalizedStage) {
return isTaskRegexStageEnabled(stagesConfig, "input");
}
if (OUTPUT_STAGES.has(normalizedStage)) {
return (
stagesConfig.output !== false && rule.destinationFlags.prompt !== false
);
if (PROMPT_STAGES.has(normalizedStage) || OUTPUT_STAGES.has(normalizedStage)) {
return isTaskRegexStageEnabled(stagesConfig, normalizedStage);
}
return stagesConfig.input !== false && rule.destinationFlags.prompt !== false;
return isTaskRegexStageEnabled(stagesConfig, normalizedStage);
}
function shouldApplyRuleForRole(rule, role = "system") {
@@ -398,7 +395,7 @@ export function applyTaskRegex(
}
// 阶段检查已移到 shouldApplyRuleForStage 中,无需单独 gate
const stagesConfig = regexConfig?.stages || {};
const stagesConfig = normalizeTaskRegexStages(regexConfig?.stages || {});
const tavernRules = collectTavernRules(regexConfig);
const localRules = collectLocalRules(regexConfig);

View File

@@ -254,7 +254,8 @@ try {
assert.match(promptBuild.systemPrompt, /GOOD_RECENT/);
assert.match(JSON.stringify(promptBuild.executionMessages), /GOOD_CANDIDATE/);
assert.match(promptBuild.systemPrompt, /FINAL_GOOD/);
assert.match(promptBuild.systemPrompt, /FINAL_BAD/);
assert.doesNotMatch(promptBuild.systemPrompt, /FINAL_GOOD/);
assert.equal(
promptBuild.debug.mvu.sanitizedFields.some((entry) => entry.name === "userMessage"),
true,
@@ -454,6 +455,8 @@ try {
const payload = buildTaskLlmPayload(promptBuild, "unused fallback");
assert.equal(payload.systemPrompt, "");
assert.match(JSON.stringify(payload.promptMessages), /FINAL_BAD/);
assert.doesNotMatch(JSON.stringify(payload.promptMessages), /FINAL_GOOD/);
const result = await llm.callLLMForJSON({
systemPrompt: payload.systemPrompt,
userPrompt: payload.userPrompt,
@@ -466,6 +469,8 @@ try {
assert.deepEqual(result, { ok: true });
assert.equal(capturedBodies.length, 1);
assert.match(JSON.stringify(capturedBodies[0].messages), /FINAL_GOOD/);
assert.doesNotMatch(JSON.stringify(capturedBodies[0].messages), /FINAL_BAD/);
assert.doesNotMatch(
JSON.stringify(capturedBodies[0].messages),
/status_current_variable|updatevariable|StatusPlaceHolderImpl|stat_data|display_data|delta_data|get_message_variable/i,
@@ -478,6 +483,18 @@ try {
assert.ok(runtimePromptBuild);
assert.ok(runtimeLlmRequest);
assert.match(JSON.stringify(runtimeLlmRequest.messages), /FINAL_GOOD/);
assert.equal(runtimeLlmRequest.requestCleaning?.applied, true);
assert.equal(
runtimeLlmRequest.requestCleaning?.stages?.length > 0,
true,
);
assert.equal(
runtimeLlmRequest.requestCleaning?.stages?.every(
(entry) => entry.stage === "input.finalPrompt",
),
true,
);
assert.doesNotMatch(
JSON.stringify(runtimePromptBuild.executionMessages),
/status_current_variable|updatevariable|StatusPlaceHolderImpl|stat_data|display_data|delta_data|get_message_variable/i,

View File

@@ -365,6 +365,79 @@ try {
assert.equal(exactStageResult, "JSON");
assert.deepEqual(exactStageDebug.entries[0].appliedRules, []);
const legacyStageCompatibilitySettings = {
taskProfilesVersion: 1,
taskProfiles: {
extract: {
activeProfileId: "legacy-stage-compat",
profiles: [
{
id: "legacy-stage-compat",
taskType: "extract",
regex: {
enabled: true,
inheritStRegex: false,
sources: {
global: false,
preset: false,
character: false,
},
stages: {
input: true,
output: true,
"input.userMessage": false,
"input.recentMessages": false,
"input.candidateText": false,
"input.finalPrompt": false,
"output.rawResponse": false,
"output.beforeParse": false,
},
localRules: [
createRule("legacy-input-user", "/Alpha/g", "A1"),
createRule("legacy-output-raw", "/Omega/g", "O1", {
source: {
user_input: false,
ai_output: true,
},
}),
],
},
},
],
},
},
};
const legacyStageInputDebug = { entries: [] };
const legacyStageInputResult = applyTaskRegex(
legacyStageCompatibilitySettings,
"extract",
"input.userMessage",
"Alpha",
legacyStageInputDebug,
"user",
);
assert.equal(legacyStageInputResult, "A1");
assert.deepEqual(
legacyStageInputDebug.entries[0].appliedRules.map((item) => item.id),
["legacy-input-user"],
);
const legacyStageOutputDebug = { entries: [] };
const legacyStageOutputResult = applyTaskRegex(
legacyStageCompatibilitySettings,
"extract",
"output.rawResponse",
"Omega",
legacyStageOutputDebug,
"assistant",
);
assert.equal(legacyStageOutputResult, "O1");
assert.deepEqual(
legacyStageOutputDebug.entries[0].appliedRules.map((item) => item.id),
["legacy-output-raw"],
);
console.log("task-regex tests passed");
} finally {
if (originalSillyTavern === undefined) {