feat(regex): move contamination cleanup to default global preset

This commit is contained in:
Youzini-afk
2026-04-11 19:38:17 +08:00
parent 78a451dfe4
commit 30046dd66b
5 changed files with 254 additions and 5 deletions

View File

@@ -896,6 +896,119 @@ const DEFAULT_TASK_REGEX_STAGES = Object.freeze({
output: false,
});
const DEFAULT_GLOBAL_TASK_REGEX_RULE_SPECS = Object.freeze([
{
id: "default-contamination-thinking-blocks",
script_name: "默认清理thinking/analysis/reasoning",
enabled: true,
find_regex: "/<(think|thinking|analysis|reasoning)\\b[^>]*>[\\s\\S]*?<\\/\\1>/gi",
replace_string: "",
trim_strings: "",
source: {
user_input: true,
ai_output: true,
},
destination: {
prompt: true,
display: false,
},
min_depth: 0,
max_depth: 9999,
},
{
id: "default-contamination-choice-blocks",
script_name: "默认清理choice",
enabled: true,
find_regex: "/(?:<choice\\b[^>]*>[\\s\\S]*?<\\/choice>|<choice\\b[^>]*\\/?>)/gi",
replace_string: "",
trim_strings: "",
source: {
user_input: true,
ai_output: true,
},
destination: {
prompt: true,
display: false,
},
min_depth: 0,
max_depth: 9999,
},
{
id: "default-contamination-updatevariable-tags",
script_name: "默认清理UpdateVariable",
enabled: true,
find_regex:
"/(?:<updatevariable\\b[^>]*>[\\s\\S]*?<\\/updatevariable>|<updatevariable\\b[^>]*\\/?>)/gi",
replace_string: "",
trim_strings: "",
source: {
user_input: true,
ai_output: true,
},
destination: {
prompt: true,
display: false,
},
min_depth: 0,
max_depth: 9999,
},
{
id: "default-contamination-status-current-variable-tags",
script_name: "默认清理status_current_variable",
enabled: true,
find_regex:
"/(?:<status_current_variable\\b[^>]*>[\\s\\S]*?<\\/status_current_variable>|<status_current_variable\\b[^>]*\\/?>)/gi",
replace_string: "",
trim_strings: "",
source: {
user_input: true,
ai_output: true,
},
destination: {
prompt: true,
display: false,
},
min_depth: 0,
max_depth: 9999,
},
{
id: "default-contamination-status-placeholder-tags",
script_name: "默认清理StatusPlaceHolderImpl",
enabled: true,
find_regex: "/<StatusPlaceHolderImpl\\b[^>]*\\/?>/gi",
replace_string: "",
trim_strings: "",
source: {
user_input: true,
ai_output: true,
},
destination: {
prompt: true,
display: false,
},
min_depth: 0,
max_depth: 9999,
},
]);
function cloneDefaultGlobalTaskRegexRules() {
return DEFAULT_GLOBAL_TASK_REGEX_RULE_SPECS.map((rule, index) =>
normalizeRegexLocalRule(
{
...rule,
source: {
...(rule.source || {}),
},
destination: {
...(rule.destination || {}),
},
},
"global",
index,
),
);
}
function normalizeRegexStageKey(stageKey = "") {
const normalized = String(stageKey || "").trim();
return TASK_REGEX_STAGE_ALIAS_MAP[normalized] || normalized;
@@ -939,7 +1052,7 @@ export function createDefaultGlobalTaskRegex() {
character: true,
},
stages: normalizeTaskRegexStages(DEFAULT_TASK_REGEX_STAGES),
localRules: [],
localRules: cloneDefaultGlobalTaskRegexRules(),
};
}
@@ -978,6 +1091,11 @@ export function normalizeGlobalTaskRegex(config = {}, taskType = "global") {
const defaults = createDefaultGlobalTaskRegex();
const source =
config && typeof config === "object" && !Array.isArray(config) ? config : {};
const normalizedTaskType = String(taskType || "").trim().toLowerCase();
const defaultLocalRules = normalizedTaskType === "global" ? defaults.localRules : [];
const rawLocalRules = Array.isArray(source.localRules)
? source.localRules
: defaultLocalRules;
return {
enabled: source.enabled !== false,
@@ -990,7 +1108,7 @@ export function normalizeGlobalTaskRegex(config = {}, taskType = "global") {
...normalizeTaskRegexStages(defaults.stages),
...normalizeTaskRegexStages(source.stages || {}),
},
localRules: dedupeRegexRules(source.localRules, taskType),
localRules: dedupeRegexRules(rawLocalRules, taskType),
};
}

View File

@@ -70,6 +70,17 @@ assert.equal(defaultSettings.taskProfilesVersion, 3);
assert.ok(defaultSettings.taskProfiles);
assert.ok(defaultSettings.taskProfiles.extract);
assert.ok(defaultSettings.taskProfiles.recall);
assert.ok(defaultSettings.globalTaskRegex);
assert.deepEqual(
defaultSettings.globalTaskRegex.localRules.map((rule) => rule.id),
[
"default-contamination-thinking-blocks",
"default-contamination-choice-blocks",
"default-contamination-updatevariable-tags",
"default-contamination-status-current-variable-tags",
"default-contamination-status-placeholder-tags",
],
);
const migratedSettings = mergePersistedSettings({
maintenanceAutoMinNewNodes: 7,

View File

@@ -46,7 +46,10 @@ installResolveHooks([
]);
const { buildTaskLlmPayload, buildTaskPrompt } = await import("../prompting/prompt-builder.js");
const { createDefaultTaskProfiles } = await import("../prompting/prompt-profiles.js");
const {
createDefaultGlobalTaskRegex,
createDefaultTaskProfiles,
} = await import("../prompting/prompt-profiles.js");
const { initializeHostAdapter } = await import("../host/adapter/index.js");
const settings = {
@@ -145,6 +148,28 @@ assert.match(String(recallFormatBlock?.content || ""), /selected_keys/);
assert.match(String(recallRulesBlock?.content || ""), /剧情时间/);
assert.match(String(recallRulesBlock?.content || ""), /评分召回/);
const globalRegexPromptBuild = await buildTaskPrompt(
{
taskProfilesVersion: 3,
taskProfiles: createDefaultTaskProfiles(),
globalTaskRegex: createDefaultGlobalTaskRegex(),
},
"recall",
{
taskName: "recall",
recentMessages:
"最近消息 <thinking>隐藏思维</thinking> <choice>1. 隐藏选项</choice>",
userMessage:
"用户输入 <updatevariable>secret</updatevariable> <status_current_variable>hp=3</status_current_variable>",
candidateNodes:
"候选节点 <StatusPlaceHolderImpl/> <analysis>隐藏分析</analysis>",
},
);
assert.doesNotMatch(
JSON.stringify(globalRegexPromptBuild),
/<thinking|<choice|<updatevariable|<status_current_variable|<StatusPlaceHolderImpl|<analysis/i,
);
const formatterCalls = [];
initializeHostAdapter({
regexProvider: {

View File

@@ -427,10 +427,17 @@ legacyRegexSettings.taskProfiles.extract.profiles.push(
);
const migratedLegacyRegex = migratePerTaskRegexToGlobal(legacyRegexSettings);
assert.equal(migratedLegacyRegex.changed, true);
assert.equal(migratedLegacyRegex.settings.globalTaskRegex.enabled, false);
assert.equal(migratedLegacyRegex.settings.globalTaskRegex.enabled, true);
assert.deepEqual(
migratedLegacyRegex.settings.globalTaskRegex.localRules.map((rule) => rule.script_name),
["隐藏规则"],
[
"默认清理thinking/analysis/reasoning",
"默认清理choice",
"默认清理UpdateVariable",
"默认清理status_current_variable",
"默认清理StatusPlaceHolderImpl",
"隐藏规则",
],
);
assert.deepEqual(
migratedLegacyRegex.settings.taskProfiles.extract.profiles.find(

View File

@@ -175,6 +175,7 @@ try {
"../prompting/task-regex.js"
);
const {
createDefaultGlobalTaskRegex,
createDefaultTaskProfiles,
isTaskRegexStageEnabled,
normalizeTaskProfile,
@@ -933,6 +934,93 @@ try {
["prompt-output"],
);
const defaultGlobalRegex = createDefaultGlobalTaskRegex();
assert.deepEqual(
defaultGlobalRegex.localRules.map((rule) => rule.id),
[
"default-contamination-thinking-blocks",
"default-contamination-choice-blocks",
"default-contamination-updatevariable-tags",
"default-contamination-status-current-variable-tags",
"default-contamination-status-placeholder-tags",
],
);
const globalDefaultDebug = { entries: [] };
const globalDefaultResult = applyTaskRegex(
{
taskProfiles: createDefaultTaskProfiles(),
globalTaskRegex: createDefaultGlobalTaskRegex(),
},
"extract",
"input.recentMessages",
[
"前缀",
"<thinking>内部思维</thinking>",
"<choice>1. 选项</choice>",
"<UpdateVariable>hp=1</UpdateVariable>",
"<status_current_variable>hp=1</status_current_variable>",
"<StatusPlaceHolderImpl/>",
"尾巴",
].join("\n"),
globalDefaultDebug,
"system",
);
assert.match(globalDefaultResult, /前缀/);
assert.match(globalDefaultResult, /尾巴/);
assert.doesNotMatch(
globalDefaultResult,
/<choice|<thinking|<updatevariable|<status_current_variable|<StatusPlaceHolderImpl/i,
);
assert.deepEqual(
globalDefaultDebug.entries[0].appliedRules.map((item) => item.id),
[
"default-contamination-thinking-blocks",
"default-contamination-choice-blocks",
"default-contamination-updatevariable-tags",
"default-contamination-status-current-variable-tags",
"default-contamination-status-placeholder-tags",
],
);
assert.equal(globalDefaultDebug.entries[0].sourceCount.local, 5);
const explicitEmptyGlobalDebug = { entries: [] };
const explicitEmptyGlobalResult = applyTaskRegex(
{
taskProfiles: createDefaultTaskProfiles(),
globalTaskRegex: {
enabled: true,
inheritStRegex: false,
sources: {
global: false,
preset: false,
character: false,
},
stages: {
"input.userMessage": true,
"input.recentMessages": true,
"input.candidateText": true,
"input.finalPrompt": false,
"output.rawResponse": false,
"output.beforeParse": false,
output: false,
},
localRules: [],
},
},
"extract",
"input.recentMessages",
"<choice>保留</choice><thinking>保留</thinking>",
explicitEmptyGlobalDebug,
"system",
);
assert.equal(
explicitEmptyGlobalResult,
"<choice>保留</choice><thinking>保留</thinking>",
);
assert.deepEqual(explicitEmptyGlobalDebug.entries[0].appliedRules, []);
assert.equal(explicitEmptyGlobalDebug.entries[0].sourceCount.local, 0);
console.log("task-regex tests passed");
} finally {
if (originalSillyTavern === undefined) {