Fix host regex reuse for extraction inputs

This commit is contained in:
Youzini-afk
2026-04-09 00:28:12 +08:00
parent e57b693418
commit 922b716ff6
7 changed files with 432 additions and 16 deletions

View File

@@ -722,6 +722,7 @@ export async function extractMemories({
schema: schemaDescription,
schemaDescription,
recentMessages: dialogueText,
chatMessages: messages,
dialogueText,
graphStats: graphOverview,
graphOverview,

View File

@@ -26,6 +26,90 @@ function normalizeReasons(reasons = []) {
: [];
}
function normalizeMessageLikeRole(value = "", isUser = false) {
if (typeof value === "string") {
const normalized = value.trim().toLowerCase();
if (normalized === "user") {
return "user";
}
if (normalized === "assistant") {
return "assistant";
}
}
return isUser ? "user" : "assistant";
}
function getStructuredMessageDescriptor(value) {
if (!value || typeof value !== "object" || Array.isArray(value)) {
return null;
}
if (typeof value.content === "string") {
const role = normalizeMessageLikeRole(value.role, false);
return {
contentKey: "content",
role,
sourceType: role === "user" ? "user_input" : "ai_output",
depth: Number.isFinite(Number(value.depth)) ? Number(value.depth) : null,
};
}
if (typeof value.mes === "string") {
const role = normalizeMessageLikeRole("", Boolean(value.is_user));
return {
contentKey: "mes",
role,
sourceType: role === "user" ? "user_input" : "ai_output",
depth: Number.isFinite(Number(value.depth)) ? Number(value.depth) : null,
};
}
return null;
}
function mergeFormatterOptions(baseOptions = null, overrides = {}) {
const base =
baseOptions && typeof baseOptions === "object" ? baseOptions : {};
const merged = {
...base,
...overrides,
};
if (merged.isPrompt == null) {
merged.isPrompt = true;
}
if (merged.isMarkdown == null) {
merged.isMarkdown = false;
}
if (!Number.isFinite(Number(merged.depth))) {
delete merged.depth;
} else {
merged.depth = Number(merged.depth);
}
return merged;
}
function buildMessageFormatterOptions(
baseOptions = null,
descriptor = null,
index = -1,
total = 0,
) {
let depth =
descriptor?.depth != null && Number.isFinite(Number(descriptor.depth))
? Number(descriptor.depth)
: null;
if (!Number.isFinite(depth) && Number.isFinite(index) && total > 0) {
depth = Math.max(total - index - 1, 0);
}
return Number.isFinite(depth)
? mergeFormatterOptions(baseOptions, { depth })
: mergeFormatterOptions(baseOptions);
}
function pushUnique(target = [], value = "") {
const normalized = String(value || "").trim();
if (!normalized || target.includes(normalized)) {
@@ -134,12 +218,17 @@ export function sanitizeInjectionText(
recordSanitizerDebug(debugState, path, sanitizerResult, stage);
const afterSanitizer = String(sanitizerResult.text || "");
const hostReuseResult = eligible && applyHostRegex && regexSourceType
const normalizedFormatterOptions = mergeFormatterOptions(formatterOptions);
const hostReuseResult =
eligible &&
applyHostRegex &&
regexSourceType &&
afterSanitizer.length > 0
? applyHostRegexReuse(settings, taskType, afterSanitizer, {
sourceType: regexSourceType,
role,
debugCollector: regexCollector,
formatterOptions,
formatterOptions: normalizedFormatterOptions,
})
: {
text: afterSanitizer,
@@ -284,6 +373,7 @@ export function sanitizeInjectionStructuredValue(
const sanitizedArray = [];
let changed = false;
for (let index = 0; index < value.length; index += 1) {
const messageDescriptor = getStructuredMessageDescriptor(value[index]);
const childResult = sanitizeInjectionStructuredValue(
settings,
taskType,
@@ -297,7 +387,14 @@ export function sanitizeInjectionStructuredValue(
sanitizationEligible,
regexSourceType,
role,
formatterOptions,
formatterOptions: messageDescriptor
? buildMessageFormatterOptions(
formatterOptions,
messageDescriptor,
index,
value.length,
)
: formatterOptions,
debugState,
regexCollector,
applySanitizer,
@@ -335,6 +432,7 @@ export function sanitizeInjectionStructuredValue(
seen.add(value);
const originalLooksMvuContainer = looksLikeMvuStateContainer(value);
const messageDescriptor = getStructuredMessageDescriptor(value);
const sanitizedObject = {};
let changed = false;
let keptEntries = 0;
@@ -359,6 +457,8 @@ export function sanitizeInjectionStructuredValue(
continue;
}
const isMessageContentField =
messageDescriptor && key === messageDescriptor.contentKey;
const childResult = sanitizeInjectionStructuredValue(
settings,
taskType,
@@ -370,13 +470,21 @@ export function sanitizeInjectionStructuredValue(
blockedContents,
contentOrigin,
sanitizationEligible,
regexSourceType,
role,
formatterOptions,
regexSourceType: isMessageContentField
? messageDescriptor.sourceType
: regexSourceType,
role: isMessageContentField ? messageDescriptor.role : role,
formatterOptions: isMessageContentField
? buildMessageFormatterOptions(formatterOptions, messageDescriptor)
: formatterOptions,
debugState,
regexCollector,
applySanitizer,
applyHostRegex,
applyHostRegex: messageDescriptor
? isMessageContentField
? applyHostRegex && Boolean(messageDescriptor.sourceType)
: false
: applyHostRegex,
stripMvuContainers,
seen,
},

View File

@@ -269,12 +269,67 @@ function messageUsesWorldInfoContent(message = {}) {
return String(message?.source || "") === "worldInfo-atDepth";
}
function getPromptMessageLikeDescriptor(value) {
if (!value || typeof value !== "object" || Array.isArray(value)) {
return null;
}
if (typeof value.content === "string") {
const role = String(value.role || "assistant").trim().toLowerCase();
return {
content: String(value.content || ""),
role: role === "user" ? "user" : "assistant",
seq: Number.isFinite(Number(value.seq)) ? Number(value.seq) : null,
};
}
if (typeof value.mes === "string") {
return {
content: String(value.mes || ""),
role: value.is_user === true ? "user" : "assistant",
seq: Number.isFinite(Number(value.seq)) ? Number(value.seq) : null,
};
}
return null;
}
function isPromptMessageArray(value) {
return (
Array.isArray(value) &&
value.length > 0 &&
value.every((entry) => getPromptMessageLikeDescriptor(entry))
);
}
function formatPromptMessageTranscript(value) {
const entries = Array.isArray(value) ? value : [value];
return entries
.map((entry, index) => {
const descriptor = getPromptMessageLikeDescriptor(entry);
if (!descriptor) {
return "";
}
const seqLabel =
descriptor.seq != null ? `#${descriptor.seq}` : `#${index + 1}`;
return `${seqLabel} [${descriptor.role}]: ${descriptor.content}`;
})
.filter(Boolean)
.join("\n\n");
}
function stringifyInterpolatedValue(value) {
if (value == null) return "";
if (typeof value === "string") return value;
if (typeof value === "number" || typeof value === "boolean") {
return String(value);
}
if (getPromptMessageLikeDescriptor(value)) {
return formatPromptMessageTranscript(value);
}
if (isPromptMessageArray(value)) {
return formatPromptMessageTranscript(value);
}
try {
return JSON.stringify(value, null, 2);
@@ -632,6 +687,32 @@ function sanitizePromptMessages(
.filter(Boolean);
}
function resolveStructuredMessageSanitizerInput(fieldName = "", context = {}, value) {
const normalizedFieldName = String(fieldName || "").trim();
if (!["recentMessages", "dialogueText"].includes(normalizedFieldName)) {
return {
value,
renderAsTranscript: false,
};
}
if (
typeof value === "string" &&
Array.isArray(context?.chatMessages) &&
isPromptMessageArray(context.chatMessages)
) {
return {
value: context.chatMessages,
renderAsTranscript: true,
};
}
return {
value,
renderAsTranscript: false,
};
}
function sanitizePromptContextInputs(
settings = {},
taskType,
@@ -697,13 +778,19 @@ function sanitizePromptContextInputs(
continue;
}
const value = sanitizedContext[fieldName];
const structuredSanitizerInput = resolveStructuredMessageSanitizerInput(
fieldName,
context,
value,
);
const valueForSanitizer = structuredSanitizerInput.value;
const regexStage = INPUT_REGEX_STAGE_BY_FIELD[fieldName] || "";
const regexRole = INPUT_REGEX_ROLE_BY_FIELD[fieldName] || "system";
const regexSourceType = INPUT_HOST_REGEX_SOURCE_BY_FIELD[fieldName] || "";
const sanitized = sanitizeInjectionStructuredValue(
settings,
taskType,
value,
valueForSanitizer,
{
fieldName,
path: fieldName,
@@ -720,12 +807,15 @@ function sanitizePromptContextInputs(
},
);
let sanitizedValue = sanitized.omit
? Array.isArray(value)
? Array.isArray(valueForSanitizer)
? []
: typeof value === "string"
: typeof valueForSanitizer === "string"
? ""
: null
: sanitized.value;
if (structuredSanitizerInput.renderAsTranscript) {
sanitizedValue = stringifyInterpolatedValue(sanitizedValue);
}
sanitizedValue = applyLocalRegexToStructuredValue(
sanitizedValue,
regexStage,

View File

@@ -777,6 +777,49 @@ function normalizeHostRegexSourceType(sourceType = "") {
return "";
}
function normalizeHostFormatterOptions(formatterOptions = null) {
const normalized =
formatterOptions && typeof formatterOptions === "object"
? { ...formatterOptions }
: {};
if (normalized.isPrompt == null) {
normalized.isPrompt = true;
}
if (normalized.isMarkdown == null) {
normalized.isMarkdown = false;
}
if (!Number.isFinite(Number(normalized.depth))) {
delete normalized.depth;
} else {
normalized.depth = Number(normalized.depth);
}
return normalized;
}
function ruleMatchesFormatterDepth(rule, formatterOptions = null) {
const depth = Number(formatterOptions?.depth);
if (!Number.isFinite(depth)) {
return true;
}
if (
rule?.minDepth != null &&
Number.isFinite(Number(rule.minDepth)) &&
Number(rule.minDepth) >= -1 &&
depth < Number(rule.minDepth)
) {
return false;
}
if (
rule?.maxDepth != null &&
Number.isFinite(Number(rule.maxDepth)) &&
Number(rule.maxDepth) >= 0 &&
depth > Number(rule.maxDepth)
) {
return false;
}
return true;
}
function buildHostRegexExecutionState(regexHost = null) {
const formatterAvailable =
typeof regexHost?.formatAsTavernRegexedString === "function";
@@ -835,9 +878,15 @@ function shouldReuseTavernRuleForSourceType(rule, sourceType = "", role = "syste
}
if (normalizedSourceType === "user_input") {
if (role === "mixed") {
return rule.sourceFlags.user !== false || rule.sourceFlags.assistant !== false;
}
return rule.sourceFlags.user !== false;
}
if (normalizedSourceType === "ai_output") {
if (role === "mixed") {
return rule.sourceFlags.user !== false || rule.sourceFlags.assistant !== false;
}
if (role === "user") {
return rule.sourceFlags.user !== false;
}
@@ -934,11 +983,13 @@ function applyHostRegexReuseFallback(
{
sourceType = "",
role = "system",
formatterOptions = null,
} = {},
) {
let output = String(input || "");
const appliedRules = [];
const normalizedSourceType = normalizeHostRegexSourceType(sourceType);
const normalizedFormatterOptions = normalizeHostFormatterOptions(formatterOptions);
for (const rule of Array.isArray(tavernRules) ? tavernRules : []) {
if (!shouldReuseTavernRuleForPrompt(rule, "host-fallback")) {
@@ -947,6 +998,9 @@ function applyHostRegexReuseFallback(
if (!shouldReuseTavernRuleForSourceType(rule, normalizedSourceType, role)) {
continue;
}
if (!ruleMatchesFormatterDepth(rule, normalizedFormatterOptions)) {
continue;
}
const result = applyOneRule(output, rule, "");
if (result.error) {
@@ -986,6 +1040,7 @@ export function applyHostRegexReuse(
const input = typeof text === "string" ? text : "";
const normalizedTaskType = String(taskType || "").trim();
const normalizedSourceType = normalizeHostRegexSourceType(sourceType);
const normalizedFormatterOptions = normalizeHostFormatterOptions(formatterOptions);
const profile = getActiveTaskProfile(settings, normalizedTaskType);
const regexConfig = profile?.regex || {};
const regexHost = getRegexHost();
@@ -1064,9 +1119,7 @@ export function applyHostRegexReuse(
input,
normalizedSourceType,
"prompt",
formatterOptions && typeof formatterOptions === "object"
? formatterOptions
: undefined,
normalizedFormatterOptions,
) ?? input,
);
pushDebug(debugCollector, {
@@ -1101,6 +1154,7 @@ export function applyHostRegexReuse(
const fallback = applyHostRegexReuseFallback(input, tavernRules, {
sourceType: normalizedSourceType,
role,
formatterOptions: normalizedFormatterOptions,
});
const fallbackReason =
executionState.mode === "host-unavailable"

View File

@@ -51,6 +51,7 @@ registerHooks({
const { buildTaskLlmPayload, buildTaskPrompt } = await import("../prompting/prompt-builder.js");
const { createDefaultTaskProfiles } = await import("../prompting/prompt-profiles.js");
const { initializeHostAdapter } = await import("../host/adapter/index.js");
const settings = {
taskProfilesVersion: 3,
@@ -146,4 +147,80 @@ assert.match(String(recallFormatBlock?.content || ""), /active_owner_keys/);
assert.match(String(recallFormatBlock?.content || ""), /active_owner_scores/);
assert.match(String(recallRulesBlock?.content || ""), /剧情时间/);
const formatterCalls = [];
initializeHostAdapter({
regexProvider: {
getTavernRegexes() {
return [];
},
isCharacterTavernRegexesEnabled() {
return true;
},
formatAsTavernRegexedString(text, source, destination, options) {
formatterCalls.push({ text, source, destination, options });
if (source === "ai_output") {
return String(text || "").replace(/<action>.*?<\/action>/g, "");
}
if (source === "user_input") {
return String(text || "").replace(/<u>|<\/u>/g, "");
}
return String(text || "");
},
},
});
const regexAwarePromptBuild = await buildTaskPrompt(settings, "extract", {
taskName: "extract",
charDescription: "",
userPersona: "",
recentMessages: "这里会被 chatMessages 回填",
chatMessages: [
{
seq: 36,
role: "assistant",
content: "<action>挥手</action>继续说明",
},
{
seq: 37,
role: "user",
content: "用户<u>输入</u>",
},
],
graphStats: "node_count=1",
schema: "event(title, summary)",
currentRange: "36 ~ 37",
});
const regexAwarePayload = buildTaskLlmPayload(
regexAwarePromptBuild,
"fallback-user",
);
const regexAwareRecentBlock = regexAwarePayload.promptMessages.find(
(message) => message.sourceKey === "recentMessages",
);
assert.match(String(regexAwareRecentBlock?.content || ""), /#36 \[assistant\]: 继续说明/);
assert.match(String(regexAwareRecentBlock?.content || ""), /#37 \[user\]: 用户输入/);
assert.doesNotMatch(String(regexAwareRecentBlock?.content || ""), /action|<u>|<\/u>/i);
assert.equal(
formatterCalls.some(
(call) =>
call.source === "ai_output" &&
call.destination === "prompt" &&
call.options?.depth === 1 &&
call.options?.isPrompt === true,
),
true,
);
assert.equal(
formatterCalls.some(
(call) =>
call.source === "user_input" &&
call.destination === "prompt" &&
call.options?.depth === 0 &&
call.options?.isPrompt === true,
),
true,
);
initializeHostAdapter({});
console.log("prompt-builder-defaults tests passed");

View File

@@ -300,8 +300,8 @@ try {
isCharacterTavernRegexesEnabled() {
return true;
},
formatAsTavernRegexedString(text, source, destination) {
formatterCalls.push({ text, source, destination });
formatAsTavernRegexedString(text, source, destination, options) {
formatterCalls.push({ text, source, destination, options });
return String(text || "").replace(/Alpha/g, "HOST");
},
},
@@ -330,6 +330,10 @@ try {
text: "Alpha Beta",
source: "user_input",
destination: "prompt",
options: {
isPrompt: true,
isMarkdown: false,
},
},
]);
assert.equal(fullBridgeDebug.entries[0].executionMode, "host-real");
@@ -398,6 +402,84 @@ try {
assert.equal(fallbackOutput.text, "C1");
assert.equal(fallbackDebug.entries[0].executionMode, "host-fallback");
setTestContext({
extensionSettings: {
regex: [
createTavernRule("depth-aware", "/Gamma/g", "DEPTH", {
placement: [PLACEMENT.WORLD_INFO],
minDepth: 1,
maxDepth: 1,
}),
],
preset_allowed_regex: {},
character_allowed_regex: [],
},
});
initializeHostAdapter({});
const depthMissResult = applyHostRegexReuse(
buildSettings({
sources: {
global: true,
preset: false,
character: false,
},
}),
"extract",
"Gamma",
{
sourceType: "world_info",
role: "system",
formatterOptions: {
depth: 0,
},
debugCollector: { entries: [] },
},
);
const depthHitResult = applyHostRegexReuse(
buildSettings({
sources: {
global: true,
preset: false,
character: false,
},
}),
"extract",
"Gamma",
{
sourceType: "world_info",
role: "system",
formatterOptions: {
depth: 1,
},
debugCollector: { entries: [] },
},
);
assert.equal(depthMissResult.text, "Gamma");
assert.equal(depthHitResult.text, "DEPTH");
setTestContext({
extensionSettings: fallbackExtensionSettings,
presetScripts: [
createTavernRule("preset-fallback", "/G1/g", "P1", {
promptOnly: true,
}),
],
characters: [
{
avatar: "hero.png",
data: {
extensions: {
regex_scripts: [
createTavernRule("character-fallback", "/P1/g", "C1", {
promptOnly: true,
}),
],
},
},
},
],
});
initializeHostAdapter({});
const fallbackInspect = inspectTaskRegexReuse(buildSettings(), "extract");
assert.equal(fallbackInspect.activeRuleCount, 3);
assert.deepEqual(

View File

@@ -6233,7 +6233,11 @@ function _renderRegexReuseBadges(rule = {}) {
text: "仅 Prompt",
});
}
if (rule.promptStageMode !== "skip" && rule.promptStageApplies === false) {
if (
rule.sourceType === "local" &&
rule.promptStageMode !== "skip" &&
rule.promptStageApplies === false
) {
badges.push({
className: "is-skip",
text: "当前任务未启用",