mirror of
https://github.com/Youzini-afk/ST-Bionic-Memory-Ecology.git
synced 2026-05-15 22:30:38 +08:00
572 lines
15 KiB
JavaScript
572 lines
15 KiB
JavaScript
import { sanitizeMvuContent } from "./mvu-compat.js";
|
|
import { applyHostRegexReuse } from "./task-regex.js";
|
|
|
|
export const PROMPT_CONTENT_ORIGIN = Object.freeze({
|
|
TEMPLATE_OWNED: "template-owned",
|
|
HOST_INJECTED: "host-injected",
|
|
WORLD_INFO_RENDERED: "world-info-rendered",
|
|
});
|
|
|
|
function normalizeSanitizerMode(mode = "injection-safe") {
|
|
return String(mode || "").trim() === "final-injection-safe"
|
|
? "final-safe"
|
|
: "aggressive";
|
|
}
|
|
|
|
function isSanitizationEligible(options = {}) {
|
|
if (options?.sanitizationEligible === false) {
|
|
return false;
|
|
}
|
|
return String(options?.contentOrigin || "") !== PROMPT_CONTENT_ORIGIN.TEMPLATE_OWNED;
|
|
}
|
|
|
|
function normalizeReasons(reasons = []) {
|
|
return Array.isArray(reasons)
|
|
? reasons.map((item) => String(item || "").trim()).filter(Boolean)
|
|
: [];
|
|
}
|
|
|
|
function normalizeMessageLikeRole(value = "", isUser = false) {
|
|
if (typeof value === "string") {
|
|
const normalized = value.trim().toLowerCase();
|
|
if (normalized === "user") {
|
|
return "user";
|
|
}
|
|
if (normalized === "assistant") {
|
|
return "assistant";
|
|
}
|
|
}
|
|
return isUser ? "user" : "assistant";
|
|
}
|
|
|
|
function getStructuredMessageDescriptor(value) {
|
|
if (!value || typeof value !== "object" || Array.isArray(value)) {
|
|
return null;
|
|
}
|
|
|
|
if (typeof value.content === "string") {
|
|
const role = normalizeMessageLikeRole(value.role, false);
|
|
return {
|
|
contentKey: "content",
|
|
role,
|
|
sourceType: role === "user" ? "user_input" : "ai_output",
|
|
depth: Number.isFinite(Number(value.depth)) ? Number(value.depth) : null,
|
|
};
|
|
}
|
|
|
|
if (typeof value.mes === "string") {
|
|
const role = normalizeMessageLikeRole("", Boolean(value.is_user));
|
|
return {
|
|
contentKey: "mes",
|
|
role,
|
|
sourceType: role === "user" ? "user_input" : "ai_output",
|
|
depth: Number.isFinite(Number(value.depth)) ? Number(value.depth) : null,
|
|
};
|
|
}
|
|
|
|
return null;
|
|
}
|
|
|
|
function mergeFormatterOptions(baseOptions = null, overrides = {}) {
|
|
const base =
|
|
baseOptions && typeof baseOptions === "object" ? baseOptions : {};
|
|
const merged = {
|
|
...base,
|
|
...overrides,
|
|
};
|
|
|
|
if (merged.isPrompt == null) {
|
|
merged.isPrompt = true;
|
|
}
|
|
if (merged.isMarkdown == null) {
|
|
merged.isMarkdown = false;
|
|
}
|
|
|
|
if (!Number.isFinite(Number(merged.depth))) {
|
|
delete merged.depth;
|
|
} else {
|
|
merged.depth = Number(merged.depth);
|
|
}
|
|
|
|
return merged;
|
|
}
|
|
|
|
function buildMessageFormatterOptions(
|
|
baseOptions = null,
|
|
descriptor = null,
|
|
index = -1,
|
|
total = 0,
|
|
) {
|
|
let depth =
|
|
descriptor?.depth != null && Number.isFinite(Number(descriptor.depth))
|
|
? Number(descriptor.depth)
|
|
: null;
|
|
if (!Number.isFinite(depth) && Number.isFinite(index) && total > 0) {
|
|
depth = Math.max(total - index - 1, 0);
|
|
}
|
|
|
|
return Number.isFinite(depth)
|
|
? mergeFormatterOptions(baseOptions, { depth })
|
|
: mergeFormatterOptions(baseOptions);
|
|
}
|
|
|
|
function pushUnique(target = [], value = "") {
|
|
const normalized = String(value || "").trim();
|
|
if (!normalized || target.includes(normalized)) {
|
|
return;
|
|
}
|
|
target.push(normalized);
|
|
}
|
|
|
|
export function createEmptyInjectionSanitizerDebug() {
|
|
return {
|
|
sanitizedFieldCount: 0,
|
|
sanitizedFields: [],
|
|
finalMessageStripCount: 0,
|
|
worldInfoBlockedContentHits: 0,
|
|
sanitizerAppliedFields: [],
|
|
sanitizerHitKinds: [],
|
|
hostReuseAppliedFields: [],
|
|
hostReuseSkippedDisplayOnlyRules: 0,
|
|
regexExecutionMode: "host-unavailable",
|
|
hostFormatterAvailable: false,
|
|
hostFormatterSource: "",
|
|
fallbackReason: "",
|
|
};
|
|
}
|
|
|
|
function recordSanitizerDebug(debugState, path, result = {}, stage = "") {
|
|
if (!debugState || (!result.changed && !result.dropped)) {
|
|
return;
|
|
}
|
|
|
|
const reasons = normalizeReasons(result.reasons);
|
|
debugState.sanitizedFields.push({
|
|
name: String(path || ""),
|
|
stage: String(stage || ""),
|
|
changed: Boolean(result.changed),
|
|
dropped: Boolean(result.dropped),
|
|
reasons,
|
|
blockedHitCount: Number(result.blockedHitCount || 0),
|
|
});
|
|
debugState.sanitizedFieldCount = debugState.sanitizedFields.length;
|
|
pushUnique(debugState.sanitizerAppliedFields, path);
|
|
for (const reason of reasons) {
|
|
pushUnique(debugState.sanitizerHitKinds, reason);
|
|
}
|
|
}
|
|
|
|
function recordHostReuseDebug(debugState, path, result = {}) {
|
|
if (!debugState || !result || typeof result !== "object") {
|
|
return;
|
|
}
|
|
debugState.regexExecutionMode = String(
|
|
result.executionMode || debugState.regexExecutionMode || "host-unavailable",
|
|
);
|
|
debugState.hostFormatterAvailable = Boolean(result.formatterAvailable);
|
|
debugState.hostFormatterSource = String(result.formatterSource || "");
|
|
debugState.fallbackReason = String(result.fallbackReason || "");
|
|
debugState.hostReuseSkippedDisplayOnlyRules = Math.max(
|
|
Number(debugState.hostReuseSkippedDisplayOnlyRules || 0),
|
|
Number(result.skippedDisplayOnlyRuleCount || 0),
|
|
);
|
|
if (result.changed) {
|
|
pushUnique(debugState.hostReuseAppliedFields, path);
|
|
}
|
|
}
|
|
|
|
export function sanitizeInjectionText(
|
|
settings = {},
|
|
taskType,
|
|
text,
|
|
{
|
|
mode = "injection-safe",
|
|
blockedContents = [],
|
|
contentOrigin = PROMPT_CONTENT_ORIGIN.HOST_INJECTED,
|
|
sanitizationEligible = true,
|
|
regexSourceType = "",
|
|
role = "system",
|
|
formatterOptions = null,
|
|
debugState = null,
|
|
regexCollector = null,
|
|
applySanitizer = true,
|
|
applyHostRegex = true,
|
|
path = "",
|
|
stage = "",
|
|
} = {},
|
|
) {
|
|
const originalText = typeof text === "string" ? text : "";
|
|
const eligible = sanitizationEligible && isSanitizationEligible({
|
|
sanitizationEligible,
|
|
contentOrigin,
|
|
});
|
|
|
|
const sanitizerResult = eligible && applySanitizer
|
|
? sanitizeMvuContent(originalText, {
|
|
mode: normalizeSanitizerMode(mode),
|
|
blockedContents,
|
|
})
|
|
: {
|
|
text: originalText,
|
|
changed: false,
|
|
dropped: false,
|
|
reasons: [],
|
|
blockedHitCount: 0,
|
|
artifactRemovedCount: 0,
|
|
};
|
|
|
|
recordSanitizerDebug(debugState, path, sanitizerResult, stage);
|
|
|
|
const afterSanitizer = String(sanitizerResult.text || "");
|
|
const normalizedFormatterOptions = mergeFormatterOptions(formatterOptions);
|
|
const hostReuseResult =
|
|
eligible &&
|
|
applyHostRegex &&
|
|
regexSourceType &&
|
|
afterSanitizer.length > 0
|
|
? applyHostRegexReuse(settings, taskType, afterSanitizer, {
|
|
sourceType: regexSourceType,
|
|
role,
|
|
debugCollector: regexCollector,
|
|
formatterOptions: normalizedFormatterOptions,
|
|
})
|
|
: {
|
|
text: afterSanitizer,
|
|
changed: false,
|
|
executionMode: "host-unavailable",
|
|
formatterAvailable: false,
|
|
formatterSource: "",
|
|
fallbackReason: "",
|
|
skippedDisplayOnlyRuleCount: 0,
|
|
};
|
|
|
|
recordHostReuseDebug(debugState, path, hostReuseResult);
|
|
|
|
const finalText = String(hostReuseResult.text || "");
|
|
return {
|
|
text: finalText,
|
|
changed: finalText !== originalText,
|
|
dropped: Boolean(sanitizerResult.dropped),
|
|
reasons: normalizeReasons(sanitizerResult.reasons),
|
|
blockedHitCount: Number(sanitizerResult.blockedHitCount || 0),
|
|
artifactRemovedCount: Number(sanitizerResult.artifactRemovedCount || 0),
|
|
hostReuseChanged: Boolean(hostReuseResult.changed),
|
|
executionMode: String(hostReuseResult.executionMode || "host-unavailable"),
|
|
formatterAvailable: Boolean(hostReuseResult.formatterAvailable),
|
|
formatterSource: String(hostReuseResult.formatterSource || ""),
|
|
fallbackReason: String(hostReuseResult.fallbackReason || ""),
|
|
skippedDisplayOnlyRuleCount: Number(
|
|
hostReuseResult.skippedDisplayOnlyRuleCount || 0,
|
|
),
|
|
};
|
|
}
|
|
|
|
function looksLikeMvuStateContainer(value, seen = new WeakSet()) {
|
|
if (!value || typeof value !== "object") {
|
|
return false;
|
|
}
|
|
if (seen.has(value)) {
|
|
return false;
|
|
}
|
|
seen.add(value);
|
|
|
|
if (Array.isArray(value)) {
|
|
return value.some((item) => looksLikeMvuStateContainer(item, seen));
|
|
}
|
|
|
|
const keys = Object.keys(value).map((key) =>
|
|
String(key || "").trim().toLowerCase(),
|
|
);
|
|
if (
|
|
keys.some((key) =>
|
|
["stat_data", "display_data", "delta_data", "$internal"].includes(key),
|
|
)
|
|
) {
|
|
return true;
|
|
}
|
|
|
|
return Object.values(value).some((item) => looksLikeMvuStateContainer(item, seen));
|
|
}
|
|
|
|
function getMvuObjectKeyStripReason(key, value) {
|
|
const normalizedKey = String(key || "").trim().toLowerCase();
|
|
if (
|
|
["stat_data", "display_data", "delta_data", "$internal"].includes(
|
|
normalizedKey,
|
|
)
|
|
) {
|
|
return "mvu_state_key_removed";
|
|
}
|
|
if (
|
|
["variables", "message_variables", "chat_variables"].includes(normalizedKey) &&
|
|
looksLikeMvuStateContainer(value)
|
|
) {
|
|
return "mvu_variables_container_removed";
|
|
}
|
|
return "";
|
|
}
|
|
|
|
function joinStructuredPath(basePath = "", segment = "") {
|
|
const normalizedSegment = String(segment || "");
|
|
if (!normalizedSegment) {
|
|
return basePath;
|
|
}
|
|
if (!basePath) {
|
|
return normalizedSegment.startsWith("[")
|
|
? normalizedSegment.slice(1, -1)
|
|
: normalizedSegment;
|
|
}
|
|
return normalizedSegment.startsWith("[")
|
|
? `${basePath}${normalizedSegment}`
|
|
: `${basePath}.${normalizedSegment}`;
|
|
}
|
|
|
|
export function sanitizeInjectionStructuredValue(
|
|
settings = {},
|
|
taskType,
|
|
value,
|
|
{
|
|
fieldName = "",
|
|
path = fieldName,
|
|
mode = "injection-safe",
|
|
blockedContents = [],
|
|
contentOrigin = PROMPT_CONTENT_ORIGIN.HOST_INJECTED,
|
|
sanitizationEligible = true,
|
|
regexSourceType = "",
|
|
role = "system",
|
|
formatterOptions = null,
|
|
debugState = null,
|
|
regexCollector = null,
|
|
applySanitizer = true,
|
|
applyHostRegex = true,
|
|
stripMvuContainers = true,
|
|
seen = new WeakSet(),
|
|
} = {},
|
|
) {
|
|
if (typeof value === "string") {
|
|
const sanitized = sanitizeInjectionText(settings, taskType, value, {
|
|
mode,
|
|
blockedContents,
|
|
contentOrigin,
|
|
sanitizationEligible,
|
|
regexSourceType,
|
|
role,
|
|
formatterOptions,
|
|
debugState,
|
|
regexCollector,
|
|
applySanitizer,
|
|
applyHostRegex,
|
|
path,
|
|
stage: mode,
|
|
});
|
|
return {
|
|
value: sanitized.text,
|
|
changed: Boolean(sanitized.changed || sanitized.dropped),
|
|
omit:
|
|
!String(sanitized.text || "").trim() &&
|
|
String(value || "").trim().length > 0,
|
|
details: sanitized,
|
|
};
|
|
}
|
|
|
|
if (Array.isArray(value)) {
|
|
const sanitizedArray = [];
|
|
let changed = false;
|
|
for (let index = 0; index < value.length; index += 1) {
|
|
const messageDescriptor = getStructuredMessageDescriptor(value[index]);
|
|
const childResult = sanitizeInjectionStructuredValue(
|
|
settings,
|
|
taskType,
|
|
value[index],
|
|
{
|
|
fieldName,
|
|
path: joinStructuredPath(path, `[${index}]`),
|
|
mode,
|
|
blockedContents,
|
|
contentOrigin,
|
|
sanitizationEligible,
|
|
regexSourceType,
|
|
role,
|
|
formatterOptions: messageDescriptor
|
|
? buildMessageFormatterOptions(
|
|
formatterOptions,
|
|
messageDescriptor,
|
|
index,
|
|
value.length,
|
|
)
|
|
: formatterOptions,
|
|
debugState,
|
|
regexCollector,
|
|
applySanitizer,
|
|
applyHostRegex,
|
|
stripMvuContainers,
|
|
seen,
|
|
},
|
|
);
|
|
if (childResult.omit) {
|
|
changed = true;
|
|
continue;
|
|
}
|
|
sanitizedArray.push(childResult.value);
|
|
if (childResult.changed) {
|
|
changed = true;
|
|
}
|
|
}
|
|
return {
|
|
value: sanitizedArray,
|
|
changed: changed || sanitizedArray.length !== value.length,
|
|
omit: value.length > 0 && sanitizedArray.length === 0,
|
|
details: null,
|
|
};
|
|
}
|
|
|
|
if (value && typeof value === "object") {
|
|
if (seen.has(value)) {
|
|
return {
|
|
value,
|
|
changed: false,
|
|
omit: false,
|
|
details: null,
|
|
};
|
|
}
|
|
seen.add(value);
|
|
|
|
const originalLooksMvuContainer = looksLikeMvuStateContainer(value);
|
|
const messageDescriptor = getStructuredMessageDescriptor(value);
|
|
const sanitizedObject = {};
|
|
let changed = false;
|
|
let keptEntries = 0;
|
|
|
|
for (const [key, entryValue] of Object.entries(value)) {
|
|
const stripReason = stripMvuContainers
|
|
? getMvuObjectKeyStripReason(key, entryValue)
|
|
: "";
|
|
if (stripReason) {
|
|
changed = true;
|
|
recordSanitizerDebug(
|
|
debugState,
|
|
joinStructuredPath(path, key),
|
|
{
|
|
changed: true,
|
|
dropped: true,
|
|
reasons: [stripReason],
|
|
blockedHitCount: 0,
|
|
},
|
|
mode,
|
|
);
|
|
continue;
|
|
}
|
|
|
|
const isMessageContentField =
|
|
messageDescriptor && key === messageDescriptor.contentKey;
|
|
const childResult = sanitizeInjectionStructuredValue(
|
|
settings,
|
|
taskType,
|
|
entryValue,
|
|
{
|
|
fieldName,
|
|
path: joinStructuredPath(path, key),
|
|
mode,
|
|
blockedContents,
|
|
contentOrigin,
|
|
sanitizationEligible,
|
|
regexSourceType: isMessageContentField
|
|
? messageDescriptor.sourceType
|
|
: regexSourceType,
|
|
role: isMessageContentField ? messageDescriptor.role : role,
|
|
formatterOptions: isMessageContentField
|
|
? buildMessageFormatterOptions(formatterOptions, messageDescriptor)
|
|
: formatterOptions,
|
|
debugState,
|
|
regexCollector,
|
|
applySanitizer,
|
|
applyHostRegex: messageDescriptor
|
|
? isMessageContentField
|
|
? applyHostRegex && Boolean(messageDescriptor.sourceType)
|
|
: false
|
|
: applyHostRegex,
|
|
stripMvuContainers,
|
|
seen,
|
|
},
|
|
);
|
|
if (childResult.omit) {
|
|
changed = true;
|
|
continue;
|
|
}
|
|
sanitizedObject[key] = childResult.value;
|
|
keptEntries += 1;
|
|
if (childResult.changed) {
|
|
changed = true;
|
|
}
|
|
}
|
|
|
|
return {
|
|
value: sanitizedObject,
|
|
changed,
|
|
omit: originalLooksMvuContainer && keptEntries === 0,
|
|
details: null,
|
|
};
|
|
}
|
|
|
|
return {
|
|
value,
|
|
changed: false,
|
|
omit: false,
|
|
details: null,
|
|
};
|
|
}
|
|
|
|
export function sanitizeInjectionMessages(
|
|
settings = {},
|
|
taskType,
|
|
messages = [],
|
|
{
|
|
blockedContents = [],
|
|
debugState = null,
|
|
regexCollector = null,
|
|
} = {},
|
|
) {
|
|
return (Array.isArray(messages) ? messages : [])
|
|
.map((message, index) => {
|
|
const contentOrigin = String(message?.contentOrigin || "").trim() ||
|
|
PROMPT_CONTENT_ORIGIN.TEMPLATE_OWNED;
|
|
const sanitizationEligible =
|
|
message?.sanitizationEligible === true &&
|
|
contentOrigin !== PROMPT_CONTENT_ORIGIN.TEMPLATE_OWNED;
|
|
if (!sanitizationEligible) {
|
|
return message;
|
|
}
|
|
|
|
const sanitized = sanitizeInjectionText(
|
|
settings,
|
|
taskType,
|
|
String(message?.content || ""),
|
|
{
|
|
mode: "final-injection-safe",
|
|
blockedContents,
|
|
contentOrigin,
|
|
sanitizationEligible,
|
|
regexSourceType: String(message?.regexSourceType || ""),
|
|
role: message?.role || "system",
|
|
debugState,
|
|
regexCollector,
|
|
applySanitizer: true,
|
|
applyHostRegex: false,
|
|
path: `message[${index}]`,
|
|
stage: "final-injection-safe",
|
|
},
|
|
);
|
|
if (debugState && (sanitized.changed || sanitized.dropped)) {
|
|
debugState.finalMessageStripCount += 1;
|
|
}
|
|
if (!String(sanitized.text || "").trim()) {
|
|
return null;
|
|
}
|
|
return {
|
|
...message,
|
|
content: sanitized.text,
|
|
};
|
|
})
|
|
.filter(Boolean);
|
|
}
|