mirror of
https://github.com/Youzini-afk/ST-Bionic-Memory-Ecology.git
synced 2026-05-15 22:30:38 +08:00
feat: improve shujuku-compatible extraction and recall input
This commit is contained in:
453
maintenance/extraction-context.js
Normal file
453
maintenance/extraction-context.js
Normal file
@@ -0,0 +1,453 @@
|
||||
function splitConfigText(value = "") {
|
||||
return String(value || "")
|
||||
.split(/[\r\n,]+/)
|
||||
.map((item) => String(item || "").trim())
|
||||
.filter(Boolean);
|
||||
}
|
||||
|
||||
function escapeRegex(value = "") {
|
||||
return String(value || "").replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
|
||||
}
|
||||
|
||||
function normalizeBoundaryRule(rawRule, mode = "exclude", index = 0) {
|
||||
if (typeof rawRule === "string") {
|
||||
const tag = String(rawRule || "").trim();
|
||||
if (!tag) return null;
|
||||
return {
|
||||
id: `${mode}:tag:${index}:${tag}`,
|
||||
mode,
|
||||
kind: "tag",
|
||||
label: tag,
|
||||
tag,
|
||||
};
|
||||
}
|
||||
|
||||
if (!rawRule || typeof rawRule !== "object" || Array.isArray(rawRule)) {
|
||||
return null;
|
||||
}
|
||||
|
||||
const tag = String(rawRule.tag || rawRule.name || "").trim();
|
||||
if (tag) {
|
||||
return {
|
||||
id: `${mode}:tag:${index}:${tag}`,
|
||||
mode,
|
||||
kind: "tag",
|
||||
label: String(rawRule.label || tag).trim() || tag,
|
||||
tag,
|
||||
};
|
||||
}
|
||||
|
||||
const start = String(rawRule.start ?? rawRule.open ?? rawRule.begin ?? "").trim();
|
||||
const end = String(rawRule.end ?? rawRule.close ?? rawRule.finish ?? "").trim();
|
||||
if (!start || !end) {
|
||||
return null;
|
||||
}
|
||||
|
||||
return {
|
||||
id: `${mode}:boundary:${index}`,
|
||||
mode,
|
||||
kind: "boundary",
|
||||
label: String(rawRule.label || `${start} … ${end}`).trim() || `${start} … ${end}`,
|
||||
start,
|
||||
end,
|
||||
caseSensitive: rawRule.caseSensitive === true,
|
||||
};
|
||||
}
|
||||
|
||||
function normalizeBoundaryRules(rawRules = null, rawTags = "", mode = "exclude") {
|
||||
const values = [];
|
||||
if (Array.isArray(rawRules)) {
|
||||
values.push(...rawRules);
|
||||
} else if (rawRules !== null && rawRules !== undefined && rawRules !== "") {
|
||||
values.push(rawRules);
|
||||
}
|
||||
values.push(...splitConfigText(rawTags));
|
||||
|
||||
return values
|
||||
.map((item, index) => normalizeBoundaryRule(item, mode, index))
|
||||
.filter(Boolean);
|
||||
}
|
||||
|
||||
function applyTagBoundaryRule(text, rule) {
|
||||
const input = String(text || "");
|
||||
const escapedTag = escapeRegex(rule?.tag || "");
|
||||
if (!escapedTag) {
|
||||
return {
|
||||
changed: false,
|
||||
output: input,
|
||||
ruleLabel: String(rule?.label || ""),
|
||||
matchedText: "",
|
||||
};
|
||||
}
|
||||
|
||||
const regex = new RegExp(
|
||||
`<${escapedTag}\\b[^>]*>([\\s\\S]*?)<\\/${escapedTag}>`,
|
||||
"gi",
|
||||
);
|
||||
let match = null;
|
||||
for (const candidate of input.matchAll(regex)) {
|
||||
match = candidate;
|
||||
}
|
||||
if (!match) {
|
||||
return {
|
||||
changed: false,
|
||||
output: input,
|
||||
ruleLabel: String(rule?.label || ""),
|
||||
matchedText: "",
|
||||
};
|
||||
}
|
||||
|
||||
const matchedText = String(match[0] || "");
|
||||
if (rule?.mode === "extract") {
|
||||
return {
|
||||
changed: true,
|
||||
output: String(match[1] || "").trim(),
|
||||
ruleLabel: String(rule?.label || rule?.tag || ""),
|
||||
matchedText,
|
||||
};
|
||||
}
|
||||
|
||||
const matchIndex = Number(match.index);
|
||||
if (!Number.isFinite(matchIndex) || matchIndex < 0) {
|
||||
return {
|
||||
changed: false,
|
||||
output: input,
|
||||
ruleLabel: String(rule?.label || rule?.tag || ""),
|
||||
matchedText: "",
|
||||
};
|
||||
}
|
||||
|
||||
return {
|
||||
changed: true,
|
||||
output: `${input.slice(0, matchIndex)}${input.slice(matchIndex + matchedText.length)}`.trim(),
|
||||
ruleLabel: String(rule?.label || rule?.tag || ""),
|
||||
matchedText,
|
||||
};
|
||||
}
|
||||
|
||||
function applyLiteralBoundaryRule(text, rule) {
|
||||
const input = String(text || "");
|
||||
const start = String(rule?.start || "");
|
||||
const end = String(rule?.end || "");
|
||||
if (!start || !end) {
|
||||
return {
|
||||
changed: false,
|
||||
output: input,
|
||||
ruleLabel: String(rule?.label || ""),
|
||||
matchedText: "",
|
||||
};
|
||||
}
|
||||
|
||||
const sourceText = rule?.caseSensitive === true ? input : input.toLowerCase();
|
||||
const startNeedle = rule?.caseSensitive === true ? start : start.toLowerCase();
|
||||
const endNeedle = rule?.caseSensitive === true ? end : end.toLowerCase();
|
||||
const startIndex = sourceText.lastIndexOf(startNeedle);
|
||||
if (startIndex < 0) {
|
||||
return {
|
||||
changed: false,
|
||||
output: input,
|
||||
ruleLabel: String(rule?.label || ""),
|
||||
matchedText: "",
|
||||
};
|
||||
}
|
||||
|
||||
const endIndex = sourceText.indexOf(endNeedle, startIndex + startNeedle.length);
|
||||
if (endIndex < 0) {
|
||||
return {
|
||||
changed: false,
|
||||
output: input,
|
||||
ruleLabel: String(rule?.label || ""),
|
||||
matchedText: "",
|
||||
};
|
||||
}
|
||||
|
||||
const matchedText = input.slice(startIndex, endIndex + end.length);
|
||||
if (rule?.mode === "extract") {
|
||||
return {
|
||||
changed: true,
|
||||
output: input.slice(startIndex + start.length, endIndex).trim(),
|
||||
ruleLabel: String(rule?.label || ""),
|
||||
matchedText,
|
||||
};
|
||||
}
|
||||
|
||||
return {
|
||||
changed: true,
|
||||
output: `${input.slice(0, startIndex)}${input.slice(endIndex + end.length)}`.trim(),
|
||||
ruleLabel: String(rule?.label || ""),
|
||||
matchedText,
|
||||
};
|
||||
}
|
||||
|
||||
function applyBoundaryRule(text, rule) {
|
||||
if (rule?.kind === "tag") {
|
||||
return applyTagBoundaryRule(text, rule);
|
||||
}
|
||||
if (rule?.kind === "boundary") {
|
||||
return applyLiteralBoundaryRule(text, rule);
|
||||
}
|
||||
return {
|
||||
changed: false,
|
||||
output: String(text || ""),
|
||||
ruleLabel: String(rule?.label || ""),
|
||||
matchedText: "",
|
||||
};
|
||||
}
|
||||
|
||||
function applyFirstExtractRule(text, rules = []) {
|
||||
const input = String(text || "");
|
||||
for (const rule of Array.isArray(rules) ? rules : []) {
|
||||
const result = applyBoundaryRule(input, rule);
|
||||
if (result.changed) {
|
||||
return {
|
||||
changed: true,
|
||||
output: result.output,
|
||||
operation: {
|
||||
mode: "extract",
|
||||
rule: result.ruleLabel,
|
||||
matchedLength: String(result.matchedText || "").length,
|
||||
},
|
||||
};
|
||||
}
|
||||
}
|
||||
return {
|
||||
changed: false,
|
||||
output: input,
|
||||
operation: null,
|
||||
};
|
||||
}
|
||||
|
||||
function applyExcludeRules(text, rules = []) {
|
||||
const input = String(text || "");
|
||||
let output = input;
|
||||
const operations = [];
|
||||
|
||||
for (const rule of Array.isArray(rules) ? rules : []) {
|
||||
const result = applyBoundaryRule(output, rule);
|
||||
if (!result.changed) {
|
||||
continue;
|
||||
}
|
||||
output = result.output;
|
||||
operations.push({
|
||||
mode: "exclude",
|
||||
rule: result.ruleLabel,
|
||||
matchedLength: String(result.matchedText || "").length,
|
||||
});
|
||||
}
|
||||
|
||||
return {
|
||||
changed: output !== input,
|
||||
output,
|
||||
operations,
|
||||
};
|
||||
}
|
||||
|
||||
function normalizeRole(value = "") {
|
||||
const role = String(value || "assistant").trim().toLowerCase();
|
||||
if (["user", "assistant", "system"].includes(role)) {
|
||||
return role;
|
||||
}
|
||||
return role === "ai" ? "assistant" : "assistant";
|
||||
}
|
||||
|
||||
function resolveMessageContent(message = {}) {
|
||||
if (typeof message?.content === "string") {
|
||||
return message.content;
|
||||
}
|
||||
if (typeof message?.mes === "string") {
|
||||
return message.mes;
|
||||
}
|
||||
return "";
|
||||
}
|
||||
|
||||
function resolveMessageRawContent(message = {}) {
|
||||
if (typeof message?.rawContent === "string") {
|
||||
return message.rawContent;
|
||||
}
|
||||
if (typeof message?.mes === "string") {
|
||||
return message.mes;
|
||||
}
|
||||
if (typeof message?.content === "string") {
|
||||
return message.content;
|
||||
}
|
||||
return "";
|
||||
}
|
||||
|
||||
function resolveSpeakerName(message = {}, role = "assistant", names = {}) {
|
||||
const explicitSpeaker = String(
|
||||
message?.speaker ?? message?.name ?? message?.displayName ?? "",
|
||||
).trim();
|
||||
if (explicitSpeaker) {
|
||||
return explicitSpeaker;
|
||||
}
|
||||
if (role === "user") {
|
||||
return String(names?.userName || "用户").trim() || "用户";
|
||||
}
|
||||
if (role === "assistant") {
|
||||
return String(names?.charName || "角色").trim() || "角色";
|
||||
}
|
||||
return role || "assistant";
|
||||
}
|
||||
|
||||
function normalizeExtractionMessage(message = {}, index = 0, names = {}) {
|
||||
const role = normalizeRole(
|
||||
message?.role ?? (message?.is_user === true ? "user" : "assistant"),
|
||||
);
|
||||
const content = String(resolveMessageContent(message) || "").trim();
|
||||
const rawContent = String(resolveMessageRawContent(message) || content).trim();
|
||||
const speaker = resolveSpeakerName(message, role, names);
|
||||
const seq = Number.isFinite(Number(message?.seq)) ? Number(message.seq) : null;
|
||||
|
||||
return {
|
||||
index,
|
||||
seq,
|
||||
role,
|
||||
speaker,
|
||||
name: speaker,
|
||||
content,
|
||||
rawContent,
|
||||
sourceType: role === "user" ? "user_input" : "ai_output",
|
||||
};
|
||||
}
|
||||
|
||||
function countRoles(messages = []) {
|
||||
return (Array.isArray(messages) ? messages : []).reduce(
|
||||
(acc, message) => {
|
||||
const role = normalizeRole(message?.role || "assistant");
|
||||
acc[role] = Number(acc[role] || 0) + 1;
|
||||
return acc;
|
||||
},
|
||||
{ user: 0, assistant: 0, system: 0 },
|
||||
);
|
||||
}
|
||||
|
||||
export function formatExtractionTranscript(messages = []) {
|
||||
return (Array.isArray(messages) ? messages : [])
|
||||
.map((message, index) => {
|
||||
const seqLabel = Number.isFinite(Number(message?.seq))
|
||||
? `#${Number(message.seq)}`
|
||||
: `#${index + 1}`;
|
||||
const role = normalizeRole(message?.role || "assistant");
|
||||
const speaker = String(message?.speaker || message?.name || "").trim();
|
||||
const speakerLabel = speaker ? `|${speaker}` : "";
|
||||
return `${seqLabel} [${role}${speakerLabel}]: ${String(message?.content || "")}`;
|
||||
})
|
||||
.filter((item) => String(item || "").trim())
|
||||
.join("\n\n");
|
||||
}
|
||||
|
||||
export function buildExtractionInputContext(
|
||||
messages = [],
|
||||
{ settings = {}, userName = "", charName = "" } = {},
|
||||
) {
|
||||
const normalizedMessages = (Array.isArray(messages) ? messages : [])
|
||||
.map((message, index) => normalizeExtractionMessage(message, index, {
|
||||
userName,
|
||||
charName,
|
||||
}))
|
||||
.filter(
|
||||
(message) =>
|
||||
String(message?.content || "").trim().length > 0 ||
|
||||
String(message?.rawContent || "").trim().length > 0,
|
||||
);
|
||||
|
||||
const extractRules = normalizeBoundaryRules(
|
||||
settings?.extractAssistantExtractRules,
|
||||
settings?.extractAssistantExtractTags,
|
||||
"extract",
|
||||
);
|
||||
const excludeRules = normalizeBoundaryRules(
|
||||
settings?.extractAssistantExcludeRules,
|
||||
settings?.extractAssistantExcludeTags,
|
||||
"exclude",
|
||||
);
|
||||
|
||||
const filteredMessages = [];
|
||||
const messageOperations = [];
|
||||
let changedAssistantMessageCount = 0;
|
||||
let droppedAssistantMessageCount = 0;
|
||||
let extractedAssistantMessageCount = 0;
|
||||
let excludedAssistantMessageCount = 0;
|
||||
|
||||
for (const message of normalizedMessages) {
|
||||
const operations = [];
|
||||
let nextContent = String(message.content || "");
|
||||
|
||||
if (message.role === "assistant") {
|
||||
const extractResult = applyFirstExtractRule(nextContent, extractRules);
|
||||
if (extractResult.changed) {
|
||||
nextContent = extractResult.output;
|
||||
extractedAssistantMessageCount += 1;
|
||||
operations.push(extractResult.operation);
|
||||
}
|
||||
|
||||
const excludeResult = applyExcludeRules(nextContent, excludeRules);
|
||||
if (excludeResult.changed) {
|
||||
nextContent = excludeResult.output;
|
||||
excludedAssistantMessageCount += 1;
|
||||
operations.push(...excludeResult.operations);
|
||||
}
|
||||
}
|
||||
|
||||
const normalizedContent = String(nextContent || "").trim();
|
||||
if (operations.length > 0 || normalizedContent !== String(message.content || "").trim()) {
|
||||
if (message.role === "assistant") {
|
||||
changedAssistantMessageCount += 1;
|
||||
}
|
||||
messageOperations.push({
|
||||
seq: message.seq,
|
||||
role: message.role,
|
||||
speaker: message.speaker,
|
||||
beforeLength: String(message.content || "").length,
|
||||
afterLength: normalizedContent.length,
|
||||
operations,
|
||||
});
|
||||
}
|
||||
|
||||
if (!normalizedContent) {
|
||||
if (message.role === "assistant" && String(message.content || "").trim()) {
|
||||
droppedAssistantMessageCount += 1;
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
||||
filteredMessages.push({
|
||||
...message,
|
||||
content: normalizedContent,
|
||||
extractionFilterOperations: operations,
|
||||
});
|
||||
}
|
||||
|
||||
const rawTranscript = formatExtractionTranscript(
|
||||
normalizedMessages.filter((message) => String(message.content || "").trim()),
|
||||
);
|
||||
const filteredTranscript = formatExtractionTranscript(filteredMessages);
|
||||
|
||||
return {
|
||||
rawMessages: normalizedMessages,
|
||||
filteredMessages,
|
||||
rawTranscript,
|
||||
filteredTranscript,
|
||||
debug: {
|
||||
rawMessageCount: normalizedMessages.length,
|
||||
filteredMessageCount: filteredMessages.length,
|
||||
rawRoleCounts: countRoles(normalizedMessages),
|
||||
filteredRoleCounts: countRoles(filteredMessages),
|
||||
rawTranscriptLength: rawTranscript.length,
|
||||
filteredTranscriptLength: filteredTranscript.length,
|
||||
changedAssistantMessageCount,
|
||||
droppedAssistantMessageCount,
|
||||
extractedAssistantMessageCount,
|
||||
excludedAssistantMessageCount,
|
||||
assistantBoundaryConfig: {
|
||||
extractRuleCount: extractRules.length,
|
||||
excludeRuleCount: excludeRules.length,
|
||||
extractRules: extractRules.map((rule) => rule.label),
|
||||
excludeRules: excludeRules.map((rule) => rule.label),
|
||||
},
|
||||
rawMessages: normalizedMessages,
|
||||
filteredMessages,
|
||||
messageOperations,
|
||||
},
|
||||
};
|
||||
}
|
||||
Reference in New Issue
Block a user