Files
ST-Bionic-Memory-Ecology/llm.js
2026-03-24 23:26:39 +08:00

525 lines
16 KiB
JavaScript
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

// ST-BME: LLM 调用封装
// 包装 ST 的 sendOpenAIRequest提供结构化 JSON 输出和重试机制
import { extension_settings } from "../../../extensions.js";
import { chat_completion_sources, sendOpenAIRequest } from "../../../openai.js";
import { getRequestHeaders } from "../../../../script.js";
const MODULE_NAME = "st_bme";
const LLM_REQUEST_TIMEOUT_MS = 60000;
const DEFAULT_TEXT_COMPLETION_TOKENS = 1200;
const DEFAULT_JSON_COMPLETION_TOKENS = 2200;
const RETRY_JSON_COMPLETION_TOKENS = 3200;
function getMemoryLLMConfig() {
const settings = extension_settings[MODULE_NAME] || {};
return {
apiUrl: normalizeOpenAICompatibleBaseUrl(settings.llmApiUrl),
apiKey: String(settings.llmApiKey || '').trim(),
model: String(settings.llmModel || '').trim(),
};
}
function normalizeOpenAICompatibleBaseUrl(value) {
return String(value || '')
.trim()
.replace(/\/+(chat\/completions|embeddings)$/i, '')
.replace(/\/+$/, '');
}
function hasDedicatedLLMConfig(config = getMemoryLLMConfig()) {
return Boolean(config.apiUrl && config.model);
}
function normalizeModelList(items = []) {
if (!Array.isArray(items)) return [];
const seen = new Set();
const models = [];
for (const item of items) {
let id = "";
let label = "";
if (typeof item === "string") {
id = item.trim();
label = id;
} else if (item && typeof item === "object") {
id = String(item.id || item.name || item.value || item.slug || "").trim();
label = String(item.name || item.id || item.value || item.slug || "").trim();
}
if (!id || seen.has(id)) continue;
seen.add(id);
models.push({ id, label: label || id });
}
return models;
}
function extractContentFromResponsePayload(payload) {
if (typeof payload === 'string') {
return payload;
}
if (Array.isArray(payload)) {
return payload
.map((item) => item?.text || item?.content || '')
.join('')
.trim();
}
if (!payload || typeof payload !== 'object') {
return '';
}
const messageContent = payload?.choices?.[0]?.message?.content;
if (typeof messageContent === 'string') {
return messageContent;
}
if (Array.isArray(messageContent)) {
return messageContent
.map((item) => item?.text || item?.content || '')
.join('')
.trim();
}
const textContent =
payload?.choices?.[0]?.text ??
payload?.text ??
payload?.message?.content ??
payload?.content;
if (typeof textContent === 'string') {
return textContent;
}
if (Array.isArray(textContent)) {
return textContent
.map((item) => item?.text || item?.content || '')
.join('')
.trim();
}
return '';
}
function normalizeLLMResponsePayload(payload) {
if (typeof payload === 'string') {
return {
content: payload.trim(),
finishReason: '',
reasoningContent: '',
raw: payload,
};
}
const choice = payload?.choices?.[0] || {};
const message = choice?.message || {};
return {
content: extractContentFromResponsePayload(payload).trim(),
finishReason: String(choice?.finish_reason || ''),
reasoningContent: typeof message?.reasoning_content === 'string'
? message.reasoning_content
: '',
raw: payload,
};
}
function createGenericJsonSchema() {
return {
name: 'st_bme_json_response',
description: 'A well-formed JSON object for programmatic parsing.',
strict: false,
value: {
type: 'object',
additionalProperties: true,
},
};
}
function looksLikeTruncatedJson(text) {
const trimmed = String(text || '').trim();
if (!trimmed) return false;
const openBraces = (trimmed.match(/\{/g) || []).length;
const closeBraces = (trimmed.match(/\}/g) || []).length;
const openBrackets = (trimmed.match(/\[/g) || []).length;
const closeBrackets = (trimmed.match(/\]/g) || []).length;
if (openBraces > closeBraces || openBrackets > closeBrackets) {
return true;
}
if (/```(?:json)?/i.test(trimmed) && !/```[\s]*$/i.test(trimmed)) {
return true;
}
return false;
}
function buildJsonAttemptMessages(systemPrompt, userPrompt, attempt, reason = '') {
const systemParts = [
systemPrompt,
'输出要求补充:只输出一个紧凑的 JSON 对象。',
'禁止 markdown 代码块、禁止解释、禁止前后缀、禁止省略号。',
'如果需要重新生成,请直接从头输出完整 JSON不要续写上一次内容。',
];
const userParts = [userPrompt];
if (attempt > 0) {
userParts.push(
reason
? `上一次输出失败原因:${reason}`
: '上一次输出未能被程序解析。',
);
userParts.push('请重新输出一个完整、紧凑、可直接 JSON.parse 的 JSON 对象。');
} else {
userParts.push('请直接输出紧凑 JSON 对象,不要包含任何额外文本。');
}
return [
{ role: 'system', content: systemParts.join('\n\n') },
{ role: 'user', content: userParts.join('\n\n') },
];
}
async function fetchWithTimeout(url, options = {}, timeoutMs = LLM_REQUEST_TIMEOUT_MS) {
const controller = new AbortController();
const timeout = setTimeout(() => controller.abort(), timeoutMs);
const signal = options.signal
? createCombinedAbortSignal(options.signal, controller.signal)
: controller.signal;
try {
return await fetch(url, {
...options,
signal,
});
} finally {
clearTimeout(timeout);
}
}
function createCombinedAbortSignal(...signals) {
const validSignals = signals.filter(Boolean);
if (validSignals.length <= 1) {
return validSignals[0] || undefined;
}
if (typeof AbortSignal !== 'undefined' && typeof AbortSignal.any === 'function') {
return AbortSignal.any(validSignals);
}
const controller = new AbortController();
for (const signal of validSignals) {
if (signal.aborted) {
controller.abort();
return controller.signal;
}
signal.addEventListener('abort', () => controller.abort(), { once: true });
}
return controller.signal;
}
// 自动检测:如果 API 不支持 response_format记住并跳过
let _jsonModeSupported = true;
async function callDedicatedOpenAICompatible(
messages,
{ signal, jsonMode = false, maxCompletionTokens = null } = {},
) {
const config = getMemoryLLMConfig();
if (!hasDedicatedLLMConfig(config)) {
const payload = await sendOpenAIRequest(
'quiet',
messages,
signal,
jsonMode ? { jsonSchema: createGenericJsonSchema() } : {},
);
const normalized = normalizeLLMResponsePayload(payload);
if (typeof normalized.content === 'string' && normalized.content.trim().length > 0) {
return normalized;
}
throw new Error('SillyTavern current model returned an unexpected response format');
}
const completionTokens = Number.isFinite(maxCompletionTokens)
? maxCompletionTokens
: jsonMode
? DEFAULT_JSON_COMPLETION_TOKENS
: DEFAULT_TEXT_COMPLETION_TOKENS;
const body = {
chat_completion_source: chat_completion_sources.OPENAI,
reverse_proxy: config.apiUrl,
proxy_password: config.apiKey || '',
model: config.model,
messages,
temperature: jsonMode ? 0 : 0.2,
max_tokens: completionTokens,
max_completion_tokens: completionTokens,
stream: false,
};
if (jsonMode) {
body.json_schema = createGenericJsonSchema();
body.reasoning_effort = 'low';
body.verbosity = 'low';
}
const response = await fetchWithTimeout('/api/backends/chat-completions/generate', {
method: 'POST',
headers: getRequestHeaders(),
body: JSON.stringify(body),
signal,
});
// 如果 400 且带了 structured output可能是 API 不支持,降级重试
if (!response.ok && response.status === 400 && jsonMode && _jsonModeSupported) {
console.warn('[ST-BME] API 不支持 structured output降级为普通 JSON 提示模式');
_jsonModeSupported = false;
delete body.json_schema;
delete body.reasoning_effort;
delete body.verbosity;
const retryResponse = await fetchWithTimeout('/api/backends/chat-completions/generate', {
method: 'POST',
headers: getRequestHeaders(),
body: JSON.stringify(body),
signal,
});
return await _parseResponse(retryResponse);
}
return await _parseResponse(response);
}
async function _parseResponse(response) {
const responseText = await response.text().catch(() => '');
let data;
try {
data = responseText ? JSON.parse(responseText) : {};
} catch {
data = { error: { message: responseText || response.statusText } };
}
if (!response.ok) {
const message = data?.error?.message || response.statusText;
throw new Error(`Memory LLM proxy error ${response.status}: ${message}`);
}
if (data?.error?.message) {
throw new Error(`Memory LLM proxy error: ${data.error.message}`);
}
const normalized = normalizeLLMResponsePayload(data);
if (typeof normalized.content === 'string' && normalized.content.length > 0) {
return normalized;
}
throw new Error('Memory LLM API returned an unexpected response format');
}
/**
* 调用 LLM 并期望返回结构化 JSON
*
* @param {object} params
* @param {string} params.systemPrompt - 系统提示词
* @param {string} params.userPrompt - 用户提示词
* @param {number} [params.maxRetries=2] - JSON 解析失败时的重试次数
* @param {string} [params.model] - 指定模型(留空使用当前配置)
* @returns {Promise<object|null>} 解析后的 JSON 对象,或 null
*/
export async function callLLMForJSON({ systemPrompt, userPrompt, maxRetries = 2 }) {
let lastFailureReason = '';
for (let attempt = 0; attempt <= maxRetries; attempt++) {
try {
const messages = buildJsonAttemptMessages(
systemPrompt,
userPrompt,
attempt,
lastFailureReason,
);
const response = await callDedicatedOpenAICompatible(messages, {
jsonMode: true,
maxCompletionTokens: attempt === 0
? DEFAULT_JSON_COMPLETION_TOKENS
: RETRY_JSON_COMPLETION_TOKENS,
});
const responseText = response?.content || '';
if (!responseText || typeof responseText !== 'string') {
console.warn(`[ST-BME] LLM 返回空响应 (尝试 ${attempt + 1})`);
lastFailureReason = '返回空响应';
continue;
}
// 尝试解析 JSON
const parsed = extractJSON(responseText);
if (parsed !== null) {
return parsed;
}
const truncated = response.finishReason === 'length' || looksLikeTruncatedJson(responseText);
lastFailureReason = truncated
? '输出因长度限制被截断,请重新输出更紧凑的完整 JSON'
: '输出不是有效 JSON请严格返回紧凑 JSON 对象';
console.warn(
`[ST-BME] LLM 响应无法解析为 JSON (尝试 ${attempt + 1}, finish=${response.finishReason || 'unknown'}):`,
responseText.slice(0, 200),
);
} catch (e) {
console.error(`[ST-BME] LLM 调用失败 (尝试 ${attempt + 1}):`, e);
lastFailureReason = e?.message || String(e) || 'LLM 调用失败';
}
}
return null;
}
/**
* 调用 LLM不要求 JSON 输出)
*
* @param {string} systemPrompt
* @param {string} userPrompt
* @returns {Promise<string|null>}
*/
export async function callLLM(systemPrompt, userPrompt) {
const messages = [
{ role: 'system', content: systemPrompt },
{ role: 'user', content: userPrompt },
];
try {
const response = await callDedicatedOpenAICompatible(messages);
return response?.content || null;
} catch (e) {
console.error('[ST-BME] LLM 调用失败:', e);
return null;
}
}
/**
* 测试记忆 LLM 连通性
* 若未配置独立记忆 LLM则测试当前 SillyTavern 聊天模型。
*
* @returns {Promise<{success: boolean, mode: string, error: string}>}
*/
export async function testLLMConnection() {
const config = getMemoryLLMConfig();
const mode = hasDedicatedLLMConfig(config)
? `dedicated:${config.model}`
: 'sillytavern-current-model';
try {
const response = await callLLM(
'你是一个连接测试助手。请只回答 OK。',
'请只回复 OK',
);
if (typeof response === 'string' && response.trim().length > 0) {
return { success: true, mode, error: '' };
}
return { success: false, mode, error: 'API 返回空结果' };
} catch (e) {
return { success: false, mode, error: String(e) };
}
}
export async function fetchMemoryLLMModels() {
const config = getMemoryLLMConfig();
if (!config.apiUrl) {
return {
success: false,
models: [],
error: "请先填写记忆 LLM API 地址",
};
}
try {
const response = await fetch("/api/backends/chat-completions/status", {
method: "POST",
headers: getRequestHeaders(),
body: JSON.stringify({
chat_completion_source: chat_completion_sources.OPENAI,
reverse_proxy: config.apiUrl,
proxy_password: config.apiKey || "",
}),
});
const payload = await response.json().catch(() => ({}));
if (!response.ok) {
const message = payload?.error || payload?.message || response.statusText;
return { success: false, models: [], error: message || `HTTP ${response.status}` };
}
const models = normalizeModelList(payload?.data);
if (models.length === 0) {
return {
success: false,
models: [],
error: "未拉取到可用模型,请检查接口是否支持 /models",
};
}
return { success: true, models, error: "" };
} catch (error) {
return { success: false, models: [], error: String(error) };
}
}
/**
* 从 LLM 响应文本中提取 JSON 对象
* 处理各种常见格式:纯 JSON、markdown 代码块、混合文本等
*
* @param {string} text
* @returns {object|null}
*/
function extractJSON(text) {
if (!text || typeof text !== 'string') return null;
const trimmed = text.trim();
// 1. 直接尝试解析
try {
return JSON.parse(trimmed);
} catch { /* continue */ }
// 2. 尝试提取 markdown 代码块中的 JSON
const codeBlockMatch = trimmed.match(/```(?:json)?\s*\n?([\s\S]*?)\n?\s*```/);
if (codeBlockMatch) {
try {
return JSON.parse(codeBlockMatch[1].trim());
} catch { /* continue */ }
}
// 3. 尝试找到第一个 { 或 [ 开始的 JSON
const firstBrace = trimmed.indexOf('{');
const firstBracket = trimmed.indexOf('[');
let startIdx = -1;
let endChar = '';
if (firstBrace >= 0 && (firstBracket < 0 || firstBrace < firstBracket)) {
startIdx = firstBrace;
endChar = '}';
} else if (firstBracket >= 0) {
startIdx = firstBracket;
endChar = ']';
}
if (startIdx >= 0) {
// 从后往前找匹配的结束字符
const lastEnd = trimmed.lastIndexOf(endChar);
if (lastEnd > startIdx) {
try {
return JSON.parse(trimmed.slice(startIdx, lastEnd + 1));
} catch { /* continue */ }
}
}
return null;
}