ST-Bionic-Memory-Ecology/llm.js

// ST-BME: LLM 调用封装
// 包装 ST 的 sendOpenAIRequest，提供结构化 JSON 输出和重试机制

import { getRequestHeaders } from "../../../../script.js";
import { extension_settings } from "../../../extensions.js";
import { chat_completion_sources, sendOpenAIRequest } from "../../../openai.js";
import { debugLog, debugWarn } from "./debug-logging.js";
import { resolveTaskGenerationOptions } from "./generation-options.js";
import { resolveLlmConfigSelection } from "./llm-preset-utils.js";
import { getActiveTaskProfile } from "./prompt-profiles.js";
import { resolveConfiguredTimeoutMs } from "./request-timeout.js";
import { applyTaskRegex } from "./task-regex.js";

const MODULE_NAME = "st_bme";
const LLM_REQUEST_TIMEOUT_MS = 300000;
const DEFAULT_TEXT_COMPLETION_TOKENS = 64000;
const DEFAULT_JSON_COMPLETION_TOKENS = 64000;
const STREAM_DEBUG_PREVIEW_MAX_CHARS = 1200;
const STREAM_DEBUG_UPDATE_INTERVAL_MS = 120;
const SENSITIVE_DEBUG_KEY_PATTERN =
  /^(authorization|proxy_password|api[_-]?key|access[_-]?token|refresh[_-]?token|secret|password)$/i;

function cloneRuntimeDebugValue(value, fallback = null) {
  if (value == null) {
    return fallback;
  }

  try {
    return JSON.parse(JSON.stringify(value));
  } catch {
    return fallback ?? value;
  }
}

function redactSensitiveString(value) {
  return String(value ?? "")
    .replace(/(Bearer\s+)[^\s"'\r\n]+/gi, "$1[REDACTED]")
    .replace(
      /(Authorization\s*:\s*Bearer\s+)[^\s"'\r\n]+/gi,
      "$1[REDACTED]",
    )
    .replace(/(proxy_password\s*:\s*)[^\r\n]+/gi, "$1[REDACTED]");
}

function redactSensitiveValue(value, currentKey = "") {
  if (value == null) {
    return value;
  }

  if (Array.isArray(value)) {
    return value.map((item) => redactSensitiveValue(item, currentKey));
  }

  if (typeof value === "object") {
    return Object.fromEntries(
      Object.entries(value).map(([key, entryValue]) => [
        key,
        redactSensitiveValue(entryValue, key),
      ]),
    );
  }

  if (typeof value === "string") {
    if (SENSITIVE_DEBUG_KEY_PATTERN.test(String(currentKey || ""))) {
      return value ? "[REDACTED]" : "";
    }
    return redactSensitiveString(value);
  }

  if (SENSITIVE_DEBUG_KEY_PATTERN.test(String(currentKey || ""))) {
    return "[REDACTED]";
  }

  return value;
}

function sanitizeLlmDebugSnapshot(snapshot = {}) {
  const cloned = cloneRuntimeDebugValue(snapshot, {});
  const redacted = redactSensitiveValue(cloned);
  if (redacted && typeof redacted === "object" && !Array.isArray(redacted)) {
    redacted.redacted = true;
  }
  return redacted;
}

function nowIso() {
  return new Date().toISOString();
}

function getRuntimeDebugState() {
  const stateKey = "__stBmeRuntimeDebugState";
  if (
    !globalThis[stateKey] ||
    typeof globalThis[stateKey] !== "object"
  ) {
    globalThis[stateKey] = {
      hostCapabilities: null,
      taskPromptBuilds: {},
      taskLlmRequests: {},
      injections: {},
      updatedAt: "",
    };
  }
  return globalThis[stateKey];
}

function recordTaskLlmRequest(taskType, snapshot = {}, options = {}) {
  const normalizedTaskType = String(taskType || "").trim() || "unknown";
  const state = getRuntimeDebugState();
  const shouldMerge = options?.merge === true;
  const previousSnapshot = shouldMerge
    ? cloneRuntimeDebugValue(state.taskLlmRequests[normalizedTaskType], {})
    : {};
  state.taskLlmRequests[normalizedTaskType] = {
    ...previousSnapshot,
    updatedAt: new Date().toISOString(),
    ...sanitizeLlmDebugSnapshot(snapshot),
  };
  state.updatedAt = new Date().toISOString();
}

function getLlmTestOverride(name) {
  const override = globalThis.__stBmeTestOverrides?.llm?.[name];
  return typeof override === "function" ? override : null;
}

function formatLlmConfigSourceLabel(source = "") {
  switch (String(source || "").trim()) {
    case "task-preset":
      return "任务专用模板";
    case "global-fallback-missing-task-preset":
      return "任务模板缺失，已回退当前 API";
    case "global-fallback-invalid-task-preset":
      return "任务模板不完整，已回退当前 API";
    case "global":
    default:
      return "跟随当前 API";
  }
}

function getMemoryLLMConfig(taskType = "") {
  const settings = extension_settings[MODULE_NAME] || {};
  const normalizedTaskType = String(taskType || "").trim();
  const activeProfile = normalizedTaskType
    ? getActiveTaskProfile(settings, normalizedTaskType)
    : null;
  const selectedPresetName =
    typeof activeProfile?.generation?.llm_preset === "string"
      ? activeProfile.generation.llm_preset
      : "";
  const selection = resolveLlmConfigSelection(settings, selectedPresetName);
  return {
    apiUrl: normalizeOpenAICompatibleBaseUrl(selection.config?.llmApiUrl),
    apiKey: String(selection.config?.llmApiKey || "").trim(),
    model: String(selection.config?.llmModel || "").trim(),
    timeoutMs: getConfiguredTimeoutMs(settings),
    llmConfigSource: selection.source || "global",
    llmConfigSourceLabel: formatLlmConfigSourceLabel(selection.source),
    llmPresetName: selection.presetName || "",
    requestedLlmPresetName: selection.requestedPresetName || "",
    llmPresetFallbackReason: selection.fallbackReason || "",
  };
}

function getConfiguredTimeoutMs(settings = {}) {
  return typeof resolveConfiguredTimeoutMs === "function"
    ? resolveConfiguredTimeoutMs(settings, LLM_REQUEST_TIMEOUT_MS)
    : (() => {
        const timeoutMs = Number(settings?.timeoutMs);
        return Number.isFinite(timeoutMs) && timeoutMs > 0
          ? timeoutMs
          : LLM_REQUEST_TIMEOUT_MS;
      })();
}

function normalizeRegexDebugEntries(debugCollector = null) {
  if (!Array.isArray(debugCollector?.entries)) {
    return [];
  }
  return debugCollector.entries.map((entry) => ({
    taskType: String(entry?.taskType || ""),
    stage: String(entry?.stage || ""),
    enabled: entry?.enabled !== false,
    appliedRules: Array.isArray(entry?.appliedRules)
      ? entry.appliedRules.map((rule) => ({
          id: String(rule?.id || ""),
          source: String(rule?.source || ""),
          error: String(rule?.error || ""),
        }))
      : [],
    sourceCount: {
      tavern: Number(entry?.sourceCount?.tavern || 0),
      local: Number(entry?.sourceCount?.local || 0),
    },
  }));
}

function applyTaskOutputRegexStages(taskType, text) {
  const normalizedTaskType = String(taskType || "").trim();
  const rawText = typeof text === "string" ? text : "";
  if (!normalizedTaskType || !rawText) {
    return {
      cleanedText: rawText,
      debug: {
        changed: false,
        applied: false,
        stages: [],
        rawLength: rawText.length,
        cleanedLength: rawText.length,
      },
    };
  }

  const settings = extension_settings[MODULE_NAME] || {};
  const regexDebug = { entries: [] };
  const afterRawStage = applyTaskRegex(
    settings,
    normalizedTaskType,
    "output.rawResponse",
    rawText,
    regexDebug,
    "assistant",
  );
  const cleanedText = applyTaskRegex(
    settings,
    normalizedTaskType,
    "output.beforeParse",
    afterRawStage,
    regexDebug,
    "assistant",
  );
  const normalizedEntries = normalizeRegexDebugEntries(regexDebug);
  const applied = normalizedEntries.some(
    (entry) => entry.appliedRules.length > 0,
  );

  return {
    cleanedText,
    debug: {
      changed: cleanedText !== rawText,
      applied,
      rawLength: rawText.length,
      cleanedLength: cleanedText.length,
      stages: normalizedEntries,
    },
  };
}

function applyTaskFinalInputRegex(taskType, messages = []) {
  const normalizedMessages = (Array.isArray(messages) ? messages : [])
    .map((message) => {
      if (!message || typeof message !== "object") {
        return null;
      }
      const role = String(message.role || "").trim().toLowerCase();
      if (!["system", "user", "assistant"].includes(role)) {
        return null;
      }
      return {
        ...message,
        role,
        content: String(message.content || ""),
      };
    })
    .filter(Boolean);
  const normalizedTaskType = String(taskType || "").trim();

  if (!normalizedTaskType || normalizedMessages.length === 0) {
    const cleanedMessages = normalizedMessages.filter((message) =>
      String(message.content || "").trim(),
    );
    return {
      messages: cleanedMessages,
      debug: {
        stage: "input.finalPrompt",
        changed: cleanedMessages.length !== normalizedMessages.length,
        applied: false,
        rawMessageCount: normalizedMessages.length,
        cleanedMessageCount: cleanedMessages.length,
        droppedMessageCount: normalizedMessages.length - cleanedMessages.length,
        stages: [],
      },
    };
  }

  const settings = extension_settings[MODULE_NAME] || {};
  const regexDebug = { entries: [] };
  let changed = false;
  let droppedMessageCount = 0;
  const cleanedMessages = normalizedMessages
    .map((message) => {
      const originalContent = String(message.content || "");
      const cleanedContent = applyTaskRegex(
        settings,
        normalizedTaskType,
        "input.finalPrompt",
        originalContent,
        regexDebug,
        message.role,
      );
      if (cleanedContent !== originalContent) {
        changed = true;
      }
      if (!String(cleanedContent || "").trim()) {
        droppedMessageCount += 1;
        return null;
      }
      return {
        ...message,
        content: cleanedContent,
      };
    })
    .filter(Boolean);
  const normalizedEntries = normalizeRegexDebugEntries(regexDebug);
  const applied = normalizedEntries.some(
    (entry) => entry.appliedRules.length > 0,
  );

  return {
    messages: cleanedMessages,
    debug: {
      stage: "input.finalPrompt",
      changed: changed || droppedMessageCount > 0,
      applied,
      rawMessageCount: normalizedMessages.length,
      cleanedMessageCount: cleanedMessages.length,
      droppedMessageCount,
      stages: normalizedEntries,
    },
  };
}

function attachRequestCleaningToPromptExecution(
  promptExecutionSummary,
  requestCleaning,
) {
  const base =
    promptExecutionSummary && typeof promptExecutionSummary === "object"
      ? cloneRuntimeDebugValue(promptExecutionSummary, {})
      : {};
  if (requestCleaning && typeof requestCleaning === "object") {
    base.requestCleaning = cloneRuntimeDebugValue(requestCleaning, null);
  }
  return base;
}

function buildEffectiveLlmRoute(
  hasDedicatedConfig,
  privateRequestSource,
  taskType = "",
) {
  const dedicated = Boolean(hasDedicatedConfig);
  return {
    taskType: String(taskType || "").trim(),
    requestSource: String(privateRequestSource || "").trim(),
    llm: dedicated ? "dedicated-memory-llm" : "sillytavern-current-model",
    transport: dedicated
      ? "dedicated-openai-compatible"
      : "sillytavern-current-model",
  };
}

function buildPromptExecutionSummary(debugContext = null) {
  if (!debugContext || typeof debugContext !== "object") {
    return null;
  }

  return {
    promptAssembly:
      debugContext.promptAssembly && typeof debugContext.promptAssembly === "object"
        ? cloneRuntimeDebugValue(debugContext.promptAssembly, {})
        : null,
    promptBuild:
      debugContext.promptBuild && typeof debugContext.promptBuild === "object"
        ? cloneRuntimeDebugValue(debugContext.promptBuild, {})
        : null,
    effectiveDelivery:
      debugContext.effectiveDelivery &&
      typeof debugContext.effectiveDelivery === "object"
        ? cloneRuntimeDebugValue(debugContext.effectiveDelivery, {})
        : null,
    ejsRuntimeStatus: String(debugContext.ejsRuntimeStatus || ""),
    worldInfo:
      debugContext.worldInfo && typeof debugContext.worldInfo === "object"
        ? cloneRuntimeDebugValue(debugContext.worldInfo, {})
        : null,
    mvu:
      debugContext.mvu && typeof debugContext.mvu === "object"
        ? cloneRuntimeDebugValue(debugContext.mvu, {})
        : null,
    regexInput: normalizeRegexDebugEntries(debugContext.regexInput),
  };
}

function createStreamDebugState({
  requested = false,
  fallback = false,
  fallbackReason = "",
  fallbackSucceeded = false,
} = {}) {
  return {
    requested: Boolean(requested),
    active: false,
    completed: false,
    fallback: Boolean(fallback),
    fallbackReason: String(fallbackReason || ""),
    fallbackSucceeded: Boolean(fallbackSucceeded),
    startedAt: "",
    finishedAt: "",
    chunkCount: 0,
    receivedChars: 0,
    previewText: "",
    finishReason: "",
    lastEventAt: "",
    lastDebugUpdateAt: 0,
  };
}

function buildStreamDebugSnapshot(streamState = {}) {
  return {
    streamRequested: Boolean(streamState.requested),
    streamActive: Boolean(streamState.active),
    streamCompleted: Boolean(streamState.completed),
    streamFallback: Boolean(streamState.fallback),
    streamFallbackReason: String(streamState.fallbackReason || ""),
    streamFallbackSucceeded: Boolean(streamState.fallbackSucceeded),
    streamStartedAt: String(streamState.startedAt || ""),
    streamFinishedAt: String(streamState.finishedAt || ""),
    streamChunkCount: Number(streamState.chunkCount || 0),
    streamReceivedChars: Number(streamState.receivedChars || 0),
    streamPreviewText: String(streamState.previewText || ""),
    streamFinishReason: String(streamState.finishReason || ""),
    streamLastEventAt: String(streamState.lastEventAt || ""),
  };
}

function recordTaskLlmStreamState(
  taskKey,
  streamState,
  extraSnapshot = {},
  { force = false } = {},
) {
  if (!taskKey || !streamState) return;

  const now = Date.now();
  if (
    !force &&
    streamState.lastDebugUpdateAt &&
    now - streamState.lastDebugUpdateAt < STREAM_DEBUG_UPDATE_INTERVAL_MS
  ) {
    return;
  }

  streamState.lastDebugUpdateAt = now;
  recordTaskLlmRequest(
    taskKey,
    {
      ...buildStreamDebugSnapshot(streamState),
      ...extraSnapshot,
    },
    {
      merge: true,
    },
  );
}

function appendStreamPreview(existingPreview = "", deltaText = "") {
  const combined = `${String(existingPreview || "")}${String(deltaText || "")}`;
  if (combined.length <= STREAM_DEBUG_PREVIEW_MAX_CHARS) {
    return combined;
  }
  return combined.slice(-STREAM_DEBUG_PREVIEW_MAX_CHARS);
}

function extractTextLikeValue(value) {
  if (value == null) return "";
  if (typeof value === "string") return value;
  if (typeof value === "number" || typeof value === "boolean") {
    return String(value);
  }
  if (Array.isArray(value)) {
    return value
      .map((item) =>
        extractTextLikeValue(item?.text ?? item?.content ?? item),
      )
      .join("");
  }
  if (typeof value === "object") {
    return extractTextLikeValue(value.text ?? value.content ?? "");
  }
  return "";
}

function extractStreamingChoice(payload = {}) {
  return payload?.choices?.[0] || {};
}

function extractStreamingContentDelta(payload = {}) {
  const choice = extractStreamingChoice(payload);
  return extractTextLikeValue(
    choice?.delta?.content ??
      choice?.message?.content ??
      choice?.text ??
      payload?.content ??
      payload?.text ??
      "",
  );
}

function extractStreamingReasoningDelta(payload = {}) {
  const choice = extractStreamingChoice(payload);
  return extractTextLikeValue(
    choice?.delta?.reasoning_content ??
      choice?.delta?.reasoning ??
      choice?.message?.reasoning_content ??
      payload?.reasoning ??
      "",
  );
}

function extractStreamingFinishReason(payload = {}) {
  const choice = extractStreamingChoice(payload);
  return String(
    choice?.finish_reason ??
      payload?.finish_reason ??
      payload?.stop_reason ??
      "",
  );
}

function extractErrorMessageFromPayload(payload = {}) {
  if (typeof payload === "string") {
    return payload;
  }
  return String(
    payload?.error?.message ??
      payload?.message ??
      payload?.detail ??
      payload?.error ??
      "",
  ).trim();
}

function looksLikeJsonModeUnsupportedMessage(message = "") {
  return /(response_format|json[_-\s]?mode|json[_-\s]?object|json schema|structured output)/i.test(
    String(message || ""),
  );
}

function looksLikeStreamUnsupportedMessage(message = "") {
  return /(stream|streaming|sse|event[-\s]?stream|text\/event-stream)/i.test(
    String(message || ""),
  );
}

function createStreamHandlingError(
  message,
  code = "stream_error",
  options = {},
) {
  const error = new Error(String(message || "流式请求失败"));
  error.name = "StreamHandlingError";
  error.code = code;
  error.fallbackable = options?.fallbackable !== false;
  error.status = Number.isFinite(Number(options?.status))
    ? Number(options.status)
    : 0;
  return error;
}

function isStreamHandlingError(error) {
  return error?.name === "StreamHandlingError";
}

function shouldFallbackToNonStream(error) {
  return isStreamHandlingError(error) && error?.fallbackable !== false;
}

function buildResponseErrorMessage(response, responseText = "") {
  const rawText = String(responseText || "").trim();
  if (!rawText) {
    return String(response?.statusText || "");
  }

  try {
    const parsed = JSON.parse(rawText);
    return extractErrorMessageFromPayload(parsed) || rawText;
  } catch {
    return rawText;
  }
}

function normalizeOpenAICompatibleBaseUrl(value) {
  return String(value || "")
    .trim()
    .replace(/\/+(chat\/completions|embeddings)$/i, "")
    .replace(/\/+$/, "");
}

function hasDedicatedLLMConfig(config = getMemoryLLMConfig()) {
  return Boolean(config.apiUrl && config.model);
}

function normalizeModelList(items = []) {
  if (!Array.isArray(items)) return [];

  const seen = new Set();
  const models = [];

  for (const item of items) {
    let id = "";
    let label = "";

    if (typeof item === "string") {
      id = item.trim();
      label = id;
    } else if (item && typeof item === "object") {
      id = String(
        item.id || item.name || item.label || item.value || item.slug || "",
      ).trim();
      label = String(
        item.label || item.name || item.id || item.value || item.slug || "",
      ).trim();
    }

    if (!id || seen.has(id)) continue;
    seen.add(id);
    models.push({ id, label: label || id });
  }

  return models;
}

function extractModelListPayload(payload = {}) {
  if (Array.isArray(payload)) {
    return payload;
  }

  if (!payload || typeof payload !== "object") {
    return [];
  }

  if (Array.isArray(payload.models)) {
    return payload.models;
  }

  if (Array.isArray(payload.data)) {
    return payload.data;
  }

  if (payload.data && typeof payload.data === "object") {
    if (Array.isArray(payload.data.models)) {
      return payload.data.models;
    }
    if (Array.isArray(payload.data.data)) {
      return payload.data.data;
    }
  }

  return [];
}

function buildDedicatedAuthHeaderString(apiKey = "") {
  const normalized = String(apiKey || "").trim();
  return normalized ? `Authorization: Bearer ${normalized}` : "";
}

function buildDedicatedStatusRequestVariants(config = getMemoryLLMConfig()) {
  const customVariant = {
    mode: "custom",
    body: {
      chat_completion_source: chat_completion_sources.CUSTOM,
      custom_url: config.apiUrl,
      custom_include_headers: buildDedicatedAuthHeaderString(config.apiKey),
      reverse_proxy: config.apiUrl,
      proxy_password: "",
    },
  };

  const legacyOpenAiVariant = {
    mode: "openai-reverse-proxy",
    body: {
      chat_completion_source: chat_completion_sources.OPENAI,
      reverse_proxy: config.apiUrl,
      proxy_password: config.apiKey || "",
    },
  };

  return [customVariant, legacyOpenAiVariant];
}

async function requestDedicatedStatusModels(
  variant,
  { timeoutMs = LLM_REQUEST_TIMEOUT_MS } = {},
) {
  const response = await fetchWithTimeout(
    "/api/backends/chat-completions/status",
    {
      method: "POST",
      headers: getRequestHeaders(),
      body: JSON.stringify(variant.body),
    },
    timeoutMs,
  );

  const rawText = await response.text().catch(() => "");
  let payload = {};
  try {
    payload = rawText ? JSON.parse(rawText) : {};
  } catch {
    payload = {};
  }

  if (!response.ok || payload?.error) {
    throw new Error(
      extractErrorMessageFromPayload(payload) ||
        rawText ||
        response.statusText ||
        `HTTP ${response.status}`,
    );
  }

  return {
    payload,
    models: normalizeModelList(extractModelListPayload(payload)),
  };
}

function extractContentFromResponsePayload(payload) {
  if (typeof payload === "string") {
    return payload;
  }

  if (Array.isArray(payload)) {
    return payload
      .map((item) => item?.text || item?.content || "")
      .join("")
      .trim();
  }

  if (!payload || typeof payload !== "object") {
    return "";
  }

  const messageContent = payload?.choices?.[0]?.message?.content;
  if (typeof messageContent === "string") {
    return messageContent;
  }

  if (Array.isArray(messageContent)) {
    return messageContent
      .map((item) => item?.text || item?.content || "")
      .join("")
      .trim();
  }

  const textContent =
    payload?.choices?.[0]?.text ??
    payload?.text ??
    payload?.message?.content ??
    payload?.content;

  if (typeof textContent === "string") {
    return textContent;
  }

  if (Array.isArray(textContent)) {
    return textContent
      .map((item) => item?.text || item?.content || "")
      .join("")
      .trim();
  }

  return "";
}

function normalizeLLMResponsePayload(payload) {
  if (typeof payload === "string") {
    return {
      content: payload.trim(),
      finishReason: "",
      reasoningContent: "",
      raw: payload,
    };
  }

  const choice = payload?.choices?.[0] || {};
  const message = choice?.message || {};
  return {
    content: extractContentFromResponsePayload(payload).trim(),
    finishReason: String(choice?.finish_reason || ""),
    reasoningContent:
      typeof message?.reasoning_content === "string"
        ? message.reasoning_content
        : "",
    raw: payload,
  };
}

function createGenericJsonSchema() {
  return {
    name: "st_bme_json_response",
    description: "A well-formed JSON object for programmatic parsing.",
    strict: false,
    value: {
      type: "object",
      additionalProperties: true,
    },
  };
}

function buildYamlObject(value, indent = 0) {
  const pad = " ".repeat(indent);

  if (Array.isArray(value)) {
    return value
      .map((item) => {
        if (item && typeof item === "object") {
          return `${pad}-\n${buildYamlObject(item, indent + 2)}`;
        }
        return `${pad}- ${JSON.stringify(item)}`;
      })
      .join("\n");
  }

  if (value && typeof value === "object") {
    return Object.entries(value)
      .map(([key, item]) => {
        if (item && typeof item === "object") {
          return `${pad}${key}:\n${buildYamlObject(item, indent + 2)}`;
        }
        return `${pad}${key}: ${JSON.stringify(item)}`;
      })
      .join("\n");
  }

  return `${pad}${JSON.stringify(value)}`;
}

function looksLikeTruncatedJson(text) {
  const trimmed = String(text || "").trim();
  if (!trimmed) return false;

  const openBraces = (trimmed.match(/\{/g) || []).length;
  const closeBraces = (trimmed.match(/\}/g) || []).length;
  const openBrackets = (trimmed.match(/\[/g) || []).length;
  const closeBrackets = (trimmed.match(/\]/g) || []).length;

  if (openBraces > closeBraces || openBrackets > closeBrackets) {
    return true;
  }

  if (/```(?:json)?/i.test(trimmed) && !/```[\s]*$/i.test(trimmed)) {
    return true;
  }

  return false;
}

function buildJsonAttemptMessages(
  systemPrompt,
  userPrompt,
  attempt,
  reason = "",
  additionalMessages = [],
  promptMessages = [],
) {
  const systemParts = [
    "输出要求补充：只输出一个紧凑的 JSON 对象。",
    "禁止 markdown 代码块、禁止解释、禁止前后缀、禁止省略号。",
    "如果需要重新生成，请直接从头输出完整 JSON，不要续写上一次内容。",
  ];

  const userParts = [];
  if (String(userPrompt || "").trim()) {
    userParts.push(String(userPrompt || "").trim());
  }
  if (attempt > 0) {
    userParts.push(
      reason ? `上一次输出失败原因：${reason}` : "上一次输出未能被程序解析。",
    );
    userParts.push(
      "请重新输出一个完整、紧凑、可直接 JSON.parse 的 JSON 对象。",
    );
  } else {
    userParts.push("请直接输出紧凑 JSON 对象，不要包含任何额外文本。");
  }

  const normalizedPromptMessages = Array.isArray(promptMessages)
    ? promptMessages
        .map((message) => {
          if (!message || typeof message !== "object") return null;
          const role = String(message.role || "").trim().toLowerCase();
          const content = String(message.content || "").trim();
          if (!["system", "user", "assistant"].includes(role) || !content) {
            return null;
          }
          return { role, content };
        })
        .filter(Boolean)
    : [];

  const systemSupplement = [systemPrompt, ...systemParts]
    .filter((part) => String(part || "").trim())
    .join("\n\n")
    .trim();
  const userSupplement = userParts.join("\n\n").trim();

  if (normalizedPromptMessages.length > 0) {
    const messages = normalizedPromptMessages.map((message) => ({ ...message }));
    const firstSystemIndex = messages.findIndex(
      (message) => message.role === "system",
    );

    if (systemSupplement) {
      if (firstSystemIndex >= 0) {
        messages[firstSystemIndex] = {
          ...messages[firstSystemIndex],
          content: [
            messages[firstSystemIndex].content,
            systemSupplement,
          ]
            .filter((part) => String(part || "").trim())
            .join("\n\n"),
        };
      } else {
        messages.unshift({ role: "system", content: systemSupplement });
      }
    }

    if (userSupplement) {
      const hasFallbackUserPrompt = Boolean(String(userPrompt || "").trim());
      const lastUserIndex = [...messages]
        .reverse()
        .findIndex((message) => message.role === "user");
      const resolvedLastUserIndex =
        lastUserIndex >= 0 ? messages.length - 1 - lastUserIndex : -1;

      if (resolvedLastUserIndex >= 0 && !hasFallbackUserPrompt) {
        messages[resolvedLastUserIndex] = {
          ...messages[resolvedLastUserIndex],
          content: [
            messages[resolvedLastUserIndex].content,
            userSupplement,
          ]
            .filter((part) => String(part || "").trim())
            .join("\n\n"),
        };
      } else {
        messages.push({ role: "user", content: userSupplement });
      }
    }

    return messages;
  }

  const messages = [];
  const normalizedSystemPrompt = [systemPrompt, ...systemParts]
    .filter((part) => String(part || "").trim())
    .join("\n\n")
    .trim();
  if (normalizedSystemPrompt) {
    messages.push({ role: "system", content: normalizedSystemPrompt });
  }

  for (const message of additionalMessages || []) {
    if (!message || typeof message !== "object") continue;
    const role = String(message.role || "").trim().toLowerCase();
    const content = String(message.content || "").trim();
    if (!content) continue;
    if (!["system", "user", "assistant"].includes(role)) continue;
    messages.push({ role, content });
  }

  messages.push({ role: "user", content: userParts.join("\n\n") });
  return messages;
}

function resolvePrivateRequestSource(
  taskType = "",
  requestSource = "",
  { allowAnonymous = false } = {},
) {
  const normalizedRequestSource = String(requestSource || "").trim();
  if (normalizedRequestSource) {
    return normalizedRequestSource;
  }

  const normalizedTaskType = String(taskType || "").trim();
  if (normalizedTaskType) {
    return `task:${normalizedTaskType}`;
  }

  if (allowAnonymous) {
    return "adhoc";
  }

  throw new Error(
    "ST-BME private LLM requests require taskType or requestSource",
  );
}

async function fetchWithTimeout(
  url,
  options = {},
  timeoutMs = LLM_REQUEST_TIMEOUT_MS,
) {
  const controller = new AbortController();
  const timeout = setTimeout(
    () =>
      controller.abort(
        new DOMException(
          `LLM 请求超时 (${Math.round(timeoutMs / 1000)}s)`,
          "AbortError",
        ),
      ),
    timeoutMs,
  );
  const signal = options.signal
    ? createCombinedAbortSignal(options.signal, controller.signal)
    : controller.signal;

  try {
    return await fetch(url, {
      ...options,
      signal,
    });
  } finally {
    clearTimeout(timeout);
  }
}

function createCombinedAbortSignal(...signals) {
  const validSignals = signals.filter(Boolean);
  if (validSignals.length <= 1) {
    return validSignals[0] || undefined;
  }

  if (
    typeof AbortSignal !== "undefined" &&
    typeof AbortSignal.any === "function"
  ) {
    return AbortSignal.any(validSignals);
  }

  const controller = new AbortController();
  for (const signal of validSignals) {
    if (signal.aborted) {
      controller.abort(signal.reason);
      return controller.signal;
    }
    signal.addEventListener("abort", () => controller.abort(signal.reason), {
      once: true,
    });
  }
  return controller.signal;
}

// 自动检测：如果 API 不支持 response_format，记住并跳过
let _jsonModeSupported = true;

function isAbortError(error) {
  return error?.name === "AbortError";
}

async function parseDedicatedStreamingResponse(
  response,
  { taskKey = "", streamState = null, onStreamProgress = null } = {},
) {
  const reader = response?.body?.getReader?.();
  if (!reader) {
    throw createStreamHandlingError(
      "专用 LLM 返回的响应体不可流式读取",
      "missing_stream_body",
    );
  }

  const decoder = new TextDecoder();
  let buffer = "";
  let content = "";
  let reasoningContent = "";
  let finishReason = "";
  let sawStreamEvent = false;

  streamState.active = true;
  streamState.completed = false;
  streamState.startedAt = streamState.startedAt || nowIso();
  streamState.finishedAt = "";
  recordTaskLlmStreamState(taskKey, streamState, {}, { force: true });

  try {
    while (true) {
      const { done, value } = await reader.read();
      if (done) {
        break;
      }

      buffer += decoder.decode(value, { stream: true }).replace(/\r\n/g, "\n");

      while (true) {
        const boundaryIndex = buffer.indexOf("\n\n");
        if (boundaryIndex < 0) {
          break;
        }

        const eventBlock = buffer.slice(0, boundaryIndex).trim();
        buffer = buffer.slice(boundaryIndex + 2);
        if (!eventBlock) {
          continue;
        }

        const dataLines = eventBlock
          .split("\n")
          .filter((line) => line.startsWith("data:"))
          .map((line) => line.slice(5).trimStart());
        if (!dataLines.length) {
          continue;
        }

        const rawData = dataLines.join("\n").trim();
        if (!rawData) {
          continue;
        }
        if (rawData === "[DONE]") {
          sawStreamEvent = true;
          streamState.lastEventAt = nowIso();
          break;
        }

        let parsed;
        try {
          parsed = JSON.parse(rawData);
        } catch (error) {
          throw createStreamHandlingError(
            "专用 LLM 返回了无法解析的 SSE 数据块",
            "invalid_sse_chunk",
            {
              fallbackable: true,
            },
          );
        }

        const payloadErrorMessage = extractErrorMessageFromPayload(parsed);
        if (payloadErrorMessage) {
          throw createStreamHandlingError(
            payloadErrorMessage,
            "stream_payload_error",
            {
              fallbackable:
                looksLikeStreamUnsupportedMessage(payloadErrorMessage),
            },
          );
        }

        sawStreamEvent = true;
        streamState.chunkCount += 1;
        streamState.lastEventAt = nowIso();

        const deltaText = extractStreamingContentDelta(parsed);
        const reasoningDelta = extractStreamingReasoningDelta(parsed);
        const nextFinishReason = extractStreamingFinishReason(parsed);

        if (deltaText) {
          content += deltaText;
          streamState.receivedChars += deltaText.length;
          streamState.previewText = appendStreamPreview(
            streamState.previewText,
            deltaText,
          );
          if (typeof onStreamProgress === "function") {
            try {
              onStreamProgress({
                previewText: streamState.previewText,
                chunkCount: streamState.chunkCount,
                receivedChars: streamState.receivedChars,
              });
            } catch {}
          }
        }

        if (reasoningDelta) {
          reasoningContent += reasoningDelta;
        }

        if (nextFinishReason) {
          finishReason = nextFinishReason;
          streamState.finishReason = nextFinishReason;
        }

        recordTaskLlmStreamState(taskKey, streamState, {});
      }
    }

    buffer += decoder.decode();
    if (!sawStreamEvent) {
      throw createStreamHandlingError(
        "专用 LLM 未返回可识别的 SSE 事件流",
        "invalid_sse_stream",
      );
    }

    streamState.active = false;
    streamState.completed = true;
    streamState.finishedAt = nowIso();
    if (finishReason) {
      streamState.finishReason = finishReason;
    }
    recordTaskLlmStreamState(taskKey, streamState, {}, { force: true });

    return {
      content: String(content || "").trim(),
      finishReason: String(finishReason || ""),
      reasoningContent: String(reasoningContent || ""),
      raw: {
        mode: "stream",
        chunkCount: streamState.chunkCount,
      },
    };
  } catch (error) {
    streamState.active = false;
    streamState.completed = false;
    streamState.finishedAt = nowIso();
    if (isAbortError(error)) {
      streamState.finishReason = "aborted";
    }
    recordTaskLlmStreamState(taskKey, streamState, {}, { force: true });
    throw error;
  } finally {
    try {
      reader.releaseLock?.();
    } catch {
      // ignore
    }
  }
}

async function executeDedicatedRequest(
  body,
  {
    signal,
    timeoutMs = LLM_REQUEST_TIMEOUT_MS,
    jsonMode = false,
    taskKey = "",
    streamState = null,
    onStreamProgress = null,
  } = {},
) {
  const requestBody = cloneRuntimeDebugValue(body, {}) || {};

  while (true) {
    recordTaskLlmRequest(
      taskKey,
      {
        requestBody: requestBody,
      },
      {
        merge: true,
      },
    );

    const response = await fetchWithTimeout(
      "/api/backends/chat-completions/generate",
      {
        method: "POST",
        headers: getRequestHeaders(),
        body: JSON.stringify(requestBody),
        signal,
      },
      timeoutMs,
    );

    if (!response.ok) {
      const responseText = await response.text().catch(() => "");
      const message = buildResponseErrorMessage(response, responseText);
      if (
        jsonMode &&
        _jsonModeSupported &&
        response.status === 400 &&
        looksLikeJsonModeUnsupportedMessage(message)
      ) {
        console.warn("[ST-BME] API 不支持 json mode，降级为普通 JSON 提示模式");
        _jsonModeSupported = false;
        delete requestBody.custom_include_body;
        continue;
      }

      if (requestBody.stream === true && looksLikeStreamUnsupportedMessage(message)) {
        throw createStreamHandlingError(
          message || `Memory LLM proxy error ${response.status}`,
          "stream_http_error",
          {
            status: response.status,
          },
        );
      }

      throw new Error(
        `Memory LLM proxy error ${response.status}: ${message || response.statusText}`,
      );
    }

    if (requestBody.stream === true) {
      return await parseDedicatedStreamingResponse(response, {
        taskKey,
        streamState,
        onStreamProgress,
      });
    }

    return await _parseResponse(response);
  }
}

async function callDedicatedOpenAICompatible(
  messages,
  {
    signal,
    jsonMode = false,
    maxCompletionTokens = null,
    taskType = "",
    requestSource = "",
    onStreamProgress = null,
  } = {},
) {
  const privateRequestSource = resolvePrivateRequestSource(
    taskType,
    requestSource,
  );
  const config = getMemoryLLMConfig(taskType);
  const settings = extension_settings[MODULE_NAME] || {};
  const hasDedicatedConfig = hasDedicatedLLMConfig(config);
  if (taskType && config.llmPresetFallbackReason) {
    debugWarn(
      `[ST-BME] 任务 ${taskType} 指定的 API 模板不可用，已回退当前 API: ` +
        `${config.requestedLlmPresetName || "(empty)"} / ${config.llmPresetFallbackReason}`,
    );
  }
  const generationResolved = taskType
    ? resolveTaskGenerationOptions(settings, taskType, {
        max_completion_tokens: Number.isFinite(maxCompletionTokens)
          ? maxCompletionTokens
          : jsonMode
            ? DEFAULT_JSON_COMPLETION_TOKENS
            : DEFAULT_TEXT_COMPLETION_TOKENS,
      }, {
        mode: hasDedicatedConfig
          ? "dedicated-openai-compatible"
          : "sillytavern-current-model",
      })
    : {
        filtered: {},
        removed: [],
      };
  const taskKey = taskType || privateRequestSource;
  const initialFilteredGeneration = generationResolved.filtered || {};
  const streamRequested =
    hasDedicatedConfig && initialFilteredGeneration.stream === true;
  const streamState = createStreamDebugState({
    requested: streamRequested,
  });
  recordTaskLlmRequest(taskType || privateRequestSource, {
    requestSource: privateRequestSource,
    taskType: String(taskType || "").trim(),
    jsonMode,
    dedicatedConfig: hasDedicatedConfig,
    route: hasDedicatedConfig
      ? "dedicated-openai-compatible"
      : "sillytavern-current-model",
    model: hasDedicatedConfig ? config.model : "sillytavern-current-model",
    apiUrl: hasDedicatedConfig ? config.apiUrl : "",
    llmConfigSource: config.llmConfigSource || "global",
    llmConfigSourceLabel: config.llmConfigSourceLabel || "",
    llmPresetName: config.llmPresetName || "",
    requestedLlmPresetName: config.requestedLlmPresetName || "",
    llmPresetFallbackReason: config.llmPresetFallbackReason || "",
    messages,
    generation: generationResolved.generation || {},
    filteredGeneration: generationResolved.filtered || {},
    removedGeneration: generationResolved.removed || [],
    capabilityMode: generationResolved.capabilityMode || "",
    effectiveRoute: buildEffectiveLlmRoute(
      hasDedicatedConfig,
      privateRequestSource,
      taskType,
    ),
    maxCompletionTokens,
    ...buildStreamDebugSnapshot(streamState),
  });
  if (!hasDedicatedConfig) {
    const payload = await sendOpenAIRequest(
      "quiet",
      messages,
      signal,
      jsonMode ? { jsonSchema: createGenericJsonSchema() } : {},
    );
    const normalized = normalizeLLMResponsePayload(payload);
    if (
      typeof normalized.content === "string" &&
      normalized.content.trim().length > 0
    ) {
      return normalized;
    }
    throw new Error(
      `${privateRequestSource}: SillyTavern current model returned an unexpected response format`,
    );
  }

  const completionTokens = Number.isFinite(maxCompletionTokens)
    ? maxCompletionTokens
    : jsonMode
      ? DEFAULT_JSON_COMPLETION_TOKENS
      : DEFAULT_TEXT_COMPLETION_TOKENS;
  const filteredGeneration = generationResolved.filtered || {};
  const resolvedCompletionTokens = Number.isFinite(
    filteredGeneration.max_completion_tokens,
  )
    ? filteredGeneration.max_completion_tokens
    : completionTokens;

  const body = {
    chat_completion_source: chat_completion_sources.CUSTOM,
    custom_url: config.apiUrl,
    custom_include_headers: config.apiKey
      ? buildYamlObject({
          Authorization: `Bearer ${config.apiKey}`,
        })
      : "",
    model: config.model,
    messages,
    temperature: filteredGeneration.temperature ?? 1,
    max_completion_tokens: resolvedCompletionTokens,
    stream: filteredGeneration.stream ?? false,
    frequency_penalty: filteredGeneration.frequency_penalty ?? 0,
    presence_penalty: filteredGeneration.presence_penalty ?? 0,
    top_p: filteredGeneration.top_p ?? 1,
  };

  const optionalGenerationFields = [
    "top_p",
    "top_k",
    "top_a",
    "min_p",
    "seed",
    "frequency_penalty",
    "presence_penalty",
    "repetition_penalty",
    "squash_system_messages",
    "reasoning_effort",
    "request_thoughts",
    "enable_function_calling",
    "enable_web_search",
    "wrap_user_messages_in_quotes",
    "reply_count",
    "max_context_tokens",
    "character_name_prefix",
  ];

  for (const field of optionalGenerationFields) {
    if (!Object.prototype.hasOwnProperty.call(filteredGeneration, field)) continue;
    body[field] = filteredGeneration[field];
  }

  if (jsonMode && _jsonModeSupported) {
    body.custom_include_body = buildYamlObject({
      response_format: {
        type: "json_object",
      },
    });
  }

  recordTaskLlmRequest(taskKey, {
    requestSource: privateRequestSource,
    taskType: String(taskType || "").trim(),
    jsonMode,
    dedicatedConfig: true,
    route: "dedicated-openai-compatible",
    model: config.model,
    apiUrl: config.apiUrl,
    llmConfigSource: config.llmConfigSource || "global",
    llmConfigSourceLabel: config.llmConfigSourceLabel || "",
    llmPresetName: config.llmPresetName || "",
    requestedLlmPresetName: config.requestedLlmPresetName || "",
    llmPresetFallbackReason: config.llmPresetFallbackReason || "",
    messages,
    generation: generationResolved.generation || {},
    filteredGeneration,
    removedGeneration: generationResolved.removed || [],
    capabilityMode: generationResolved.capabilityMode || "",
    resolvedCompletionTokens,
    effectiveRoute: buildEffectiveLlmRoute(
      true,
      privateRequestSource,
      taskType,
    ),
    requestBody: body,
    ...buildStreamDebugSnapshot(streamState),
  });

  try {
    return await executeDedicatedRequest(body, {
      signal,
      timeoutMs: config.timeoutMs,
      jsonMode,
      taskKey,
      streamState,
      onStreamProgress,
    });
  } catch (error) {
    if (
      !streamRequested ||
      !shouldFallbackToNonStream(error) ||
      isAbortError(error)
    ) {
      throw error;
    }

    streamState.active = false;
    streamState.completed = false;
    streamState.fallback = true;
    streamState.fallbackReason = error?.message || String(error);
    streamState.finishedAt = nowIso();
    recordTaskLlmStreamState(taskKey, streamState, {}, { force: true });

    console.warn(
      `[ST-BME] 专用 LLM 流式不可用，已自动降级为非流式: ${streamState.fallbackReason}`,
    );

    const fallbackBody = {
      ...body,
      stream: false,
    };

    const fallbackResponse = await executeDedicatedRequest(fallbackBody, {
      signal,
      timeoutMs: config.timeoutMs,
      jsonMode,
      taskKey,
      streamState,
    });

    streamState.fallbackSucceeded = true;
    recordTaskLlmStreamState(taskKey, streamState, {}, { force: true });
    return fallbackResponse;
  }
}

async function _parseResponse(response) {
  const responseText = await response.text().catch(() => "");
  let data;

  try {
    data = responseText ? JSON.parse(responseText) : {};
  } catch {
    data = { error: { message: responseText || response.statusText } };
  }

  if (!response.ok) {
    const message = data?.error?.message || response.statusText;
    throw new Error(`Memory LLM proxy error ${response.status}: ${message}`);
  }

  if (data?.error?.message) {
    throw new Error(`Memory LLM proxy error: ${data.error.message}`);
  }
  const normalized = normalizeLLMResponsePayload(data);
  if (typeof normalized.content === "string" && normalized.content.length > 0) {
    return normalized;
  }

  throw new Error("Memory LLM API returned an unexpected response format");
}

/**
 * 调用 LLM 并期望返回结构化 JSON
 *
 * @param {object} params
 * @param {string} params.systemPrompt - 系统提示词
 * @param {string} params.userPrompt - 用户提示词
 * @param {number} [params.maxRetries=2] - JSON 解析失败时的重试次数
 * @param {string} [params.model] - 指定模型（留空使用当前配置）
 * @returns {Promise<object|null>} 解析后的 JSON 对象，或 null
 */
export async function callLLMForJSON({
  systemPrompt,
  userPrompt,
  maxRetries = 2,
  signal,
  taskType = "",
  requestSource = "",
  additionalMessages = [],
  promptMessages = [],
  debugContext = null,
  onStreamProgress = null,
  maxCompletionTokens = null,
  returnFailureDetails = false,
} = {}) {
  const override = getLlmTestOverride("callLLMForJSON");
  if (override) {
    return await override({
      systemPrompt,
      userPrompt,
      maxRetries,
      signal,
      taskType,
      requestSource,
      additionalMessages,
      promptMessages,
      debugContext,
      onStreamProgress,
      maxCompletionTokens,
      returnFailureDetails,
    });
  }

  const privateRequestSource = resolvePrivateRequestSource(
    taskType,
    requestSource,
  );
  let lastFailureReason = "";
  let lastFailureType = "";
  const promptExecutionSummary = buildPromptExecutionSummary(debugContext);

  for (let attempt = 0; attempt <= maxRetries; attempt++) {
    try {
      const assembledMessages = buildJsonAttemptMessages(
        systemPrompt,
        userPrompt,
        attempt,
        lastFailureReason,
        additionalMessages,
        promptMessages,
      );
      {
        const asmUser = assembledMessages.filter((m) => m?.role === "user");
        debugLog(
          `[ST-BME][prompt-diag] buildJsonAttemptMessages: ` +
            `total=${assembledMessages.length}, user=${asmUser.length}, ` +
            `roles=[${assembledMessages.map((m) => m?.role).join(",")}]`,
        );
        for (const m of asmUser) {
          debugLog(
            `[ST-BME][prompt-diag]   assembled user: len=${String(m.content || "").length}, ` +
              `preview="${String(m.content || "").slice(0, 80)}..."`,
          );
        }
      }
      const requestCleaning = applyTaskFinalInputRegex(
        taskType,
        assembledMessages,
      );
      {
        const rcMsgs = Array.isArray(requestCleaning.messages) ? requestCleaning.messages : [];
        const rcUser = rcMsgs.filter((m) => m?.role === "user");
        const dbg = requestCleaning.debug || {};
        debugLog(
          `[ST-BME][prompt-diag] applyTaskFinalInputRegex: ` +
            `total=${rcMsgs.length}, user=${rcUser.length}, ` +
            `changed=${dbg.changed}, applied=${dbg.applied}, ` +
            `roles=[${rcMsgs.map((m) => m?.role).join(",")}]`,
        );
        if (rcUser.length === 0 && assembledMessages.filter((m) => m?.role === "user").length > 0) {
          debugWarn(
            `[ST-BME][prompt-diag] *** USER MESSAGES LOST during applyTaskFinalInputRegex! ***`,
          );
          for (const rule of dbg.appliedRules || []) {
            debugWarn(`[ST-BME][prompt-diag]   applied rule: ${JSON.stringify(rule)}`);
          }
        }
      }
      const promptExecutionSnapshot = attachRequestCleaningToPromptExecution(
        promptExecutionSummary,
        requestCleaning.debug,
      );
      recordTaskLlmRequest(
        taskType || privateRequestSource,
        {
          requestCleaning: requestCleaning.debug,
          promptExecution: promptExecutionSnapshot,
        },
        {
          merge: true,
        },
      );
      const response = await callDedicatedOpenAICompatible(requestCleaning.messages, {
        signal,
        jsonMode: true,
        taskType,
        requestSource: privateRequestSource,
        onStreamProgress,
        maxCompletionTokens: Number.isFinite(maxCompletionTokens)
          ? maxCompletionTokens
          : DEFAULT_JSON_COMPLETION_TOKENS,
      });
      const responseText = response?.content || "";
      const outputCleanup = applyTaskOutputRegexStages(taskType, responseText);
      recordTaskLlmRequest(
        taskType || privateRequestSource,
        {
          requestCleaning: requestCleaning.debug,
          responseCleaning: outputCleanup.debug,
          promptExecution: promptExecutionSnapshot,
        },
        {
          merge: true,
        },
      );

      if (!responseText || typeof responseText !== "string") {
        console.warn(`[ST-BME] LLM 返回空响应 (尝试 ${attempt + 1})`);
        lastFailureReason = "返回空响应";
        lastFailureType = "empty-response";
        continue;
      }

      // 尝试解析 JSON
      const parsed = extractJSON(outputCleanup.cleanedText);
      if (parsed !== null) {
        return returnFailureDetails
          ? {
              ok: true,
              data: parsed,
              attempts: attempt + 1,
              errorType: "",
              failureReason: "",
            }
          : parsed;
      }

      const truncated =
        response.finishReason === "length" ||
        looksLikeTruncatedJson(outputCleanup.cleanedText);
      lastFailureType = truncated ? "truncated-json" : "invalid-json";
      lastFailureReason = truncated
        ? "输出因长度限制被截断，请重新输出更紧凑的完整 JSON"
        : "输出不是有效 JSON，请严格返回紧凑 JSON 对象";
      console.warn(
        `[ST-BME] LLM 响应无法解析为 JSON (尝试 ${attempt + 1}, finish=${response.finishReason || "unknown"}):`,
        responseText.slice(0, 200),
      );
    } catch (e) {
      if (isAbortError(e)) {
        const abortMessage = e?.message || String(e) || "LLM 调用已终止";
        const isTimeoutAbort =
          !signal?.aborted && /超时/i.test(String(abortMessage || ""));
        if (!isTimeoutAbort) {
          throw e;
        }
        console.error(`[ST-BME] LLM 调用超时 (尝试 ${attempt + 1}):`, e);
        lastFailureReason = abortMessage;
        lastFailureType = "timeout";
        continue;
      }
      console.error(`[ST-BME] LLM 调用失败 (尝试 ${attempt + 1}):`, e);
      lastFailureReason = e?.message || String(e) || "LLM 调用失败";
      lastFailureType = "provider-error";
    }
  }

  if (returnFailureDetails) {
    const failureSnapshot = {
      ok: false,
      data: null,
      attempts: maxRetries + 1,
      errorType: lastFailureType || "unknown",
      failureReason: lastFailureReason || "LLM 未返回可解析 JSON",
    };
    recordTaskLlmRequest(taskType || privateRequestSource, {
      jsonFailure: failureSnapshot,
      promptExecution: promptExecutionSummary,
    }, {
      merge: true,
    });
    return failureSnapshot;
  }

  return null;
}

/**
 * 调用 LLM（不要求 JSON 输出）
 *
 * @param {string} systemPrompt
 * @param {string} userPrompt
 * @returns {Promise<string|null>}
 */
export async function callLLM(systemPrompt, userPrompt, options = {}) {
  const override = getLlmTestOverride("callLLM");
  if (override) {
    return await override(systemPrompt, userPrompt, options);
  }

  const taskType = String(options.taskType || "").trim();
  const privateRequestSource = resolvePrivateRequestSource(
    taskType,
    options.requestSource || options.source || "diagnostic:call-llm",
    { allowAnonymous: true },
  );
  const promptExecutionSummary = buildPromptExecutionSummary(
    options.debugContext || null,
  );
  const assembledMessages = [
    { role: "system", content: systemPrompt },
    { role: "user", content: userPrompt },
  ];
  const requestCleaning = applyTaskFinalInputRegex(taskType, assembledMessages);
  const promptExecutionSnapshot = attachRequestCleaningToPromptExecution(
    promptExecutionSummary,
    requestCleaning.debug,
  );

  try {
    recordTaskLlmRequest(taskType || privateRequestSource, {
      requestCleaning: requestCleaning.debug,
      promptExecution: promptExecutionSnapshot,
    }, {
      merge: true,
    });
    const response = await callDedicatedOpenAICompatible(requestCleaning.messages, {
      signal: options.signal,
      taskType,
      requestSource: privateRequestSource,
    });
    const responseText =
      typeof response?.content === "string" ? response.content : "";
    const outputCleanup = applyTaskOutputRegexStages(taskType, responseText);
    recordTaskLlmRequest(taskType || privateRequestSource, {
      requestCleaning: requestCleaning.debug,
      responseCleaning: outputCleanup.debug,
      promptExecution: promptExecutionSnapshot,
    }, {
      merge: true,
    });
    return outputCleanup.cleanedText || null;
  } catch (e) {
    console.error("[ST-BME] LLM 调用失败:", e);
    return null;
  }
}

/**
 * 测试记忆 LLM 连通性
 * 若未配置独立记忆 LLM，则测试当前 SillyTavern 聊天模型。
 *
 * @returns {Promise<{success: boolean, mode: string, error: string}>}
 */
export async function testLLMConnection() {
  const config = getMemoryLLMConfig();
  const mode = hasDedicatedLLMConfig(config)
    ? `dedicated:${config.model}`
    : "sillytavern-current-model";

  try {
    const response = await callLLM(
      "你是一个连接测试助手。请只回答 OK。",
      "请只回复 OK",
      {
        requestSource: "diagnostic:test-connection",
      },
    );
    if (typeof response === "string" && response.trim().length > 0) {
      return { success: true, mode, error: "" };
    }
    return { success: false, mode, error: "API 返回空结果" };
  } catch (e) {
    return { success: false, mode, error: String(e) };
  }
}

export async function fetchMemoryLLMModels() {
  const config = getMemoryLLMConfig();
  if (!config.apiUrl) {
    return {
      success: false,
      models: [],
      error: "请先填写记忆 LLM API 地址",
    };
  }

  const variants = buildDedicatedStatusRequestVariants(config);
  const errors = [];

  try {
    for (const variant of variants) {
      try {
        const result = await requestDedicatedStatusModels(variant, {
          timeoutMs: config.timeoutMs,
        });
        if (result.models.length > 0) {
          return { success: true, models: result.models, error: "" };
        }
        errors.push(`${variant.mode}:empty`);
      } catch (error) {
        errors.push(`${variant.mode}:${String(error?.message || error)}`);
      }
    }

    return {
      success: false,
      models: [],
      error:
        errors.length > 0
          ? `未拉取到可用模型。尝试结果: ${errors.join(" | ")}`
          : "未拉取到可用模型，请检查接口是否支持模型列表接口",
    };
  } catch (error) {
    return { success: false, models: [], error: String(error) };
  }
}

/**
 * 从 LLM 响应文本中提取 JSON 对象
 * 处理各种常见格式：纯 JSON、markdown 代码块、混合文本等
 *
 * @param {string} text
 * @returns {object|null}
 */
function extractJSON(text) {
  if (!text || typeof text !== "string") return null;

  const trimmed = text.trim();

  // 1. 直接尝试解析
  try {
    return JSON.parse(trimmed);
  } catch {
    /* continue */
  }

  // 2. 尝试提取 markdown 代码块中的 JSON
  const codeBlockMatch = trimmed.match(/```(?:json)?\s*\n?([\s\S]*?)\n?\s*```/);
  if (codeBlockMatch) {
    try {
      return JSON.parse(codeBlockMatch[1].trim());
    } catch {
      /* continue */
    }
  }

  // 3. 尝试找到第一个 { 或 [ 开始的 JSON
  const firstBrace = trimmed.indexOf("{");
  const firstBracket = trimmed.indexOf("[");

  let startIdx = -1;
  let endChar = "";

  if (firstBrace >= 0 && (firstBracket < 0 || firstBrace < firstBracket)) {
    startIdx = firstBrace;
    endChar = "}";
  } else if (firstBracket >= 0) {
    startIdx = firstBracket;
    endChar = "]";
  }

  if (startIdx >= 0) {
    // 从后往前找匹配的结束字符
    const lastEnd = trimmed.lastIndexOf(endChar);
    if (lastEnd > startIdx) {
      try {
        return JSON.parse(trimmed.slice(startIdx, lastEnd + 1));
      } catch {
        /* continue */
      }
    }
  }

  return null;
}