diff --git a/maintenance/authority-consistency.js b/maintenance/authority-consistency.js index 3ba02a3..9ed4a67 100644 --- a/maintenance/authority-consistency.js +++ b/maintenance/authority-consistency.js @@ -96,6 +96,9 @@ export function buildAuthorityConsistencyRepairPlan(audit = null) { : []; const issueCodes = collectIssueCodes(source); const steps = []; + const sqlRevision = normalizeOptionalInteger(source?.sql?.revision); + const blobRevision = normalizeOptionalInteger(source?.blob?.revision); + const sqlNewerThanBlob = Number.isFinite(sqlRevision) && Number.isFinite(blobRevision) && sqlRevision > blobRevision; const addStep = (action, label, detail, codes = []) => { const normalizedAction = normalizeRepairAction(action); if (!normalizedAction || !actions.includes(normalizedAction)) { @@ -120,21 +123,23 @@ export function buildAuthorityConsistencyRepairPlan(audit = null) { addStep( "write-authority-checkpoint", - "写入当前 Checkpoint", - "Authority Blob 尚无 checkpoint,先把当前 runtime 图谱写成 checkpoint,再继续后续修复。", - ["blob-checkpoint-missing"], - ); - addStep( - "restore-from-authority-blob-checkpoint", - "从 Blob Checkpoint 恢复 SQL", - "检测到 runtime / SQL / Blob revision 漂移,可用 Blob checkpoint 回灌 Authority SQL。", - ["sql-runtime-revision-drift", "blob-runtime-revision-drift"], + "同步备份 Checkpoint", + "Authority Blob checkpoint 落后或缺失,应从当前权威图谱源同步一个新的备份 checkpoint。", + ["blob-checkpoint-missing", "blob-checkpoint-behind", "blob-runtime-revision-drift"], ); + if (!sqlNewerThanBlob) { + addStep( + "restore-from-authority-blob-checkpoint", + "灾难恢复:从 Blob Checkpoint 恢复 SQL", + "仅在 SQL 缺失、损坏或用户明确需要回滚时,才可用 Blob checkpoint 回灌 Authority SQL。", + ["sql-runtime-revision-drift", "blob-newer-than-sql", "blob-chat-mismatch"], + ); + } addStep( "rebuild-authority-trivium", - "重建 Authority Trivium", - "Trivium 与 SQL revision 不一致,或当前向量索引仍为 dirty,需要重建 Trivium。", - ["trivium-sql-revision-drift", "trivium-collection-mismatch", "vector-dirty"], + "同步向量/Trivium 副本", + "Trivium 向量副本落后、collection 不匹配,或当前向量索引为 dirty,需要从权威图谱源重建/同步。", + ["trivium-sql-revision-drift", "trivium-replica-behind", "trivium-collection-mismatch", "vector-dirty"], ); const blockedIssueCodes = (Array.isArray(source.issues) ? source.issues : []) @@ -145,7 +150,7 @@ export function buildAuthorityConsistencyRepairPlan(audit = null) { (action) => action !== "run-authority-consistency-audit" && !steps.some((step) => step.action === action), ); const detail = steps.length - ? `建议顺序:${steps.map((step) => step.label).join(" → ")}` + ? `建议同步:${steps.map((step) => step.label).join(" → ")}` : String(source?.summary?.detail || "当前审计未发现需要自动编排的修复步骤"); return { @@ -157,7 +162,7 @@ export function buildAuthorityConsistencyRepairPlan(audit = null) { unsupportedActions, summary: { level: steps.length > 0 ? "warning" : String(source?.summary?.level || "idle"), - label: steps.length > 0 ? `建议修复 ${steps.length} 步` : "当前无需编排修复", + label: steps.length > 0 ? `建议同步副本 ${steps.length} 步` : "当前无需编排修复", detail, }, }; @@ -436,10 +441,23 @@ export function buildAuthorityConsistencyAudit(input = {}) { runtimeVsBlobRevision: buildRevisionDelta(runtime.revision, blob.revision), sqlVsBlobRevision: buildRevisionDelta(sql.revision, blob.revision), triviumVsSqlRevision: buildRevisionDelta(trivium.revision, sql.revision), + sqlNewerThanBlob: + Number.isFinite(sql.revision) && Number.isFinite(blob.revision) && sql.revision > blob.revision, + blobNewerThanSql: + Number.isFinite(sql.revision) && Number.isFinite(blob.revision) && blob.revision > sql.revision, + sqlNewerThanTrivium: + Number.isFinite(sql.revision) && Number.isFinite(trivium.revision) && sql.revision > trivium.revision, collectionMatchesRuntime: !trivium.namespace || !runtime.collectionId || trivium.namespace === runtime.collectionId, checkpointRestorable: - blob.exists && blob.hasSerializedGraph && (!blob.chatId || !chatId || blob.chatId === chatId), + blob.exists && + blob.hasSerializedGraph && + (!blob.chatId || !chatId || blob.chatId === chatId) && + !( + Number.isFinite(sql.revision) && + Number.isFinite(blob.revision) && + sql.revision > blob.revision + ), }; const issues = []; @@ -473,11 +491,16 @@ export function buildAuthorityConsistencyAudit(input = {}) { Number.isFinite(runtime.revision) && blob.revision !== runtime.revision ) { + const code = Number.isFinite(sql.revision) && blob.revision < sql.revision + ? "blob-checkpoint-behind" + : "blob-runtime-revision-drift"; issues.push( normalizeIssue( "warning", - "blob-runtime-revision-drift", - `Blob checkpoint revision 与 runtime 不一致:${blob.revision} ≠ ${runtime.revision}`, + code, + code === "blob-checkpoint-behind" + ? `Blob checkpoint 落后于 Authority SQL:${blob.revision} < ${sql.revision}` + : `Blob checkpoint revision 与 runtime 不一致:${blob.revision} ≠ ${runtime.revision}`, ), ); } @@ -486,11 +509,16 @@ export function buildAuthorityConsistencyAudit(input = {}) { Number.isFinite(sql.revision) && trivium.revision !== sql.revision ) { + const code = trivium.revision < sql.revision + ? "trivium-replica-behind" + : "trivium-sql-revision-drift"; issues.push( normalizeIssue( "warning", - "trivium-sql-revision-drift", - `Trivium revision 与 SQL 不一致:${trivium.revision} ≠ ${sql.revision}`, + code, + code === "trivium-replica-behind" + ? `Trivium 向量副本落后于 Authority SQL:${trivium.revision} < ${sql.revision}` + : `Trivium revision 与 SQL 不一致:${trivium.revision} ≠ ${sql.revision}`, ), ); } @@ -511,11 +539,18 @@ export function buildAuthorityConsistencyAudit(input = {}) { } const actions = []; - if (drift.checkpointRestorable) actions.push("restore-from-authority-blob-checkpoint"); + const restoreRelevant = + drift.checkpointRestorable && + ( + sql.ok !== true || + drift.blobNewerThanSql || + issues.some((issue) => issue.code === "sql-probe-error") + ); + if (restoreRelevant) actions.push("restore-from-authority-blob-checkpoint"); if (runtime.vectorDirty || (Number.isFinite(drift.triviumVsSqlRevision) && drift.triviumVsSqlRevision < 0)) { actions.push("rebuild-authority-trivium"); } - if (!blob.exists && source.capability?.blobReady) { + if ((!blob.exists || drift.sqlNewerThanBlob) && source.capability?.blobReady) { actions.push("write-authority-checkpoint"); } if (issues.some((issue) => issue.code === "sql-runtime-revision-drift" || issue.code === "blob-runtime-revision-drift")) { @@ -533,13 +568,32 @@ export function buildAuthorityConsistencyAudit(input = {}) { level === "error" ? "存在阻塞性不一致" : level === "warning" - ? "存在待处理漂移" + ? sql.ok + ? "副本待同步" + : "存在待处理漂移" : level === "success" ? "Authority 工件已对齐" : "等待审计"; const detail = issues[0]?.message || (level === "success" ? "Authority SQL / Trivium / Blob 已达到当前可观测的一致状态" : "尚未运行审计"); + const replicaLag = issues.some((issue) => [ + "blob-checkpoint-missing", + "blob-checkpoint-behind", + "trivium-replica-behind", + "vector-dirty", + ].includes(issue.code)); + const runtimeAheadOfSql = + Number.isFinite(runtime.revision) && + Number.isFinite(sql.revision) && + runtime.revision > sql.revision; + const dataSafety = sql.ok + ? runtimeAheadOfSql + ? "runtime-ahead-of-sql" + : replicaLag + ? "saved-replicas-behind" + : "saved" + : (sql.available ? "unknown" : "unavailable"); return { updatedAt, @@ -557,6 +611,9 @@ export function buildAuthorityConsistencyAudit(input = {}) { label, detail, issueCount: issues.length, + dataSafety, + backupRedundancy: replicaLag ? "degraded" : (blob.exists ? "ok" : "unknown"), + searchQuality: runtime.vectorDirty || drift.sqlNewerThanTrivium ? "degraded" : "ok", }, }; } diff --git a/tests/authority-consistency.mjs b/tests/authority-consistency.mjs index e8f9de4..6a71f6e 100644 --- a/tests/authority-consistency.mjs +++ b/tests/authority-consistency.mjs @@ -108,7 +108,8 @@ const auditAligned = buildAuthorityConsistencyAudit({ assert.equal(auditAligned.summary.level, "success"); assert.equal(auditAligned.issues.length, 0); assert.equal(auditAligned.drift.checkpointRestorable, true); -assert.ok(auditAligned.actions.includes("restore-from-authority-blob-checkpoint")); +assert.equal(auditAligned.actions.includes("restore-from-authority-blob-checkpoint"), false); +assert.equal(auditAligned.summary.dataSafety, "saved"); const alignedRepairPlan = buildAuthorityConsistencyRepairPlan(auditAligned); assert.equal(alignedRepairPlan.ok, false); assert.equal(alignedRepairPlan.stepCount, 0); @@ -161,6 +162,67 @@ assert.deepEqual( ], ); +const auditSqlAheadReplicasBehind = buildAuthorityConsistencyAudit({ + chatId: "chat-a", + collectionId: "st-bme::chat-a", + capability: { + blobReady: true, + }, + runtimeGraph: { + meta: { revision: 2 }, + nodes: [{ id: "node-a" }], + edges: [], + vectorIndexState: { + collectionId: "st-bme::chat-a", + dirty: false, + }, + }, + graphPersistenceState: { + chatId: "chat-a", + revision: 2, + authorityBlobCheckpointPath: "user/files/checkpoint.json", + authorityBlobCheckpointRevision: 0, + }, + sqlSnapshot: { + meta: { revision: 2, nodeCount: 1, edgeCount: 0, tombstoneCount: 0 }, + }, + triviumStat: { + revision: 0, + namespace: "st-bme::chat-a", + }, + blobResult: { + ok: true, + exists: true, + path: "user/files/checkpoint.json", + checkpoint: { + chatId: "chat-a", + revision: 0, + serializedGraph: serializeGraph(createEmptyGraph()), + }, + }, +}); +assert.equal(auditSqlAheadReplicasBehind.summary.level, "warning"); +assert.equal(auditSqlAheadReplicasBehind.summary.label, "副本待同步"); +assert.equal(auditSqlAheadReplicasBehind.summary.dataSafety, "saved-replicas-behind"); +assert.equal(auditSqlAheadReplicasBehind.summary.backupRedundancy, "degraded"); +assert.equal(auditSqlAheadReplicasBehind.summary.searchQuality, "degraded"); +assert.ok(auditSqlAheadReplicasBehind.issues.some((issue) => issue.code === "blob-checkpoint-behind")); +assert.ok(auditSqlAheadReplicasBehind.issues.some((issue) => issue.code === "trivium-replica-behind")); +assert.ok(auditSqlAheadReplicasBehind.actions.includes("write-authority-checkpoint")); +assert.ok(auditSqlAheadReplicasBehind.actions.includes("rebuild-authority-trivium")); +assert.equal(auditSqlAheadReplicasBehind.actions.includes("restore-from-authority-blob-checkpoint"), false); +assert.equal(auditSqlAheadReplicasBehind.drift.checkpointRestorable, false); +const sqlAheadRepairPlan = buildAuthorityConsistencyRepairPlan(auditSqlAheadReplicasBehind); +assert.equal(sqlAheadRepairPlan.ok, true); +assert.equal(sqlAheadRepairPlan.requiresConfirmation, false); +assert.deepEqual( + sqlAheadRepairPlan.steps.map((step) => step.action), + [ + "write-authority-checkpoint", + "rebuild-authority-trivium", + ], +); + const restoreRepairPlan = buildAuthorityConsistencyRepairPlan({ issues: [ { @@ -182,4 +244,29 @@ assert.deepEqual( ["restore-from-authority-blob-checkpoint"], ); +const auditRuntimeAheadOfSql = buildAuthorityConsistencyAudit({ + chatId: "chat-a", + collectionId: "st-bme::chat-a", + runtimeGraph: { + meta: { revision: 4 }, + nodes: [{ id: "node-a" }], + edges: [], + vectorIndexState: { collectionId: "st-bme::chat-a", dirty: false }, + }, + graphPersistenceState: { + chatId: "chat-a", + revision: 4, + }, + sqlSnapshot: { + meta: { revision: 3, nodeCount: 1, edgeCount: 0, tombstoneCount: 0 }, + }, + triviumStat: { + revision: 3, + namespace: "st-bme::chat-a", + }, +}); +assert.equal(auditRuntimeAheadOfSql.summary.level, "warning"); +assert.equal(auditRuntimeAheadOfSql.summary.dataSafety, "runtime-ahead-of-sql"); +assert.equal(auditRuntimeAheadOfSql.actions.includes("restore-from-authority-blob-checkpoint"), false); + console.log("authority-consistency tests passed"); diff --git a/ui/panel.js b/ui/panel.js index 8e2b0d2..8c7d188 100644 --- a/ui/panel.js +++ b/ui/panel.js @@ -3113,7 +3113,12 @@ function _refreshTaskPersistence() { ? authorityAudit.issues.map((issue) => issue.message).filter(Boolean).join(" / ") : authorityAuditSummary.detail || "—"; const authorityAuditActionsLabel = Array.isArray(authorityAudit?.actions) && authorityAudit.actions.length - ? authorityAudit.actions.join(" · ") + ? authorityAudit.actions.map((action) => ({ + "write-authority-checkpoint": "同步备份 Checkpoint", + "rebuild-authority-trivium": "同步向量/Trivium 副本", + "run-authority-consistency-audit": "重新审计", + "restore-from-authority-blob-checkpoint": "灾难恢复:从 Checkpoint 覆盖 SQL", + }[action] || action)).join(" · ") : "—"; const authorityAuditUpdatedLabel = ps.authorityConsistencyUpdatedAt ? _formatTaskProfileTime(ps.authorityConsistencyUpdatedAt) @@ -3149,20 +3154,20 @@ function _refreshTaskPersistence() { ).trim(); const authorityRepairLabel = authorityRepairState === "success" - ? "修复完成" + ? "同步完成" : authorityRepairState === "error" - ? "修复失败" + ? "同步失败" : authorityRepairState === "running" ? authorityRepairResult?.handoffRequired ? "等待 Job 交接" - : "修复中" + : "同步中" : "未执行"; const authorityRepairUpdatedLabel = ps.authorityRepairUpdatedAt ? _formatTaskProfileTime(ps.authorityRepairUpdatedAt) : "—"; const authorityRepairPlanLabel = authorityRepairPlan.ok ? authorityRepairPlan.steps.map((step) => step.label).join(" → ") - : authorityRepairPlan.summary.label || "当前无需编排修复"; + : authorityRepairPlan.summary.label || "当前无需编排同步"; const authorityRepairResultLabel = authorityRepairResult?.steps?.length ? `${Number(authorityRepairResult.steps.length || 0)} 步${ authorityRepairResult?.handoffRequired @@ -3340,9 +3345,9 @@ function _refreshTaskPersistence() { ["Blob rev", authorityAuditBlobRevision], ["Blob path", authorityAuditBlobPath], ["建议动作", authorityAuditActionsLabel], - ["建议修复", authorityRepairPlanLabel], - ["修复状态", authorityRepairLabel], - ["修复结果", authorityRepairResultLabel], + ["建议同步", authorityRepairPlanLabel], + ["同步状态", authorityRepairLabel], + ["同步结果", authorityRepairResultLabel], ["最近审计", authorityAuditUpdatedLabel], ["最近修复", authorityRepairUpdatedLabel], ["恢复状态", authorityRestoreLabel], @@ -3381,21 +3386,30 @@ function _refreshTaskPersistence() { (!ps.authorityBlobCheckpointPath && ps.authorityBlobReady); const showAuthorityTriviumRebuildAction = authorityAuditActions.includes("rebuild-authority-trivium"); + const showAuthorityRestoreAction = Boolean( + authorityAudit?.drift?.checkpointRestorable && + ( + authorityAudit?.sql?.ok !== true || + authorityAudit?.drift?.blobNewerThanSql || + authorityAuditActions.includes("restore-from-authority-blob-checkpoint") || + authorityRestoreState !== "idle" + ), + ); const authorityActionButtons = [ typeof _actionHandlers.runAuthorityConsistencyAudit === "function" ? `` : "", showAuthorityRepairAction && typeof _actionHandlers.runAuthorityConsistencyRepairPlan === "function" - ? `` + ? `` : "", showAuthorityCheckpointWriteAction && typeof _actionHandlers.writeAuthorityCheckpoint === "function" - ? `` + ? `` : "", - typeof _actionHandlers.restoreAuthorityCheckpoint === "function" - ? `` + showAuthorityRestoreAction && typeof _actionHandlers.restoreAuthorityCheckpoint === "function" + ? `` : "", showAuthorityTriviumRebuildAction && typeof _actionHandlers.rebuildVectorIndex === "function" - ? `` + ? `` : "", typeof _actionHandlers.captureAuthorityPerformanceBaseline === "function" ? `` @@ -3470,9 +3484,9 @@ function _refreshTaskPersistence() {
-
Authority 一致性 / Checkpoint
+
Authority 副本健康 / 备份与向量同步
- 审计当前 chat 的 Authority SQL / Trivium / Blob checkpoint 是否同 revision 前进;restore 会把 Blob checkpoint 回灌到 Authority SQL,并在 Authority 主存储启用时触发当前聊天重载。 + 审计当前 chat 的 Authority SQL、Blob checkpoint 备份和 Trivium/vector 搜索副本。SQL 是主存储;Blob/Trivium 落后时优先同步副本,只有 SQL 缺失或需要回滚时才从 checkpoint 恢复。
${authorityActionButtons ? `
${authorityActionButtons}
` : ""} ${renderRowsTwoColumn(authorityRows)} @@ -3507,21 +3521,21 @@ function _refreshTaskPersistence() { if (typeof _actionHandlers.runAuthorityConsistencyRepairPlan !== "function") return; if (authorityRepairPlan.requiresConfirmation) { const confirmed = globalThis.confirm?.( - `建议修复将按以下顺序执行:\n${authorityRepairPlan.steps.map((step, index) => `${index + 1}. ${step.label}`).join("\n")}\n\n其中包含从 Blob Checkpoint 恢复 SQL,确定继续?`, + `副本同步计划将按以下顺序执行:\n${authorityRepairPlan.steps.map((step, index) => `${index + 1}. ${step.label}`).join("\n")}\n\n其中包含从 Blob Checkpoint 恢复 SQL。此操作只适合 SQL 缺失、损坏或需要回滚时使用,确定继续?`, ); if (!confirmed) return; } - toastr.info("Authority 建议修复执行中…", "ST-BME", { timeOut: 2000 }); + toastr.info("Authority 副本同步执行中…", "ST-BME", { timeOut: 2000 }); const result = await _actionHandlers.runAuthorityConsistencyRepairPlan(); if (result?.success) { const stepCount = Number(result?.repairResult?.steps?.length || result?.results?.length || 0); if (result?.handoffRequired || result?.repairResult?.handoffRequired) { - toastr.success(`Authority 建议修复已交接异步 Job${stepCount > 0 ? `(${stepCount} 步)` : ""}`, "ST-BME"); + toastr.success(`Authority 副本同步已交接异步 Job${stepCount > 0 ? `(${stepCount} 步)` : ""}`, "ST-BME"); } else { - toastr.success(`Authority 建议修复已完成${stepCount > 0 ? `(${stepCount} 步)` : ""}`, "ST-BME"); + toastr.success(`Authority 副本同步已完成${stepCount > 0 ? `(${stepCount} 步)` : ""}`, "ST-BME"); } } else { - toastr.warning(`Authority 建议修复失败:${result?.error || "unknown"}`, "ST-BME"); + toastr.warning(`Authority 副本同步失败:${result?.error || "unknown"}`, "ST-BME"); } } else if (action === "checkpoint") { if (typeof _actionHandlers.writeAuthorityCheckpoint !== "function") return; @@ -3534,6 +3548,10 @@ function _refreshTaskPersistence() { } } else if (action === "restore") { if (typeof _actionHandlers.restoreAuthorityCheckpoint !== "function") return; + const confirmed = globalThis.confirm?.( + `灾难恢复会用 Blob Checkpoint 覆盖 Authority SQL。\n\nSQL rev: ${authorityAuditSqlRevision}\nCheckpoint rev: ${authorityAuditBlobRevision}\n\n只有 SQL 缺失、损坏或明确需要回滚时才继续。确定执行?`, + ); + if (!confirmed) return; toastr.info("Authority Checkpoint 恢复中…", "ST-BME", { timeOut: 2000 }); const result = await _actionHandlers.restoreAuthorityCheckpoint(); if (result?.success) { @@ -3573,7 +3591,7 @@ function _refreshTaskPersistence() { action === "restore" ? `Authority Checkpoint 恢复失败: ${error?.message || error}` : action === "repair-plan" - ? `Authority 建议修复失败: ${error?.message || error}` + ? `Authority 副本同步失败: ${error?.message || error}` : action === "checkpoint" ? `Authority Checkpoint 写入失败: ${error?.message || error}` : action === "rebuild-trivium"