fix(authority): clarify replica consistency state

This commit is contained in:
OpenCode
2026-05-15 16:51:48 +00:00
parent 7c19a124b3
commit 09fc103f21
3 changed files with 206 additions and 44 deletions

View File

@@ -96,6 +96,9 @@ export function buildAuthorityConsistencyRepairPlan(audit = null) {
: [];
const issueCodes = collectIssueCodes(source);
const steps = [];
const sqlRevision = normalizeOptionalInteger(source?.sql?.revision);
const blobRevision = normalizeOptionalInteger(source?.blob?.revision);
const sqlNewerThanBlob = Number.isFinite(sqlRevision) && Number.isFinite(blobRevision) && sqlRevision > blobRevision;
const addStep = (action, label, detail, codes = []) => {
const normalizedAction = normalizeRepairAction(action);
if (!normalizedAction || !actions.includes(normalizedAction)) {
@@ -120,21 +123,23 @@ export function buildAuthorityConsistencyRepairPlan(audit = null) {
addStep(
"write-authority-checkpoint",
"写入当前 Checkpoint",
"Authority Blob 尚无 checkpoint,先把当前 runtime 图谱写成 checkpoint,再继续后续修复。",
["blob-checkpoint-missing"],
);
addStep(
"restore-from-authority-blob-checkpoint",
"从 Blob Checkpoint 恢复 SQL",
"检测到 runtime / SQL / Blob revision 漂移,可用 Blob checkpoint 回灌 Authority SQL。",
["sql-runtime-revision-drift", "blob-runtime-revision-drift"],
"同步备份 Checkpoint",
"Authority Blob checkpoint 落后或缺失,应从当前权威图谱源同步一个新的备份 checkpoint。",
["blob-checkpoint-missing", "blob-checkpoint-behind", "blob-runtime-revision-drift"],
);
if (!sqlNewerThanBlob) {
addStep(
"restore-from-authority-blob-checkpoint",
"灾难恢复:从 Blob Checkpoint 恢复 SQL",
"仅在 SQL 缺失、损坏或用户明确需要回滚时,才可用 Blob checkpoint 回灌 Authority SQL。",
["sql-runtime-revision-drift", "blob-newer-than-sql", "blob-chat-mismatch"],
);
}
addStep(
"rebuild-authority-trivium",
"重建 Authority Trivium",
"Trivium 与 SQL revision 不一致,或当前向量索引为 dirty需要重建 Trivium。",
["trivium-sql-revision-drift", "trivium-collection-mismatch", "vector-dirty"],
"同步向量/Trivium 副本",
"Trivium 向量副本落后、collection 不匹配,或当前向量索引为 dirty需要从权威图谱源重建/同步。",
["trivium-sql-revision-drift", "trivium-replica-behind", "trivium-collection-mismatch", "vector-dirty"],
);
const blockedIssueCodes = (Array.isArray(source.issues) ? source.issues : [])
@@ -145,7 +150,7 @@ export function buildAuthorityConsistencyRepairPlan(audit = null) {
(action) => action !== "run-authority-consistency-audit" && !steps.some((step) => step.action === action),
);
const detail = steps.length
? `建议顺序${steps.map((step) => step.label).join(" → ")}`
? `建议同步${steps.map((step) => step.label).join(" → ")}`
: String(source?.summary?.detail || "当前审计未发现需要自动编排的修复步骤");
return {
@@ -157,7 +162,7 @@ export function buildAuthorityConsistencyRepairPlan(audit = null) {
unsupportedActions,
summary: {
level: steps.length > 0 ? "warning" : String(source?.summary?.level || "idle"),
label: steps.length > 0 ? `建议修复 ${steps.length}` : "当前无需编排修复",
label: steps.length > 0 ? `建议同步副本 ${steps.length}` : "当前无需编排修复",
detail,
},
};
@@ -436,10 +441,23 @@ export function buildAuthorityConsistencyAudit(input = {}) {
runtimeVsBlobRevision: buildRevisionDelta(runtime.revision, blob.revision),
sqlVsBlobRevision: buildRevisionDelta(sql.revision, blob.revision),
triviumVsSqlRevision: buildRevisionDelta(trivium.revision, sql.revision),
sqlNewerThanBlob:
Number.isFinite(sql.revision) && Number.isFinite(blob.revision) && sql.revision > blob.revision,
blobNewerThanSql:
Number.isFinite(sql.revision) && Number.isFinite(blob.revision) && blob.revision > sql.revision,
sqlNewerThanTrivium:
Number.isFinite(sql.revision) && Number.isFinite(trivium.revision) && sql.revision > trivium.revision,
collectionMatchesRuntime:
!trivium.namespace || !runtime.collectionId || trivium.namespace === runtime.collectionId,
checkpointRestorable:
blob.exists && blob.hasSerializedGraph && (!blob.chatId || !chatId || blob.chatId === chatId),
blob.exists &&
blob.hasSerializedGraph &&
(!blob.chatId || !chatId || blob.chatId === chatId) &&
!(
Number.isFinite(sql.revision) &&
Number.isFinite(blob.revision) &&
sql.revision > blob.revision
),
};
const issues = [];
@@ -473,11 +491,16 @@ export function buildAuthorityConsistencyAudit(input = {}) {
Number.isFinite(runtime.revision) &&
blob.revision !== runtime.revision
) {
const code = Number.isFinite(sql.revision) && blob.revision < sql.revision
? "blob-checkpoint-behind"
: "blob-runtime-revision-drift";
issues.push(
normalizeIssue(
"warning",
"blob-runtime-revision-drift",
`Blob checkpoint revision 与 runtime 不一致:${blob.revision}${runtime.revision}`,
code,
code === "blob-checkpoint-behind"
? `Blob checkpoint 落后于 Authority SQL${blob.revision} < ${sql.revision}`
: `Blob checkpoint revision 与 runtime 不一致:${blob.revision}${runtime.revision}`,
),
);
}
@@ -486,11 +509,16 @@ export function buildAuthorityConsistencyAudit(input = {}) {
Number.isFinite(sql.revision) &&
trivium.revision !== sql.revision
) {
const code = trivium.revision < sql.revision
? "trivium-replica-behind"
: "trivium-sql-revision-drift";
issues.push(
normalizeIssue(
"warning",
"trivium-sql-revision-drift",
`Trivium revision 与 SQL 不一致:${trivium.revision}${sql.revision}`,
code,
code === "trivium-replica-behind"
? `Trivium 向量副本落后于 Authority SQL${trivium.revision} < ${sql.revision}`
: `Trivium revision 与 SQL 不一致:${trivium.revision}${sql.revision}`,
),
);
}
@@ -511,11 +539,18 @@ export function buildAuthorityConsistencyAudit(input = {}) {
}
const actions = [];
if (drift.checkpointRestorable) actions.push("restore-from-authority-blob-checkpoint");
const restoreRelevant =
drift.checkpointRestorable &&
(
sql.ok !== true ||
drift.blobNewerThanSql ||
issues.some((issue) => issue.code === "sql-probe-error")
);
if (restoreRelevant) actions.push("restore-from-authority-blob-checkpoint");
if (runtime.vectorDirty || (Number.isFinite(drift.triviumVsSqlRevision) && drift.triviumVsSqlRevision < 0)) {
actions.push("rebuild-authority-trivium");
}
if (!blob.exists && source.capability?.blobReady) {
if ((!blob.exists || drift.sqlNewerThanBlob) && source.capability?.blobReady) {
actions.push("write-authority-checkpoint");
}
if (issues.some((issue) => issue.code === "sql-runtime-revision-drift" || issue.code === "blob-runtime-revision-drift")) {
@@ -533,13 +568,32 @@ export function buildAuthorityConsistencyAudit(input = {}) {
level === "error"
? "存在阻塞性不一致"
: level === "warning"
? "存在待处理漂移"
? sql.ok
? "副本待同步"
: "存在待处理漂移"
: level === "success"
? "Authority 工件已对齐"
: "等待审计";
const detail = issues[0]?.message || (level === "success"
? "Authority SQL / Trivium / Blob 已达到当前可观测的一致状态"
: "尚未运行审计");
const replicaLag = issues.some((issue) => [
"blob-checkpoint-missing",
"blob-checkpoint-behind",
"trivium-replica-behind",
"vector-dirty",
].includes(issue.code));
const runtimeAheadOfSql =
Number.isFinite(runtime.revision) &&
Number.isFinite(sql.revision) &&
runtime.revision > sql.revision;
const dataSafety = sql.ok
? runtimeAheadOfSql
? "runtime-ahead-of-sql"
: replicaLag
? "saved-replicas-behind"
: "saved"
: (sql.available ? "unknown" : "unavailable");
return {
updatedAt,
@@ -557,6 +611,9 @@ export function buildAuthorityConsistencyAudit(input = {}) {
label,
detail,
issueCount: issues.length,
dataSafety,
backupRedundancy: replicaLag ? "degraded" : (blob.exists ? "ok" : "unknown"),
searchQuality: runtime.vectorDirty || drift.sqlNewerThanTrivium ? "degraded" : "ok",
},
};
}

View File

@@ -108,7 +108,8 @@ const auditAligned = buildAuthorityConsistencyAudit({
assert.equal(auditAligned.summary.level, "success");
assert.equal(auditAligned.issues.length, 0);
assert.equal(auditAligned.drift.checkpointRestorable, true);
assert.ok(auditAligned.actions.includes("restore-from-authority-blob-checkpoint"));
assert.equal(auditAligned.actions.includes("restore-from-authority-blob-checkpoint"), false);
assert.equal(auditAligned.summary.dataSafety, "saved");
const alignedRepairPlan = buildAuthorityConsistencyRepairPlan(auditAligned);
assert.equal(alignedRepairPlan.ok, false);
assert.equal(alignedRepairPlan.stepCount, 0);
@@ -161,6 +162,67 @@ assert.deepEqual(
],
);
const auditSqlAheadReplicasBehind = buildAuthorityConsistencyAudit({
chatId: "chat-a",
collectionId: "st-bme::chat-a",
capability: {
blobReady: true,
},
runtimeGraph: {
meta: { revision: 2 },
nodes: [{ id: "node-a" }],
edges: [],
vectorIndexState: {
collectionId: "st-bme::chat-a",
dirty: false,
},
},
graphPersistenceState: {
chatId: "chat-a",
revision: 2,
authorityBlobCheckpointPath: "user/files/checkpoint.json",
authorityBlobCheckpointRevision: 0,
},
sqlSnapshot: {
meta: { revision: 2, nodeCount: 1, edgeCount: 0, tombstoneCount: 0 },
},
triviumStat: {
revision: 0,
namespace: "st-bme::chat-a",
},
blobResult: {
ok: true,
exists: true,
path: "user/files/checkpoint.json",
checkpoint: {
chatId: "chat-a",
revision: 0,
serializedGraph: serializeGraph(createEmptyGraph()),
},
},
});
assert.equal(auditSqlAheadReplicasBehind.summary.level, "warning");
assert.equal(auditSqlAheadReplicasBehind.summary.label, "副本待同步");
assert.equal(auditSqlAheadReplicasBehind.summary.dataSafety, "saved-replicas-behind");
assert.equal(auditSqlAheadReplicasBehind.summary.backupRedundancy, "degraded");
assert.equal(auditSqlAheadReplicasBehind.summary.searchQuality, "degraded");
assert.ok(auditSqlAheadReplicasBehind.issues.some((issue) => issue.code === "blob-checkpoint-behind"));
assert.ok(auditSqlAheadReplicasBehind.issues.some((issue) => issue.code === "trivium-replica-behind"));
assert.ok(auditSqlAheadReplicasBehind.actions.includes("write-authority-checkpoint"));
assert.ok(auditSqlAheadReplicasBehind.actions.includes("rebuild-authority-trivium"));
assert.equal(auditSqlAheadReplicasBehind.actions.includes("restore-from-authority-blob-checkpoint"), false);
assert.equal(auditSqlAheadReplicasBehind.drift.checkpointRestorable, false);
const sqlAheadRepairPlan = buildAuthorityConsistencyRepairPlan(auditSqlAheadReplicasBehind);
assert.equal(sqlAheadRepairPlan.ok, true);
assert.equal(sqlAheadRepairPlan.requiresConfirmation, false);
assert.deepEqual(
sqlAheadRepairPlan.steps.map((step) => step.action),
[
"write-authority-checkpoint",
"rebuild-authority-trivium",
],
);
const restoreRepairPlan = buildAuthorityConsistencyRepairPlan({
issues: [
{
@@ -182,4 +244,29 @@ assert.deepEqual(
["restore-from-authority-blob-checkpoint"],
);
const auditRuntimeAheadOfSql = buildAuthorityConsistencyAudit({
chatId: "chat-a",
collectionId: "st-bme::chat-a",
runtimeGraph: {
meta: { revision: 4 },
nodes: [{ id: "node-a" }],
edges: [],
vectorIndexState: { collectionId: "st-bme::chat-a", dirty: false },
},
graphPersistenceState: {
chatId: "chat-a",
revision: 4,
},
sqlSnapshot: {
meta: { revision: 3, nodeCount: 1, edgeCount: 0, tombstoneCount: 0 },
},
triviumStat: {
revision: 3,
namespace: "st-bme::chat-a",
},
});
assert.equal(auditRuntimeAheadOfSql.summary.level, "warning");
assert.equal(auditRuntimeAheadOfSql.summary.dataSafety, "runtime-ahead-of-sql");
assert.equal(auditRuntimeAheadOfSql.actions.includes("restore-from-authority-blob-checkpoint"), false);
console.log("authority-consistency tests passed");

View File

@@ -3113,7 +3113,12 @@ function _refreshTaskPersistence() {
? authorityAudit.issues.map((issue) => issue.message).filter(Boolean).join(" / ")
: authorityAuditSummary.detail || "—";
const authorityAuditActionsLabel = Array.isArray(authorityAudit?.actions) && authorityAudit.actions.length
? authorityAudit.actions.join(" · ")
? authorityAudit.actions.map((action) => ({
"write-authority-checkpoint": "同步备份 Checkpoint",
"rebuild-authority-trivium": "同步向量/Trivium 副本",
"run-authority-consistency-audit": "重新审计",
"restore-from-authority-blob-checkpoint": "灾难恢复:从 Checkpoint 覆盖 SQL",
}[action] || action)).join(" · ")
: "—";
const authorityAuditUpdatedLabel = ps.authorityConsistencyUpdatedAt
? _formatTaskProfileTime(ps.authorityConsistencyUpdatedAt)
@@ -3149,20 +3154,20 @@ function _refreshTaskPersistence() {
).trim();
const authorityRepairLabel =
authorityRepairState === "success"
? "修复完成"
? "同步完成"
: authorityRepairState === "error"
? "修复失败"
? "同步失败"
: authorityRepairState === "running"
? authorityRepairResult?.handoffRequired
? "等待 Job 交接"
: "修复中"
: "同步中"
: "未执行";
const authorityRepairUpdatedLabel = ps.authorityRepairUpdatedAt
? _formatTaskProfileTime(ps.authorityRepairUpdatedAt)
: "—";
const authorityRepairPlanLabel = authorityRepairPlan.ok
? authorityRepairPlan.steps.map((step) => step.label).join(" → ")
: authorityRepairPlan.summary.label || "当前无需编排修复";
: authorityRepairPlan.summary.label || "当前无需编排同步";
const authorityRepairResultLabel = authorityRepairResult?.steps?.length
? `${Number(authorityRepairResult.steps.length || 0)}${
authorityRepairResult?.handoffRequired
@@ -3340,9 +3345,9 @@ function _refreshTaskPersistence() {
["Blob rev", authorityAuditBlobRevision],
["Blob path", authorityAuditBlobPath],
["建议动作", authorityAuditActionsLabel],
["建议修复", authorityRepairPlanLabel],
["修复状态", authorityRepairLabel],
["修复结果", authorityRepairResultLabel],
["建议同步", authorityRepairPlanLabel],
["同步状态", authorityRepairLabel],
["同步结果", authorityRepairResultLabel],
["最近审计", authorityAuditUpdatedLabel],
["最近修复", authorityRepairUpdatedLabel],
["恢复状态", authorityRestoreLabel],
@@ -3381,21 +3386,30 @@ function _refreshTaskPersistence() {
(!ps.authorityBlobCheckpointPath && ps.authorityBlobReady);
const showAuthorityTriviumRebuildAction =
authorityAuditActions.includes("rebuild-authority-trivium");
const showAuthorityRestoreAction = Boolean(
authorityAudit?.drift?.checkpointRestorable &&
(
authorityAudit?.sql?.ok !== true ||
authorityAudit?.drift?.blobNewerThanSql ||
authorityAuditActions.includes("restore-from-authority-blob-checkpoint") ||
authorityRestoreState !== "idle"
),
);
const authorityActionButtons = [
typeof _actionHandlers.runAuthorityConsistencyAudit === "function"
? `<button class="bme-config-secondary-btn" type="button" data-authority-persistence-action="audit">执行 Authority 审计</button>`
: "",
showAuthorityRepairAction && typeof _actionHandlers.runAuthorityConsistencyRepairPlan === "function"
? `<button class="bme-config-secondary-btn" type="button" data-authority-persistence-action="repair-plan">执行建议修复</button>`
? `<button class="bme-config-secondary-btn" type="button" data-authority-persistence-action="repair-plan">执行副本同步</button>`
: "",
showAuthorityCheckpointWriteAction && typeof _actionHandlers.writeAuthorityCheckpoint === "function"
? `<button class="bme-config-secondary-btn" type="button" data-authority-persistence-action="checkpoint">写入当前 Checkpoint</button>`
? `<button class="bme-config-secondary-btn" type="button" data-authority-persistence-action="checkpoint">同步 Checkpoint</button>`
: "",
typeof _actionHandlers.restoreAuthorityCheckpoint === "function"
? `<button class="bme-config-secondary-btn" type="button" data-authority-persistence-action="restore">Checkpoint 恢复</button>`
showAuthorityRestoreAction && typeof _actionHandlers.restoreAuthorityCheckpoint === "function"
? `<button class="bme-config-secondary-btn" type="button" data-authority-persistence-action="restore">灾难恢复:Checkpoint 覆盖 SQL</button>`
: "",
showAuthorityTriviumRebuildAction && typeof _actionHandlers.rebuildVectorIndex === "function"
? `<button class="bme-config-secondary-btn" type="button" data-authority-persistence-action="rebuild-trivium">重建 Authority Trivium</button>`
? `<button class="bme-config-secondary-btn" type="button" data-authority-persistence-action="rebuild-trivium">同步 Authority Trivium</button>`
: "",
typeof _actionHandlers.captureAuthorityPerformanceBaseline === "function"
? `<button class="bme-config-secondary-btn" type="button" data-authority-persistence-action="baseline">捕获 Perf Baseline</button>`
@@ -3470,9 +3484,9 @@ function _refreshTaskPersistence() {
</div>
</div>
<div class="bme-persist-kv" style="margin-top:12px">
<div style="font-size:12px;font-weight:700;color:var(--bme-on-surface);margin-bottom:10px"><i class="fa-solid fa-shield-halved" style="margin-right:6px;color:var(--bme-primary)"></i>Authority 一致性 / Checkpoint</div>
<div style="font-size:12px;font-weight:700;color:var(--bme-on-surface);margin-bottom:10px"><i class="fa-solid fa-shield-halved" style="margin-right:6px;color:var(--bme-primary)"></i>Authority 副本健康 / 备份与向量同步</div>
<div class="bme-config-help" style="margin-bottom:12px">
审计当前 chat 的 Authority SQL / Trivium / Blob checkpoint 是否同 revision 前进restore 会把 Blob checkpoint 回灌到 Authority SQL并在 Authority 主存储启用时触发当前聊天重载
审计当前 chat 的 Authority SQLBlob checkpoint 备份和 Trivium/vector 搜索副本。SQL 是主存储Blob/Trivium 落后时优先同步副本,只有 SQL 缺失或需要回滚时才从 checkpoint 恢复
</div>
${authorityActionButtons ? `<div style="display:flex;gap:8px;flex-wrap:wrap;margin-bottom:12px">${authorityActionButtons}</div>` : ""}
${renderRowsTwoColumn(authorityRows)}
@@ -3507,21 +3521,21 @@ function _refreshTaskPersistence() {
if (typeof _actionHandlers.runAuthorityConsistencyRepairPlan !== "function") return;
if (authorityRepairPlan.requiresConfirmation) {
const confirmed = globalThis.confirm?.(
`建议修复将按以下顺序执行:\n${authorityRepairPlan.steps.map((step, index) => `${index + 1}. ${step.label}`).join("\n")}\n\n其中包含从 Blob Checkpoint 恢复 SQL确定继续`,
`副本同步计划将按以下顺序执行:\n${authorityRepairPlan.steps.map((step, index) => `${index + 1}. ${step.label}`).join("\n")}\n\n其中包含从 Blob Checkpoint 恢复 SQL。此操作只适合 SQL 缺失、损坏或需要回滚时使用,确定继续?`,
);
if (!confirmed) return;
}
toastr.info("Authority 建议修复执行中…", "ST-BME", { timeOut: 2000 });
toastr.info("Authority 副本同步执行中…", "ST-BME", { timeOut: 2000 });
const result = await _actionHandlers.runAuthorityConsistencyRepairPlan();
if (result?.success) {
const stepCount = Number(result?.repairResult?.steps?.length || result?.results?.length || 0);
if (result?.handoffRequired || result?.repairResult?.handoffRequired) {
toastr.success(`Authority 建议修复已交接异步 Job${stepCount > 0 ? `${stepCount} 步)` : ""}`, "ST-BME");
toastr.success(`Authority 副本同步已交接异步 Job${stepCount > 0 ? `${stepCount} 步)` : ""}`, "ST-BME");
} else {
toastr.success(`Authority 建议修复已完成${stepCount > 0 ? `${stepCount} 步)` : ""}`, "ST-BME");
toastr.success(`Authority 副本同步已完成${stepCount > 0 ? `${stepCount} 步)` : ""}`, "ST-BME");
}
} else {
toastr.warning(`Authority 建议修复失败:${result?.error || "unknown"}`, "ST-BME");
toastr.warning(`Authority 副本同步失败:${result?.error || "unknown"}`, "ST-BME");
}
} else if (action === "checkpoint") {
if (typeof _actionHandlers.writeAuthorityCheckpoint !== "function") return;
@@ -3534,6 +3548,10 @@ function _refreshTaskPersistence() {
}
} else if (action === "restore") {
if (typeof _actionHandlers.restoreAuthorityCheckpoint !== "function") return;
const confirmed = globalThis.confirm?.(
`灾难恢复会用 Blob Checkpoint 覆盖 Authority SQL。\n\nSQL rev: ${authorityAuditSqlRevision}\nCheckpoint rev: ${authorityAuditBlobRevision}\n\n只有 SQL 缺失、损坏或明确需要回滚时才继续。确定执行?`,
);
if (!confirmed) return;
toastr.info("Authority Checkpoint 恢复中…", "ST-BME", { timeOut: 2000 });
const result = await _actionHandlers.restoreAuthorityCheckpoint();
if (result?.success) {
@@ -3573,7 +3591,7 @@ function _refreshTaskPersistence() {
action === "restore"
? `Authority Checkpoint 恢复失败: ${error?.message || error}`
: action === "repair-plan"
? `Authority 建议修复失败: ${error?.message || error}`
? `Authority 副本同步失败: ${error?.message || error}`
: action === "checkpoint"
? `Authority Checkpoint 写入失败: ${error?.message || error}`
: action === "rebuild-trivium"