fix(sync): defer cleanup of stale remote chunks

This commit is contained in:
youzini
2026-06-06 09:36:41 +00:00
parent 017e801767
commit dba53cc21c
2 changed files with 430 additions and 0 deletions

View File

@@ -204,6 +204,43 @@ function createMockFetchEnvironment() {
};
}
function createMockAuthorityBlobAdapter() {
const blobs = new Map();
const logs = {
reads: 0,
writes: 0,
deletes: 0,
};
return {
blobs,
logs,
adapter: {
async readJson(path) {
logs.reads += 1;
if (!blobs.has(path)) {
return { exists: false, ok: true, path };
}
return { exists: true, ok: true, path, payload: JSON.parse(JSON.stringify(blobs.get(path))) };
},
async writeJson(path, payload) {
logs.writes += 1;
blobs.set(path, JSON.parse(JSON.stringify(payload)));
return { ok: true, path };
},
async writeText(path, payload) {
logs.writes += 1;
blobs.set(path, JSON.parse(payload));
return { ok: true, path };
},
async delete(path) {
logs.deletes += 1;
const existed = blobs.delete(path);
return { ok: true, deleted: existed, path };
},
},
};
}
function buildRuntimeOptions({ dbByChatId, fetch }) {
return {
fetch,
@@ -332,6 +369,198 @@ async function testUploadSanitizesIllegalChatIdFilename() {
assert.match(logs.uploadedPayloads[0].name, /^[A-Za-z0-9._~-]+$/);
}
async function testUploadDefersAndThenCleansStaleRemoteChunks() {
const { fetch, remoteFiles, logs } = createMockFetchEnvironment();
const dbByChatId = new Map();
const chatId = "chat-chunk-gc";
const db = new FakeDb(chatId, {
meta: {
schemaVersion: 1,
chatId,
deviceId: "",
revision: 1,
lastModified: 100,
nodeCount: 1,
edgeCount: 1,
tombstoneCount: 0,
},
nodes: [{ id: "n1", updatedAt: 100, name: "node" }],
edges: [{ id: "e1", fromId: "n1", toId: "n2", updatedAt: 100 }],
tombstones: [],
state: { lastProcessedFloor: 1, extractionCount: 1 },
});
dbByChatId.set(chatId, db);
const runtime = buildRuntimeOptions({ dbByChatId, fetch });
const firstUpload = await upload(chatId, {
...runtime,
nowMs: 1_000,
remoteSyncChunkGcGraceMs: 5_000,
});
assert.equal(firstUpload.uploaded, true);
const manifestName = firstUpload.filename;
const firstManifest = remoteFiles.get(manifestName);
const firstChunks = new Set(firstManifest.chunks.map((chunk) => chunk.filename));
assert.ok(firstChunks.size >= 3, "v2 upload should create node, edge, and runtime-meta chunks");
assert.equal(firstUpload.cleanup?.attempted, 0, "first upload has no previous manifest to clean");
assert.deepEqual(firstManifest.chunkGc?.pending || [], []);
db.snapshot = {
...JSON.parse(JSON.stringify(db.snapshot)),
meta: {
...db.snapshot.meta,
revision: 2,
lastModified: 200,
},
nodes: [{ id: "n1", updatedAt: 100, name: "node" }],
edges: [{ id: "e2", fromId: "n1", toId: "n3", updatedAt: 200 }],
state: { lastProcessedFloor: 2, extractionCount: 2 },
};
const secondUpload = await upload(chatId, {
...runtime,
nowMs: 2_000,
remoteSyncChunkGcGraceMs: 5_000,
});
assert.equal(secondUpload.uploaded, true);
const secondManifest = remoteFiles.get(manifestName);
const secondChunks = new Set(secondManifest.chunks.map((chunk) => chunk.filename));
const staleChunks = [...firstChunks].filter((filename) => !secondChunks.has(filename));
const sharedChunks = [...firstChunks].filter((filename) => secondChunks.has(filename));
assert.ok(staleChunks.length > 0, "changed edge/runtime metadata should create stale chunk files");
assert.ok(sharedChunks.length > 0, "unchanged nodes should keep at least one shared chunk");
for (const filename of staleChunks) {
assert.equal(remoteFiles.has(filename), true, `stale chunk remains during grace period: ${filename}`);
}
for (const filename of sharedChunks) {
assert.equal(remoteFiles.has(filename), true, `shared chunk should remain: ${filename}`);
}
for (const filename of secondChunks) {
assert.equal(remoteFiles.has(filename), true, `current chunk should remain: ${filename}`);
}
assert.deepEqual(
new Set((secondManifest.chunkGc?.pending || []).map((entry) => entry.filename)),
new Set(staleChunks),
);
assert.equal(secondUpload.cleanup.attempted, 0);
assert.equal(secondUpload.cleanup.deleted, 0);
assert.equal(secondUpload.cleanup.failed, 0);
assert.equal(logs.deleteCalls, 0);
assert.equal(Number.isFinite(secondUpload.timings?.previousManifestReadMs), true);
assert.equal(Number.isFinite(secondUpload.timings?.chunkCleanupMs), true);
const thirdUpload = await upload(chatId, {
...runtime,
nowMs: 8_000,
remoteSyncChunkGcGraceMs: 5_000,
});
assert.equal(thirdUpload.uploaded, true);
const thirdManifest = remoteFiles.get(manifestName);
for (const filename of staleChunks) {
assert.equal(remoteFiles.has(filename), false, `eligible stale chunk should be deleted: ${filename}`);
}
for (const filename of thirdManifest.chunks.map((chunk) => chunk.filename)) {
assert.equal(remoteFiles.has(filename), true, `current chunk should remain after GC: ${filename}`);
}
assert.equal(thirdUpload.cleanup.attempted, staleChunks.length);
assert.equal(thirdUpload.cleanup.deleted, staleChunks.length);
assert.equal(thirdUpload.cleanup.failed, 0);
}
async function testUploadSkipsChunkCleanupWhenPreviousManifestUnavailable() {
const { fetch, remoteFiles, logs } = createMockFetchEnvironment();
const dbByChatId = new Map();
const chatId = "chat-chunk-gc-legacy";
const db = new FakeDb(chatId, {
meta: {
schemaVersion: 1,
chatId,
deviceId: "",
revision: 3,
lastModified: 300,
nodeCount: 1,
edgeCount: 0,
tombstoneCount: 0,
},
nodes: [{ id: "n1", updatedAt: 300 }],
edges: [],
tombstones: [],
state: { lastProcessedFloor: 3, extractionCount: 1 },
});
dbByChatId.set(chatId, db);
const legacyManifestName = "ST-BME_sync_chat-chunk-gc-legacy.json";
const unrelatedOrphanChunk = "ST-BME_sync_chat-chunk-gc-legacy.__edges.000.orphan.json";
remoteFiles.set(legacyManifestName, {
meta: { chatId, revision: 1 },
nodes: [],
edges: [],
tombstones: [],
state: { lastProcessedFloor: 0, extractionCount: 0 },
});
remoteFiles.set(unrelatedOrphanChunk, { kind: "edges", records: [{ id: "old" }] });
const result = await upload(chatId, buildRuntimeOptions({ dbByChatId, fetch }));
assert.equal(result.uploaded, true);
assert.equal(result.cleanup?.attempted, 0);
assert.equal(logs.deleteCalls, 0, "non-v2 previous manifest must not trigger speculative deletion");
assert.equal(remoteFiles.has(unrelatedOrphanChunk), true, "orphan chunk cannot be deleted without manifest evidence");
}
async function testAuthorityBlobUploadDoesNotDeleteUserFilesFallbackChunks() {
const { fetch, remoteFiles, logs } = createMockFetchEnvironment();
const authority = createMockAuthorityBlobAdapter();
const dbByChatId = new Map();
const chatId = "chat-authority-gc";
dbByChatId.set(
chatId,
new FakeDb(chatId, {
meta: {
schemaVersion: 1,
chatId,
deviceId: "",
revision: 1,
lastModified: 100,
nodeCount: 1,
edgeCount: 0,
tombstoneCount: 0,
},
nodes: [{ id: "n1", updatedAt: 100 }],
edges: [],
tombstones: [],
state: { lastProcessedFloor: 1, extractionCount: 1 },
}),
);
const fallbackManifest = "ST-BME_sync_chat-authority-gc.json";
const fallbackChunk = "ST-BME_sync_chat-authority-gc.__nodes.000.fallback.json";
remoteFiles.set(fallbackManifest, {
kind: "st-bme-sync",
formatVersion: 2,
chatId,
meta: { chatId, revision: 0, lastModified: 1, nodeCount: 1, edgeCount: 0, tombstoneCount: 0, schemaVersion: 1 },
state: { lastProcessedFloor: 0, extractionCount: 0 },
chunks: [{ kind: "nodes", index: 0, count: 1, filename: fallbackChunk }],
});
remoteFiles.set(fallbackChunk, { kind: "nodes", index: 0, records: [{ id: "fallback" }] });
const result = await upload(chatId, {
...buildRuntimeOptions({ dbByChatId, fetch }),
authorityBlobAdapter: authority.adapter,
authorityBlobFailOpen: true,
nowMs: 10_000,
remoteSyncChunkGcGraceMs: 0,
});
assert.equal(result.uploaded, true);
assert.equal(result.cleanup?.reason, "authority-blob-skip");
assert.equal(logs.deleteCalls, 0, "authority upload must not cross-delete user-files fallback chunks");
assert.equal(authority.logs.deletes, 0, "authority upload should skip chunk GC by default");
assert.equal(remoteFiles.has(fallbackManifest), true);
assert.equal(remoteFiles.has(fallbackChunk), true);
}
async function testDownloadImport() {
const { fetch, remoteFiles } = createMockFetchEnvironment();
const dbByChatId = new Map();
@@ -1439,6 +1668,9 @@ async function main() {
await testRemoteStatusMissing();
await testUploadPayloadMetaFirstAndDebounce();
await testUploadSanitizesIllegalChatIdFilename();
await testUploadDefersAndThenCleansStaleRemoteChunks();
await testUploadSkipsChunkCleanupWhenPreviousManifestUnavailable();
await testAuthorityBlobUploadDoesNotDeleteUserFilesFallbackChunks();
await testDownloadImport();
await testLegacyRemoteFilenameFallbackAndReuse();
await testMergeRules();