bookworm-smart-assistant/scripts/manifest-compact.js

133 lines
5.3 KiB
JavaScript
Raw Normal View History

#!/usr/bin/env node
/**
* manifest-compact.js · Phase α 冲刺 3 · 2026-04-25
* Weekly maintenance CLI (not a hook).
* node scripts/manifest-compact.js # dry-run
* node scripts/manifest-compact.js --execute # apply
*/
'use strict';
const fs = require('fs');
const path = require('path');
const ROOT = path.resolve(__dirname, '..');
const PIPELINE_DIR = path.join(ROOT, 'ai-delivery-pipeline');
const MANIFEST = path.join(PIPELINE_DIR, 'manifest.jsonl');
const STAGING_DIR = path.join(PIPELINE_DIR, 'staging');
const QUARANTINE_DIR = path.join(PIPELINE_DIR, 'quarantine');
const DELIVERY_DIR = path.join(PIPELINE_DIR, 'delivery');
const ARCHIVE_DIR = path.join(DELIVERY_DIR, 'archive');
const TTL_DAYS = 7;
const EXECUTE = process.argv.includes('--execute');
function readManifestLines() {
if (!fs.existsSync(MANIFEST)) return { parsed: [], corrupt: 0, total: 0 };
const raw = fs.readFileSync(MANIFEST, 'utf8');
const lines = raw.split('\n').filter(Boolean);
const parsed = []; let corrupt = 0;
for (const L of lines) {
try { parsed.push(JSON.parse(L)); } catch { corrupt++; }
}
return { parsed, corrupt, total: lines.length };
}
function aggregate(entries) {
const groups = new Map();
for (const e of entries) {
if (!e.hash || !e.originalPath) continue;
const key = (e.sessionId || '-') + '|' + e.hash + '|' + e.originalPath;
const cur = groups.get(key) || { events: [], final: null };
cur.events.push(e); cur.final = e;
groups.set(key, cur);
}
return groups;
}
function partitionByAge(entries) {
const cutoff = Date.now() - TTL_DAYS * 86400 * 1000;
const recent = [], old = [];
for (const e of entries) {
const ts = e.ts ? new Date(e.ts).getTime() : Date.now();
(ts < cutoff ? old : recent).push(e);
}
return { recent, old };
}
function walkDir(dir) {
if (!fs.existsSync(dir)) return [];
const out = [];
const walk = (d) => {
for (const name of fs.readdirSync(d)) {
const p = path.join(d, name);
let s; try { s = fs.statSync(p); } catch { continue; }
if (s.isDirectory()) walk(p); else out.push({ path: p, mtime: s.mtimeMs, size: s.size });
}
};
walk(dir);
return out;
}
function cleanOldQuarantine(dryRun) {
const cutoff = Date.now() - TTL_DAYS * 86400 * 1000;
const victims = walkDir(QUARANTINE_DIR).filter(f => f.mtime < cutoff);
const totalSize = victims.reduce((a, f) => a + f.size, 0);
if (!dryRun) for (const v of victims) { try { fs.unlinkSync(v.path); } catch (_) {} }
return { count: victims.length, bytes: totalSize };
}
function cleanOrphanStaging(validKeys, dryRun) {
const files = walkDir(STAGING_DIR);
const victims = [];
for (const f of files) {
const parts = f.path.replace(STAGING_DIR, '').split(/[\\\/]/).filter(Boolean);
if (parts.length < 3) continue;
const hash = parts[1];
const hasKey = [...validKeys].some(k => k.includes('|' + hash + '|'));
if (!hasKey) victims.push(f);
}
const totalSize = victims.reduce((a, f) => a + f.size, 0);
if (!dryRun) for (const v of victims) { try { fs.unlinkSync(v.path); } catch (_) {} }
return { count: victims.length, bytes: totalSize };
}
function archiveOld(oldEntries, dryRun) {
if (oldEntries.length === 0) return { archived: 0, path: null };
fs.mkdirSync(ARCHIVE_DIR, { recursive: true });
const today = new Date().toISOString().slice(0, 10);
const archivePath = path.join(ARCHIVE_DIR, 'manifest-' + today + '.jsonl');
if (!dryRun) {
const body = oldEntries.map(e => JSON.stringify(e)).join('\n') + '\n';
fs.appendFileSync(archivePath, body, 'utf8');
}
return { archived: oldEntries.length, path: archivePath };
}
function rewriteManifest(recentEntries, dryRun) {
if (dryRun) return;
const tmp = MANIFEST + '.tmp.' + process.pid;
const body = recentEntries.map(e => JSON.stringify(e)).join('\n') + (recentEntries.length ? '\n' : '');
fs.writeFileSync(tmp, body, 'utf8');
fs.renameSync(tmp, MANIFEST);
}
function fmtBytes(b) {
if (b < 1024) return b + ' B';
if (b < 1024 * 1024) return (b / 1024).toFixed(1) + ' KB';
return (b / 1024 / 1024).toFixed(1) + ' MB';
}
function main() {
if (!fs.existsSync(MANIFEST)) { console.log('[manifest-compact] no manifest, skip'); process.exit(0); }
const { parsed, corrupt, total } = readManifestLines();
console.log('[manifest-compact] entries:', parsed.length, '(total', total + ', corrupt', corrupt + ')');
const { recent, old } = partitionByAge(parsed);
const groups = aggregate(parsed);
const validKeys = new Set(groups.keys());
const qR = cleanOldQuarantine(!EXECUTE);
const sR = cleanOrphanStaging(validKeys, !EXECUTE);
const aR = archiveOld(old, !EXECUTE);
console.log('\n[' + (EXECUTE ? 'EXECUTE' : 'DRY-RUN') + '] summary:');
console.log(' groups:', groups.size);
console.log(' recent kept:', recent.length);
console.log(' archived:', aR.archived, '->', aR.path || '(none)');
console.log(' quarantine cleaned:', qR.count, '(' + fmtBytes(qR.bytes) + ')');
console.log(' staging orphans cleaned:', sR.count, '(' + fmtBytes(sR.bytes) + ')');
if (EXECUTE) {
rewriteManifest(recent, false);
console.log('\n[EXECUTE] manifest.jsonl rewritten (' + recent.length + ' entries)');
} else {
console.log('\n[DRY-RUN] add --execute to apply');
}
}
try { main(); } catch (e) { console.error('[manifest-compact] crash:', e.message); process.exit(1); }