bookworm-smart-assistant/scripts/patches/patch-x04-handoff-stream-scan.js

172 lines
6.5 KiB
JavaScript
Raw Normal View History

#!/usr/bin/env node
// patch-x04-handoff-stream-scan.js
// P1: scanToolOutputTiers() 全量 readFileSync transcript, 长会话 >100MB 会 OOM
// 修复: >20MB 跳过全量读取, 改用 readline 流式逐行扫描
'use strict';
const fs = require('fs');
const path = require('path');
const SENTINEL = '// [PATCH-X04-STREAM-SCAN]';
const target = path.join(__dirname, '..', '..', 'hooks', 'pre-compact-handoff.js');
if (!fs.existsSync(target)) {
process.stdout.write('[SKIP] target not found\n');
process.exit(0);
}
let contentRaw = fs.readFileSync(target, 'utf8');
const useCRLF = contentRaw.includes('\r\n');
let content = useCRLF ? contentRaw.replace(/\r\n/g, '\n') : contentRaw;
if (content.includes(SENTINEL)) {
process.stdout.write('[SKIP] patch-x04 already applied\n');
process.exit(0);
}
const bak = target + '.bak.x04';
if (!fs.existsSync(bak)) fs.writeFileSync(bak, contentRaw);
const OLD_FN = `// === TOOL_OUTPUT_TIER_V1 ===
// 扫描 transcript JSONL, 按工具类型分级保留大输出, 输出 TOP-10 摘要
function scanToolOutputTiers(transcriptPath) {
if (!transcriptPath || !fs.existsSync(transcriptPath)) {
return { applied: false, reason: 'no transcript_path' };
}
try {
const raw = fs.readFileSync(transcriptPath, 'utf8');
const lines = raw.split('\\n').filter(Boolean);
const items = [];
for (const line of lines) {
let obj;
try { obj = JSON.parse(line); } catch { continue; }
// 只关注 tool_result 类型 (含工具调用响应)
const content = obj?.message?.content || obj?.content;
if (!Array.isArray(content)) continue;
for (const part of content) {
if (part?.type !== 'tool_result') continue;
const text = typeof part.content === 'string'
? part.content
: Array.isArray(part.content) ? part.content.map(c => c?.text || '').join('') : '';
const size = Buffer.byteLength(text, 'utf8');
if (size < 500) continue;
// R2-INPUT-CAP-V2: 单条 tool_result > 5MB 截断, 防 tierize 正则扫描超时
const MAX_ITEM_BYTES = 5 * 1024 * 1024;
const safeText = size > MAX_ITEM_BYTES ? text.slice(0, MAX_ITEM_BYTES) : text;
items.push({ size, text: safeText, tool_use_id: part.tool_use_id, capped: size > MAX_ITEM_BYTES });
}
}
items.sort((a, b) => b.size - a.size);
const top = items.slice(0, 10).map(it => tierize(it));
const totalBytes = items.reduce((s, it) => s + it.size, 0);
return {
applied: true,
total_tool_results_scanned: items.length,
total_bytes: totalBytes,
top_offenders: top
};
} catch (e) {
return { applied: false, reason: 'scan_error: ' + (e.message || e) };
}
}`;
const NEW_FN = `// === TOOL_OUTPUT_TIER_V1 === ${SENTINEL}
// 扫描 transcript JSONL, 按工具类型分级保留大输出, 输出 TOP-10 摘要
// X04: 流式逐行扫描, 避免大文件 OOM
function scanToolOutputTiers(transcriptPath) {
if (!transcriptPath || !fs.existsSync(transcriptPath)) {
return { applied: false, reason: 'no transcript_path' };
}
try {
const stat = fs.statSync(transcriptPath);
const MAX_FILE = 50 * 1024 * 1024; // 50MB 硬上限
if (stat.size > MAX_FILE) {
return { applied: false, reason: 'transcript_too_large: ' + (stat.size / 1024 / 1024).toFixed(1) + 'MB (limit 50MB)' };
}
const items = [];
const MAX_ITEM_BYTES = 5 * 1024 * 1024;
// 20MB 以下: 同步读取 (性能优先)
// 20MB 以上: 逐行流式读取 (内存安全)
const STREAM_THRESHOLD = 20 * 1024 * 1024;
if (stat.size <= STREAM_THRESHOLD) {
const raw = fs.readFileSync(transcriptPath, 'utf8');
const lines = raw.split('\\n').filter(Boolean);
for (const line of lines) {
processLine(line, items, MAX_ITEM_BYTES);
}
} else {
// 流式: 逐块读取, 按换行切割
const fd = fs.openSync(transcriptPath, 'r');
const CHUNK = 4 * 1024 * 1024; // 4MB 块
const buf = Buffer.alloc(CHUNK);
let remainder = '';
let pos = 0;
while (pos < stat.size) {
const n = fs.readSync(fd, buf, 0, CHUNK, pos);
if (n <= 0) break;
const chunk = remainder + buf.toString('utf8', 0, n);
const parts = chunk.split('\\n');
remainder = parts.pop() || '';
for (const line of parts) {
if (!line) continue;
processLine(line, items, MAX_ITEM_BYTES);
}
pos += n;
}
if (remainder) processLine(remainder, items, MAX_ITEM_BYTES);
fs.closeSync(fd);
}
items.sort((a, b) => b.size - a.size);
const top = items.slice(0, 10).map(it => tierize(it));
const totalBytes = items.reduce((s, it) => s + it.size, 0);
return {
applied: true,
total_tool_results_scanned: items.length,
total_bytes: totalBytes,
top_offenders: top,
mode: stat.size > STREAM_THRESHOLD ? 'stream' : 'sync'
};
} catch (e) {
return { applied: false, reason: 'scan_error: ' + (e.message || e) };
}
}
function processLine(line, items, MAX_ITEM_BYTES) {
let obj;
try { obj = JSON.parse(line); } catch { return; }
const content = obj?.message?.content || obj?.content;
if (!Array.isArray(content)) return;
for (const part of content) {
if (part?.type !== 'tool_result') continue;
const text = typeof part.content === 'string'
? part.content
: Array.isArray(part.content) ? part.content.map(c => c?.text || '').join('') : '';
const size = Buffer.byteLength(text, 'utf8');
if (size < 500) return;
const safeText = size > MAX_ITEM_BYTES ? text.slice(0, MAX_ITEM_BYTES) : text;
items.push({ size, text: safeText, tool_use_id: part.tool_use_id, capped: size > MAX_ITEM_BYTES });
}
}`;
if (!content.includes(OLD_FN)) {
process.stdout.write('[ERROR] old function not found — file may have been modified by another patch\n');
// 尝试检测 X03 已修改的版本 (handoff 变量名改 heartbeatPath)
if (content.includes('const raw = fs.readFileSync(transcriptPath,')) {
process.stdout.write('[HINT] readFileSync pattern found but surrounding context differs\n');
}
process.exit(1);
}
content = content.replace(OLD_FN, NEW_FN);
const finalContent = useCRLF ? content.replace(/\n/g, '\r\n') : content;
fs.writeFileSync(target, finalContent, 'utf8');
// 验证
const verify = fs.readFileSync(target, 'utf8');
const ok = verify.includes(SENTINEL) && verify.includes('processLine(') && verify.includes('STREAM_THRESHOLD');
process.stdout.write(ok ? '[DONE] patch-x04 applied: stream scan for large transcripts\n' : '[ERROR] verification failed\n');
process.exit(ok ? 0 : 1);