172 lines
6.5 KiB
JavaScript
172 lines
6.5 KiB
JavaScript
|
|
#!/usr/bin/env node
|
||
|
|
// patch-x04-handoff-stream-scan.js
|
||
|
|
// P1: scanToolOutputTiers() 全量 readFileSync transcript, 长会话 >100MB 会 OOM
|
||
|
|
// 修复: >20MB 跳过全量读取, 改用 readline 流式逐行扫描
|
||
|
|
'use strict';
|
||
|
|
const fs = require('fs');
|
||
|
|
const path = require('path');
|
||
|
|
|
||
|
|
const SENTINEL = '// [PATCH-X04-STREAM-SCAN]';
|
||
|
|
const target = path.join(__dirname, '..', '..', 'hooks', 'pre-compact-handoff.js');
|
||
|
|
|
||
|
|
if (!fs.existsSync(target)) {
|
||
|
|
process.stdout.write('[SKIP] target not found\n');
|
||
|
|
process.exit(0);
|
||
|
|
}
|
||
|
|
|
||
|
|
let contentRaw = fs.readFileSync(target, 'utf8');
|
||
|
|
const useCRLF = contentRaw.includes('\r\n');
|
||
|
|
let content = useCRLF ? contentRaw.replace(/\r\n/g, '\n') : contentRaw;
|
||
|
|
|
||
|
|
if (content.includes(SENTINEL)) {
|
||
|
|
process.stdout.write('[SKIP] patch-x04 already applied\n');
|
||
|
|
process.exit(0);
|
||
|
|
}
|
||
|
|
|
||
|
|
const bak = target + '.bak.x04';
|
||
|
|
if (!fs.existsSync(bak)) fs.writeFileSync(bak, contentRaw);
|
||
|
|
|
||
|
|
const OLD_FN = `// === TOOL_OUTPUT_TIER_V1 ===
|
||
|
|
// 扫描 transcript JSONL, 按工具类型分级保留大输出, 输出 TOP-10 摘要
|
||
|
|
function scanToolOutputTiers(transcriptPath) {
|
||
|
|
if (!transcriptPath || !fs.existsSync(transcriptPath)) {
|
||
|
|
return { applied: false, reason: 'no transcript_path' };
|
||
|
|
}
|
||
|
|
try {
|
||
|
|
const raw = fs.readFileSync(transcriptPath, 'utf8');
|
||
|
|
const lines = raw.split('\\n').filter(Boolean);
|
||
|
|
const items = [];
|
||
|
|
for (const line of lines) {
|
||
|
|
let obj;
|
||
|
|
try { obj = JSON.parse(line); } catch { continue; }
|
||
|
|
// 只关注 tool_result 类型 (含工具调用响应)
|
||
|
|
const content = obj?.message?.content || obj?.content;
|
||
|
|
if (!Array.isArray(content)) continue;
|
||
|
|
for (const part of content) {
|
||
|
|
if (part?.type !== 'tool_result') continue;
|
||
|
|
const text = typeof part.content === 'string'
|
||
|
|
? part.content
|
||
|
|
: Array.isArray(part.content) ? part.content.map(c => c?.text || '').join('') : '';
|
||
|
|
const size = Buffer.byteLength(text, 'utf8');
|
||
|
|
if (size < 500) continue;
|
||
|
|
// R2-INPUT-CAP-V2: 单条 tool_result > 5MB 截断, 防 tierize 正则扫描超时
|
||
|
|
const MAX_ITEM_BYTES = 5 * 1024 * 1024;
|
||
|
|
const safeText = size > MAX_ITEM_BYTES ? text.slice(0, MAX_ITEM_BYTES) : text;
|
||
|
|
items.push({ size, text: safeText, tool_use_id: part.tool_use_id, capped: size > MAX_ITEM_BYTES });
|
||
|
|
}
|
||
|
|
}
|
||
|
|
items.sort((a, b) => b.size - a.size);
|
||
|
|
const top = items.slice(0, 10).map(it => tierize(it));
|
||
|
|
const totalBytes = items.reduce((s, it) => s + it.size, 0);
|
||
|
|
return {
|
||
|
|
applied: true,
|
||
|
|
total_tool_results_scanned: items.length,
|
||
|
|
total_bytes: totalBytes,
|
||
|
|
top_offenders: top
|
||
|
|
};
|
||
|
|
} catch (e) {
|
||
|
|
return { applied: false, reason: 'scan_error: ' + (e.message || e) };
|
||
|
|
}
|
||
|
|
}`;
|
||
|
|
|
||
|
|
const NEW_FN = `// === TOOL_OUTPUT_TIER_V1 === ${SENTINEL}
|
||
|
|
// 扫描 transcript JSONL, 按工具类型分级保留大输出, 输出 TOP-10 摘要
|
||
|
|
// X04: 流式逐行扫描, 避免大文件 OOM
|
||
|
|
function scanToolOutputTiers(transcriptPath) {
|
||
|
|
if (!transcriptPath || !fs.existsSync(transcriptPath)) {
|
||
|
|
return { applied: false, reason: 'no transcript_path' };
|
||
|
|
}
|
||
|
|
try {
|
||
|
|
const stat = fs.statSync(transcriptPath);
|
||
|
|
const MAX_FILE = 50 * 1024 * 1024; // 50MB 硬上限
|
||
|
|
if (stat.size > MAX_FILE) {
|
||
|
|
return { applied: false, reason: 'transcript_too_large: ' + (stat.size / 1024 / 1024).toFixed(1) + 'MB (limit 50MB)' };
|
||
|
|
}
|
||
|
|
|
||
|
|
const items = [];
|
||
|
|
const MAX_ITEM_BYTES = 5 * 1024 * 1024;
|
||
|
|
|
||
|
|
// 20MB 以下: 同步读取 (性能优先)
|
||
|
|
// 20MB 以上: 逐行流式读取 (内存安全)
|
||
|
|
const STREAM_THRESHOLD = 20 * 1024 * 1024;
|
||
|
|
|
||
|
|
if (stat.size <= STREAM_THRESHOLD) {
|
||
|
|
const raw = fs.readFileSync(transcriptPath, 'utf8');
|
||
|
|
const lines = raw.split('\\n').filter(Boolean);
|
||
|
|
for (const line of lines) {
|
||
|
|
processLine(line, items, MAX_ITEM_BYTES);
|
||
|
|
}
|
||
|
|
} else {
|
||
|
|
// 流式: 逐块读取, 按换行切割
|
||
|
|
const fd = fs.openSync(transcriptPath, 'r');
|
||
|
|
const CHUNK = 4 * 1024 * 1024; // 4MB 块
|
||
|
|
const buf = Buffer.alloc(CHUNK);
|
||
|
|
let remainder = '';
|
||
|
|
let pos = 0;
|
||
|
|
while (pos < stat.size) {
|
||
|
|
const n = fs.readSync(fd, buf, 0, CHUNK, pos);
|
||
|
|
if (n <= 0) break;
|
||
|
|
const chunk = remainder + buf.toString('utf8', 0, n);
|
||
|
|
const parts = chunk.split('\\n');
|
||
|
|
remainder = parts.pop() || '';
|
||
|
|
for (const line of parts) {
|
||
|
|
if (!line) continue;
|
||
|
|
processLine(line, items, MAX_ITEM_BYTES);
|
||
|
|
}
|
||
|
|
pos += n;
|
||
|
|
}
|
||
|
|
if (remainder) processLine(remainder, items, MAX_ITEM_BYTES);
|
||
|
|
fs.closeSync(fd);
|
||
|
|
}
|
||
|
|
|
||
|
|
items.sort((a, b) => b.size - a.size);
|
||
|
|
const top = items.slice(0, 10).map(it => tierize(it));
|
||
|
|
const totalBytes = items.reduce((s, it) => s + it.size, 0);
|
||
|
|
return {
|
||
|
|
applied: true,
|
||
|
|
total_tool_results_scanned: items.length,
|
||
|
|
total_bytes: totalBytes,
|
||
|
|
top_offenders: top,
|
||
|
|
mode: stat.size > STREAM_THRESHOLD ? 'stream' : 'sync'
|
||
|
|
};
|
||
|
|
} catch (e) {
|
||
|
|
return { applied: false, reason: 'scan_error: ' + (e.message || e) };
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|
||
|
|
function processLine(line, items, MAX_ITEM_BYTES) {
|
||
|
|
let obj;
|
||
|
|
try { obj = JSON.parse(line); } catch { return; }
|
||
|
|
const content = obj?.message?.content || obj?.content;
|
||
|
|
if (!Array.isArray(content)) return;
|
||
|
|
for (const part of content) {
|
||
|
|
if (part?.type !== 'tool_result') continue;
|
||
|
|
const text = typeof part.content === 'string'
|
||
|
|
? part.content
|
||
|
|
: Array.isArray(part.content) ? part.content.map(c => c?.text || '').join('') : '';
|
||
|
|
const size = Buffer.byteLength(text, 'utf8');
|
||
|
|
if (size < 500) return;
|
||
|
|
const safeText = size > MAX_ITEM_BYTES ? text.slice(0, MAX_ITEM_BYTES) : text;
|
||
|
|
items.push({ size, text: safeText, tool_use_id: part.tool_use_id, capped: size > MAX_ITEM_BYTES });
|
||
|
|
}
|
||
|
|
}`;
|
||
|
|
|
||
|
|
if (!content.includes(OLD_FN)) {
|
||
|
|
process.stdout.write('[ERROR] old function not found — file may have been modified by another patch\n');
|
||
|
|
// 尝试检测 X03 已修改的版本 (handoff 变量名改 heartbeatPath)
|
||
|
|
if (content.includes('const raw = fs.readFileSync(transcriptPath,')) {
|
||
|
|
process.stdout.write('[HINT] readFileSync pattern found but surrounding context differs\n');
|
||
|
|
}
|
||
|
|
process.exit(1);
|
||
|
|
}
|
||
|
|
|
||
|
|
content = content.replace(OLD_FN, NEW_FN);
|
||
|
|
const finalContent = useCRLF ? content.replace(/\n/g, '\r\n') : content;
|
||
|
|
fs.writeFileSync(target, finalContent, 'utf8');
|
||
|
|
|
||
|
|
// 验证
|
||
|
|
const verify = fs.readFileSync(target, 'utf8');
|
||
|
|
const ok = verify.includes(SENTINEL) && verify.includes('processLine(') && verify.includes('STREAM_THRESHOLD');
|
||
|
|
process.stdout.write(ok ? '[DONE] patch-x04 applied: stream scan for large transcripts\n' : '[ERROR] verification failed\n');
|
||
|
|
process.exit(ok ? 0 : 1);
|