#!/usr/bin/env node // patch-x04-handoff-stream-scan.js // P1: scanToolOutputTiers() 全量 readFileSync transcript, 长会话 >100MB 会 OOM // 修复: >20MB 跳过全量读取, 改用 readline 流式逐行扫描 'use strict'; const fs = require('fs'); const path = require('path'); const SENTINEL = '// [PATCH-X04-STREAM-SCAN]'; const target = path.join(__dirname, '..', '..', 'hooks', 'pre-compact-handoff.js'); if (!fs.existsSync(target)) { process.stdout.write('[SKIP] target not found\n'); process.exit(0); } let contentRaw = fs.readFileSync(target, 'utf8'); const useCRLF = contentRaw.includes('\r\n'); let content = useCRLF ? contentRaw.replace(/\r\n/g, '\n') : contentRaw; if (content.includes(SENTINEL)) { process.stdout.write('[SKIP] patch-x04 already applied\n'); process.exit(0); } const bak = target + '.bak.x04'; if (!fs.existsSync(bak)) fs.writeFileSync(bak, contentRaw); const OLD_FN = `// === TOOL_OUTPUT_TIER_V1 === // 扫描 transcript JSONL, 按工具类型分级保留大输出, 输出 TOP-10 摘要 function scanToolOutputTiers(transcriptPath) { if (!transcriptPath || !fs.existsSync(transcriptPath)) { return { applied: false, reason: 'no transcript_path' }; } try { const raw = fs.readFileSync(transcriptPath, 'utf8'); const lines = raw.split('\\n').filter(Boolean); const items = []; for (const line of lines) { let obj; try { obj = JSON.parse(line); } catch { continue; } // 只关注 tool_result 类型 (含工具调用响应) const content = obj?.message?.content || obj?.content; if (!Array.isArray(content)) continue; for (const part of content) { if (part?.type !== 'tool_result') continue; const text = typeof part.content === 'string' ? part.content : Array.isArray(part.content) ? part.content.map(c => c?.text || '').join('') : ''; const size = Buffer.byteLength(text, 'utf8'); if (size < 500) continue; // R2-INPUT-CAP-V2: 单条 tool_result > 5MB 截断, 防 tierize 正则扫描超时 const MAX_ITEM_BYTES = 5 * 1024 * 1024; const safeText = size > MAX_ITEM_BYTES ? text.slice(0, MAX_ITEM_BYTES) : text; items.push({ size, text: safeText, tool_use_id: part.tool_use_id, capped: size > MAX_ITEM_BYTES }); } } items.sort((a, b) => b.size - a.size); const top = items.slice(0, 10).map(it => tierize(it)); const totalBytes = items.reduce((s, it) => s + it.size, 0); return { applied: true, total_tool_results_scanned: items.length, total_bytes: totalBytes, top_offenders: top }; } catch (e) { return { applied: false, reason: 'scan_error: ' + (e.message || e) }; } }`; const NEW_FN = `// === TOOL_OUTPUT_TIER_V1 === ${SENTINEL} // 扫描 transcript JSONL, 按工具类型分级保留大输出, 输出 TOP-10 摘要 // X04: 流式逐行扫描, 避免大文件 OOM function scanToolOutputTiers(transcriptPath) { if (!transcriptPath || !fs.existsSync(transcriptPath)) { return { applied: false, reason: 'no transcript_path' }; } try { const stat = fs.statSync(transcriptPath); const MAX_FILE = 50 * 1024 * 1024; // 50MB 硬上限 if (stat.size > MAX_FILE) { return { applied: false, reason: 'transcript_too_large: ' + (stat.size / 1024 / 1024).toFixed(1) + 'MB (limit 50MB)' }; } const items = []; const MAX_ITEM_BYTES = 5 * 1024 * 1024; // 20MB 以下: 同步读取 (性能优先) // 20MB 以上: 逐行流式读取 (内存安全) const STREAM_THRESHOLD = 20 * 1024 * 1024; if (stat.size <= STREAM_THRESHOLD) { const raw = fs.readFileSync(transcriptPath, 'utf8'); const lines = raw.split('\\n').filter(Boolean); for (const line of lines) { processLine(line, items, MAX_ITEM_BYTES); } } else { // 流式: 逐块读取, 按换行切割 const fd = fs.openSync(transcriptPath, 'r'); const CHUNK = 4 * 1024 * 1024; // 4MB 块 const buf = Buffer.alloc(CHUNK); let remainder = ''; let pos = 0; while (pos < stat.size) { const n = fs.readSync(fd, buf, 0, CHUNK, pos); if (n <= 0) break; const chunk = remainder + buf.toString('utf8', 0, n); const parts = chunk.split('\\n'); remainder = parts.pop() || ''; for (const line of parts) { if (!line) continue; processLine(line, items, MAX_ITEM_BYTES); } pos += n; } if (remainder) processLine(remainder, items, MAX_ITEM_BYTES); fs.closeSync(fd); } items.sort((a, b) => b.size - a.size); const top = items.slice(0, 10).map(it => tierize(it)); const totalBytes = items.reduce((s, it) => s + it.size, 0); return { applied: true, total_tool_results_scanned: items.length, total_bytes: totalBytes, top_offenders: top, mode: stat.size > STREAM_THRESHOLD ? 'stream' : 'sync' }; } catch (e) { return { applied: false, reason: 'scan_error: ' + (e.message || e) }; } } function processLine(line, items, MAX_ITEM_BYTES) { let obj; try { obj = JSON.parse(line); } catch { return; } const content = obj?.message?.content || obj?.content; if (!Array.isArray(content)) return; for (const part of content) { if (part?.type !== 'tool_result') continue; const text = typeof part.content === 'string' ? part.content : Array.isArray(part.content) ? part.content.map(c => c?.text || '').join('') : ''; const size = Buffer.byteLength(text, 'utf8'); if (size < 500) return; const safeText = size > MAX_ITEM_BYTES ? text.slice(0, MAX_ITEM_BYTES) : text; items.push({ size, text: safeText, tool_use_id: part.tool_use_id, capped: size > MAX_ITEM_BYTES }); } }`; if (!content.includes(OLD_FN)) { process.stdout.write('[ERROR] old function not found — file may have been modified by another patch\n'); // 尝试检测 X03 已修改的版本 (handoff 变量名改 heartbeatPath) if (content.includes('const raw = fs.readFileSync(transcriptPath,')) { process.stdout.write('[HINT] readFileSync pattern found but surrounding context differs\n'); } process.exit(1); } content = content.replace(OLD_FN, NEW_FN); const finalContent = useCRLF ? content.replace(/\n/g, '\r\n') : content; fs.writeFileSync(target, finalContent, 'utf8'); // 验证 const verify = fs.readFileSync(target, 'utf8'); const ok = verify.includes(SENTINEL) && verify.includes('processLine(') && verify.includes('STREAM_THRESHOLD'); process.stdout.write(ok ? '[DONE] patch-x04 applied: stream scan for large transcripts\n' : '[ERROR] verification failed\n'); process.exit(ok ? 0 : 1);