bookworm-smart-assistant/scripts/patches/patch-r2-precompact-tier-output.js

154 lines
6.3 KiB
JavaScript
Raw Normal View History

#!/usr/bin/env node
/**
* patch-r2-precompact-tier-output.js · 2026-04-26
*
* R2: pre-compact-handoff.js 增加工具输出分级摘要
* compact 前扫描 transcript_path JSONL, 识别 TOP-N 大工具结果,
* 按工具类型差异化保留, 写入 handoff.json.tool_output_tiers
*
* 分级规则:
* - Bash 输出 >2000B: 保留头 1500B + 500B + 截断行数
* - Read 结果 >3000B: 保留路径+行范围+ 200B 摘要
* - Write/Edit 结果 >500B: 仅保留路径+行数确认
* - Agent/Task 结果 >2000B: 保留头 1000B + "(Agent 完整结果已 dump)"
* - 其他工具 >5000B: 2000B + 500B
*
* 幂等: sentinel "TOOL_OUTPUT_TIER_V1"
*/
'use strict';
const fs = require('fs');
const path = require('path');
const TARGET = path.join(__dirname, '..', '..', 'hooks', 'pre-compact-handoff.js');
const SENTINEL = 'TOOL_OUTPUT_TIER_V1';
const OLD_BLOCK = ` // 构造 handoff 数据
const handoff = {
timestamp: new Date().toISOString(),
session_id: hookData.session_id || \`session-\${Date.now()}\`,
context_hint: '会话因上下文压缩中断,以下是压缩前的状态摘要',
conversation_summary: hookData.transcript_summary || '(由 PreCompact hook 自动捕获)',
tool_call_count: hookData.tool_call_count || 'unknown',
working_directory: process.cwd(),
note: '此文件由 pre-compact-handoff.js 自动生成SessionStart 时自动读取并注入恢复上下文'
};`;
const NEW_BLOCK = ` // TOOL_OUTPUT_TIER_V1 - 扫描 transcript 提取大工具输出分级摘要
const toolOutputTiers = scanToolOutputTiers(hookData.transcript_path);
// 构造 handoff 数据
const handoff = {
timestamp: new Date().toISOString(),
session_id: hookData.session_id || \`session-\${Date.now()}\`,
context_hint: '会话因上下文压缩中断,以下是压缩前的状态摘要',
conversation_summary: hookData.transcript_summary || '(由 PreCompact hook 自动捕获)',
tool_call_count: hookData.tool_call_count || 'unknown',
working_directory: process.cwd(),
tool_output_tiers: toolOutputTiers,
note: '此文件由 pre-compact-handoff.js 自动生成SessionStart 时自动读取并注入恢复上下文'
};`;
const HELPER_FN = `
// === TOOL_OUTPUT_TIER_V1 ===
// 扫描 transcript JSONL, 按工具类型分级保留大输出, 输出 TOP-10 摘要
function scanToolOutputTiers(transcriptPath) {
if (!transcriptPath || !fs.existsSync(transcriptPath)) {
return { applied: false, reason: 'no transcript_path' };
}
try {
const raw = fs.readFileSync(transcriptPath, 'utf8');
const lines = raw.split('\\n').filter(Boolean);
const items = [];
for (const line of lines) {
let obj;
try { obj = JSON.parse(line); } catch { continue; }
// 只关注 tool_result 类型 (含工具调用响应)
const content = obj?.message?.content || obj?.content;
if (!Array.isArray(content)) continue;
for (const part of content) {
if (part?.type !== 'tool_result') continue;
const text = typeof part.content === 'string'
? part.content
: Array.isArray(part.content) ? part.content.map(c => c?.text || '').join('') : '';
const size = Buffer.byteLength(text, 'utf8');
if (size < 500) continue;
items.push({ size, text, tool_use_id: part.tool_use_id });
}
}
items.sort((a, b) => b.size - a.size);
const top = items.slice(0, 10).map(it => tierize(it));
const totalBytes = items.reduce((s, it) => s + it.size, 0);
return {
applied: true,
total_tool_results_scanned: items.length,
total_bytes: totalBytes,
top_offenders: top
};
} catch (e) {
return { applied: false, reason: 'scan_error: ' + (e.message || e) };
}
}
function tierize(item) {
const { size, text, tool_use_id } = item;
// 启发式工具类型判定 (transcript 不直接含工具名, 用文本特征)
let kind = 'other';
if (/^(File created successfully|Wrote \\d+ lines|The file .* has been (created|updated))/m.test(text)) kind = 'write';
else if (/^\\s*\\d+→/m.test(text) || text.startsWith(' 1\\t')) kind = 'read';
else if (/<tool_use_error>|^bash:|stderr:/m.test(text) || /\\$ /m.test(text.slice(0, 100))) kind = 'bash';
else if (/^(Found \\d+ files?|^[a-zA-Z]:\\\\.*\\.(ts|js|md|json))/m.test(text)) kind = 'glob_grep';
else if (size > 3000 && text.includes('agent')) kind = 'agent';
let summary;
switch (kind) {
case 'write':
summary = text.split('\\n').slice(0, 2).join(' | ').slice(0, 200);
break;
case 'read':
summary = '[Read] ' + text.slice(0, 200) + ' ... [+' + (size - 200) + ' bytes]';
break;
case 'bash':
summary = text.slice(0, 1500) + '\\n... [truncated ' + Math.max(0, size - 2000) + ' bytes] ...\\n' + text.slice(-500);
break;
case 'agent':
summary = text.slice(0, 1000) + '\\n... [Agent 完整结果已截断 ' + (size - 1000) + ' bytes]';
break;
case 'glob_grep':
summary = '[Glob/Grep] ' + text.split('\\n').slice(0, 8).join(' | ').slice(0, 400);
break;
default:
summary = text.slice(0, 2000) + '\\n... [+' + Math.max(0, size - 2500) + ' bytes] ...\\n' + text.slice(-500);
}
return { tool_use_id, kind, original_bytes: size, summary };
}
`;
function main() {
const srcRaw = fs.readFileSync(TARGET, 'utf8');
if (srcRaw.includes(SENTINEL)) {
console.log('[r2] already applied, skip');
return;
}
// CRLF 容忍: 检测原文行尾, 把 OLD_BLOCK/NEW_BLOCK 转换成同制
const eol = srcRaw.includes('\r\n') ? '\r\n' : '\n';
const oldNorm = OLD_BLOCK.replace(/\r?\n/g, eol);
const newNorm = NEW_BLOCK.replace(/\r?\n/g, eol);
if (!srcRaw.includes(oldNorm)) {
console.error('[r2] anchor block not found, manual review needed (eol=' + JSON.stringify(eol) + ')');
process.exit(1);
}
let next = srcRaw.replace(oldNorm, newNorm);
// 在 IIFE 结尾 })(); 之后追加 helper 函数
const helperNorm = HELPER_FN.replace(/\r?\n/g, eol);
next = next.replace(/(\}\)\(\);\s*)$/, `$1${eol}${helperNorm}`);
const bak = TARGET + '.bak.r2.' + Date.now();
fs.copyFileSync(TARGET, bak);
const tmp = TARGET + '.tmp.' + process.pid;
fs.writeFileSync(tmp, next, 'utf8');
fs.renameSync(tmp, TARGET);
console.log('[r2] OK, bak:', path.basename(bak), 'eol=', JSON.stringify(eol));
}
main();