bookworm-smart-assistant/hooks/build-outcome-tracker.js

505 lines
17 KiB
JavaScript
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#!/usr/bin/env node
/**
* PostToolUse Hook: 构建结果追踪器 (Phase 3)
* Matcher: Bash
*
* 触发: 匹配构建/测试命令
* 功能: 将构建结果记录到 debug/outcome-YYYY-MM-DD.jsonl
*
* 日志格式: { ts, command, outcome, errorHint, sessionId, skill, traceId }
* Phase 2: errorHint 提取 + 成功率聚合 (outcome-aggregation.json)
* Phase 3: 技能-结果关联 + 跨 hook 会话追踪
*
* stdin: { tool_name: "Bash", tool_input: { command }, tool_result: { stdout, stderr, exitCode } }
* 退出码: 0 (始终放行PostToolUse 不阻断)
*
* Fail-open: 任何异常 → exit(0)
*/
const fs = require('fs');
const path = require('path');
const { safeAppendJsonl } = require('./lib/safe-append.js');
const readStdin = require('./lib/read-stdin.js');
// ─── 路径解析 ────────────────────────────────────────
let debugDir;
try {
const { PATHS } = require('../scripts/paths.config.js');
debugDir = PATHS.debugDir;
} catch {
debugDir = path.resolve(__dirname, '..', 'debug');
}
// ─── Feature Flag 检查 ───────────────────────────────
try {
const { isEnabled } = require('../scripts/feature-flags.js');
if (!isEnabled('build-outcome-tracker')) {
process.exit(0);
}
} catch {
// feature-flags 加载失败 → 视为关闭,放行
process.exit(0);
}
// ─── User Override 检查 ──────────────────────────────
try {
const { isChecksDisabled } = require('../scripts/user-overrides.js');
if (isChecksDisabled()) {
process.exit(0);
}
} catch {}
// ─── 构建/测试命令模式 ──────────────────────────────
const BUILD_TEST_PATTERNS = [
/\bnpm\s+run\s+(build|test)\b/,
/\bnpm\s+test\b/,
/\bnpx\s+.*(?:build|test)\b/,
/\byarn\s+(?:build|test)\b/,
/\bpnpm\s+(?:run\s+)?(?:build|test)\b/,
/\btsc\b/,
/\bmake\b/,
/\bcargo\s+(?:build|test)\b/,
/\bgo\s+(?:build|test)\b/,
/\bgcc\b/,
/\bg\+\+\b/,
/\bjavac\b/,
/\bjest\b/,
/\bvitest\b/,
/\bmocha\b/,
/\bpytest\b/,
/\bdotnet\s+(?:build|test)\b/,
/\bgradlew?\s+(?:build|test)\b/,
/\bmvn\s+(?:compile|test|package|install)\b/,
/\bwebpack\b/,
/\bvite\s+build\b/,
/\besbuild\b/,
];
function isBuildOrTestCommand(command) {
if (!command || typeof command !== 'string') return false;
return BUILD_TEST_PATTERNS.some(p => p.test(command));
}
// ─── T02: 管道命令检测 + 已知测试命令回退 ──────────────
const KNOWN_TEST_RUNNERS = [
/\bvitest\b/, /\bjest\b/, /\bpytest\b/, /\bmocha\b/,
/\bcargo\s+test\b/, /\bgo\s+test\b/, /\bdotnet\s+test\b/,
];
/**
* 检测管道命令并提取基础命令
* @param {string} command
* @returns {{ isPipe: boolean, baseCommand: string, isKnownTestRunner: boolean }}
*/
function detectPipeline(command) {
if (!command) return { isPipe: false, baseCommand: command, isKnownTestRunner: false };
const isPipe = /\|/.test(command);
const baseCommand = isPipe ? command.split('|')[0].trim() : command;
const isKnownTestRunner = KNOWN_TEST_RUNNERS.some(p => p.test(baseCommand));
return { isPipe, baseCommand, isKnownTestRunner };
}
// ─── Outcome 推断 ────────────────────────────────────
const FAILURE_PATTERNS = [
/\berror\b/i,
/\bfailed\b/i,
/\bfailure\b/i,
/\bERROR\b/,
/\bFAILED\b/,
/\bfatal\b/i,
/\bexception\b/i,
/\bsegfault\b/i,
/exit\s+code\s+[1-9]/i,
/\bnot\s+found\b/i,
/\bcommand\s+failed\b/i,
];
const SUCCESS_PATTERNS = [
/\bsuccess\b/i,
/\bpassed\b/i,
/\bcompleted?\b/i,
/\bbuilt?\s+successfully\b/i,
/\bdone\b/i,
/\ball\s+tests?\s+passed\b/i,
/\b0\s+errors?\b/i,
];
// ─── P1: 测试框架汇总行检测 (优先于 exitCode不受管道影响) ──
const FRAMEWORK_RESULT_PATTERNS = [
// vitest/jest: "X failed |" 或 "Tests X passed"
{ pattern: /([1-9]\d*)\s+failed\s*[|\s]/i, success: false },
{ pattern: /Tests?\s+(\d+)\s+passed/i, success: true },
// pytest: "X passed, Y failed" 或 "X passed in"
{ pattern: /(\d+)\s+passed,\s*(\d+)\s+failed/i, successFn: (m) => parseInt(m[2]) === 0 },
{ pattern: /(\d+)\s+passed(?:\s+in\s+[\d.]+s)?$/m, success: true },
// cargo test
{ pattern: /test result:\s*ok/i, success: true },
{ pattern: /test result:\s*FAILED/i, success: false },
// go test
{ pattern: /^PASS$/m, success: true },
{ pattern: /^FAIL\b/m, success: false },
// tsc/build
{ pattern: /compiled?\s+successfully/i, success: true },
{ pattern: /build\s+succeeded/i, success: true },
{ pattern: /build\s+failed/i, success: false },
];
/**
* 从输出尾部检测测试框架汇总行
* @param {string} text - 合并后的输出文本
* @returns {'success'|'failure'|null}
*/
function detectFrameworkResult(text) {
if (!text) return null;
const lastLines = text.split('\n').slice(-30).join('\n');
for (const { pattern, success, successFn } of FRAMEWORK_RESULT_PATTERNS) {
const match = lastLines.match(pattern);
if (match) {
return (successFn ? successFn(match) : success) ? 'success' : 'failure';
}
}
return null;
}
function inferOutcome(toolResult) {
if (!toolResult) return 'unknown';
// 组合输出文本
const text = [toolResult.stdout || '', toolResult.stderr || '', toolResult.content || ''].join('\n');
// P1: 优先检测测试框架汇总行 (不受管道 exitCode 影响)
const frameworkResult = detectFrameworkResult(text);
if (frameworkResult) return frameworkResult;
// exitCode: 字符串形式也接受
const exitCode = typeof toolResult.exitCode === 'number' ? toolResult.exitCode
: typeof toolResult.exitCode === 'string' ? parseInt(toolResult.exitCode, 10)
: null;
// 非零退出码 → 可靠的失败信号
if (exitCode !== null && !isNaN(exitCode) && exitCode !== 0) return 'failure';
// 通用模式匹配
const hasFailure = FAILURE_PATTERNS.some(p => p.test(text));
const hasSuccess = SUCCESS_PATTERNS.some(p => p.test(text));
if (hasFailure && !hasSuccess) return 'failure';
if (hasSuccess && !hasFailure) return 'success';
if (hasFailure && hasSuccess) return 'failure'; // 有错误优先视为失败
// exitCode 0 + 无内容信号 → 成功
if (exitCode === 0) return 'success';
return 'unknown';
}
// ─── Phase 2: errorHint 提取 ─────────────────────────
/**
* 从 tool_result 中提取第一行含 error/fail/fatal 的文本作为 errorHint
* @param {Object} toolResult
* @returns {string} errorHint (≤150 chars) 或空字符串
*/
function extractErrorHint(toolResult) {
if (!toolResult) return '';
const text = [toolResult.stderr || '', toolResult.stdout || '', toolResult.content || ''].join('\n');
const lines = text.split('\n');
for (const line of lines) {
if (/\b(?:error|fail|fatal)\b/i.test(line) && line.trim().length > 0) {
return line.trim().slice(0, 150);
}
}
return '';
}
// ─── Phase 2: 成功率聚合 ─────────────────────────────
const AGGREGATION_FILE_PATH = path.join(debugDir, 'outcome-aggregation.json');
const MAX_AGGREGATION_COMMANDS = 50;
/**
* 更新成功率聚合数据
* @param {string} command - 构建命令
* @param {string} outcome - success|failure|unknown
*/
/**
* P2-FIX: 命令规范化 — 去除 tail/head 参数差异和路径格式差异
* 减少 near-duplicate key (如 tail -5 vs tail -15)
*/
function normalizeCommand(cmd) {
return (cmd || '')
.replace(/\|\s*(tail|head)\s+-\d+/g, '') // 去除 tail -N / head -N
.replace(/\\+/g, '/') // 统一路径分隔符
.replace(/\/\//g, '/') // 去除双斜杠
.trim()
.slice(0, 150); // 截断到 150 字符
}
function updateAggregation(command, outcome) {
// H9: O_EXCL 文件锁保护 read-modify-write 操作
const lockFile = AGGREGATION_FILE_PATH + '.lock';
let lockFd;
try { lockFd = fs.openSync(lockFile, 'wx'); } catch { return; }
try {
let agg = {};
if (fs.existsSync(AGGREGATION_FILE_PATH)) {
agg = JSON.parse(fs.readFileSync(AGGREGATION_FILE_PATH, 'utf8'));
}
const cmdKey = normalizeCommand(command);
if (!agg[cmdKey]) {
agg[cmdKey] = { total: 0, success: 0, failure: 0, unknown: 0, lastUpdated: '' };
}
agg[cmdKey].total++;
if (outcome === 'success') agg[cmdKey].success++;
else if (outcome === 'failure') agg[cmdKey].failure++;
else agg[cmdKey].unknown++;
agg[cmdKey].lastUpdated = new Date().toISOString();
// LRU 淘汰: 超过上限时移除最旧条目
const keys = Object.keys(agg);
if (keys.length > MAX_AGGREGATION_COMMANDS) {
const sorted = keys.sort((a, b) => {
const ta = agg[a].lastUpdated || '';
const tb = agg[b].lastUpdated || '';
return ta.localeCompare(tb);
});
// 删除最旧的条目直到达到上限
const toRemove = sorted.slice(0, keys.length - MAX_AGGREGATION_COMMANDS);
for (const k of toRemove) {
delete agg[k];
}
}
if (!fs.existsSync(debugDir)) fs.mkdirSync(debugDir, { recursive: true });
// P2: temp+rename 原子写入,防止并发半写
const _aggTmp = AGGREGATION_FILE_PATH + '.tmp.' + process.pid;
fs.writeFileSync(_aggTmp, JSON.stringify(agg, null, 2) + '\n');
fs.renameSync(_aggTmp, AGGREGATION_FILE_PATH);
} catch {}
// H9: 释放锁
try { fs.closeSync(lockFd); fs.unlinkSync(lockFile); } catch {}
}
// ─── Phase 1: 技能归因 ───────────────────────────────
function getRouteSkill() {
try {
const routeStateFile = path.join(debugDir, 'route-state-current.json');
if (fs.existsSync(routeStateFile)) {
const state = JSON.parse(fs.readFileSync(routeStateFile, 'utf8'));
return (state.routing && state.routing.primary) || state.skill || 'unknown';
}
} catch {}
return 'unknown';
}
function generateTraceId() {
const ts = Date.now().toString(36);
const rand = Math.random().toString(36).slice(2, 8);
return `${ts}-${rand}`;
}
// ─── Phase 3: 技能-结果关联 (D3) ─────────────────────
const SKILL_CORRELATION_FILE = path.join(debugDir, 'skill-outcome-correlation.json');
const MAX_SKILLS = 30;
const RECENT_WINDOW_SIZE = 20;
/**
* 计算趋势: 前半 vs 后半成功率
* @param {Array<string>} recentWindow - 最近的 outcome 列表
* @returns {'improving'|'worsening'|'stable'|'insufficient'}
*/
function computeTrend(recentWindow) {
if (!recentWindow || recentWindow.length < 6) return 'insufficient';
const mid = Math.floor(recentWindow.length / 2);
const firstHalf = recentWindow.slice(0, mid);
const secondHalf = recentWindow.slice(mid);
const rate = (arr) => arr.filter(o => o === 'success').length / arr.length;
const firstRate = rate(firstHalf);
const secondRate = rate(secondHalf);
const diff = secondRate - firstRate;
if (diff > 0.15) return 'improving';
if (diff < -0.15) return 'worsening';
return 'stable';
}
/**
* 更新技能-结果关联数据
* @param {string} skill - 技能名称
* @param {string} outcome - success|failure|unknown
*/
function updateSkillCorrelation(skill, outcome) {
try {
if (!skill || skill === 'unknown') return;
if (!fs.existsSync(debugDir)) fs.mkdirSync(debugDir, { recursive: true });
let data = { skills: {}, generatedAt: '' };
try {
if (fs.existsSync(SKILL_CORRELATION_FILE)) {
data = JSON.parse(fs.readFileSync(SKILL_CORRELATION_FILE, 'utf8'));
}
} catch {}
if (!data.skills) data.skills = {};
if (!data.skills[skill]) {
data.skills[skill] = { total: 0, success: 0, failure: 0, unknown: 0, successRate: 0, lastUpdated: '', recentWindow: [] };
}
const entry = data.skills[skill];
entry.total++;
if (outcome === 'success') entry.success++;
else if (outcome === 'failure') entry.failure++;
else entry.unknown++;
entry.successRate = entry.total > 0 ? Math.round((entry.success / entry.total) * 1000) / 1000 : 0;
entry.lastUpdated = new Date().toISOString();
// recentWindow: 保留最近 RECENT_WINDOW_SIZE 条
if (!entry.recentWindow) entry.recentWindow = [];
entry.recentWindow.push(outcome);
if (entry.recentWindow.length > RECENT_WINDOW_SIZE) {
entry.recentWindow = entry.recentWindow.slice(-RECENT_WINDOW_SIZE);
}
// LRU 淘汰: 超过 MAX_SKILLS 时移除最旧
const skillKeys = Object.keys(data.skills);
if (skillKeys.length > MAX_SKILLS) {
const sorted = skillKeys.sort((a, b) => {
const ta = data.skills[a].lastUpdated || '';
const tb = data.skills[b].lastUpdated || '';
return ta.localeCompare(tb);
});
const toRemove = sorted.slice(0, skillKeys.length - MAX_SKILLS);
for (const k of toRemove) delete data.skills[k];
}
data.generatedAt = new Date().toISOString();
// P2: temp+rename 原子写入,防止并发半写
const _corrTmp = SKILL_CORRELATION_FILE + '.tmp.' + process.pid;
fs.writeFileSync(_corrTmp, JSON.stringify(data, null, 2) + '\n');
fs.renameSync(_corrTmp, SKILL_CORRELATION_FILE);
} catch {}
}
/**
* 获取技能的成功率和趋势 (导出供外部消费)
* @param {string} skill - 技能名称
* @returns {{ successRate: number, total: number, trend: string }|null}
*/
function getSkillSuccessRate(skill) {
try {
if (!fs.existsSync(SKILL_CORRELATION_FILE)) return null;
const data = JSON.parse(fs.readFileSync(SKILL_CORRELATION_FILE, 'utf8'));
const entry = data.skills && data.skills[skill];
if (!entry || entry.total < 3) return null;
return {
successRate: entry.successRate,
total: entry.total,
trend: computeTrend(entry.recentWindow),
};
} catch {
return null;
}
}
// ─── 日志写入 ────────────────────────────────────────
function logOutcome(entry) {
try {
const dateStr = new Date().toISOString().slice(0, 10);
const logFile = path.join(debugDir, `outcome-${dateStr}.jsonl`);
safeAppendJsonl(logFile, entry);
} catch {}
}
// ─── 主流程 ──────────────────────────────────────────
function main() {
readStdin({ maxSize: 512 * 1024 }).then(input => {
const command = input.tool_input?.command;
// 非构建/测试命令 → 跳过
if (!isBuildOrTestCommand(command)) {
process.exit(0);
return;
}
// 推断结果
let outcome = inferOutcome(input.tool_result);
// T02: 管道命令二次判定 — 已知测试命令 + unknown → 内容无失败关键词则视为 success
if (outcome === 'unknown') {
const pipe = detectPipeline(command);
if (pipe.isKnownTestRunner) {
const text = [
(input.tool_result?.stdout || ''),
(input.tool_result?.stderr || ''),
(input.tool_result?.content || '')
].join('\n');
const hasFail = /\bfail|\berror|\bFAIL|\bERROR/i.test(text);
if (!hasFail) {
outcome = 'success';
}
}
}
// Phase 2: errorHint 提取(仅失败时)
const errorHint = outcome === 'failure' ? extractErrorHint(input.tool_result) : '';
// Phase 1: 技能归因
const skill = getRouteSkill();
// Phase 3: 使用共享 traceId (D5)
let traceId;
try {
const { getSessionTrace } = require('../scripts/session-trace.js');
traceId = getSessionTrace().traceId;
} catch {
traceId = generateTraceId();
}
// 记录Phase 2: 增加 errorHint 字段)
const cmdNormalized = (command || '').slice(0, 200);
// T02: 管道检测
const pipeline = detectPipeline(command);
logOutcome({
ts: new Date().toISOString(),
command: cmdNormalized,
outcome,
pipelineMode: pipeline.isPipe,
errorHint,
sessionId: input.session_id || 'unknown',
skill,
traceId,
});
// Phase 2: 更新聚合数据
updateAggregation(cmdNormalized, outcome);
// Phase 3: 技能-结果关联 (D3)
try { updateSkillCorrelation(skill, outcome); } catch {}
// Phase 3: 跨 hook 会话追踪 (D5)
try {
const { appendTraceEvent } = require('../scripts/session-trace.js');
appendTraceEvent('build-outcome-tracker', 'outcome', {
command: cmdNormalized,
outcome,
skill,
errorHint: errorHint ? errorHint.slice(0, 80) : '',
});
} catch {}
process.exit(0);
}).catch(() => process.exit(0));
}
// 模块导出 (供测试)
if (typeof module !== 'undefined') {
module.exports = { isBuildOrTestCommand, inferOutcome, detectFrameworkResult, detectPipeline, extractErrorHint, updateAggregation, getRouteSkill, generateTraceId, AGGREGATION_FILE_PATH, updateSkillCorrelation, getSkillSuccessRate, computeTrend };
}
if (require.main === module) {
main();
}