#!/usr/bin/env node /** * 行为基线异常检测引擎 * * 从 activity-*.jsonl 学习正常行为统计, 生成 baseline.json。 * --check 模式: 对当日事件做异常检测 (3-sigma 规则)。 * * 用法: * node scripts/behavior-baseline.js # 生成/更新基线 * node scripts/behavior-baseline.js --check # 检查当日异常 * node scripts/behavior-baseline.js --json # JSON 输出 * node scripts/behavior-baseline.js --days 30 # 使用最近 N 天数据 */ const fs = require('fs'); const path = require('path'); const detectClaudeRoot = () => require('./paths.config.js').PATHS.root; const CLAUDE_ROOT = detectClaudeRoot(); const DEBUG_DIR = path.join(CLAUDE_ROOT, 'debug'); const BASELINE_FILE = path.join(CLAUDE_ROOT, 'debug', 'baseline.json'); const CHECK_MODE = process.argv.includes('--check'); const JSON_MODE = process.argv.includes('--json'); const daysArg = process.argv.indexOf('--days'); const DAYS = daysArg >= 0 ? parseInt(process.argv[daysArg + 1]) || 30 : 30; // === 日志加载 === function loadActivityLogs(maxDays) { const files = []; try { const entries = fs.readdirSync(DEBUG_DIR); for (const f of entries) { if (f.startsWith('activity-') && f.endsWith('.jsonl')) { files.push(path.join(DEBUG_DIR, f)); } } } catch { return []; } files.sort(); // 过滤日期范围 const cutoff = new Date(); cutoff.setDate(cutoff.getDate() - maxDays); const cutoffStr = cutoff.toISOString().slice(0, 10); const events = []; for (const file of files) { const dateMatch = path.basename(file).match(/activity-(\d{4}-\d{2}-\d{2})/); if (dateMatch && dateMatch[1] < cutoffStr) continue; try { const lines = fs.readFileSync(file, 'utf8').trim().split('\n'); for (const line of lines) { try { events.push(JSON.parse(line)); } catch {} } } catch {} } return events; } // === 基线统计计算 === function computeBaseline(events) { // 按日分组 const dailyStats = {}; for (const evt of events) { const date = (evt.ts || '').slice(0, 10); if (!date) continue; if (!dailyStats[date]) { dailyStats[date] = { totalEvents: 0, byEvent: {}, byTool: {}, bashCmdLengths: [], uniqueSkills: new Set(), hourDistribution: new Array(24).fill(0), }; } const day = dailyStats[date]; day.totalEvents++; // 按事件类型 const evtType = evt.event || 'unknown'; day.byEvent[evtType] = (day.byEvent[evtType] || 0) + 1; // 按工具 const tool = evt.tool || 'unknown'; day.byTool[tool] = (day.byTool[tool] || 0) + 1; // Bash 命令长度 if (evtType === 'bash' && evt.detail) { day.bashCmdLengths.push(evt.detail.length); } // 技能追踪 if (evtType === 'skill' && evt.detail) { day.uniqueSkills.add(evt.detail); } // 小时分布 const hour = parseInt((evt.ts || '').slice(11, 13)); if (!isNaN(hour)) { day.hourDistribution[hour]++; } } // 聚合统计 (均值 + 标准差) const dates = Object.keys(dailyStats).sort(); if (dates.length === 0) return null; const metrics = { totalEventsPerDay: [], bashEventsPerDay: [], writeEventsPerDay: [], skillEventsPerDay: [], avgBashCmdLength: [], uniqueSkillsPerDay: [], }; for (const date of dates) { const day = dailyStats[date]; metrics.totalEventsPerDay.push(day.totalEvents); metrics.bashEventsPerDay.push(day.byEvent.bash || 0); metrics.writeEventsPerDay.push(day.byEvent.write || 0); metrics.skillEventsPerDay.push(day.byEvent.skill || 0); metrics.uniqueSkillsPerDay.push(day.uniqueSkills.size); if (day.bashCmdLengths.length > 0) { const avg = day.bashCmdLengths.reduce((a, b) => a + b, 0) / day.bashCmdLengths.length; metrics.avgBashCmdLength.push(Math.round(avg)); } } // 计算均值和标准差 function stats(arr) { if (arr.length === 0) return { mean: 0, stddev: 0, min: 0, max: 0, n: 0 }; const n = arr.length; const mean = arr.reduce((a, b) => a + b, 0) / n; const variance = arr.reduce((sum, v) => sum + (v - mean) ** 2, 0) / n; const stddev = Math.sqrt(variance); return { mean: Math.round(mean * 100) / 100, stddev: Math.round(stddev * 100) / 100, min: Math.min(...arr), max: Math.max(...arr), n, }; } const baseline = { generated: new Date().toISOString(), dataRange: { from: dates[0], to: dates[dates.length - 1], days: dates.length }, metrics: { totalEventsPerDay: stats(metrics.totalEventsPerDay), bashEventsPerDay: stats(metrics.bashEventsPerDay), writeEventsPerDay: stats(metrics.writeEventsPerDay), skillEventsPerDay: stats(metrics.skillEventsPerDay), avgBashCmdLength: stats(metrics.avgBashCmdLength), uniqueSkillsPerDay: stats(metrics.uniqueSkillsPerDay), }, // 3-sigma 上下界 thresholds: {}, }; // 计算 3-sigma 阈值 for (const [key, s] of Object.entries(baseline.metrics)) { baseline.thresholds[key] = { upper: Math.round((s.mean + 3 * s.stddev) * 100) / 100, lower: Math.max(0, Math.round((s.mean - 3 * s.stddev) * 100) / 100), }; } // v5.1: IQR 统计 baseline.iqr = {}; for (const [key, values] of Object.entries(metrics)) { const iqrResult = computeIQR(values); if (iqrResult) baseline.iqr[key] = iqrResult; } // v5.1: 动态阈值 (EWMA) baseline.dynamicThresholds = {}; for (const [key, values] of Object.entries(metrics)) { const dt = computeDynamicThresholds(values); if (dt) baseline.dynamicThresholds[key] = dt; } return baseline; } // === IQR 计算 (v5.1) === function computeIQR(arr) { if (arr.length < 4) return null; const sorted = [...arr].sort((a, b) => a - b); const n = sorted.length; const q1Idx = Math.floor(n * 0.25); const q3Idx = Math.floor(n * 0.75); const Q1 = sorted[q1Idx]; const Q3 = sorted[q3Idx]; const IQR = Q3 - Q1; return { Q1, Q3, IQR, lowerFence: Q1 - 1.5 * IQR, upperFence: Q3 + 1.5 * IQR, }; } // === 混合异常评分 (v5.1) === /** * Z-score + IQR 双重验证 * 双异常 = CRITICAL, 单一 = WARNING, 无异常 = OK * @param {number} value - 当前值 * @param {Object} stats - { mean, stddev } * @param {Object} iqr - computeIQR 的结果 * @returns {{ severity: string, zAnomaly: boolean, iqrAnomaly: boolean }} */ function hybridAnomalyScore(value, stats, iqr) { // Z-score 检测 (3-sigma) const zScore = stats.stddev > 0 ? Math.abs(value - stats.mean) / stats.stddev : 0; const zAnomaly = zScore > 3; // IQR 检测 let iqrAnomaly = false; if (iqr) { iqrAnomaly = value < iqr.lowerFence || value > iqr.upperFence; } let severity = 'OK'; if (zAnomaly && iqrAnomaly) { severity = 'CRITICAL'; } else if (zAnomaly || iqrAnomaly) { severity = 'WARNING'; } return { severity, zAnomaly, iqrAnomaly, zScore: Math.round(zScore * 100) / 100 }; } // === 动态阈值 EWMA (v5.1) === /** * 指数加权移动平均计算动态阈值 * @param {number[]} dailyValues - 按时间序列排列的日值 * @param {number} alpha - 平滑系数 (0~1, 默认 0.3) * @returns {{ ewma: number, ewmaStd: number, upper: number, lower: number }} */ function computeDynamicThresholds(dailyValues, alpha = 0.3) { if (dailyValues.length === 0) return null; let ewma = dailyValues[0]; let ewmaVar = 0; for (let i = 1; i < dailyValues.length; i++) { const diff = dailyValues[i] - ewma; ewma = alpha * dailyValues[i] + (1 - alpha) * ewma; ewmaVar = alpha * diff * diff + (1 - alpha) * ewmaVar; } const ewmaStd = Math.sqrt(ewmaVar); return { ewma: Math.round(ewma * 100) / 100, ewmaStd: Math.round(ewmaStd * 100) / 100, upper: Math.round((ewma + 3 * ewmaStd) * 100) / 100, lower: Math.max(0, Math.round((ewma - 3 * ewmaStd) * 100) / 100), }; } // === 异常检查 === function checkAnomalies(baseline) { const today = new Date().toISOString().slice(0, 10); const todayFile = path.join(DEBUG_DIR, `activity-${today}.jsonl`); if (!fs.existsSync(todayFile)) { return { date: today, status: 'no_data', anomalies: [] }; } const events = []; try { const lines = fs.readFileSync(todayFile, 'utf8').trim().split('\n'); for (const line of lines) { try { events.push(JSON.parse(line)); } catch {} } } catch { return { date: today, status: 'read_error', anomalies: [] }; } // 当日统计 const todayStats = { totalEvents: events.length, bashEvents: events.filter(e => e.event === 'bash').length, writeEvents: events.filter(e => e.event === 'write').length, skillEvents: events.filter(e => e.event === 'skill').length, uniqueSkills: new Set(events.filter(e => e.event === 'skill').map(e => e.detail)).size, }; // Bash 命令平均长度 const bashCmds = events.filter(e => e.event === 'bash' && e.detail); todayStats.avgBashCmdLength = bashCmds.length > 0 ? Math.round(bashCmds.reduce((s, e) => s + e.detail.length, 0) / bashCmds.length) : 0; const anomalies = []; const checks = [ ['totalEventsPerDay', todayStats.totalEvents], ['bashEventsPerDay', todayStats.bashEvents], ['writeEventsPerDay', todayStats.writeEvents], ['skillEventsPerDay', todayStats.skillEvents], ['avgBashCmdLength', todayStats.avgBashCmdLength], ['uniqueSkillsPerDay', todayStats.uniqueSkills], ]; for (const [metric, value] of checks) { const threshold = baseline.thresholds[metric]; const stats = baseline.metrics[metric]; if (!threshold || !stats || stats.n < 2) continue; if (value > threshold.upper) { anomalies.push({ metric, value, expected: `${stats.mean} +/- ${stats.stddev}`, threshold: threshold.upper, severity: value > stats.mean + 5 * stats.stddev ? 'CRITICAL' : 'WARNING', message: `${metric} = ${value} 超过上界 ${threshold.upper} (均值 ${stats.mean})`, }); } if (value < threshold.lower && threshold.lower > 0) { anomalies.push({ metric, value, expected: `${stats.mean} +/- ${stats.stddev}`, threshold: threshold.lower, severity: 'INFO', message: `${metric} = ${value} 低于下界 ${threshold.lower}`, }); } } return { date: today, status: 'checked', todayStats, anomalies }; } // === 主流程 === function main() { const events = loadActivityLogs(DAYS); if (events.length === 0) { if (JSON_MODE) { console.log(JSON.stringify({ error: 'no_data', message: '无活动日志可分析' })); } else { console.log('无活动日志可分析。请先产生一些操作记录。'); } return; } const baseline = computeBaseline(events); if (!baseline) { console.log('数据不足以生成基线。'); return; } // 保存基线 fs.writeFileSync(BASELINE_FILE, JSON.stringify(baseline, null, 2) + '\n'); if (CHECK_MODE) { const result = checkAnomalies(baseline); if (JSON_MODE) { console.log(JSON.stringify({ baseline: baseline.dataRange, check: result }, null, 2)); return; } console.log('=== 行为基线异常检测 ==='); console.log(`基线范围: ${baseline.dataRange.from} ~ ${baseline.dataRange.to} (${baseline.dataRange.days} 天)`); console.log(`检查日期: ${result.date}`); console.log(''); if (result.todayStats) { console.log('当日统计:'); console.log(` 总事件: ${result.todayStats.totalEvents}`); console.log(` Bash: ${result.todayStats.bashEvents} Write: ${result.todayStats.writeEvents} Skill: ${result.todayStats.skillEvents}`); console.log(` Bash 命令平均长度: ${result.todayStats.avgBashCmdLength} 字符`); console.log(''); } if (result.anomalies.length === 0) { console.log('检测结果: 正常 (无异常)'); } else { console.log(`检测结果: ${result.anomalies.length} 个异常`); for (const a of result.anomalies) { console.log(` [${a.severity}] ${a.message}`); } } return; } // 默认: 显示基线摘要 if (JSON_MODE) { console.log(JSON.stringify(baseline, null, 2)); return; } console.log('=== 行为基线已生成 ==='); console.log(`数据范围: ${baseline.dataRange.from} ~ ${baseline.dataRange.to} (${baseline.dataRange.days} 天, ${events.length} 事件)`); console.log(''); console.log('指标统计:'); for (const [key, s] of Object.entries(baseline.metrics)) { const t = baseline.thresholds[key]; console.log(` ${key.padEnd(25)} mean=${String(s.mean).padStart(7)} std=${String(s.stddev).padStart(7)} range=[${t.lower}, ${t.upper}]`); } console.log(''); console.log(`基线文件: ${BASELINE_FILE}`); console.log('运行 --check 检查当日异常。'); } // 模块导出 (供测试使用) if (typeof module !== 'undefined') { module.exports = { loadActivityLogs, computeBaseline, checkAnomalies, computeIQR, hybridAnomalyScore, computeDynamicThresholds, main, }; } if (require.main === module) { main(); }