bookworm-smart-assistant/scripts/behavior-baseline.js

435 lines
13 KiB
JavaScript
Raw Permalink Normal View History

#!/usr/bin/env node
/**
* 行为基线异常检测引擎
*
* activity-*.jsonl 学习正常行为统计, 生成 baseline.json
* --check 模式: 对当日事件做异常检测 (3-sigma 规则)
*
* 用法:
* node scripts/behavior-baseline.js # 生成/更新基线
* node scripts/behavior-baseline.js --check # 检查当日异常
* node scripts/behavior-baseline.js --json # JSON 输出
* node scripts/behavior-baseline.js --days 30 # 使用最近 N 天数据
*/
const fs = require('fs');
const path = require('path');
const detectClaudeRoot = () => require('./paths.config.js').PATHS.root;
const CLAUDE_ROOT = detectClaudeRoot();
const DEBUG_DIR = path.join(CLAUDE_ROOT, 'debug');
const BASELINE_FILE = path.join(CLAUDE_ROOT, 'debug', 'baseline.json');
const CHECK_MODE = process.argv.includes('--check');
const JSON_MODE = process.argv.includes('--json');
const daysArg = process.argv.indexOf('--days');
const DAYS = daysArg >= 0 ? parseInt(process.argv[daysArg + 1]) || 30 : 30;
// === 日志加载 ===
function loadActivityLogs(maxDays) {
const files = [];
try {
const entries = fs.readdirSync(DEBUG_DIR);
for (const f of entries) {
if (f.startsWith('activity-') && f.endsWith('.jsonl')) {
files.push(path.join(DEBUG_DIR, f));
}
}
} catch { return []; }
files.sort();
// 过滤日期范围
const cutoff = new Date();
cutoff.setDate(cutoff.getDate() - maxDays);
const cutoffStr = cutoff.toISOString().slice(0, 10);
const events = [];
for (const file of files) {
const dateMatch = path.basename(file).match(/activity-(\d{4}-\d{2}-\d{2})/);
if (dateMatch && dateMatch[1] < cutoffStr) continue;
try {
const lines = fs.readFileSync(file, 'utf8').trim().split('\n');
for (const line of lines) {
try {
events.push(JSON.parse(line));
} catch {}
}
} catch {}
}
return events;
}
// === 基线统计计算 ===
function computeBaseline(events) {
// 按日分组
const dailyStats = {};
for (const evt of events) {
const date = (evt.ts || '').slice(0, 10);
if (!date) continue;
if (!dailyStats[date]) {
dailyStats[date] = {
totalEvents: 0,
byEvent: {},
byTool: {},
bashCmdLengths: [],
uniqueSkills: new Set(),
hourDistribution: new Array(24).fill(0),
};
}
const day = dailyStats[date];
day.totalEvents++;
// 按事件类型
const evtType = evt.event || 'unknown';
day.byEvent[evtType] = (day.byEvent[evtType] || 0) + 1;
// 按工具
const tool = evt.tool || 'unknown';
day.byTool[tool] = (day.byTool[tool] || 0) + 1;
// Bash 命令长度
if (evtType === 'bash' && evt.detail) {
day.bashCmdLengths.push(evt.detail.length);
}
// 技能追踪
if (evtType === 'skill' && evt.detail) {
day.uniqueSkills.add(evt.detail);
}
// 小时分布
const hour = parseInt((evt.ts || '').slice(11, 13));
if (!isNaN(hour)) {
day.hourDistribution[hour]++;
}
}
// 聚合统计 (均值 + 标准差)
const dates = Object.keys(dailyStats).sort();
if (dates.length === 0) return null;
const metrics = {
totalEventsPerDay: [],
bashEventsPerDay: [],
writeEventsPerDay: [],
skillEventsPerDay: [],
avgBashCmdLength: [],
uniqueSkillsPerDay: [],
};
for (const date of dates) {
const day = dailyStats[date];
metrics.totalEventsPerDay.push(day.totalEvents);
metrics.bashEventsPerDay.push(day.byEvent.bash || 0);
metrics.writeEventsPerDay.push(day.byEvent.write || 0);
metrics.skillEventsPerDay.push(day.byEvent.skill || 0);
metrics.uniqueSkillsPerDay.push(day.uniqueSkills.size);
if (day.bashCmdLengths.length > 0) {
const avg = day.bashCmdLengths.reduce((a, b) => a + b, 0) / day.bashCmdLengths.length;
metrics.avgBashCmdLength.push(Math.round(avg));
}
}
// 计算均值和标准差
function stats(arr) {
if (arr.length === 0) return { mean: 0, stddev: 0, min: 0, max: 0, n: 0 };
const n = arr.length;
const mean = arr.reduce((a, b) => a + b, 0) / n;
const variance = arr.reduce((sum, v) => sum + (v - mean) ** 2, 0) / n;
const stddev = Math.sqrt(variance);
return {
mean: Math.round(mean * 100) / 100,
stddev: Math.round(stddev * 100) / 100,
min: Math.min(...arr),
max: Math.max(...arr),
n,
};
}
const baseline = {
generated: new Date().toISOString(),
dataRange: { from: dates[0], to: dates[dates.length - 1], days: dates.length },
metrics: {
totalEventsPerDay: stats(metrics.totalEventsPerDay),
bashEventsPerDay: stats(metrics.bashEventsPerDay),
writeEventsPerDay: stats(metrics.writeEventsPerDay),
skillEventsPerDay: stats(metrics.skillEventsPerDay),
avgBashCmdLength: stats(metrics.avgBashCmdLength),
uniqueSkillsPerDay: stats(metrics.uniqueSkillsPerDay),
},
// 3-sigma 上下界
thresholds: {},
};
// 计算 3-sigma 阈值
for (const [key, s] of Object.entries(baseline.metrics)) {
baseline.thresholds[key] = {
upper: Math.round((s.mean + 3 * s.stddev) * 100) / 100,
lower: Math.max(0, Math.round((s.mean - 3 * s.stddev) * 100) / 100),
};
}
// v5.1: IQR 统计
baseline.iqr = {};
for (const [key, values] of Object.entries(metrics)) {
const iqrResult = computeIQR(values);
if (iqrResult) baseline.iqr[key] = iqrResult;
}
// v5.1: 动态阈值 (EWMA)
baseline.dynamicThresholds = {};
for (const [key, values] of Object.entries(metrics)) {
const dt = computeDynamicThresholds(values);
if (dt) baseline.dynamicThresholds[key] = dt;
}
return baseline;
}
// === IQR 计算 (v5.1) ===
function computeIQR(arr) {
if (arr.length < 4) return null;
const sorted = [...arr].sort((a, b) => a - b);
const n = sorted.length;
const q1Idx = Math.floor(n * 0.25);
const q3Idx = Math.floor(n * 0.75);
const Q1 = sorted[q1Idx];
const Q3 = sorted[q3Idx];
const IQR = Q3 - Q1;
return {
Q1,
Q3,
IQR,
lowerFence: Q1 - 1.5 * IQR,
upperFence: Q3 + 1.5 * IQR,
};
}
// === 混合异常评分 (v5.1) ===
/**
* Z-score + IQR 双重验证
* 双异常 = CRITICAL, 单一 = WARNING, 无异常 = OK
* @param {number} value - 当前值
* @param {Object} stats - { mean, stddev }
* @param {Object} iqr - computeIQR 的结果
* @returns {{ severity: string, zAnomaly: boolean, iqrAnomaly: boolean }}
*/
function hybridAnomalyScore(value, stats, iqr) {
// Z-score 检测 (3-sigma)
const zScore = stats.stddev > 0 ? Math.abs(value - stats.mean) / stats.stddev : 0;
const zAnomaly = zScore > 3;
// IQR 检测
let iqrAnomaly = false;
if (iqr) {
iqrAnomaly = value < iqr.lowerFence || value > iqr.upperFence;
}
let severity = 'OK';
if (zAnomaly && iqrAnomaly) {
severity = 'CRITICAL';
} else if (zAnomaly || iqrAnomaly) {
severity = 'WARNING';
}
return { severity, zAnomaly, iqrAnomaly, zScore: Math.round(zScore * 100) / 100 };
}
// === 动态阈值 EWMA (v5.1) ===
/**
* 指数加权移动平均计算动态阈值
* @param {number[]} dailyValues - 按时间序列排列的日值
* @param {number} alpha - 平滑系数 (0~1, 默认 0.3)
* @returns {{ ewma: number, ewmaStd: number, upper: number, lower: number }}
*/
function computeDynamicThresholds(dailyValues, alpha = 0.3) {
if (dailyValues.length === 0) return null;
let ewma = dailyValues[0];
let ewmaVar = 0;
for (let i = 1; i < dailyValues.length; i++) {
const diff = dailyValues[i] - ewma;
ewma = alpha * dailyValues[i] + (1 - alpha) * ewma;
ewmaVar = alpha * diff * diff + (1 - alpha) * ewmaVar;
}
const ewmaStd = Math.sqrt(ewmaVar);
return {
ewma: Math.round(ewma * 100) / 100,
ewmaStd: Math.round(ewmaStd * 100) / 100,
upper: Math.round((ewma + 3 * ewmaStd) * 100) / 100,
lower: Math.max(0, Math.round((ewma - 3 * ewmaStd) * 100) / 100),
};
}
// === 异常检查 ===
function checkAnomalies(baseline) {
const today = new Date().toISOString().slice(0, 10);
const todayFile = path.join(DEBUG_DIR, `activity-${today}.jsonl`);
if (!fs.existsSync(todayFile)) {
return { date: today, status: 'no_data', anomalies: [] };
}
const events = [];
try {
const lines = fs.readFileSync(todayFile, 'utf8').trim().split('\n');
for (const line of lines) {
try { events.push(JSON.parse(line)); } catch {}
}
} catch { return { date: today, status: 'read_error', anomalies: [] }; }
// 当日统计
const todayStats = {
totalEvents: events.length,
bashEvents: events.filter(e => e.event === 'bash').length,
writeEvents: events.filter(e => e.event === 'write').length,
skillEvents: events.filter(e => e.event === 'skill').length,
uniqueSkills: new Set(events.filter(e => e.event === 'skill').map(e => e.detail)).size,
};
// Bash 命令平均长度
const bashCmds = events.filter(e => e.event === 'bash' && e.detail);
todayStats.avgBashCmdLength = bashCmds.length > 0
? Math.round(bashCmds.reduce((s, e) => s + e.detail.length, 0) / bashCmds.length)
: 0;
const anomalies = [];
const checks = [
['totalEventsPerDay', todayStats.totalEvents],
['bashEventsPerDay', todayStats.bashEvents],
['writeEventsPerDay', todayStats.writeEvents],
['skillEventsPerDay', todayStats.skillEvents],
['avgBashCmdLength', todayStats.avgBashCmdLength],
['uniqueSkillsPerDay', todayStats.uniqueSkills],
];
for (const [metric, value] of checks) {
const threshold = baseline.thresholds[metric];
const stats = baseline.metrics[metric];
if (!threshold || !stats || stats.n < 2) continue;
if (value > threshold.upper) {
anomalies.push({
metric,
value,
expected: `${stats.mean} +/- ${stats.stddev}`,
threshold: threshold.upper,
severity: value > stats.mean + 5 * stats.stddev ? 'CRITICAL' : 'WARNING',
message: `${metric} = ${value} 超过上界 ${threshold.upper} (均值 ${stats.mean})`,
});
}
if (value < threshold.lower && threshold.lower > 0) {
anomalies.push({
metric,
value,
expected: `${stats.mean} +/- ${stats.stddev}`,
threshold: threshold.lower,
severity: 'INFO',
message: `${metric} = ${value} 低于下界 ${threshold.lower}`,
});
}
}
return { date: today, status: 'checked', todayStats, anomalies };
}
// === 主流程 ===
function main() {
const events = loadActivityLogs(DAYS);
if (events.length === 0) {
if (JSON_MODE) {
console.log(JSON.stringify({ error: 'no_data', message: '无活动日志可分析' }));
} else {
console.log('无活动日志可分析。请先产生一些操作记录。');
}
return;
}
const baseline = computeBaseline(events);
if (!baseline) {
console.log('数据不足以生成基线。');
return;
}
// 保存基线
fs.writeFileSync(BASELINE_FILE, JSON.stringify(baseline, null, 2) + '\n');
if (CHECK_MODE) {
const result = checkAnomalies(baseline);
if (JSON_MODE) {
console.log(JSON.stringify({ baseline: baseline.dataRange, check: result }, null, 2));
return;
}
console.log('=== 行为基线异常检测 ===');
console.log(`基线范围: ${baseline.dataRange.from} ~ ${baseline.dataRange.to} (${baseline.dataRange.days} 天)`);
console.log(`检查日期: ${result.date}`);
console.log('');
if (result.todayStats) {
console.log('当日统计:');
console.log(` 总事件: ${result.todayStats.totalEvents}`);
console.log(` Bash: ${result.todayStats.bashEvents} Write: ${result.todayStats.writeEvents} Skill: ${result.todayStats.skillEvents}`);
console.log(` Bash 命令平均长度: ${result.todayStats.avgBashCmdLength} 字符`);
console.log('');
}
if (result.anomalies.length === 0) {
console.log('检测结果: 正常 (无异常)');
} else {
console.log(`检测结果: ${result.anomalies.length} 个异常`);
for (const a of result.anomalies) {
console.log(` [${a.severity}] ${a.message}`);
}
}
return;
}
// 默认: 显示基线摘要
if (JSON_MODE) {
console.log(JSON.stringify(baseline, null, 2));
return;
}
console.log('=== 行为基线已生成 ===');
console.log(`数据范围: ${baseline.dataRange.from} ~ ${baseline.dataRange.to} (${baseline.dataRange.days} 天, ${events.length} 事件)`);
console.log('');
console.log('指标统计:');
for (const [key, s] of Object.entries(baseline.metrics)) {
const t = baseline.thresholds[key];
console.log(` ${key.padEnd(25)} mean=${String(s.mean).padStart(7)} std=${String(s.stddev).padStart(7)} range=[${t.lower}, ${t.upper}]`);
}
console.log('');
console.log(`基线文件: ${BASELINE_FILE}`);
console.log('运行 --check 检查当日异常。');
}
// 模块导出 (供测试使用)
if (typeof module !== 'undefined') {
module.exports = {
loadActivityLogs,
computeBaseline,
checkAnomalies,
computeIQR,
hybridAnomalyScore,
computeDynamicThresholds,
main,
};
}
if (require.main === module) {
main();
}