435 lines
13 KiB
JavaScript
435 lines
13 KiB
JavaScript
#!/usr/bin/env node
|
|
/**
|
|
* 行为基线异常检测引擎
|
|
*
|
|
* 从 activity-*.jsonl 学习正常行为统计, 生成 baseline.json。
|
|
* --check 模式: 对当日事件做异常检测 (3-sigma 规则)。
|
|
*
|
|
* 用法:
|
|
* node scripts/behavior-baseline.js # 生成/更新基线
|
|
* node scripts/behavior-baseline.js --check # 检查当日异常
|
|
* node scripts/behavior-baseline.js --json # JSON 输出
|
|
* node scripts/behavior-baseline.js --days 30 # 使用最近 N 天数据
|
|
*/
|
|
|
|
const fs = require('fs');
|
|
const path = require('path');
|
|
|
|
const detectClaudeRoot = () => require('./paths.config.js').PATHS.root;
|
|
|
|
const CLAUDE_ROOT = detectClaudeRoot();
|
|
const DEBUG_DIR = path.join(CLAUDE_ROOT, 'debug');
|
|
const BASELINE_FILE = path.join(CLAUDE_ROOT, 'debug', 'baseline.json');
|
|
|
|
const CHECK_MODE = process.argv.includes('--check');
|
|
const JSON_MODE = process.argv.includes('--json');
|
|
const daysArg = process.argv.indexOf('--days');
|
|
const DAYS = daysArg >= 0 ? parseInt(process.argv[daysArg + 1]) || 30 : 30;
|
|
|
|
// === 日志加载 ===
|
|
function loadActivityLogs(maxDays) {
|
|
const files = [];
|
|
try {
|
|
const entries = fs.readdirSync(DEBUG_DIR);
|
|
for (const f of entries) {
|
|
if (f.startsWith('activity-') && f.endsWith('.jsonl')) {
|
|
files.push(path.join(DEBUG_DIR, f));
|
|
}
|
|
}
|
|
} catch { return []; }
|
|
|
|
files.sort();
|
|
|
|
// 过滤日期范围
|
|
const cutoff = new Date();
|
|
cutoff.setDate(cutoff.getDate() - maxDays);
|
|
const cutoffStr = cutoff.toISOString().slice(0, 10);
|
|
|
|
const events = [];
|
|
for (const file of files) {
|
|
const dateMatch = path.basename(file).match(/activity-(\d{4}-\d{2}-\d{2})/);
|
|
if (dateMatch && dateMatch[1] < cutoffStr) continue;
|
|
|
|
try {
|
|
const lines = fs.readFileSync(file, 'utf8').trim().split('\n');
|
|
for (const line of lines) {
|
|
try {
|
|
events.push(JSON.parse(line));
|
|
} catch {}
|
|
}
|
|
} catch {}
|
|
}
|
|
|
|
return events;
|
|
}
|
|
|
|
// === 基线统计计算 ===
|
|
function computeBaseline(events) {
|
|
// 按日分组
|
|
const dailyStats = {};
|
|
|
|
for (const evt of events) {
|
|
const date = (evt.ts || '').slice(0, 10);
|
|
if (!date) continue;
|
|
|
|
if (!dailyStats[date]) {
|
|
dailyStats[date] = {
|
|
totalEvents: 0,
|
|
byEvent: {},
|
|
byTool: {},
|
|
bashCmdLengths: [],
|
|
uniqueSkills: new Set(),
|
|
hourDistribution: new Array(24).fill(0),
|
|
};
|
|
}
|
|
|
|
const day = dailyStats[date];
|
|
day.totalEvents++;
|
|
|
|
// 按事件类型
|
|
const evtType = evt.event || 'unknown';
|
|
day.byEvent[evtType] = (day.byEvent[evtType] || 0) + 1;
|
|
|
|
// 按工具
|
|
const tool = evt.tool || 'unknown';
|
|
day.byTool[tool] = (day.byTool[tool] || 0) + 1;
|
|
|
|
// Bash 命令长度
|
|
if (evtType === 'bash' && evt.detail) {
|
|
day.bashCmdLengths.push(evt.detail.length);
|
|
}
|
|
|
|
// 技能追踪
|
|
if (evtType === 'skill' && evt.detail) {
|
|
day.uniqueSkills.add(evt.detail);
|
|
}
|
|
|
|
// 小时分布
|
|
const hour = parseInt((evt.ts || '').slice(11, 13));
|
|
if (!isNaN(hour)) {
|
|
day.hourDistribution[hour]++;
|
|
}
|
|
}
|
|
|
|
// 聚合统计 (均值 + 标准差)
|
|
const dates = Object.keys(dailyStats).sort();
|
|
if (dates.length === 0) return null;
|
|
|
|
const metrics = {
|
|
totalEventsPerDay: [],
|
|
bashEventsPerDay: [],
|
|
writeEventsPerDay: [],
|
|
skillEventsPerDay: [],
|
|
avgBashCmdLength: [],
|
|
uniqueSkillsPerDay: [],
|
|
};
|
|
|
|
for (const date of dates) {
|
|
const day = dailyStats[date];
|
|
metrics.totalEventsPerDay.push(day.totalEvents);
|
|
metrics.bashEventsPerDay.push(day.byEvent.bash || 0);
|
|
metrics.writeEventsPerDay.push(day.byEvent.write || 0);
|
|
metrics.skillEventsPerDay.push(day.byEvent.skill || 0);
|
|
metrics.uniqueSkillsPerDay.push(day.uniqueSkills.size);
|
|
|
|
if (day.bashCmdLengths.length > 0) {
|
|
const avg = day.bashCmdLengths.reduce((a, b) => a + b, 0) / day.bashCmdLengths.length;
|
|
metrics.avgBashCmdLength.push(Math.round(avg));
|
|
}
|
|
}
|
|
|
|
// 计算均值和标准差
|
|
function stats(arr) {
|
|
if (arr.length === 0) return { mean: 0, stddev: 0, min: 0, max: 0, n: 0 };
|
|
const n = arr.length;
|
|
const mean = arr.reduce((a, b) => a + b, 0) / n;
|
|
const variance = arr.reduce((sum, v) => sum + (v - mean) ** 2, 0) / n;
|
|
const stddev = Math.sqrt(variance);
|
|
return {
|
|
mean: Math.round(mean * 100) / 100,
|
|
stddev: Math.round(stddev * 100) / 100,
|
|
min: Math.min(...arr),
|
|
max: Math.max(...arr),
|
|
n,
|
|
};
|
|
}
|
|
|
|
const baseline = {
|
|
generated: new Date().toISOString(),
|
|
dataRange: { from: dates[0], to: dates[dates.length - 1], days: dates.length },
|
|
metrics: {
|
|
totalEventsPerDay: stats(metrics.totalEventsPerDay),
|
|
bashEventsPerDay: stats(metrics.bashEventsPerDay),
|
|
writeEventsPerDay: stats(metrics.writeEventsPerDay),
|
|
skillEventsPerDay: stats(metrics.skillEventsPerDay),
|
|
avgBashCmdLength: stats(metrics.avgBashCmdLength),
|
|
uniqueSkillsPerDay: stats(metrics.uniqueSkillsPerDay),
|
|
},
|
|
// 3-sigma 上下界
|
|
thresholds: {},
|
|
};
|
|
|
|
// 计算 3-sigma 阈值
|
|
for (const [key, s] of Object.entries(baseline.metrics)) {
|
|
baseline.thresholds[key] = {
|
|
upper: Math.round((s.mean + 3 * s.stddev) * 100) / 100,
|
|
lower: Math.max(0, Math.round((s.mean - 3 * s.stddev) * 100) / 100),
|
|
};
|
|
}
|
|
|
|
// v5.1: IQR 统计
|
|
baseline.iqr = {};
|
|
for (const [key, values] of Object.entries(metrics)) {
|
|
const iqrResult = computeIQR(values);
|
|
if (iqrResult) baseline.iqr[key] = iqrResult;
|
|
}
|
|
|
|
// v5.1: 动态阈值 (EWMA)
|
|
baseline.dynamicThresholds = {};
|
|
for (const [key, values] of Object.entries(metrics)) {
|
|
const dt = computeDynamicThresholds(values);
|
|
if (dt) baseline.dynamicThresholds[key] = dt;
|
|
}
|
|
|
|
return baseline;
|
|
}
|
|
|
|
// === IQR 计算 (v5.1) ===
|
|
function computeIQR(arr) {
|
|
if (arr.length < 4) return null;
|
|
const sorted = [...arr].sort((a, b) => a - b);
|
|
const n = sorted.length;
|
|
const q1Idx = Math.floor(n * 0.25);
|
|
const q3Idx = Math.floor(n * 0.75);
|
|
const Q1 = sorted[q1Idx];
|
|
const Q3 = sorted[q3Idx];
|
|
const IQR = Q3 - Q1;
|
|
return {
|
|
Q1,
|
|
Q3,
|
|
IQR,
|
|
lowerFence: Q1 - 1.5 * IQR,
|
|
upperFence: Q3 + 1.5 * IQR,
|
|
};
|
|
}
|
|
|
|
// === 混合异常评分 (v5.1) ===
|
|
/**
|
|
* Z-score + IQR 双重验证
|
|
* 双异常 = CRITICAL, 单一 = WARNING, 无异常 = OK
|
|
* @param {number} value - 当前值
|
|
* @param {Object} stats - { mean, stddev }
|
|
* @param {Object} iqr - computeIQR 的结果
|
|
* @returns {{ severity: string, zAnomaly: boolean, iqrAnomaly: boolean }}
|
|
*/
|
|
function hybridAnomalyScore(value, stats, iqr) {
|
|
// Z-score 检测 (3-sigma)
|
|
const zScore = stats.stddev > 0 ? Math.abs(value - stats.mean) / stats.stddev : 0;
|
|
const zAnomaly = zScore > 3;
|
|
|
|
// IQR 检测
|
|
let iqrAnomaly = false;
|
|
if (iqr) {
|
|
iqrAnomaly = value < iqr.lowerFence || value > iqr.upperFence;
|
|
}
|
|
|
|
let severity = 'OK';
|
|
if (zAnomaly && iqrAnomaly) {
|
|
severity = 'CRITICAL';
|
|
} else if (zAnomaly || iqrAnomaly) {
|
|
severity = 'WARNING';
|
|
}
|
|
|
|
return { severity, zAnomaly, iqrAnomaly, zScore: Math.round(zScore * 100) / 100 };
|
|
}
|
|
|
|
// === 动态阈值 EWMA (v5.1) ===
|
|
/**
|
|
* 指数加权移动平均计算动态阈值
|
|
* @param {number[]} dailyValues - 按时间序列排列的日值
|
|
* @param {number} alpha - 平滑系数 (0~1, 默认 0.3)
|
|
* @returns {{ ewma: number, ewmaStd: number, upper: number, lower: number }}
|
|
*/
|
|
function computeDynamicThresholds(dailyValues, alpha = 0.3) {
|
|
if (dailyValues.length === 0) return null;
|
|
|
|
let ewma = dailyValues[0];
|
|
let ewmaVar = 0;
|
|
|
|
for (let i = 1; i < dailyValues.length; i++) {
|
|
const diff = dailyValues[i] - ewma;
|
|
ewma = alpha * dailyValues[i] + (1 - alpha) * ewma;
|
|
ewmaVar = alpha * diff * diff + (1 - alpha) * ewmaVar;
|
|
}
|
|
|
|
const ewmaStd = Math.sqrt(ewmaVar);
|
|
return {
|
|
ewma: Math.round(ewma * 100) / 100,
|
|
ewmaStd: Math.round(ewmaStd * 100) / 100,
|
|
upper: Math.round((ewma + 3 * ewmaStd) * 100) / 100,
|
|
lower: Math.max(0, Math.round((ewma - 3 * ewmaStd) * 100) / 100),
|
|
};
|
|
}
|
|
|
|
// === 异常检查 ===
|
|
function checkAnomalies(baseline) {
|
|
const today = new Date().toISOString().slice(0, 10);
|
|
const todayFile = path.join(DEBUG_DIR, `activity-${today}.jsonl`);
|
|
|
|
if (!fs.existsSync(todayFile)) {
|
|
return { date: today, status: 'no_data', anomalies: [] };
|
|
}
|
|
|
|
const events = [];
|
|
try {
|
|
const lines = fs.readFileSync(todayFile, 'utf8').trim().split('\n');
|
|
for (const line of lines) {
|
|
try { events.push(JSON.parse(line)); } catch {}
|
|
}
|
|
} catch { return { date: today, status: 'read_error', anomalies: [] }; }
|
|
|
|
// 当日统计
|
|
const todayStats = {
|
|
totalEvents: events.length,
|
|
bashEvents: events.filter(e => e.event === 'bash').length,
|
|
writeEvents: events.filter(e => e.event === 'write').length,
|
|
skillEvents: events.filter(e => e.event === 'skill').length,
|
|
uniqueSkills: new Set(events.filter(e => e.event === 'skill').map(e => e.detail)).size,
|
|
};
|
|
|
|
// Bash 命令平均长度
|
|
const bashCmds = events.filter(e => e.event === 'bash' && e.detail);
|
|
todayStats.avgBashCmdLength = bashCmds.length > 0
|
|
? Math.round(bashCmds.reduce((s, e) => s + e.detail.length, 0) / bashCmds.length)
|
|
: 0;
|
|
|
|
const anomalies = [];
|
|
const checks = [
|
|
['totalEventsPerDay', todayStats.totalEvents],
|
|
['bashEventsPerDay', todayStats.bashEvents],
|
|
['writeEventsPerDay', todayStats.writeEvents],
|
|
['skillEventsPerDay', todayStats.skillEvents],
|
|
['avgBashCmdLength', todayStats.avgBashCmdLength],
|
|
['uniqueSkillsPerDay', todayStats.uniqueSkills],
|
|
];
|
|
|
|
for (const [metric, value] of checks) {
|
|
const threshold = baseline.thresholds[metric];
|
|
const stats = baseline.metrics[metric];
|
|
if (!threshold || !stats || stats.n < 2) continue;
|
|
|
|
if (value > threshold.upper) {
|
|
anomalies.push({
|
|
metric,
|
|
value,
|
|
expected: `${stats.mean} +/- ${stats.stddev}`,
|
|
threshold: threshold.upper,
|
|
severity: value > stats.mean + 5 * stats.stddev ? 'CRITICAL' : 'WARNING',
|
|
message: `${metric} = ${value} 超过上界 ${threshold.upper} (均值 ${stats.mean})`,
|
|
});
|
|
}
|
|
if (value < threshold.lower && threshold.lower > 0) {
|
|
anomalies.push({
|
|
metric,
|
|
value,
|
|
expected: `${stats.mean} +/- ${stats.stddev}`,
|
|
threshold: threshold.lower,
|
|
severity: 'INFO',
|
|
message: `${metric} = ${value} 低于下界 ${threshold.lower}`,
|
|
});
|
|
}
|
|
}
|
|
|
|
return { date: today, status: 'checked', todayStats, anomalies };
|
|
}
|
|
|
|
// === 主流程 ===
|
|
function main() {
|
|
const events = loadActivityLogs(DAYS);
|
|
|
|
if (events.length === 0) {
|
|
if (JSON_MODE) {
|
|
console.log(JSON.stringify({ error: 'no_data', message: '无活动日志可分析' }));
|
|
} else {
|
|
console.log('无活动日志可分析。请先产生一些操作记录。');
|
|
}
|
|
return;
|
|
}
|
|
|
|
const baseline = computeBaseline(events);
|
|
if (!baseline) {
|
|
console.log('数据不足以生成基线。');
|
|
return;
|
|
}
|
|
|
|
// 保存基线
|
|
fs.writeFileSync(BASELINE_FILE, JSON.stringify(baseline, null, 2) + '\n');
|
|
|
|
if (CHECK_MODE) {
|
|
const result = checkAnomalies(baseline);
|
|
|
|
if (JSON_MODE) {
|
|
console.log(JSON.stringify({ baseline: baseline.dataRange, check: result }, null, 2));
|
|
return;
|
|
}
|
|
|
|
console.log('=== 行为基线异常检测 ===');
|
|
console.log(`基线范围: ${baseline.dataRange.from} ~ ${baseline.dataRange.to} (${baseline.dataRange.days} 天)`);
|
|
console.log(`检查日期: ${result.date}`);
|
|
console.log('');
|
|
|
|
if (result.todayStats) {
|
|
console.log('当日统计:');
|
|
console.log(` 总事件: ${result.todayStats.totalEvents}`);
|
|
console.log(` Bash: ${result.todayStats.bashEvents} Write: ${result.todayStats.writeEvents} Skill: ${result.todayStats.skillEvents}`);
|
|
console.log(` Bash 命令平均长度: ${result.todayStats.avgBashCmdLength} 字符`);
|
|
console.log('');
|
|
}
|
|
|
|
if (result.anomalies.length === 0) {
|
|
console.log('检测结果: 正常 (无异常)');
|
|
} else {
|
|
console.log(`检测结果: ${result.anomalies.length} 个异常`);
|
|
for (const a of result.anomalies) {
|
|
console.log(` [${a.severity}] ${a.message}`);
|
|
}
|
|
}
|
|
return;
|
|
}
|
|
|
|
// 默认: 显示基线摘要
|
|
if (JSON_MODE) {
|
|
console.log(JSON.stringify(baseline, null, 2));
|
|
return;
|
|
}
|
|
|
|
console.log('=== 行为基线已生成 ===');
|
|
console.log(`数据范围: ${baseline.dataRange.from} ~ ${baseline.dataRange.to} (${baseline.dataRange.days} 天, ${events.length} 事件)`);
|
|
console.log('');
|
|
console.log('指标统计:');
|
|
for (const [key, s] of Object.entries(baseline.metrics)) {
|
|
const t = baseline.thresholds[key];
|
|
console.log(` ${key.padEnd(25)} mean=${String(s.mean).padStart(7)} std=${String(s.stddev).padStart(7)} range=[${t.lower}, ${t.upper}]`);
|
|
}
|
|
console.log('');
|
|
console.log(`基线文件: ${BASELINE_FILE}`);
|
|
console.log('运行 --check 检查当日异常。');
|
|
}
|
|
|
|
// 模块导出 (供测试使用)
|
|
if (typeof module !== 'undefined') {
|
|
module.exports = {
|
|
loadActivityLogs,
|
|
computeBaseline,
|
|
checkAnomalies,
|
|
computeIQR,
|
|
hybridAnomalyScore,
|
|
computeDynamicThresholds,
|
|
main,
|
|
};
|
|
}
|
|
|
|
if (require.main === module) {
|
|
main();
|
|
}
|