bookworm-smart-assistant/scripts/quality-analyzer.js

459 lines
15 KiB
JavaScript
Raw Normal View History

#!/usr/bin/env node
/**
* 质量评分引擎 (v5.3+)
* 分析 Skill / Agent / MCP 的生产质量输出评分报告
*
* 数据源:
* - activity-*.jsonl (工具调用事件)
* - compliance-*.jsonl (合规审计)
* - route-feedback.jsonl (路由反馈)
* - session-memory.json (会话技能追踪)
*
* 质量维度:
* Q1 使用频率 被调用次数 (热度)
* Q2 成功率 success=true 占比
* Q3 路由命中 被路由推荐 vs 被纠正的比率
* Q4 用户留存 同会话内是否快速切换走 (满意度代理)
* Q5 合规率 合规校验通过率
*
* 用法:
* node quality-analyzer.js # 输出质量报告到 stdout
* node quality-analyzer.js --json # JSON 格式
* node quality-analyzer.js --save # 保存到 debug/quality-report.json
*/
const fs = require('fs');
const path = require('path');
// 路径配置
const detectClaudeRoot = () => require('./paths.config.js').PATHS.root;
const CLAUDE_ROOT = detectClaudeRoot();
const DEBUG_DIR = path.join(CLAUDE_ROOT, 'debug');
// === 数据加载工具 ===
function parseJsonl(filePath) {
try {
if (!fs.existsSync(filePath)) return [];
return fs.readFileSync(filePath, 'utf8').trim().split('\n')
.filter(Boolean)
.map(line => { try { return JSON.parse(line); } catch { return null; } })
.filter(Boolean);
} catch { return []; }
}
function loadAllJsonl(prefix, days = 30) {
const results = [];
const now = Date.now();
try {
const files = fs.readdirSync(DEBUG_DIR)
.filter(f => f.startsWith(prefix) && f.endsWith('.jsonl'))
.sort();
for (const file of files) {
const dateMatch = file.match(/(\d{4}-\d{2}-\d{2})/);
if (dateMatch) {
const fileDate = new Date(dateMatch[1]).getTime();
if (now - fileDate > days * 24 * 60 * 60 * 1000) continue;
}
results.push(...parseJsonl(path.join(DEBUG_DIR, file)));
}
} catch {}
return results;
}
function loadSessionMemory() {
try {
const file = path.join(DEBUG_DIR, 'session-memory.json');
if (!fs.existsSync(file)) return null;
return JSON.parse(fs.readFileSync(file, 'utf8'));
} catch { return null; }
}
// === 质量评分计算 ===
/**
* Q1: 使用频率评分 (归一化到 0-100)
*/
function scoreUsageFrequency(count, maxCount) {
if (maxCount === 0) return 0;
return Math.round((count / maxCount) * 100);
}
/**
* Q2: 成功率评分
*/
function scoreSuccessRate(successes, total) {
if (total === 0) return null; // 无数据
return Math.round((successes / total) * 100);
}
/**
* Q3: 路由命中率 (被推荐且未被纠正)
*/
function scoreRouteHitRate(recommended, correctedAway) {
const total = recommended + correctedAway;
if (total === 0) return null;
return Math.round((recommended / total) * 100);
}
/**
* Q4: 用户留存率 (同会话内未被快速切换走)
* 基于 session-memory pairs 数据
*/
function scoreRetention(skillName, sessionData) {
if (!sessionData || !sessionData.sessions) return null;
let usedCount = 0;
let switchedAway = 0;
for (const session of Object.values(sessionData.sessions)) {
const count = (session.skillCounts || {})[skillName] || 0;
if (count === 0) continue;
usedCount += count;
// 统计从此技能切换走的次数
for (const [pair, pairCount] of Object.entries(session.pairs || {})) {
if (pair.startsWith(skillName + '→')) {
switchedAway += pairCount;
}
}
}
if (usedCount === 0) return null;
// 留存率 = 1 - (切换走次数 / 使用次数)
const retention = Math.max(0, 1 - (switchedAway / usedCount));
return Math.round(retention * 100);
}
/**
* Q5: 合规率
*/
function scoreComplianceRate(passed, blocked) {
const total = passed + blocked;
if (total === 0) return null;
return Math.round((passed / total) * 100);
}
/**
* 综合质量分 (加权平均)
*/
function computeOverallScore(scores) {
const weights = { usage: 0.15, success: 0.30, routeHit: 0.20, retention: 0.25, compliance: 0.10 };
let totalWeight = 0;
let weightedSum = 0;
for (const [dim, weight] of Object.entries(weights)) {
const val = scores[dim];
if (val !== null && val !== undefined) {
weightedSum += val * weight;
totalWeight += weight;
}
}
return totalWeight > 0 ? Math.round(weightedSum / totalWeight) : null;
}
// === 分析器 ===
function analyzeSkills(activityLogs, complianceLogs, feedbackLogs, sessionData) {
const skills = {};
// 从 activity logs 统计 skill 事件
for (const entry of activityLogs) {
if (entry.event !== 'skill') continue;
const name = entry.detail || 'unknown';
if (!skills[name]) skills[name] = { calls: 0, successes: 0, failures: 0 };
skills[name].calls++;
if (entry.success === true) skills[name].successes++;
else if (entry.success === false) skills[name].failures++;
}
// 从 compliance logs 补充技能数据
for (const entry of complianceLogs) {
if (!entry.skill && !entry.actualSkill) continue;
const name = entry.skill || entry.actualSkill;
if (!skills[name]) skills[name] = { calls: 0, successes: 0, failures: 0 };
if (!skills[name].gatePassed) skills[name].gatePassed = 0;
if (!skills[name].gateBlocked) skills[name].gateBlocked = 0;
if (entry.event === 'gate-pass' || entry.compliant === true) {
skills[name].gatePassed++;
skills[name].calls = Math.max(skills[name].calls, skills[name].gatePassed);
}
if (entry.event === 'gate-block' || entry.compliant === false) {
skills[name].gateBlocked++;
}
}
// 从 session-memory 补充使用数据
if (sessionData && sessionData.sessions) {
for (const session of Object.values(sessionData.sessions)) {
for (const [name, count] of Object.entries(session.skillCounts || {})) {
if (!skills[name]) skills[name] = { calls: 0, successes: 0, failures: 0 };
skills[name].calls = Math.max(skills[name].calls, count);
}
}
}
// 从 route-feedback 统计路由命中
const routeStats = {};
for (const entry of feedbackLogs) {
const routed = entry.routedTo || entry.routed;
const corrected = entry.correctedTo;
if (routed) {
if (!routeStats[routed]) routeStats[routed] = { recommended: 0, correctedAway: 0 };
if (entry.type === 'confirm' || !corrected || corrected === routed) {
routeStats[routed].recommended++;
} else {
routeStats[routed].correctedAway++;
}
}
}
// 计算评分
const maxCalls = Math.max(1, ...Object.values(skills).map(s => s.calls));
const results = {};
for (const [name, data] of Object.entries(skills)) {
const rs = routeStats[name] || { recommended: 0, correctedAway: 0 };
const scores = {
usage: scoreUsageFrequency(data.calls, maxCalls),
success: scoreSuccessRate(data.successes, data.successes + data.failures),
routeHit: scoreRouteHitRate(rs.recommended, rs.correctedAway),
retention: scoreRetention(name, sessionData),
compliance: scoreComplianceRate(data.gatePassed || 0, data.gateBlocked || 0),
};
results[name] = {
calls: data.calls,
successes: data.successes,
failures: data.failures,
scores,
overall: computeOverallScore(scores),
};
}
return results;
}
function analyzeAgents(activityLogs) {
const agents = {};
for (const entry of activityLogs) {
if (entry.event !== 'agent') continue;
// TaskCreate → 提取 agent 类型; TaskUpdate → 状态变更
const tool = entry.tool;
const detail = entry.detail || '';
if (tool === 'TaskCreate') {
if (!agents[detail]) agents[detail] = { created: 0, completed: 0, failed: 0 };
agents[detail].created++;
} else if (tool === 'TaskUpdate') {
const parts = detail.split(':');
const status = parts[1] || '';
const taskId = parts[0] || 'unknown';
if (!agents[taskId]) agents[taskId] = { created: 0, completed: 0, failed: 0 };
if (status === 'completed') agents[taskId].completed++;
}
}
return agents;
}
function analyzeMcp(activityLogs) {
const mcps = {};
for (const entry of activityLogs) {
if (entry.event !== 'mcp') continue;
const name = entry.detail || entry.tool || 'unknown';
if (!mcps[name]) mcps[name] = { calls: 0, successes: 0, failures: 0 };
mcps[name].calls++;
if (entry.success === true) mcps[name].successes++;
else if (entry.success === false) mcps[name].failures++;
}
// 计算评分
const maxCalls = Math.max(1, ...Object.values(mcps).map(m => m.calls));
for (const [name, data] of Object.entries(mcps)) {
data.scores = {
usage: scoreUsageFrequency(data.calls, maxCalls),
success: scoreSuccessRate(data.successes, data.successes + data.failures),
};
data.overall = data.scores.success !== null ? data.scores.success : data.scores.usage;
}
return mcps;
}
// === 排名与建议 ===
function generateRecommendations(skillScores) {
const recommendations = [];
const sorted = Object.entries(skillScores)
.filter(([, v]) => v.overall !== null)
.sort((a, b) => (a[1].overall || 0) - (b[1].overall || 0));
for (const [name, data] of sorted) {
if (data.overall !== null && data.overall < 50) {
const issues = [];
if (data.scores.success !== null && data.scores.success < 70)
issues.push('成功率低');
if (data.scores.routeHit !== null && data.scores.routeHit < 60)
issues.push('路由命中率低');
if (data.scores.retention !== null && data.scores.retention < 50)
issues.push('用户快速切换走');
if (data.scores.usage < 10)
issues.push('几乎未使用');
recommendations.push({
skill: name,
overall: data.overall,
action: data.overall < 30 ? '建议淘汰' : '需要优化',
issues,
});
}
}
return recommendations;
}
// === 主函数 ===
function analyze(options = {}) {
const days = options.days || 30;
// 加载数据
const activityLogs = loadAllJsonl('activity-', days);
const complianceLogs = loadAllJsonl('compliance-', days);
const feedbackLogs = parseJsonl(path.join(DEBUG_DIR, 'route-feedback.jsonl'));
const sessionData = loadSessionMemory();
// 分析各类组件
const skillScores = analyzeSkills(activityLogs, complianceLogs, feedbackLogs, sessionData);
const agentStats = analyzeAgents(activityLogs);
const mcpScores = analyzeMcp(activityLogs);
// 生成建议
const recommendations = generateRecommendations(skillScores);
// 统计摘要
const skillEntries = Object.entries(skillScores);
const activeSkills = skillEntries.filter(([, v]) => v.calls > 0).length;
const avgScore = skillEntries.length > 0
? Math.round(skillEntries.reduce((s, [, v]) => s + (v.overall || 0), 0) / skillEntries.length)
: 0;
return {
timestamp: new Date().toISOString(),
period: `${days}d`,
summary: {
totalSkillsTracked: skillEntries.length,
activeSkills,
avgQualityScore: avgScore,
lowQualityCount: recommendations.length,
totalEvents: activityLogs.length,
},
skills: skillScores,
agents: agentStats,
mcp: mcpScores,
recommendations,
topSkills: skillEntries
.filter(([, v]) => v.overall !== null)
.sort((a, b) => (b[1].overall || 0) - (a[1].overall || 0))
.slice(0, 10)
.map(([name, data]) => ({ name, overall: data.overall, calls: data.calls })),
bottomSkills: skillEntries
.filter(([, v]) => v.overall !== null && v.overall < 50)
.sort((a, b) => (a[1].overall || 0) - (b[1].overall || 0))
.slice(0, 5)
.map(([name, data]) => ({ name, overall: data.overall, calls: data.calls })),
};
}
// === CLI ===
function main() {
const args = process.argv.slice(2);
const isJson = args.includes('--json');
const isSave = args.includes('--save');
const days = parseInt(args.find(a => a.startsWith('--days='))?.split('=')[1] || '30');
const report = analyze({ days });
if (isSave) {
const outFile = path.join(DEBUG_DIR, 'quality-report.json');
fs.writeFileSync(outFile, JSON.stringify(report, null, 2));
console.log('质量报告已保存:', outFile);
}
if (isJson || isSave) {
console.log(JSON.stringify(report, null, 2));
return;
}
// 可读格式输出
console.log('═══════════════════════════════════════════');
console.log(' Bookworm 组件质量评分报告');
console.log(` 时间范围: ${report.period} | 总事件: ${report.summary.totalEvents}`);
console.log('═══════════════════════════════════════════\n');
console.log(`活跃技能: ${report.summary.activeSkills} | 平均质量分: ${report.summary.avgQualityScore}/100\n`);
if (report.topSkills.length > 0) {
console.log('── Top Skills ──');
for (const s of report.topSkills) {
const bar = '█'.repeat(Math.round(s.overall / 5)) + '░'.repeat(20 - Math.round(s.overall / 5));
console.log(` ${s.name.padEnd(25)} ${bar} ${s.overall}/100 (${s.calls} calls)`);
}
console.log();
}
if (report.bottomSkills.length > 0) {
console.log('── 需关注 ──');
for (const s of report.bottomSkills) {
console.log(`${s.name.padEnd(25)} ${s.overall}/100 (${s.calls} calls)`);
}
console.log();
}
if (report.recommendations.length > 0) {
console.log('── 建议 ──');
for (const r of report.recommendations) {
console.log(` ${r.action === '建议淘汰' ? '✗' : '!'} ${r.skill}: ${r.action}${r.issues.join(', ')}`);
}
console.log();
}
const mcpEntries = Object.entries(report.mcp);
if (mcpEntries.length > 0) {
console.log('── MCP 质量 ──');
for (const [name, data] of mcpEntries.sort((a, b) => b[1].calls - a[1].calls)) {
const successStr = data.scores.success !== null ? `${data.scores.success}%` : 'N/A';
console.log(` ${name.padEnd(35)} ${data.calls} calls | 成功率: ${successStr}`);
}
}
console.log('\n═══════════════════════════════════════════');
}
// 模块导出
if (typeof module !== 'undefined') {
module.exports = {
analyze,
analyzeSkills,
analyzeAgents,
analyzeMcp,
scoreUsageFrequency,
scoreSuccessRate,
scoreRouteHitRate,
scoreRetention,
scoreComplianceRate,
computeOverallScore,
generateRecommendations,
};
}
if (require.main === module) {
main();
}