bookworm-smart-assistant/scripts/ab-backtest.js

142 lines
5.2 KiB
JavaScript

#!/usr/bin/env node
/**
* A/B 回测脚本: 用历史纠正数据验证消歧规则效果
*
* 读取 route-feedback.jsonl 的纠正记录,
* 对每个 query 分别用 "原始 BM25" 和 "BM25 + 消歧" 评分,
* 对比两者的路由准确率变化。
*/
const fs = require('fs');
const path = require('path');
const detectRoot = () => require('./paths.config.js').PATHS.root;
const ROOT = detectRoot();
// 加载 route-analyzer 的各导出函数
const analyzer = require(path.join(ROOT, 'scripts', 'route-analyzer.js'));
const { tokenize, scoreSkill, buildBM25Params, normalizeScores, applyDisambiguation } = analyzer;
// 加载 skills-index.json
const indexFile = path.join(ROOT, 'skills-index.json');
const index = JSON.parse(fs.readFileSync(indexFile, 'utf8'));
// 构建 BM25 全局参数
const bm25Params = buildBM25Params(index);
// 加载反馈数据
const feedbackFile = path.join(ROOT, 'debug', 'route-feedback.jsonl');
const lines = fs.readFileSync(feedbackFile, 'utf8').trim().split('\n');
const entries = lines.map(l => { try { return JSON.parse(l); } catch { return null; } }).filter(Boolean);
// 分离纠正和确认
const corrections = entries.filter(e => e.routedTo !== e.correctedTo);
const confirms = entries.filter(e => e.routedTo === e.correctedTo);
/**
* 对 query 运行评分管道,返回排序后的结果
* @param {string} query
* @param {boolean} withDisambig - 是否启用消歧规则
*/
function scoreQuery(query, withDisambig = true) {
const queryTokens = tokenize(query);
// BM25 评分所有技能
const results = index.skills.map(skill => {
const { totalScore, matchedKeywords } = scoreSkill(skill, queryTokens, bm25Params);
return {
name: skill.name,
score: Math.round(totalScore * 100) / 100,
matchedKeywords,
};
}).sort((a, b) => b.score - a.score);
// 可选: 消歧
const final = withDisambig ? applyDisambiguation(results, query, index) : results;
return normalizeScores(final);
}
console.log('=== A/B 回测: 消歧规则效果验证 ===\n');
console.log(`反馈总数: ${entries.length} (确认 ${confirms.length}, 纠正 ${corrections.length})`);
console.log(`原始准确率: ${((confirms.length / entries.length) * 100).toFixed(1)}%\n`);
// 对每个纠正 case 分别测试 "无消歧" 和 "有消歧"
let fixedByDisambig = 0;
let stillWrong = 0;
let confirmStillCorrect = 0;
let confirmRegressed = 0;
let alreadyCorrectWithout = 0;
console.log('--- 纠正 case 回测 ---');
for (const c of corrections) {
const withoutD = scoreQuery(c.query, false);
const withD = scoreQuery(c.query, true);
const oldTop = withoutD[0]?.name || '(none)';
const newTop = withD[0]?.name || '(none)';
const expected = c.correctedTo;
if (oldTop === expected) {
alreadyCorrectWithout++;
console.log(` [ALREADY] "${c.query}" → ${oldTop} (BM25 已修复,可能因关键词降级)`);
} else if (newTop === expected) {
fixedByDisambig++;
console.log(` [FIXED] "${c.query}"`);
console.log(` BM25: ${oldTop} → 消歧: ${newTop} (正确)`);
} else {
stillWrong++;
console.log(` [MISS] "${c.query}"`);
console.log(` BM25: ${oldTop}, 消歧: ${newTop}, 期望: ${expected}`);
}
}
// 对确认 case 验证无回归
console.log('\n--- 确认 case 回归检查 ---');
for (const c of confirms) {
const withD = scoreQuery(c.query, true);
const newTop = withD[0]?.name || '(none)';
if (newTop === c.correctedTo) {
confirmStillCorrect++;
} else {
confirmRegressed++;
console.log(` [REGRESS] "${c.query}"`);
console.log(` 原: ${c.correctedTo} → 新: ${newTop}`);
}
}
// 汇总
const newCorrectCount = confirms.length - confirmRegressed + fixedByDisambig;
const newAccuracy = (newCorrectCount / entries.length * 100).toFixed(1);
const oldAccuracy = (confirms.length / entries.length * 100).toFixed(1);
console.log('\n=== 汇总 ===');
console.log(`纠正修复: ${fixedByDisambig}/${corrections.length}`);
console.log(`仍然错误: ${stillWrong}/${corrections.length}`);
console.log(`确认保持: ${confirmStillCorrect}/${confirms.length}`);
console.log(`确认回归: ${confirmRegressed}/${confirms.length}`);
console.log(`\n准确率: ${oldAccuracy}% → ${newAccuracy}% (${newCorrectCount}/${entries.length})`);
console.log(`提升: +${(newAccuracy - oldAccuracy).toFixed(1)}%`);
if (confirmRegressed > 0) {
console.log(`\n[WARN] 发现 ${confirmRegressed} 个回归,需要检查消歧规则`);
}
// JSON 输出
const report = {
ts: new Date().toISOString(),
totalEntries: entries.length,
originalAccuracy: parseFloat(oldAccuracy),
newAccuracy: parseFloat(newAccuracy),
improvement: parseFloat((newAccuracy - oldAccuracy).toFixed(1)),
corrections: { total: corrections.length, fixed: fixedByDisambig, missed: stillWrong },
confirms: { total: confirms.length, kept: confirmStillCorrect, regressed: confirmRegressed },
};
// 保存报告
const reportFile = path.join(ROOT, 'debug', 'ab-backtest-report.json');
fs.writeFileSync(reportFile, JSON.stringify(report, null, 2) + '\n');
console.log(`\n报告已保存: ${reportFile}`);