#!/usr/bin/env node /** * A/B 回测脚本: 用历史纠正数据验证消歧规则效果 * * 读取 route-feedback.jsonl 的纠正记录, * 对每个 query 分别用 "原始 BM25" 和 "BM25 + 消歧" 评分, * 对比两者的路由准确率变化。 */ const fs = require('fs'); const path = require('path'); const detectRoot = () => require('./paths.config.js').PATHS.root; const ROOT = detectRoot(); // 加载 route-analyzer 的各导出函数 const analyzer = require(path.join(ROOT, 'scripts', 'route-analyzer.js')); const { tokenize, scoreSkill, buildBM25Params, normalizeScores, applyDisambiguation } = analyzer; // 加载 skills-index.json const indexFile = path.join(ROOT, 'skills-index.json'); const index = JSON.parse(fs.readFileSync(indexFile, 'utf8')); // 构建 BM25 全局参数 const bm25Params = buildBM25Params(index); // 加载反馈数据 const feedbackFile = path.join(ROOT, 'debug', 'route-feedback.jsonl'); const lines = fs.readFileSync(feedbackFile, 'utf8').trim().split('\n'); const entries = lines.map(l => { try { return JSON.parse(l); } catch { return null; } }).filter(Boolean); // 分离纠正和确认 const corrections = entries.filter(e => e.routedTo !== e.correctedTo); const confirms = entries.filter(e => e.routedTo === e.correctedTo); /** * 对 query 运行评分管道,返回排序后的结果 * @param {string} query * @param {boolean} withDisambig - 是否启用消歧规则 */ function scoreQuery(query, withDisambig = true) { const queryTokens = tokenize(query); // BM25 评分所有技能 const results = index.skills.map(skill => { const { totalScore, matchedKeywords } = scoreSkill(skill, queryTokens, bm25Params); return { name: skill.name, score: Math.round(totalScore * 100) / 100, matchedKeywords, }; }).sort((a, b) => b.score - a.score); // 可选: 消歧 const final = withDisambig ? applyDisambiguation(results, query, index) : results; return normalizeScores(final); } console.log('=== A/B 回测: 消歧规则效果验证 ===\n'); console.log(`反馈总数: ${entries.length} (确认 ${confirms.length}, 纠正 ${corrections.length})`); console.log(`原始准确率: ${((confirms.length / entries.length) * 100).toFixed(1)}%\n`); // 对每个纠正 case 分别测试 "无消歧" 和 "有消歧" let fixedByDisambig = 0; let stillWrong = 0; let confirmStillCorrect = 0; let confirmRegressed = 0; let alreadyCorrectWithout = 0; console.log('--- 纠正 case 回测 ---'); for (const c of corrections) { const withoutD = scoreQuery(c.query, false); const withD = scoreQuery(c.query, true); const oldTop = withoutD[0]?.name || '(none)'; const newTop = withD[0]?.name || '(none)'; const expected = c.correctedTo; if (oldTop === expected) { alreadyCorrectWithout++; console.log(` [ALREADY] "${c.query}" → ${oldTop} (BM25 已修复,可能因关键词降级)`); } else if (newTop === expected) { fixedByDisambig++; console.log(` [FIXED] "${c.query}"`); console.log(` BM25: ${oldTop} → 消歧: ${newTop} (正确)`); } else { stillWrong++; console.log(` [MISS] "${c.query}"`); console.log(` BM25: ${oldTop}, 消歧: ${newTop}, 期望: ${expected}`); } } // 对确认 case 验证无回归 console.log('\n--- 确认 case 回归检查 ---'); for (const c of confirms) { const withD = scoreQuery(c.query, true); const newTop = withD[0]?.name || '(none)'; if (newTop === c.correctedTo) { confirmStillCorrect++; } else { confirmRegressed++; console.log(` [REGRESS] "${c.query}"`); console.log(` 原: ${c.correctedTo} → 新: ${newTop}`); } } // 汇总 const newCorrectCount = confirms.length - confirmRegressed + fixedByDisambig; const newAccuracy = (newCorrectCount / entries.length * 100).toFixed(1); const oldAccuracy = (confirms.length / entries.length * 100).toFixed(1); console.log('\n=== 汇总 ==='); console.log(`纠正修复: ${fixedByDisambig}/${corrections.length}`); console.log(`仍然错误: ${stillWrong}/${corrections.length}`); console.log(`确认保持: ${confirmStillCorrect}/${confirms.length}`); console.log(`确认回归: ${confirmRegressed}/${confirms.length}`); console.log(`\n准确率: ${oldAccuracy}% → ${newAccuracy}% (${newCorrectCount}/${entries.length})`); console.log(`提升: +${(newAccuracy - oldAccuracy).toFixed(1)}%`); if (confirmRegressed > 0) { console.log(`\n[WARN] 发现 ${confirmRegressed} 个回归,需要检查消歧规则`); } // JSON 输出 const report = { ts: new Date().toISOString(), totalEntries: entries.length, originalAccuracy: parseFloat(oldAccuracy), newAccuracy: parseFloat(newAccuracy), improvement: parseFloat((newAccuracy - oldAccuracy).toFixed(1)), corrections: { total: corrections.length, fixed: fixedByDisambig, missed: stillWrong }, confirms: { total: confirms.length, kept: confirmStillCorrect, regressed: confirmRegressed }, }; // 保存报告 const reportFile = path.join(ROOT, 'debug', 'ab-backtest-report.json'); fs.writeFileSync(reportFile, JSON.stringify(report, null, 2) + '\n'); console.log(`\n报告已保存: ${reportFile}`);