#!/usr/bin/env node 'use strict'; /** * v6.6.1 路由精度回归测试 — 5 测试用例 * 直接调用 route-engine + intent-classifier + disambiguation 验证 */ const path = require('path'); const fs = require('fs'); const ROOT = path.join(__dirname, '..', '..'); // 加载核心模块 const routeEngine = require(path.join(ROOT, 'scripts', 'route-engine.js')); const intentClassifier = require(path.join(ROOT, 'scripts', 'intent-classifier.js')); const cwd = process.cwd(); let passed = 0, failed = 0; function test(name, prompt, expectPrimary, opts = {}) { const intent = intentClassifier.classify ? intentClassifier.classify(prompt) : { intents: [], entities: [], modifiers: [], complexity: 'medium' }; const result = routeEngine.runRouteEngine(prompt, cwd, intent); const primary = result.primary; const confidence = result.confidence; const candidates = (result.candidates || []).slice(0, 5); const coldStart = result._coldStartApplied || false; const firedRules = (result._firedRules || []).map(r => r.id || r.rule || '').filter(Boolean); // 检查是否命中期望 skill (主路由或 top-3 候选) const top3Names = candidates.slice(0, 3).map(c => c.name); const isPrimaryHit = primary === expectPrimary; const isTop3Hit = top3Names.includes(expectPrimary); const hit = isPrimaryHit || (opts.allowTop3 && isTop3Hit); const status = hit ? 'PASS' : 'FAIL'; if (hit) passed++; else failed++; console.log(`\n[${status}] ${name}`); console.log(` prompt: "${prompt}"`); console.log(` expect: ${expectPrimary}`); console.log(` got: ${primary} (cf: ${confidence})`); console.log(` top-3: ${top3Names.join(', ')}`); console.log(` rules: ${firedRules.length > 0 ? firedRules.join(', ') : '(none)'}`); console.log(` coldStart: ${coldStart}`); if (opts.checkCap && coldStart) { const capApplied = confidence <= 0.65; console.log(` cap@0.65: ${capApplied ? 'YES' : 'NO (BUG!)'}`); } if (!hit) { console.log(` ** MISMATCH: expected ${expectPrimary}, got ${primary}`); if (isTop3Hit) console.log(` ** (但在 top-3 候选中)`); } } console.log('=== Bookworm v6.6.1 Route Regression Test ===\n'); // TC1: R90 sre-expert test('TC1: SLI 监控告警 → sre-expert (R90)', 'SLI 监控告警配置', 'sre-expert'); // TC2: R91 impact-analyst test('TC2: 函数影响分析 → impact-analyst (R91)', '改这个函数会影响哪些模块', 'impact-analyst'); // TC3: R92 data-analyst-expert test('TC3: Google Sheets 数据分析 → data-analyst-expert (R92)', '从 Google Sheets 分析销售数据', 'data-analyst-expert'); // TC4: 确认词 "执行" — 路由引擎层面应该是低置信度/none (继承在 bundle 层处理) // 这里验证路由引擎不会错误地高置信度命中无关 skill test('TC4: 确认词 "执行" (路由引擎层)', '执行', 'none', { allowTop3: true }); // 路由引擎对单字返回 none 或低置信度是正确行为 // TC5: 图片查询 — 路由引擎层面应该返回 none (继承在 bundle 层处理) test('TC5: 图片查询 (路由引擎层)', '[Image #1] 看看这个报错', 'debugger-expert', { allowTop3: true }); // 图片+附带文字可能有语义命中 // === 补充: 冷启动 cap 验证 === // 运行一个会触发冷启动的查询,检查 cap 是否生效 console.log('\n--- 补充: 冷启动 cap 机制验证 ---'); const csResult = routeEngine.runRouteEngine('帮我检查一下系统健康状态', cwd, { intents: ['general'], entities: [], modifiers: [], complexity: 'medium' }); const csApplied = csResult._coldStartApplied || false; const csConf = csResult.confidence; if (csApplied && csResult.candidates && csResult.candidates.length >= 2) { const gap = (csResult.candidates[0]?.confidence || 0) - (csResult.candidates[1]?.confidence || 0); console.log(` coldStart: true, gap: ${gap.toFixed(3)}, confidence: ${csConf}`); if (gap < 0.15 && csConf <= 0.65) { console.log(' [PASS] cap 在 route-engine 层生效'); passed++; } else if (gap >= 0.15) { console.log(' [SKIP] gap >= 0.15, cap 不需要触发'); } else { console.log(' [FAIL] cap 应为 0.65 但实际为 ' + csConf); failed++; } } else { console.log(` coldStart: ${csApplied}, confidence: ${csConf} — cap 验证跳过`); } // === TC4/TC5 继承逻辑验证 (模拟 bundle 层) === console.log('\n--- 补充: 继承逻辑模拟验证 ---'); // 模拟 route-state-current.json 中有有效上一轮路由 const mockPrevState = { ts: new Date().toISOString(), routing: { primary: 'debugger-expert', candidates: [{ name: 'debugger-expert', confidence: 0.85 }], confidence: 0.85, chain: [], lastValidPrimary: 'debugger-expert', }, lastValidPrimary: 'debugger-expert', }; // TC4-inherit: 确认词继承 const confirmWords = ['执行', '开始', '继续', '确认', '好的', '行', '可以', 'go', 'yes', 'proceed', 'ok']; const tc4prompt = '执行'; const isConfirm = confirmWords.some(w => tc4prompt.includes(w)); if (isConfirm) { console.log(` [PASS] TC4-inherit: "${tc4prompt}" 匹配确认词列表, bundle 层会触发 tryInherit()`); console.log(` → 继承结果: ${mockPrevState.routing.primary} (cf: ${(mockPrevState.routing.confidence * 0.7).toFixed(2)})`); passed++; } else { console.log(` [FAIL] TC4-inherit: "${tc4prompt}" 未匹配确认词`); failed++; } // TC5-inherit: 图片继承 via lastValidPrimary const tc5prompt = '[Image #1] 看看这个报错'; const isImage = /\[Image\s*#?\d+\]/.test(tc5prompt); if (isImage) { const lvp = mockPrevState.lastValidPrimary || (mockPrevState.routing && mockPrevState.routing.lastValidPrimary); if (lvp && lvp !== 'none') { console.log(` [PASS] TC5-inherit: 图片检测 + lastValidPrimary="${lvp}" → 继承成功`); passed++; } else { console.log(` [FAIL] TC5-inherit: 图片检测成功但 lastValidPrimary 为空`); failed++; } } else { console.log(` [FAIL] TC5-inherit: 未检测到图片模式`); failed++; } // === 总结 === console.log(`\n${'='.repeat(50)}`); console.log(`TOTAL: ${passed + failed} tests, ${passed} PASS, ${failed} FAIL`); console.log(`VERDICT: ${failed === 0 ? 'ALL PASS ✓' : `${failed} FAILURES ✗`}`); process.exit(failed > 0 ? 1 : 0);