bookworm-smart-assistant/scripts/patches/test-route-regression-0427.js

#!/usr/bin/env node
'use strict';
/**
 * v6.6.1 路由精度回归测试 — 5 测试用例
 * 直接调用 route-engine + intent-classifier + disambiguation 验证
 */
const path = require('path');
const fs = require('fs');
const ROOT = path.join(__dirname, '..', '..');

// 加载核心模块
const routeEngine = require(path.join(ROOT, 'scripts', 'route-engine.js'));
const intentClassifier = require(path.join(ROOT, 'scripts', 'intent-classifier.js'));

const cwd = process.cwd();
let passed = 0, failed = 0;

function test(name, prompt, expectPrimary, opts = {}) {
  const intent = intentClassifier.classify ? intentClassifier.classify(prompt) : { intents: [], entities: [], modifiers: [], complexity: 'medium' };
  const result = routeEngine.runRouteEngine(prompt, cwd, intent);

  const primary = result.primary;
  const confidence = result.confidence;
  const candidates = (result.candidates || []).slice(0, 5);
  const coldStart = result._coldStartApplied || false;
  const firedRules = (result._firedRules || []).map(r => r.id || r.rule || '').filter(Boolean);

  // 检查是否命中期望 skill (主路由或 top-3 候选)
  const top3Names = candidates.slice(0, 3).map(c => c.name);
  const isPrimaryHit = primary === expectPrimary;
  const isTop3Hit = top3Names.includes(expectPrimary);
  const hit = isPrimaryHit || (opts.allowTop3 && isTop3Hit);

  const status = hit ? 'PASS' : 'FAIL';
  if (hit) passed++; else failed++;

  console.log(`\n[${status}] ${name}`);
  console.log(`  prompt:     "${prompt}"`);
  console.log(`  expect:     ${expectPrimary}`);
  console.log(`  got:        ${primary} (cf: ${confidence})`);
  console.log(`  top-3:      ${top3Names.join(', ')}`);
  console.log(`  rules:      ${firedRules.length > 0 ? firedRules.join(', ') : '(none)'}`);
  console.log(`  coldStart:  ${coldStart}`);
  if (opts.checkCap && coldStart) {
    const capApplied = confidence <= 0.65;
    console.log(`  cap@0.65:   ${capApplied ? 'YES' : 'NO (BUG!)'}`);
  }
  if (!hit) {
    console.log(`  ** MISMATCH: expected ${expectPrimary}, got ${primary}`);
    if (isTop3Hit) console.log(`  ** (但在 top-3 候选中)`);
  }
}

console.log('=== Bookworm v6.6.1 Route Regression Test ===\n');

// TC1: R90 sre-expert
test('TC1: SLI 监控告警 → sre-expert (R90)',
  'SLI 监控告警配置',
  'sre-expert');

// TC2: R91 impact-analyst
test('TC2: 函数影响分析 → impact-analyst (R91)',
  '改这个函数会影响哪些模块',
  'impact-analyst');

// TC3: R92 data-analyst-expert
test('TC3: Google Sheets 数据分析 → data-analyst-expert (R92)',
  '从 Google Sheets 分析销售数据',
  'data-analyst-expert');

// TC4: 确认词 "执行" — 路由引擎层面应该是低置信度/none (继承在 bundle 层处理)
// 这里验证路由引擎不会错误地高置信度命中无关 skill
test('TC4: 确认词 "执行" (路由引擎层)',
  '执行',
  'none',
  { allowTop3: true }); // 路由引擎对单字返回 none 或低置信度是正确行为

// TC5: 图片查询 — 路由引擎层面应该返回 none (继承在 bundle 层处理)
test('TC5: 图片查询 (路由引擎层)',
  '[Image #1] 看看这个报错',
  'debugger-expert',
  { allowTop3: true }); // 图片+附带文字可能有语义命中

// === 补充: 冷启动 cap 验证 ===
// 运行一个会触发冷启动的查询，检查 cap 是否生效
console.log('\n--- 补充: 冷启动 cap 机制验证 ---');
const csResult = routeEngine.runRouteEngine('帮我检查一下系统健康状态', cwd,
  { intents: ['general'], entities: [], modifiers: [], complexity: 'medium' });
const csApplied = csResult._coldStartApplied || false;
const csConf = csResult.confidence;
if (csApplied && csResult.candidates && csResult.candidates.length >= 2) {
  const gap = (csResult.candidates[0]?.confidence || 0) - (csResult.candidates[1]?.confidence || 0);
  console.log(`  coldStart: true, gap: ${gap.toFixed(3)}, confidence: ${csConf}`);
  if (gap < 0.15 && csConf <= 0.65) {
    console.log('  [PASS] cap 在 route-engine 层生效');
    passed++;
  } else if (gap >= 0.15) {
    console.log('  [SKIP] gap >= 0.15, cap 不需要触发');
  } else {
    console.log('  [FAIL] cap 应为 0.65 但实际为 ' + csConf);
    failed++;
  }
} else {
  console.log(`  coldStart: ${csApplied}, confidence: ${csConf} — cap 验证跳过`);
}

// === TC4/TC5 继承逻辑验证 (模拟 bundle 层) ===
console.log('\n--- 补充: 继承逻辑模拟验证 ---');

// 模拟 route-state-current.json 中有有效上一轮路由
const mockPrevState = {
  ts: new Date().toISOString(),
  routing: {
    primary: 'debugger-expert',
    candidates: [{ name: 'debugger-expert', confidence: 0.85 }],
    confidence: 0.85,
    chain: [],
    lastValidPrimary: 'debugger-expert',
  },
  lastValidPrimary: 'debugger-expert',
};

// TC4-inherit: 确认词继承
const confirmWords = ['执行', '开始', '继续', '确认', '好的', '行', '可以', 'go', 'yes', 'proceed', 'ok'];
const tc4prompt = '执行';
const isConfirm = confirmWords.some(w => tc4prompt.includes(w));
if (isConfirm) {
  console.log(`  [PASS] TC4-inherit: "${tc4prompt}" 匹配确认词列表, bundle 层会触发 tryInherit()`);
  console.log(`    → 继承结果: ${mockPrevState.routing.primary} (cf: ${(mockPrevState.routing.confidence * 0.7).toFixed(2)})`);
  passed++;
} else {
  console.log(`  [FAIL] TC4-inherit: "${tc4prompt}" 未匹配确认词`);
  failed++;
}

// TC5-inherit: 图片继承 via lastValidPrimary
const tc5prompt = '[Image #1] 看看这个报错';
const isImage = /\[Image\s*#?\d+\]/.test(tc5prompt);
if (isImage) {
  const lvp = mockPrevState.lastValidPrimary || (mockPrevState.routing && mockPrevState.routing.lastValidPrimary);
  if (lvp && lvp !== 'none') {
    console.log(`  [PASS] TC5-inherit: 图片检测 + lastValidPrimary="${lvp}" → 继承成功`);
    passed++;
  } else {
    console.log(`  [FAIL] TC5-inherit: 图片检测成功但 lastValidPrimary 为空`);
    failed++;
  }
} else {
  console.log(`  [FAIL] TC5-inherit: 未检测到图片模式`);
  failed++;
}

// === 总结 ===
console.log(`\n${'='.repeat(50)}`);
console.log(`TOTAL: ${passed + failed} tests, ${passed} PASS, ${failed} FAIL`);
console.log(`VERDICT: ${failed === 0 ? 'ALL PASS ✓' : `${failed} FAILURES ✗`}`);
process.exit(failed > 0 ? 1 : 0);
fix: strip session-continuity-mcp hooks from Portable template export.mjs now removes hooks referencing npm packages not included in the Portable distribution (session-continuity-mcp). Eliminates MODULE_NOT_FOUND errors on Portable installations. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com> 2026-04-27 22:15:39 +08:00			`#!/usr/bin/env node`
			`'use strict';`
			`/**`
			`* v6.6.1 路由精度回归测试 — 5 测试用例`
			`* 直接调用 route-engine + intent-classifier + disambiguation 验证`
			`*/`
			`const path = require('path');`
			`const fs = require('fs');`
			`const ROOT = path.join(__dirname, '..', '..');`

			`// 加载核心模块`
			`const routeEngine = require(path.join(ROOT, 'scripts', 'route-engine.js'));`
			`const intentClassifier = require(path.join(ROOT, 'scripts', 'intent-classifier.js'));`

			`const cwd = process.cwd();`
			`let passed = 0, failed = 0;`

			`function test(name, prompt, expectPrimary, opts = {}) {`
			`const intent = intentClassifier.classify ? intentClassifier.classify(prompt) : { intents: [], entities: [], modifiers: [], complexity: 'medium' };`
			`const result = routeEngine.runRouteEngine(prompt, cwd, intent);`

			`const primary = result.primary;`
			`const confidence = result.confidence;`
			`const candidates = (result.candidates \|\| []).slice(0, 5);`
			`const coldStart = result._coldStartApplied \|\| false;`
			`const firedRules = (result._firedRules \|\| []).map(r => r.id \|\| r.rule \|\| '').filter(Boolean);`

			`// 检查是否命中期望 skill (主路由或 top-3 候选)`
			`const top3Names = candidates.slice(0, 3).map(c => c.name);`
			`const isPrimaryHit = primary === expectPrimary;`
			`const isTop3Hit = top3Names.includes(expectPrimary);`
			`const hit = isPrimaryHit \|\| (opts.allowTop3 && isTop3Hit);`

			`const status = hit ? 'PASS' : 'FAIL';`
			`if (hit) passed++; else failed++;`

			console.log(`\n[${status}] ${name}`);
			console.log(` prompt: "${prompt}"`);
			console.log(` expect: ${expectPrimary}`);
			console.log(` got: ${primary} (cf: ${confidence})`);
			console.log(` top-3: ${top3Names.join(', ')}`);
			console.log(` rules: ${firedRules.length > 0 ? firedRules.join(', ') : '(none)'}`);
			console.log(` coldStart: ${coldStart}`);
			`if (opts.checkCap && coldStart) {`
			`const capApplied = confidence <= 0.65;`
			console.log(` cap@0.65: ${capApplied ? 'YES' : 'NO (BUG!)'}`);
			`}`
			`if (!hit) {`
			console.log(` ** MISMATCH: expected ${expectPrimary}, got ${primary}`);
			if (isTop3Hit) console.log(` ** (但在 top-3 候选中)`);
			`}`
			`}`

			`console.log('=== Bookworm v6.6.1 Route Regression Test ===\n');`

			`// TC1: R90 sre-expert`
			`test('TC1: SLI 监控告警 → sre-expert (R90)',`
			`'SLI 监控告警配置',`
			`'sre-expert');`

			`// TC2: R91 impact-analyst`
			`test('TC2: 函数影响分析 → impact-analyst (R91)',`
			`'改这个函数会影响哪些模块',`
			`'impact-analyst');`

			`// TC3: R92 data-analyst-expert`
			`test('TC3: Google Sheets 数据分析 → data-analyst-expert (R92)',`
			`'从 Google Sheets 分析销售数据',`
			`'data-analyst-expert');`

			`// TC4: 确认词 "执行" — 路由引擎层面应该是低置信度/none (继承在 bundle 层处理)`
			`// 这里验证路由引擎不会错误地高置信度命中无关 skill`
			`test('TC4: 确认词 "执行" (路由引擎层)',`
			`'执行',`
			`'none',`
			`{ allowTop3: true }); // 路由引擎对单字返回 none 或低置信度是正确行为`

			`// TC5: 图片查询 — 路由引擎层面应该返回 none (继承在 bundle 层处理)`
			`test('TC5: 图片查询 (路由引擎层)',`
			`'[Image #1] 看看这个报错',`
			`'debugger-expert',`
			`{ allowTop3: true }); // 图片+附带文字可能有语义命中`

			`// === 补充: 冷启动 cap 验证 ===`
			`// 运行一个会触发冷启动的查询，检查 cap 是否生效`
			`console.log('\n--- 补充: 冷启动 cap 机制验证 ---');`
			`const csResult = routeEngine.runRouteEngine('帮我检查一下系统健康状态', cwd,`
			`{ intents: ['general'], entities: [], modifiers: [], complexity: 'medium' });`
			`const csApplied = csResult._coldStartApplied \|\| false;`
			`const csConf = csResult.confidence;`
			`if (csApplied && csResult.candidates && csResult.candidates.length >= 2) {`
			`const gap = (csResult.candidates[0]?.confidence \|\| 0) - (csResult.candidates[1]?.confidence \|\| 0);`
			console.log(` coldStart: true, gap: ${gap.toFixed(3)}, confidence: ${csConf}`);
			`if (gap < 0.15 && csConf <= 0.65) {`
			`console.log(' [PASS] cap 在 route-engine 层生效');`
			`passed++;`
			`} else if (gap >= 0.15) {`
			`console.log(' [SKIP] gap >= 0.15, cap 不需要触发');`
			`} else {`
			`console.log(' [FAIL] cap 应为 0.65 但实际为 ' + csConf);`
			`failed++;`
			`}`
			`} else {`
			console.log(` coldStart: ${csApplied}, confidence: ${csConf} — cap 验证跳过`);
			`}`

			`// === TC4/TC5 继承逻辑验证 (模拟 bundle 层) ===`
			`console.log('\n--- 补充: 继承逻辑模拟验证 ---');`

			`// 模拟 route-state-current.json 中有有效上一轮路由`
			`const mockPrevState = {`
			`ts: new Date().toISOString(),`
			`routing: {`
			`primary: 'debugger-expert',`
			`candidates: [{ name: 'debugger-expert', confidence: 0.85 }],`
			`confidence: 0.85,`
			`chain: [],`
			`lastValidPrimary: 'debugger-expert',`
			`},`
			`lastValidPrimary: 'debugger-expert',`
			`};`

			`// TC4-inherit: 确认词继承`
			`const confirmWords = ['执行', '开始', '继续', '确认', '好的', '行', '可以', 'go', 'yes', 'proceed', 'ok'];`
			`const tc4prompt = '执行';`
			`const isConfirm = confirmWords.some(w => tc4prompt.includes(w));`
			`if (isConfirm) {`
			console.log(` [PASS] TC4-inherit: "${tc4prompt}" 匹配确认词列表, bundle 层会触发 tryInherit()`);
			console.log(` → 继承结果: ${mockPrevState.routing.primary} (cf: ${(mockPrevState.routing.confidence * 0.7).toFixed(2)})`);
			`passed++;`
			`} else {`
			console.log(` [FAIL] TC4-inherit: "${tc4prompt}" 未匹配确认词`);
			`failed++;`
			`}`

			`// TC5-inherit: 图片继承 via lastValidPrimary`
			`const tc5prompt = '[Image #1] 看看这个报错';`
			`const isImage = /\[Image\s*#?\d+\]/.test(tc5prompt);`
			`if (isImage) {`
			`const lvp = mockPrevState.lastValidPrimary \|\| (mockPrevState.routing && mockPrevState.routing.lastValidPrimary);`
			`if (lvp && lvp !== 'none') {`
			console.log(` [PASS] TC5-inherit: 图片检测 + lastValidPrimary="${lvp}" → 继承成功`);
			`passed++;`
			`} else {`
			console.log(` [FAIL] TC5-inherit: 图片检测成功但 lastValidPrimary 为空`);
			`failed++;`
			`}`
			`} else {`
			console.log(` [FAIL] TC5-inherit: 未检测到图片模式`);
			`failed++;`
			`}`

			`// === 总结 ===`
			console.log(`\n${'='.repeat(50)}`);
			console.log(`TOTAL: ${passed + failed} tests, ${passed} PASS, ${failed} FAIL`);
			console.log(`VERDICT: ${failed === 0 ? 'ALL PASS ✓' : `${failed} FAILURES ✗`}`);
			`process.exit(failed > 0 ? 1 : 0);`