bookworm-smart-assistant/scripts/route-analyzer.js

1152 lines
42 KiB
JavaScript
Raw Permalink Normal View History

#!/usr/bin/env node
/**
* 路由置信度引擎
*
* 基于 skills-index.json 对输入文本做语义匹配
* 返回 top-K 技能及置信度分数
*
* 用法:
* node scripts/route-analyzer.js "React 组件性能优化"
* node scripts/route-analyzer.js --top 10 "部署到 k8s"
* node scripts/route-analyzer.js --json "写一个 REST API"
* node scripts/route-analyzer.js --log "调试内存泄漏"
*
* 置信度阈值:
* >= 0.8 高置信度 - 直接路由
* 0.5-0.8 中置信度 - 推荐候选
* < 0.5 低置信度 - fallback developer-expert
*/
const fs = require('fs');
const path = require('path');
const detectClaudeRoot = () => require('./paths.config.js').PATHS.root;
const CLAUDE_ROOT = detectClaudeRoot();
const INDEX_FILE = path.join(CLAUDE_ROOT, 'skills-index.json');
const DEBUG_DIR = path.join(CLAUDE_ROOT, 'debug');
const WEIGHTS_FILE = path.join(DEBUG_DIR, 'route-weights.json');
// === 参数解析 (延迟到 main 内使用,仅在直接执行时校验) ===
let args, jsonMode, logMode, topK, query;
function parseArgs() {
args = process.argv.slice(2);
jsonMode = args.includes('--json');
logMode = args.includes('--log');
const topIdx = args.indexOf('--top');
topK = (topIdx >= 0 && parseInt(args[topIdx + 1])) || 5;
query = args.filter(a => !a.startsWith('--') && !(topIdx >= 0 && args[topIdx + 1] === a)).join(' ');
}
// === 加载索引 ===
function loadIndex() {
if (!fs.existsSync(INDEX_FILE)) {
console.error('skills-index.json not found. Run: node scripts/generate-skill-index.js');
process.exit(1);
}
return JSON.parse(fs.readFileSync(INDEX_FILE, 'utf8'));
}
// === 文本标准化 ===
// P0-FIX: 从 skills-index 构建已知中文关键词集合
let _knownCnKwCache = null;
function _getKnownCnKeywords() {
if (_knownCnKwCache) return _knownCnKwCache;
_knownCnKwCache = new Set();
try {
const idxFile = path.join(CLAUDE_ROOT, 'skills-index-lite.json');
const idx = JSON.parse(fs.readFileSync(idxFile, 'utf8'));
for (const skill of (idx.skills || [])) {
for (const kw of (skill.keywords || [])) {
const k = (kw.keyword || kw).toLowerCase();
if (/[\u4e00-\u9fff]/.test(k)) _knownCnKwCache.add(k);
}
}
} catch {}
return _knownCnKwCache;
}
// P1-FIX: 否定检测 — 否定词后 1-3 token 降权
const NEGATION_WORDS = new Set([
'不用', '不要', '不是', '别用', '除了', '排除', '去掉', '不需要', '不使用',
'without', 'except', 'exclude', 'no', 'not', 'dont', "don\'t", 'remove'
]);
function tokenize(text) {
// P2: 截断超长输入,防止性能退化
text = (text || '').slice(0, 2000);
const tokens = new Set();
// 高频复合词优先匹配 (减少滑动窗口噪声)
const COMPOUND_WORDS = new Set([
"数据库", "服务器", "微服务", "框架设计", "项目管理",
"接口设计", "单元测试", "集成测试", "性能优化", "代码审查",
"架构设计", "版本控制", "持续集成", "持续部署", "负载均衡",
"消息队列", "缓存策略", "安全审计", "权限管理", "日志分析",
"容器化", "虚拟化", "自动化", "可视化", "模块化",
"数据分析", "机器学习", "深度学习", "自然语言", "搜索引擎",
"前端开发", "后端开发", "全栈开发", "移动开发", "跨平台",
"状态管理", "路由设计", "组件开发", "响应式", "渐进式",
"大语言模型", "向量数据库", "知识图谱", "推荐系统",
]);
const textLower = text.toLowerCase();
for (const w of COMPOUND_WORDS) {
if (textLower.includes(w)) tokens.add(w);
}
// 中文: 2-4 字符片段 (滑动窗口)
const cnChars = text.match(/[\u4e00-\u9fff]+/g) || [];
for (const chunk of cnChars) {
for (let len = 2; len <= Math.min(4, chunk.length); len++) {
for (let i = 0; i <= chunk.length - len; i++) {
tokens.add(chunk.slice(i, i + len).toLowerCase());
}
}
}
// 英文: 完整单词 + 连字符词组
const enWords = text.match(/[A-Za-z][\w.-]*(?:\s+[A-Za-z][\w.-]*){0,2}/g) || [];
for (const w of enWords) {
tokens.add(w.toLowerCase().trim());
// 也添加单个单词
for (const single of w.split(/[\s.-]+/)) {
if (single.length >= 2) tokens.add(single.toLowerCase());
}
}
// v4.9→v6.4: 加权同义词展开 + 否定检测
let expanded;
let synonymWeights = null; // 同义词权重映射 (原词 1.0, 主同义词 0.7, 次 0.4)
try {
const { expandSynonymsWeighted } = require('./synonym-expander.js');
const result = expandSynonymsWeighted(tokens);
expanded = result.expanded;
synonymWeights = result.weights;
} catch {
expanded = tokens;
}
const negatedTokens = new Set();
const _w = text.toLowerCase().split(/[\s,]+/);
for (let _i = 0; _i < _w.length; _i++) {
if (NEGATION_WORDS.has(_w[_i])) {
for (let _j = 1; _j <= 3 && _i + _j < _w.length; _j++) {
if (_w[_i + _j].length >= 2) negatedTokens.add(_w[_i + _j]);
}
}
}
Object.defineProperty(expanded, '_negatedTokens', {
value: negatedTokens, enumerable: false, configurable: true
});
// v6.4: 挂载同义词权重映射,供 scoreSkill 使用
Object.defineProperty(expanded, '_synonymWeights', {
value: synonymWeights, enumerable: false, configurable: true
});
return expanded;
}
// === BM25 参数构建 (v5.8 P1-A: 分段 IDF 归一化) ===
const IDF_FLOOR = Math.log(2); // ≈0.693,防止 IDF 趋零
// 加载 domain 映射 (技能→domain 的反向索引)
let _domainMap = null;
function loadDomainMap() {
if (_domainMap) return _domainMap;
try {
const mapFile = path.join(CLAUDE_ROOT, 'scripts', 'skill-domain-map.json');
if (fs.existsSync(mapFile)) {
const raw = JSON.parse(fs.readFileSync(mapFile, 'utf8'));
// 构建 skillName → domainName 反向索引
_domainMap = new Map();
for (const [domain, skills] of Object.entries(raw.domains || {})) {
for (const skill of skills) {
_domainMap.set(skill, domain);
}
}
return _domainMap;
}
} catch {}
_domainMap = new Map();
return _domainMap;
}
function buildBM25Params(index) {
const skills = index.skills || [];
const N = skills.length;
// 计算平均文档长度 (关键词数)
let totalDl = 0;
for (const skill of skills) {
totalDl += (skill.keywords || []).length;
}
const avgdl = N > 0 ? totalDl / N : 1;
// 加载 domain 映射
const domainMap = loadDomainMap();
// 按 domain 分组技能
const domainGroups = new Map(); // domain → [skill]
for (const skill of skills) {
const domain = domainMap.get(skill.name) || '_global';
if (!domainGroups.has(domain)) domainGroups.set(domain, []);
domainGroups.get(domain).push(skill);
}
// 全局 DF
const df = new Map(); // keyword → 全局出现计数
for (const skill of skills) {
const seen = new Set();
for (const { keyword } of (skill.keywords || [])) {
const kw = keyword.toLowerCase();
if (!seen.has(kw)) {
seen.add(kw);
df.set(kw, (df.get(kw) || 0) + 1);
}
}
}
// domain-local DF
const domainDf = new Map(); // domain → Map<keyword, localDf>
for (const [domain, groupSkills] of domainGroups) {
const localDf = new Map();
for (const skill of groupSkills) {
const seen = new Set();
for (const { keyword } of (skill.keywords || [])) {
const kw = keyword.toLowerCase();
if (!seen.has(kw)) {
seen.add(kw);
localDf.set(kw, (localDf.get(kw) || 0) + 1);
}
}
}
domainDf.set(domain, localDf);
}
// 全局 IDF (带 floor)
const idf = new Map();
for (const [kw, docFreq] of df) {
const rawIdf = Math.log((N - docFreq + 0.5) / (docFreq + 0.5) + 1);
idf.set(kw, Math.max(rawIdf, IDF_FLOOR));
}
// domain-local IDF (per-domain 分段计算)
// domainIdf: Map<domain, Map<keyword, localIdf>>
const domainIdf = new Map();
for (const [domain, localDfMap] of domainDf) {
const localN = domainGroups.get(domain).length;
const localIdfMap = new Map();
for (const [kw, localDocFreq] of localDfMap) {
const rawIdf = Math.log((localN - localDocFreq + 0.5) / (localDocFreq + 0.5) + 1);
localIdfMap.set(kw, Math.max(rawIdf, IDF_FLOOR));
}
domainIdf.set(domain, localIdfMap);
}
return { N, avgdl, idf, df, domainIdf, domainMap, domainGroups };
}
/**
* BM25 单项评分
* @param {number} tf - 词频 (匹配权重)
* @param {number} idf - 逆文档频率
* @param {number} dl - 文档长度 (技能关键词数)
* @param {number} avgdl - 平均文档长度
* @param {number} k1 - 词频饱和参数 (默认 1.2)
* @param {number} b - 长度归一化参数 (默认 0.75)
* @returns {number} BM25 分值
*/
function computeBM25Score(tf, idf, dl, avgdl, k1 = 1.5, b = 0.75) {
const numerator = tf * (k1 + 1);
const denominator = tf + k1 * (1 - b + b * dl / avgdl);
return idf * numerator / denominator;
}
// === 冷启动防护 (v5.8 P0-A) ===
const COLD_START_THRESHOLD = 30; // 路由次数低于此值视为冷启动
const COLD_START_MAX_BOOST = 0.08; // 冷启动最大 boost 值 (67 skills 场景,降低避免翻转)
const EPSILON_EXPLORE = 0.10; // epsilon-greedy 探索概率
/**
* 对冷启动技能施加线性衰减 boost
* boost = MAX_BOOST * (1 - routeCount / THRESHOLD)
* routeCount >= THRESHOLD boost = 0
*
* @param {Array} results - 排序后的评分结果 [{name, score, ...}]
* @param {Map<string, number>} routeStats - 技能路由次数统计
* @returns {{ results: Array, boostedSkills: string[] }}
*/
function applyColdStartBoost(results, routeStats) {
const boostedSkills = [];
// 保护基准: 消歧 top 优先,否则用 BM25 原始 top-1
const disambTop = results.find(r => r.disambiguated);
const originalTop = results[0]; // BM25 排序后的 top-1
for (const r of results) {
if (r.score <= 0) continue; // 只 boost 有基础分的技能
const count = routeStats.get(r.name) || 0;
if (count < COLD_START_THRESHOLD) {
const boost = COLD_START_MAX_BOOST * (1 - count / COLD_START_THRESHOLD);
const topScore = results[0]?.score || 1;
const newScore = r.score + boost * topScore;
// 保护 1: 冷启动 boost 不得超过消歧确认的 top 技能
if (disambTop && !r.disambiguated && newScore > disambTop.score) {
r.score = disambTop.score * 0.98;
r.coldStartCapped = true;
// 保护 2: 冷启动 boost 不得让非 top-1 技能跃居 top-1 之上
} else if (originalTop && r !== originalTop && newScore > originalTop.score) {
r.score = originalTop.score * 0.99;
r.coldStartCapped = true;
} else {
r.score = newScore;
}
r.coldStartBoost = boost;
boostedSkills.push(r.name);
}
}
// 重新排序
results.sort((a, b) => b.score - a.score);
return { results, boostedSkills };
}
/**
* Epsilon-greedy 探索: EPSILON 概率从 top-K 中随机选择
* 与已有 route-ab-test.js (Thompson Sampling) 互补:
* - AB test: 仅在 top-2 置信差 <15% 时触发
* - Epsilon-greedy: 无条件以 10% 概率探索 top-5
*
* @param {Array} candidates - top-K 候选
* @param {number} epsilon - 探索概率
* @returns {{ selected: Object, explored: boolean }}
*/
function epsilonGreedySelect(candidates, epsilon = EPSILON_EXPLORE) {
if (!candidates || candidates.length < 2) {
return { selected: candidates?.[0] || null, explored: false };
}
if (Math.random() < epsilon) {
// 从 top-5 中随机选一个 (非 top-1)
const pool = candidates.slice(1, Math.min(5, candidates.length));
if (pool.length > 0) {
const idx = Math.floor(Math.random() * pool.length);
return { selected: pool[idx], explored: true };
}
}
return { selected: candidates[0], explored: false };
}
// === 学习权重加载 ===
let _learnedWeights = null;
function loadLearnedWeights() {
if (_learnedWeights !== null) return _learnedWeights;
try {
if (fs.existsSync(WEIGHTS_FILE)) {
_learnedWeights = JSON.parse(fs.readFileSync(WEIGHTS_FILE, 'utf8'));
return _learnedWeights;
}
} catch {}
_learnedWeights = {};
return _learnedWeights;
}
// === 旧版评分 (向后兼容) ===
function legacyScoreSkill(skill, queryTokens) {
let totalScore = 0;
let matchedKeywords = [];
const weights = loadLearnedWeights();
const skillDeltas = (weights.deltas || {})[skill.name] || {};
for (const { keyword, weight } of skill.keywords) {
const kwLower = keyword.toLowerCase();
const delta = skillDeltas[kwLower] || 0;
const adjustedWeight = Math.max(0.1, weight + delta);
if (queryTokens.has(kwLower)) {
totalScore += adjustedWeight;
matchedKeywords.push({ keyword, weight: adjustedWeight, matchType: 'exact' });
continue;
}
for (const token of queryTokens) {
if (token.length >= 3 && kwLower.includes(token)) {
totalScore += adjustedWeight * 0.6;
matchedKeywords.push({ keyword, weight: adjustedWeight * 0.6, matchType: 'partial' });
break;
}
if (kwLower.length >= 3 && token.includes(kwLower)) {
totalScore += adjustedWeight * 0.6;
matchedKeywords.push({ keyword, weight: adjustedWeight * 0.6, matchType: 'partial' });
break;
}
}
}
return { totalScore, matchedKeywords };
}
// === BM25 匹配评分 (v5.8 P1-A: blended IDF) ===
const IDF_BLEND_GLOBAL = 0.4; // 全局 IDF 权重
const IDF_BLEND_DOMAIN = 0.6; // domain-local IDF 权重
function scoreSkill(skill, queryTokens, bm25Params, invertedIndex) {
// 向后兼容: 无 BM25 参数时回退旧逻辑
if (!bm25Params) return legacyScoreSkill(skill, queryTokens);
let totalScore = 0;
let matchedKeywords = [];
// v6.4 MEDIUM-3: 读取否定 token 集合,命中否定的关键词施加反向惩罚
const negatedTokens = queryTokens._negatedTokens || new Set();
// v6.4 MEDIUM-4: 读取同义词权重映射,对同义词匹配施加衰减
const synonymWeights = queryTokens._synonymWeights || null;
const weights = loadLearnedWeights();
const skillDeltas = (weights.deltas || {})[skill.name] || {};
const dl = (skill.keywords || []).length;
const { avgdl, idf: idfMap, domainIdf, domainMap } = bm25Params;
// 获取当前技能所属 domain 的 local IDF
const skillDomain = domainMap ? (domainMap.get(skill.name) || '_global') : '_global';
const localIdfMap = (domainIdf && domainIdf.get(skillDomain)) || null;
// v6.3: 倒排索引优化 — 预计算精确匹配集合
let exactMatchSet = null;
if (invertedIndex) {
exactMatchSet = new Set();
for (const token of queryTokens) {
const postings = invertedIndex.get(token);
if (postings) {
// postings 是 Set<skillIndex>,需要反查技能名
// 但 invertedIndex 存的是 keyword→Set<skillIndex>
// 我们直接检查当前 token 是否在此技能的关键词中
exactMatchSet.add(token);
}
}
}
for (const kwEntry of skill.keywords) {
const kwLower = kwEntry.keyword.toLowerCase();
const delta = skillDeltas[kwLower] || 0;
// 使用 tfidfWeight 如果可用,否则 fallback 到 weight
const baseWeight = kwEntry.tfidfWeight || kwEntry.weight;
const adjustedWeight = Math.max(0.1, baseWeight + delta);
// v5.8: blended IDF — 全局×0.4 + domain-local×0.6
// tfidfWeight 已含编译期 IDF施加校正因子而非直接替代
let kwIDF;
if (kwEntry.tfidfWeight) {
// 编译期 tfidfWeight 有效时: 计算校正因子
// 全局 IDF 退化时domain-local IDF 补偿
const globalIdf = idfMap.get(kwLower) || IDF_FLOOR;
const localIdf = (localIdfMap && localIdfMap.get(kwLower)) || globalIdf;
const blendedIdf = globalIdf * IDF_BLEND_GLOBAL + localIdf * IDF_BLEND_DOMAIN;
// 校正因子 = blended / globalIdf (当 globalIdf 退化时 > 1补偿)
const correction = globalIdf > 0 ? blendedIdf / globalIdf : 1;
kwIDF = Math.max(correction, 0.5); // V10 修复: 允许校正因子 <1 (下限 0.5),消除小域系统性加分偏差
} else {
// 无预计算: 直接用 blended IDF
const globalIdf = idfMap.get(kwLower) || 0;
const localIdf = (localIdfMap && localIdfMap.get(kwLower)) || globalIdf;
kwIDF = globalIdf * IDF_BLEND_GLOBAL + localIdf * IDF_BLEND_DOMAIN;
kwIDF = Math.max(kwIDF, IDF_FLOOR);
}
// 精确匹配
if (queryTokens.has(kwLower)) {
let bm25 = computeBM25Score(adjustedWeight, kwIDF, dl, avgdl);
// v6.4 MEDIUM-4: 同义词权重衰减 — 非原词的同义词匹配按权重缩放
if (synonymWeights && synonymWeights.has(kwLower)) {
bm25 *= synonymWeights.get(kwLower);
}
// v6.4 MEDIUM-3: 否定惩罚 — 被否定的 token 反向计分
if (negatedTokens.has(kwLower)) {
bm25 *= -0.3;
}
totalScore += bm25;
matchedKeywords.push({ keyword: kwEntry.keyword, weight: bm25, matchType: negatedTokens.has(kwLower) ? 'negated' : 'exact' });
continue;
}
// 包含匹配 (折扣 0.6)
for (const token of queryTokens) {
if (token.length >= 3 && kwLower.includes(token)) {
let bm25 = computeBM25Score(adjustedWeight * 0.6, kwIDF, dl, avgdl);
// v6.4: 同义词权重衰减
if (synonymWeights && synonymWeights.has(token)) {
bm25 *= synonymWeights.get(token);
}
// v6.4: 否定惩罚
if (negatedTokens.has(token)) {
bm25 *= -0.3;
}
totalScore += bm25;
matchedKeywords.push({ keyword: kwEntry.keyword, weight: bm25, matchType: negatedTokens.has(token) ? 'negated' : 'partial' });
break;
}
if (kwLower.length >= 3 && token.includes(kwLower)) {
let bm25 = computeBM25Score(adjustedWeight * 0.6, kwIDF, dl, avgdl);
if (synonymWeights && synonymWeights.has(token)) {
bm25 *= synonymWeights.get(token);
}
if (negatedTokens.has(token)) {
bm25 *= -0.3;
}
totalScore += bm25;
matchedKeywords.push({ keyword: kwEntry.keyword, weight: bm25, matchType: negatedTokens.has(token) ? 'negated' : 'partial' });
break;
}
}
}
// v5.9.1: 长期休眠技能降权 (coldPenalty 由 generate-skill-index 标记)
if (skill.coldPenalty && totalScore > 0) {
totalScore *= skill.coldPenalty;
}
// v6.3: 超大关键词列表技能降权 — 防止 gstack 导入技能的大量泛化关键词淹没真正专家技能
// 技能关键词数 > 80 且 maturity=unknown 视为低信噪比技能,施加对数衰减降权
if (skill.maturity === 'unknown' && dl > 80 && totalScore > 0) {
// 对数衰减: 80 kw → 1.0, 120 kw → 0.37, 200 kw → 0.17
const penalty = Math.max(0.1, 80 / dl);
totalScore *= penalty;
}
return { totalScore, matchedKeywords };
}
// === 上下文感知融合评分 (v5.0) ===
/**
* 融合 BM25 + 上下文 + 项目类型 + 工作流模式
* 权重: BM25 0.6 + context 0.2 + project 0.1 + workflow 0.1
* @param {number} bm25Score - BM25 原始分数
* @param {number} contextScore - 上下文分数 (0~1)
* @param {number} projectBoost - 项目类型加成 (0~...)
* @param {number} workflowScore - 工作流预测分数 (0~1)
* @returns {number} 融合后的分数
*/
/** @deprecated CLI fallback only. Use route-interceptor-bundle fusion weights. */
function contextAwareScore(bm25Score, contextScore, projectBoost, workflowScore) {
// 修复: 线性加权融合,上下文信号独立于 BM25 分数
// 上下文信号使用固定基准值缩放,确保对排名有实质影响
const CTX_BASE = 5.0;
return bm25Score * 0.6
+ contextScore * CTX_BASE * 0.2
+ projectBoost * CTX_BASE * 0.1
+ workflowScore * CTX_BASE * 0.1;
}
// === Top-k Reranking (v5.8 P1-B) ===
/**
* BM25 初筛后对 top-k 结果精排
*
* 三个精排信号:
* 1. Jaccard overlap 查询 tokens 与技能 core 关键词的重叠率
* 2. Tier bonus core 关键词匹配 ×1.5, extended ×1.0
* 3. Gap penalty top-1 top-2 差距过小时收紧排名
*
* @param {Array} results - BM25 排序后的结果 (已含 matchedKeywords)
* @param {Set} queryTokens - 用户查询 tokens
* @param {Object} index - 技能索引
* @param {number} k - rerank 窗口大小 (默认 10)
* @returns {Array} 精排后的结果
*/
function rerankTopK(results, queryTokens, index, k = 10) {
if (!results || results.length < 2 || !queryTokens) return results;
const topK = results.slice(0, k);
const rest = results.slice(k);
// 构建技能 core keywords 快速查找
const skillCoreKws = new Map();
for (const skill of (index.skills || [])) {
const coreSet = new Set();
const allSet = new Set();
for (const kw of (skill.keywords || [])) {
const kwLower = kw.keyword.toLowerCase();
allSet.add(kwLower);
if (kw.tier === 'core') coreSet.add(kwLower);
}
skillCoreKws.set(skill.name, { core: coreSet, all: allSet });
}
for (const r of topK) {
const kwData = skillCoreKws.get(r.name);
if (!kwData) continue;
// 信号 1: Jaccard overlap (查询 vs 技能 core 关键词)
let intersect = 0, unionSize = kwData.core.size;
for (const token of queryTokens) {
if (kwData.core.has(token)) intersect++;
if (!kwData.all.has(token)) unionSize++;
}
const jaccard = unionSize > 0 ? intersect / unionSize : 0;
// 信号 2: Tier bonus — 统计 matchedKeywords 中 core 占比
let coreMatches = 0, totalMatches = (r.matchedKeywords || []).length;
for (const mk of (r.matchedKeywords || [])) {
if (kwData.core.has(mk.keyword.toLowerCase())) coreMatches++;
}
const tierRatio = totalMatches > 0 ? coreMatches / totalMatches : 0;
// rerank score = 原始 BM25 × (1 + jaccard×0.3 + tierRatio×0.2)
// 消歧 boosted 技能受保护: rerank 不降低其排名
/* L1c-RERANK-ARBITRATION-AWARE-2026-04-25 */
const rerankMultiplier = 1 + jaccard * 0.3 + tierRatio * 0.2;
// L1c: L1b 仲裁 loser (_arbitratedBy 标记) 不享受保护, 且 multiplier 硬 cap 到 1.0
// 防止被 rerank boost 反超已被 cap 到 winner*0.95 的位置
const _isArbLoser = !!r._arbitratedBy;
if (r.disambiguated && !_isArbLoser) {
// 消歧 winner: 只允许 rerank 增强,不允许被其他技能超越
r.score = r.score * Math.max(rerankMultiplier, 1.0);
r._rerankProtected = true;
} else if (_isArbLoser) {
// 仲裁 loser: 严格不放大, 仅允许收紧 (jaccard/tier 真低分自然降级 OK)
const _capped = Math.min(rerankMultiplier, 1.0);
r.score = r.score * _capped;
r._rerankBoost = _capped;
continue;
} else {
r.score = r.score * rerankMultiplier;
}
r._rerankBoost = rerankMultiplier;
}
// 消歧保护 cap: 非消歧技能不得超越消歧 winner
// L1c: cap 基线必须是真 winner (disambiguated && !_arbitratedBy);
// 跨域仲裁 loser 虽然 disambiguated=true, 但被 L1b cap 到 winner*0.95,
// 不可作为 cap 基线 (否则真 winner 会被反向 cap)
const disambTop = topK.find(r => r.disambiguated && !r._arbitratedBy);
if (disambTop) {
for (const r of topK) {
if (r === disambTop) continue;
// 仲裁 loser 也参与 cap: 它的 disambiguated 是历史标记, 不豁免
if (r.score > disambTop.score) {
r.score = disambTop.score * 0.98;
r._rerankCapped = true;
}
}
}
// 信号 3: Gap penalty — top-1 与 top-2 差距 < 5% 时不改变排名
topK.sort((a, b) => b.score - a.score);
return topK.concat(rest);
}
function normalizeScores(results) {
if (!results || results.length === 0) return results || [];
const maxScore = results[0]?.score;
if (maxScore === 0) return results;
return results.map(r => ({
...r,
confidence: Math.min(Math.round(r.score / maxScore * 100) / 100, 1.0),
}));
}
// === 路由审计日志 ===
function logRoute(query, results) {
try {
if (!fs.existsSync(DEBUG_DIR)) fs.mkdirSync(DEBUG_DIR, { recursive: true });
const dateStr = new Date().toISOString().slice(0, 10);
const logFile = path.join(DEBUG_DIR, `route-${dateStr}.jsonl`);
const entry = {
ts: new Date().toISOString(),
query: query.slice(0, 200),
topResult: results[0]?.name || 'none',
topConfidence: results[0]?.confidence || 0,
candidates: results.slice(0, 5).map(r => ({ name: r.name, confidence: r.confidence })),
};
fs.appendFileSync(logFile, JSON.stringify(entry) + '\n');
} catch {}
}
// === composable 协作推荐 ===
function getComposable(index, skillName) {
const skill = index.skills.find(s => s.name === skillName);
return skill?.composable || {};
}
function buildComposableHints(index, topResults) {
if (topResults.length === 0) return [];
const top = topResults[0];
const comp = getComposable(index, top.name);
const hints = [];
// enhances: 本技能可增强的其他技能
if (comp.enhances?.length > 0) {
for (const name of comp.enhances) {
// 只推荐存在于索引中的技能
if (index.skills.some(s => s.name === name)) {
hints.push({ skill: name, relation: 'enhances', from: top.name });
}
}
}
// requires: 前置依赖技能
if (comp.requires?.length > 0) {
for (const name of comp.requires) {
if (index.skills.some(s => s.name === name)) {
hints.push({ skill: name, relation: 'requires', from: top.name });
}
}
}
// conflicts: 不宜同时使用
if (comp.conflicts?.length > 0) {
for (const name of comp.conflicts) {
if (index.skills.some(s => s.name === name)) {
hints.push({ skill: name, relation: 'conflicts', from: top.name });
}
}
}
return hints;
}
// === 冲突消歧规则引擎 (v5.3 三层防线, v5.5 P4 外部化) ===
// 规则数据从 disambiguation-rules.json 加载trigger 字符串编译为 RegExp
function loadDisambiguationRules() {
try {
const rulesPath = path.join(__dirname, 'disambiguation-rules.json');
const raw = JSON.parse(fs.readFileSync(rulesPath, 'utf8'));
return raw.rules.map(r => ({
id: r.id,
trigger: new RegExp(r.trigger, 'i'),
boost: r.boost,
penalty: r.penalty,
weight: r.weight,
mutual_exclusion: r.mutual_exclusion,
}));
} catch (e) {
// 加载失败时返回空数组,优雅降级
if (typeof process !== 'undefined' && process.stderr) {
process.stderr.write(`[route-analyzer] 消歧规则加载失败: ${e.message}\n`);
}
return [];
}
}
const DISAMBIGUATION_RULES = loadDisambiguationRules();
/**
* 计算规则 specificity: regex 中固定字符占比越高越具体
* @param {string} triggerSource - regex 源字符串
* @returns {number} 0~1 之间的 specificity
*/
function computeRuleSpecificity(triggerSource) {
if (!triggerSource) return 0.5;
// 固定字符 = 非元字符 (非 . * + ? | [ ] ( ) { } ^ $ \)
const fixed = (triggerSource.match(/[a-zA-Z0-9\u4e00-\u9fff_-]/g) || []).length;
const total = triggerSource.length;
return total > 0 ? Math.min(1, fixed / total) : 0.5;
}
/**
* 对评分结果应用消歧规则 (v5.8 重构: 全量匹配 + 加权投票 + specificity)
*
* 改进点 (vs v5.7):
* 1. 所有规则全量匹配收集投票后统一应用
* 2. 每条规则的有效权重 = weight × specificity (越具体的规则影响越大)
* 3. boost/penalty 分别累积最终一次性合并到分值
* 4. 记录触发的规则 ID 供遥测消费
*
* @param {Array} results - 排序后的评分结果
* @param {string} queryText - 原始查询文本
* @param {Object} index - skills-index
* @returns {{ results: Array, firedRules: string[] }} 消歧后的结果 + 触发的规则
*/
// // L1-AGENT-VIRTUAL-INJECTION-HELPER 加载 ~/.claude/agents/*.md 构建 agent 白名单 (惰性 + 缓存)
let _agentNamesCache = null;
function _loadAgentNamesCached() {
if (_agentNamesCache !== null) return _agentNamesCache;
try {
const _agentDir = path.join(CLAUDE_ROOT, 'agents');
if (!fs.existsSync(_agentDir)) {
_agentNamesCache = new Set();
return _agentNamesCache;
}
const _files = fs.readdirSync(_agentDir);
const _names = new Set();
for (const _f of _files) {
if (_f.endsWith('.md') && !_f.startsWith('_')) {
_names.add(_f.slice(0, -3));
}
}
_agentNamesCache = _names;
} catch (_e) {
_agentNamesCache = new Set(); // fail-close: 空集等价于关闭虚拟注入
}
return _agentNamesCache;
}
function applyDisambiguation(results, queryText, index) {
if (results.length < 2) return { results, firedRules: [] };
const queryLower = queryText.toLowerCase();
const firedRules = [];
// L1-AGENT-VIRTUAL-INJECTION (2026-04-25 D1 缺陷根治)
// 在投票阶段开始前, 为 agent-only boost 规则注入虚拟 results 条目,
// 使后续 boost/penalty/排名强制能正常作用于 agent (skills-index 不含 agent)。
// Fail-close: 加载失败仅打印警告, 不阻断主流程。
try {
const _agentNames = _loadAgentNamesCached();
if (_agentNames && _agentNames.size > 0 && results.length > 0) {
const _maxScore = Math.max.apply(null, results.map(function(r){return r.score||0;}).concat([0.001]));
const _existingNames = new Set(results.map(function(r){return r.name;}));
const _candidateAgents = new Set();
for (const _rule of DISAMBIGUATION_RULES) {
if (!_rule.trigger.test(queryText.toLowerCase())) continue;
if (_rule.boost && _agentNames.has(_rule.boost) && !_existingNames.has(_rule.boost)) {
_candidateAgents.add(_rule.boost);
}
}
for (const _agentName of _candidateAgents) {
results.push({
name: _agentName,
score: _maxScore * 0.6,
_virtual: true,
_isAgent: true,
matched: [],
weights: {}
});
}
}
} catch (_e) {
try { process.stderr.write('[route-analyzer] L1 virtual-agent injection skipped: ' + (_e && _e.message ? _e.message : String(_e)) + '\n'); } catch (_) {}
}
// Phase 1: 收集所有匹配规则的投票
const boostVotes = new Map(); // skillName → 累积 boost 增量
const penaltyVotes = new Map(); // skillName → 累积 penalty 增量
for (const rule of DISAMBIGUATION_RULES) {
if (!rule.trigger.test(queryLower)) continue;
firedRules.push(rule.id);
// specificity 加权: regex 越具体,权重越高
const specificity = computeRuleSpecificity(rule.trigger.source);
const effectiveWeight = rule.weight * (0.5 + specificity * 0.5); // 基础 50% + specificity 50%
// 累积 boost 投票
const boosted = results.find(r => r.name === rule.boost && r.score > 0);
if (boosted) {
const current = boostVotes.get(rule.boost) || 0;
boostVotes.set(rule.boost, Math.max(current, effectiveWeight)); // 取最大值防止叠加虚高
}
// 累积 penalty 投票
for (const penName of rule.penalty) {
const current = penaltyVotes.get(penName) || 0;
penaltyVotes.set(penName, Math.max(current, effectiveWeight * 0.5)); // penalty 折半
}
}
// Phase 1.5: mutual_exclusion 互斥消解 (RL-V14)
for (const rule of DISAMBIGUATION_RULES) {
if (!rule.mutual_exclusion || !firedRules.includes(rule.id)) continue;
const conflictWith = rule.mutual_exclusion.with;
if (firedRules.includes(conflictWith)) {
if (rule.mutual_exclusion.on_keyword) {
const keywordRe = new RegExp(rule.mutual_exclusion.on_keyword, 'i');
if (keywordRe.test(queryLower) && rule.boost) {
boostVotes.delete(rule.boost);
}
}
}
}
// Phase 2: 统一应用投票结果
for (const r of results) {
if (r.score <= 0) continue;
// 记录原始分数 (用于审计)
if (!r._baseScore) r._baseScore = r.score;
const boost = boostVotes.get(r.name) || 0;
const penalty = penaltyVotes.get(r.name) || 0;
if (boost > 0) {
r.score = r._baseScore * (1 + boost);
r.disambiguated = true;
}
if (penalty > 0 && !r.disambiguated) {
// 仅在技能未被 boost 时施加 penalty
r.score = r._baseScore * (1 - penalty * 0.3);
r.penalized = true;
}
}
// L1b-CROSS-BOOST-ARBITRATION (2026-04-25)
// Phase 2.5: 跨域 boost 仲裁 — 防止两条 fired rule 各自 boost 不同 skill
// 但相互不在对方 penalty 列表中 (Phase 3 不介入), 导致基线分数高的胜出.
// 顺序无关: 仅依赖 boostVotes + 规则静态属性, 不依赖遍历次序.
try {
if (boostVotes.size >= 2) {
const _boostMeta = new Map();
for (const _rule of DISAMBIGUATION_RULES) {
if (!firedRules.includes(_rule.id)) continue;
if (!_rule.boost || !boostVotes.has(_rule.boost)) continue;
const _spec = computeRuleSpecificity(_rule.trigger.source);
const _w = (_rule.weight || 0) * (0.5 + _spec * 0.5);
const _prev = _boostMeta.get(_rule.boost);
if (!_prev || _w > _prev.weight) {
_boostMeta.set(_rule.boost, {
weight: _w,
ruleId: _rule.id,
penaltySet: new Set(_rule.penalty || [])
});
}
}
if (_boostMeta.size >= 2) {
const _ranked = Array.from(_boostMeta.entries())
.sort((a, b) => b[1].weight - a[1].weight);
const [_winnerName, _winnerMeta] = _ranked[0];
const _winner = results.find(r => r.name === _winnerName && r.score > 0);
if (_winner) {
for (let _i = 1; _i < _ranked.length; _i++) {
const [_loserName, _loserMeta] = _ranked[_i];
const _crossPenalty = _winnerMeta.penaltySet.has(_loserName)
|| _loserMeta.penaltySet.has(_winnerName);
if (_crossPenalty) continue;
const _loser = results.find(r => r.name === _loserName && r.score > 0);
if (!_loser) continue;
const _ratio = Math.max(0.6, _loserMeta.weight / Math.max(_winnerMeta.weight, 1e-6));
const _newScore = _loser.score * _ratio;
_loser.score = Math.min(_newScore, _winner.score * 0.95);
_loser._arbitratedBy = _winnerMeta.ruleId;
_loser._arbitrationRatio = Math.round(_ratio * 1000) / 1000;
}
}
}
}
} catch (_e) {
try { process.stderr.write('[route-analyzer] L1b cross-boost arbitration skipped: ' + (_e && _e.message ? _e.message : String(_e)) + '\n'); } catch (_) {}
}
// Phase 3: 排名强制 — boosted 技能必须排在其 penalized 对手前面
for (const rule of DISAMBIGUATION_RULES) {
if (!firedRules.includes(rule.id)) continue;
const boosted = results.find(r => r.name === rule.boost && r.disambiguated);
if (!boosted) continue;
for (const r of results) {
if (rule.penalty.includes(r.name) && r.score > boosted.score) {
r.score = boosted.score * 0.95;
r.penalizedBy = rule.boost;
}
}
}
// 重新排序
results.sort((a, b) => b.score - a.score);
return { results, firedRules };
}
// === 主流程 ===
function main() {
parseArgs();
if (!query) {
console.error('Usage: node route-analyzer.js [--json] [--log] [--top N] "<query>"');
process.exit(1);
}
const index = loadIndex();
const queryTokens = tokenize(query);
// v4.9: 构建 BM25 参数
const bm25Params = buildBM25Params(index);
// v5.0: 加载上下文信号 (优雅降级)
let composableIdx = {}, contextScores = {}, projectBoosts = {}, workflowPrediction = null;
try {
const ct = require('./context-tracker.js');
composableIdx = ct.buildComposableIndex(index);
const ctxState = ct.loadState(); // 一次性加载,避免 68 次 I/O
for (const skill of index.skills) {
contextScores[skill.name] = ct.computeContextScore(skill.name, composableIdx, ctxState);
}
} catch {}
try {
const pd = require('./project-detector.js');
projectBoosts = pd.getProjectBoost(process.cwd());
} catch {}
try {
const wp = require('./workflow-patterns.js');
const events = wp.loadActivityLogs(30);
const sessions = wp.extractSkillSequences(events, 30);
const patterns = wp.minePatterns(sessions, 2);
workflowPrediction = patterns;
} catch {}
// 评分所有技能
// v6.3: 构建倒排索引加速精确匹配
const invertedIndex = buildInvertedIndex(index);
const results = index.skills.map(skill => {
const { totalScore, matchedKeywords } = scoreSkill(skill, queryTokens, bm25Params, invertedIndex);
// v5.0: 上下文融合
const ctxScore = contextScores[skill.name] || 0;
const projBoost = projectBoosts[skill.name] || 0;
let wfScore = 0;
if (workflowPrediction) {
// 从 bigrams 检查当前技能是否为预测后继
for (const [key, count] of Object.entries(workflowPrediction.bigrams || {})) {
const [, to] = key.split('→');
if (to === skill.name) { wfScore = Math.min(1, count * 0.1); break; }
}
}
// 修复: 上下文信号可独立贡献,不再要求 BM25 > 0
const finalScore = (totalScore > 0 || ctxScore > 0 || projBoost > 0 || wfScore > 0)
? contextAwareScore(totalScore, ctxScore, projBoost, wfScore)
: 0;
return {
name: skill.name,
maturity: skill.maturity,
score: Math.round(finalScore * 100) / 100,
matchedKeywords: matchedKeywords
.sort((a, b) => b.weight - a.weight)
.slice(0, 8),
};
}).sort((a, b) => b.score - a.score);
// v5.3: 冲突消歧 (三层防线第 3 层)
const { results: disambiguated, firedRules } = applyDisambiguation(results, query, index);
// v5.8 P1-B: top-k reranking 精排
const reranked = rerankTopK(disambiguated, queryTokens, index, 10);
// 归一化置信度
const normalized = normalizeScores(reranked).slice(0, topK);
// composable 协作推荐
const composableHints = buildComposableHints(index, normalized);
// 路由审计日志
if (logMode) {
logRoute(query, normalized);
}
// 输出
if (jsonMode) {
const output = {
query,
tokens: Array.from(queryTokens),
results: normalized,
recommendation: getRecommendation(normalized),
};
if (composableHints.length > 0) {
output.composable = composableHints;
}
console.log(JSON.stringify(output, null, 2));
} else {
renderCli(normalized, composableHints);
}
}
function getRecommendation(results) {
if (results.length === 0) return { action: 'fallback', skill: 'developer-expert' };
const top = results[0];
if (top.confidence >= 0.8 && results.length > 1 && results[1].confidence < 0.6) {
return { action: 'route', skill: top.name, confidence: top.confidence };
}
if (top.confidence >= 0.5) {
return {
action: 'recommend',
primary: top.name,
candidates: results.filter(r => r.confidence >= 0.3).map(r => r.name),
};
}
return { action: 'fallback', skill: 'developer-expert' };
}
function renderCli(results, composableHints = []) {
console.log(`\nQuery: "${query}"\n`);
if (results.length === 0) {
console.log(' No matches found. Fallback: developer-expert');
return;
}
const maxScore = results[0].score || 1;
for (const [i, r] of results.entries()) {
const barLen = Math.round(r.confidence * 20);
const bar = '\u2588'.repeat(barLen) + '\u2591'.repeat(20 - barLen);
const level = r.confidence >= 0.8 ? 'HIGH' : r.confidence >= 0.5 ? 'MED ' : 'LOW ';
const marker = i === 0 ? ' <--' : '';
console.log(` ${String(i + 1).padStart(2)}. ${r.name.padEnd(30)} ${bar} ${(r.confidence * 100).toFixed(0).padStart(3)}% [${level}]${marker}`);
// 显示匹配关键词
if (r.matchedKeywords.length > 0) {
const kwStr = r.matchedKeywords.slice(0, 5).map(k => k.keyword).join(', ');
console.log(` matched: ${kwStr}`);
}
}
// composable 协作提示
if (composableHints.length > 0) {
console.log();
const enhances = composableHints.filter(h => h.relation === 'enhances');
const requires = composableHints.filter(h => h.relation === 'requires');
const conflicts = composableHints.filter(h => h.relation === 'conflicts');
if (enhances.length > 0) {
console.log(` Enhances: ${enhances.map(h => h.skill).join(', ')}`);
}
if (requires.length > 0) {
console.log(` Requires: ${requires.map(h => h.skill).join(', ')}`);
}
if (conflicts.length > 0) {
console.log(` Conflicts: ${conflicts.map(h => h.skill).join(', ')}`);
}
}
// 建议
const rec = getRecommendation(results);
console.log();
if (rec.action === 'route') {
console.log(` Recommendation: ROUTE to ${rec.skill} (${(rec.confidence * 100).toFixed(0)}% confidence)`);
} else if (rec.action === 'recommend') {
console.log(` Recommendation: ${rec.primary} (candidates: ${rec.candidates.join(', ')})`);
} else {
console.log(` Recommendation: FALLBACK to developer-expert`);
}
console.log();
}
// 导出核心函数供测试使用
if (typeof module !== 'undefined') {
// === P3-2: 倒排索引 (keyword → skill indices) ===
function buildInvertedIndex(index) {
const skills = index.skills || [];
const invertedIdx = new Map(); // keyword → Set<skillIndex>
for (let i = 0; i < skills.length; i++) {
for (const { keyword } of (skills[i].keywords || [])) {
const kw = keyword.toLowerCase();
if (!invertedIdx.has(kw)) invertedIdx.set(kw, new Set());
invertedIdx.get(kw).add(i);
}
}
return invertedIdx;
}
module.exports = {
tokenize, scoreSkill, legacyScoreSkill, normalizeScores,
getRecommendation, buildComposableHints, loadLearnedWeights,
buildBM25Params, computeBM25Score, contextAwareScore,
applyDisambiguation, DISAMBIGUATION_RULES,
applyColdStartBoost, epsilonGreedySelect, computeRuleSpecificity, buildInvertedIndex,
loadDomainMap, IDF_FLOOR, IDF_BLEND_GLOBAL, IDF_BLEND_DOMAIN, rerankTopK,
};
}
// 仅在直接执行时运行
if (require.main === module) {
main();
}