#!/usr/bin/env node /** * 路由置信度引擎 * * 基于 skills-index.json 对输入文本做语义匹配, * 返回 top-K 技能及置信度分数。 * * 用法: * node scripts/route-analyzer.js "React 组件性能优化" * node scripts/route-analyzer.js --top 10 "部署到 k8s" * node scripts/route-analyzer.js --json "写一个 REST API" * node scripts/route-analyzer.js --log "调试内存泄漏" * * 置信度阈值: * >= 0.8 高置信度 - 直接路由 * 0.5-0.8 中置信度 - 推荐候选 * < 0.5 低置信度 - fallback developer-expert */ const fs = require('fs'); const path = require('path'); const detectClaudeRoot = () => require('./paths.config.js').PATHS.root; const CLAUDE_ROOT = detectClaudeRoot(); const INDEX_FILE = path.join(CLAUDE_ROOT, 'skills-index.json'); const DEBUG_DIR = path.join(CLAUDE_ROOT, 'debug'); const WEIGHTS_FILE = path.join(DEBUG_DIR, 'route-weights.json'); // === 参数解析 (延迟到 main 内使用,仅在直接执行时校验) === let args, jsonMode, logMode, topK, query; function parseArgs() { args = process.argv.slice(2); jsonMode = args.includes('--json'); logMode = args.includes('--log'); const topIdx = args.indexOf('--top'); topK = (topIdx >= 0 && parseInt(args[topIdx + 1])) || 5; query = args.filter(a => !a.startsWith('--') && !(topIdx >= 0 && args[topIdx + 1] === a)).join(' '); } // === 加载索引 === function loadIndex() { if (!fs.existsSync(INDEX_FILE)) { console.error('skills-index.json not found. Run: node scripts/generate-skill-index.js'); process.exit(1); } return JSON.parse(fs.readFileSync(INDEX_FILE, 'utf8')); } // === 文本标准化 === // P0-FIX: 从 skills-index 构建已知中文关键词集合 let _knownCnKwCache = null; function _getKnownCnKeywords() { if (_knownCnKwCache) return _knownCnKwCache; _knownCnKwCache = new Set(); try { const idxFile = path.join(CLAUDE_ROOT, 'skills-index-lite.json'); const idx = JSON.parse(fs.readFileSync(idxFile, 'utf8')); for (const skill of (idx.skills || [])) { for (const kw of (skill.keywords || [])) { const k = (kw.keyword || kw).toLowerCase(); if (/[\u4e00-\u9fff]/.test(k)) _knownCnKwCache.add(k); } } } catch {} return _knownCnKwCache; } // P1-FIX: 否定检测 — 否定词后 1-3 token 降权 const NEGATION_WORDS = new Set([ '不用', '不要', '不是', '别用', '除了', '排除', '去掉', '不需要', '不使用', 'without', 'except', 'exclude', 'no', 'not', 'dont', "don\'t", 'remove' ]); function tokenize(text) { // P2: 截断超长输入,防止性能退化 text = (text || '').slice(0, 2000); const tokens = new Set(); // 高频复合词优先匹配 (减少滑动窗口噪声) const COMPOUND_WORDS = new Set([ "数据库", "服务器", "微服务", "框架设计", "项目管理", "接口设计", "单元测试", "集成测试", "性能优化", "代码审查", "架构设计", "版本控制", "持续集成", "持续部署", "负载均衡", "消息队列", "缓存策略", "安全审计", "权限管理", "日志分析", "容器化", "虚拟化", "自动化", "可视化", "模块化", "数据分析", "机器学习", "深度学习", "自然语言", "搜索引擎", "前端开发", "后端开发", "全栈开发", "移动开发", "跨平台", "状态管理", "路由设计", "组件开发", "响应式", "渐进式", "大语言模型", "向量数据库", "知识图谱", "推荐系统", ]); const textLower = text.toLowerCase(); for (const w of COMPOUND_WORDS) { if (textLower.includes(w)) tokens.add(w); } // 中文: 2-4 字符片段 (滑动窗口) const cnChars = text.match(/[\u4e00-\u9fff]+/g) || []; for (const chunk of cnChars) { for (let len = 2; len <= Math.min(4, chunk.length); len++) { for (let i = 0; i <= chunk.length - len; i++) { tokens.add(chunk.slice(i, i + len).toLowerCase()); } } } // 英文: 完整单词 + 连字符词组 const enWords = text.match(/[A-Za-z][\w.-]*(?:\s+[A-Za-z][\w.-]*){0,2}/g) || []; for (const w of enWords) { tokens.add(w.toLowerCase().trim()); // 也添加单个单词 for (const single of w.split(/[\s.-]+/)) { if (single.length >= 2) tokens.add(single.toLowerCase()); } } // v4.9→v6.4: 加权同义词展开 + 否定检测 let expanded; let synonymWeights = null; // 同义词权重映射 (原词 1.0, 主同义词 0.7, 次 0.4) try { const { expandSynonymsWeighted } = require('./synonym-expander.js'); const result = expandSynonymsWeighted(tokens); expanded = result.expanded; synonymWeights = result.weights; } catch { expanded = tokens; } const negatedTokens = new Set(); const _w = text.toLowerCase().split(/[\s,]+/); for (let _i = 0; _i < _w.length; _i++) { if (NEGATION_WORDS.has(_w[_i])) { for (let _j = 1; _j <= 3 && _i + _j < _w.length; _j++) { if (_w[_i + _j].length >= 2) negatedTokens.add(_w[_i + _j]); } } } Object.defineProperty(expanded, '_negatedTokens', { value: negatedTokens, enumerable: false, configurable: true }); // v6.4: 挂载同义词权重映射,供 scoreSkill 使用 Object.defineProperty(expanded, '_synonymWeights', { value: synonymWeights, enumerable: false, configurable: true }); return expanded; } // === BM25 参数构建 (v5.8 P1-A: 分段 IDF 归一化) === const IDF_FLOOR = Math.log(2); // ≈0.693,防止 IDF 趋零 // 加载 domain 映射 (技能→domain 的反向索引) let _domainMap = null; function loadDomainMap() { if (_domainMap) return _domainMap; try { const mapFile = path.join(CLAUDE_ROOT, 'scripts', 'skill-domain-map.json'); if (fs.existsSync(mapFile)) { const raw = JSON.parse(fs.readFileSync(mapFile, 'utf8')); // 构建 skillName → domainName 反向索引 _domainMap = new Map(); for (const [domain, skills] of Object.entries(raw.domains || {})) { for (const skill of skills) { _domainMap.set(skill, domain); } } return _domainMap; } } catch {} _domainMap = new Map(); return _domainMap; } function buildBM25Params(index) { const skills = index.skills || []; const N = skills.length; // 计算平均文档长度 (关键词数) let totalDl = 0; for (const skill of skills) { totalDl += (skill.keywords || []).length; } const avgdl = N > 0 ? totalDl / N : 1; // 加载 domain 映射 const domainMap = loadDomainMap(); // 按 domain 分组技能 const domainGroups = new Map(); // domain → [skill] for (const skill of skills) { const domain = domainMap.get(skill.name) || '_global'; if (!domainGroups.has(domain)) domainGroups.set(domain, []); domainGroups.get(domain).push(skill); } // 全局 DF const df = new Map(); // keyword → 全局出现计数 for (const skill of skills) { const seen = new Set(); for (const { keyword } of (skill.keywords || [])) { const kw = keyword.toLowerCase(); if (!seen.has(kw)) { seen.add(kw); df.set(kw, (df.get(kw) || 0) + 1); } } } // domain-local DF const domainDf = new Map(); // domain → Map for (const [domain, groupSkills] of domainGroups) { const localDf = new Map(); for (const skill of groupSkills) { const seen = new Set(); for (const { keyword } of (skill.keywords || [])) { const kw = keyword.toLowerCase(); if (!seen.has(kw)) { seen.add(kw); localDf.set(kw, (localDf.get(kw) || 0) + 1); } } } domainDf.set(domain, localDf); } // 全局 IDF (带 floor) const idf = new Map(); for (const [kw, docFreq] of df) { const rawIdf = Math.log((N - docFreq + 0.5) / (docFreq + 0.5) + 1); idf.set(kw, Math.max(rawIdf, IDF_FLOOR)); } // domain-local IDF (per-domain 分段计算) // domainIdf: Map> const domainIdf = new Map(); for (const [domain, localDfMap] of domainDf) { const localN = domainGroups.get(domain).length; const localIdfMap = new Map(); for (const [kw, localDocFreq] of localDfMap) { const rawIdf = Math.log((localN - localDocFreq + 0.5) / (localDocFreq + 0.5) + 1); localIdfMap.set(kw, Math.max(rawIdf, IDF_FLOOR)); } domainIdf.set(domain, localIdfMap); } return { N, avgdl, idf, df, domainIdf, domainMap, domainGroups }; } /** * BM25 单项评分 * @param {number} tf - 词频 (匹配权重) * @param {number} idf - 逆文档频率 * @param {number} dl - 文档长度 (技能关键词数) * @param {number} avgdl - 平均文档长度 * @param {number} k1 - 词频饱和参数 (默认 1.2) * @param {number} b - 长度归一化参数 (默认 0.75) * @returns {number} BM25 分值 */ function computeBM25Score(tf, idf, dl, avgdl, k1 = 1.5, b = 0.75) { const numerator = tf * (k1 + 1); const denominator = tf + k1 * (1 - b + b * dl / avgdl); return idf * numerator / denominator; } // === 冷启动防护 (v5.8 P0-A) === const COLD_START_THRESHOLD = 30; // 路由次数低于此值视为冷启动 const COLD_START_MAX_BOOST = 0.08; // 冷启动最大 boost 值 (67 skills 场景,降低避免翻转) const EPSILON_EXPLORE = 0.10; // epsilon-greedy 探索概率 /** * 对冷启动技能施加线性衰减 boost * boost = MAX_BOOST * (1 - routeCount / THRESHOLD) * 当 routeCount >= THRESHOLD 时 boost = 0 * * @param {Array} results - 排序后的评分结果 [{name, score, ...}] * @param {Map} routeStats - 技能路由次数统计 * @returns {{ results: Array, boostedSkills: string[] }} */ function applyColdStartBoost(results, routeStats) { const boostedSkills = []; // 保护基准: 消歧 top 优先,否则用 BM25 原始 top-1 const disambTop = results.find(r => r.disambiguated); const originalTop = results[0]; // BM25 排序后的 top-1 for (const r of results) { if (r.score <= 0) continue; // 只 boost 有基础分的技能 const count = routeStats.get(r.name) || 0; if (count < COLD_START_THRESHOLD) { const boost = COLD_START_MAX_BOOST * (1 - count / COLD_START_THRESHOLD); const topScore = results[0]?.score || 1; const newScore = r.score + boost * topScore; // 保护 1: 冷启动 boost 不得超过消歧确认的 top 技能 if (disambTop && !r.disambiguated && newScore > disambTop.score) { r.score = disambTop.score * 0.98; r.coldStartCapped = true; // 保护 2: 冷启动 boost 不得让非 top-1 技能跃居 top-1 之上 } else if (originalTop && r !== originalTop && newScore > originalTop.score) { r.score = originalTop.score * 0.99; r.coldStartCapped = true; } else { r.score = newScore; } r.coldStartBoost = boost; boostedSkills.push(r.name); } } // 重新排序 results.sort((a, b) => b.score - a.score); return { results, boostedSkills }; } /** * Epsilon-greedy 探索: 以 EPSILON 概率从 top-K 中随机选择 * 与已有 route-ab-test.js (Thompson Sampling) 互补: * - AB test: 仅在 top-2 置信差 <15% 时触发 * - Epsilon-greedy: 无条件以 10% 概率探索 top-5 * * @param {Array} candidates - top-K 候选 * @param {number} epsilon - 探索概率 * @returns {{ selected: Object, explored: boolean }} */ function epsilonGreedySelect(candidates, epsilon = EPSILON_EXPLORE) { if (!candidates || candidates.length < 2) { return { selected: candidates?.[0] || null, explored: false }; } if (Math.random() < epsilon) { // 从 top-5 中随机选一个 (非 top-1) const pool = candidates.slice(1, Math.min(5, candidates.length)); if (pool.length > 0) { const idx = Math.floor(Math.random() * pool.length); return { selected: pool[idx], explored: true }; } } return { selected: candidates[0], explored: false }; } // === 学习权重加载 === let _learnedWeights = null; function loadLearnedWeights() { if (_learnedWeights !== null) return _learnedWeights; try { if (fs.existsSync(WEIGHTS_FILE)) { _learnedWeights = JSON.parse(fs.readFileSync(WEIGHTS_FILE, 'utf8')); return _learnedWeights; } } catch {} _learnedWeights = {}; return _learnedWeights; } // === 旧版评分 (向后兼容) === function legacyScoreSkill(skill, queryTokens) { let totalScore = 0; let matchedKeywords = []; const weights = loadLearnedWeights(); const skillDeltas = (weights.deltas || {})[skill.name] || {}; for (const { keyword, weight } of skill.keywords) { const kwLower = keyword.toLowerCase(); const delta = skillDeltas[kwLower] || 0; const adjustedWeight = Math.max(0.1, weight + delta); if (queryTokens.has(kwLower)) { totalScore += adjustedWeight; matchedKeywords.push({ keyword, weight: adjustedWeight, matchType: 'exact' }); continue; } for (const token of queryTokens) { if (token.length >= 3 && kwLower.includes(token)) { totalScore += adjustedWeight * 0.6; matchedKeywords.push({ keyword, weight: adjustedWeight * 0.6, matchType: 'partial' }); break; } if (kwLower.length >= 3 && token.includes(kwLower)) { totalScore += adjustedWeight * 0.6; matchedKeywords.push({ keyword, weight: adjustedWeight * 0.6, matchType: 'partial' }); break; } } } return { totalScore, matchedKeywords }; } // === BM25 匹配评分 (v5.8 P1-A: blended IDF) === const IDF_BLEND_GLOBAL = 0.4; // 全局 IDF 权重 const IDF_BLEND_DOMAIN = 0.6; // domain-local IDF 权重 function scoreSkill(skill, queryTokens, bm25Params, invertedIndex) { // 向后兼容: 无 BM25 参数时回退旧逻辑 if (!bm25Params) return legacyScoreSkill(skill, queryTokens); let totalScore = 0; let matchedKeywords = []; // v6.4 MEDIUM-3: 读取否定 token 集合,命中否定的关键词施加反向惩罚 const negatedTokens = queryTokens._negatedTokens || new Set(); // v6.4 MEDIUM-4: 读取同义词权重映射,对同义词匹配施加衰减 const synonymWeights = queryTokens._synonymWeights || null; const weights = loadLearnedWeights(); const skillDeltas = (weights.deltas || {})[skill.name] || {}; const dl = (skill.keywords || []).length; const { avgdl, idf: idfMap, domainIdf, domainMap } = bm25Params; // 获取当前技能所属 domain 的 local IDF const skillDomain = domainMap ? (domainMap.get(skill.name) || '_global') : '_global'; const localIdfMap = (domainIdf && domainIdf.get(skillDomain)) || null; // v6.3: 倒排索引优化 — 预计算精确匹配集合 let exactMatchSet = null; if (invertedIndex) { exactMatchSet = new Set(); for (const token of queryTokens) { const postings = invertedIndex.get(token); if (postings) { // postings 是 Set,需要反查技能名 // 但 invertedIndex 存的是 keyword→Set // 我们直接检查当前 token 是否在此技能的关键词中 exactMatchSet.add(token); } } } for (const kwEntry of skill.keywords) { const kwLower = kwEntry.keyword.toLowerCase(); const delta = skillDeltas[kwLower] || 0; // 使用 tfidfWeight 如果可用,否则 fallback 到 weight const baseWeight = kwEntry.tfidfWeight || kwEntry.weight; const adjustedWeight = Math.max(0.1, baseWeight + delta); // v5.8: blended IDF — 全局×0.4 + domain-local×0.6 // tfidfWeight 已含编译期 IDF,施加校正因子而非直接替代 let kwIDF; if (kwEntry.tfidfWeight) { // 编译期 tfidfWeight 有效时: 计算校正因子 // 全局 IDF 退化时,domain-local IDF 补偿 const globalIdf = idfMap.get(kwLower) || IDF_FLOOR; const localIdf = (localIdfMap && localIdfMap.get(kwLower)) || globalIdf; const blendedIdf = globalIdf * IDF_BLEND_GLOBAL + localIdf * IDF_BLEND_DOMAIN; // 校正因子 = blended / globalIdf (当 globalIdf 退化时 > 1,补偿) const correction = globalIdf > 0 ? blendedIdf / globalIdf : 1; kwIDF = Math.max(correction, 0.5); // V10 修复: 允许校正因子 <1 (下限 0.5),消除小域系统性加分偏差 } else { // 无预计算: 直接用 blended IDF const globalIdf = idfMap.get(kwLower) || 0; const localIdf = (localIdfMap && localIdfMap.get(kwLower)) || globalIdf; kwIDF = globalIdf * IDF_BLEND_GLOBAL + localIdf * IDF_BLEND_DOMAIN; kwIDF = Math.max(kwIDF, IDF_FLOOR); } // 精确匹配 if (queryTokens.has(kwLower)) { let bm25 = computeBM25Score(adjustedWeight, kwIDF, dl, avgdl); // v6.4 MEDIUM-4: 同义词权重衰减 — 非原词的同义词匹配按权重缩放 if (synonymWeights && synonymWeights.has(kwLower)) { bm25 *= synonymWeights.get(kwLower); } // v6.4 MEDIUM-3: 否定惩罚 — 被否定的 token 反向计分 if (negatedTokens.has(kwLower)) { bm25 *= -0.3; } totalScore += bm25; matchedKeywords.push({ keyword: kwEntry.keyword, weight: bm25, matchType: negatedTokens.has(kwLower) ? 'negated' : 'exact' }); continue; } // 包含匹配 (折扣 0.6) for (const token of queryTokens) { if (token.length >= 3 && kwLower.includes(token)) { let bm25 = computeBM25Score(adjustedWeight * 0.6, kwIDF, dl, avgdl); // v6.4: 同义词权重衰减 if (synonymWeights && synonymWeights.has(token)) { bm25 *= synonymWeights.get(token); } // v6.4: 否定惩罚 if (negatedTokens.has(token)) { bm25 *= -0.3; } totalScore += bm25; matchedKeywords.push({ keyword: kwEntry.keyword, weight: bm25, matchType: negatedTokens.has(token) ? 'negated' : 'partial' }); break; } if (kwLower.length >= 3 && token.includes(kwLower)) { let bm25 = computeBM25Score(adjustedWeight * 0.6, kwIDF, dl, avgdl); if (synonymWeights && synonymWeights.has(token)) { bm25 *= synonymWeights.get(token); } if (negatedTokens.has(token)) { bm25 *= -0.3; } totalScore += bm25; matchedKeywords.push({ keyword: kwEntry.keyword, weight: bm25, matchType: negatedTokens.has(token) ? 'negated' : 'partial' }); break; } } } // v5.9.1: 长期休眠技能降权 (coldPenalty 由 generate-skill-index 标记) if (skill.coldPenalty && totalScore > 0) { totalScore *= skill.coldPenalty; } // v6.3: 超大关键词列表技能降权 — 防止 gstack 导入技能的大量泛化关键词淹没真正专家技能 // 技能关键词数 > 80 且 maturity=unknown 视为低信噪比技能,施加对数衰减降权 if (skill.maturity === 'unknown' && dl > 80 && totalScore > 0) { // 对数衰减: 80 kw → 1.0, 120 kw → 0.37, 200 kw → 0.17 const penalty = Math.max(0.1, 80 / dl); totalScore *= penalty; } return { totalScore, matchedKeywords }; } // === 上下文感知融合评分 (v5.0) === /** * 融合 BM25 + 上下文 + 项目类型 + 工作流模式 * 权重: BM25 0.6 + context 0.2 + project 0.1 + workflow 0.1 * @param {number} bm25Score - BM25 原始分数 * @param {number} contextScore - 上下文分数 (0~1) * @param {number} projectBoost - 项目类型加成 (0~...) * @param {number} workflowScore - 工作流预测分数 (0~1) * @returns {number} 融合后的分数 */ /** @deprecated CLI fallback only. Use route-interceptor-bundle fusion weights. */ function contextAwareScore(bm25Score, contextScore, projectBoost, workflowScore) { // 修复: 线性加权融合,上下文信号独立于 BM25 分数 // 上下文信号使用固定基准值缩放,确保对排名有实质影响 const CTX_BASE = 5.0; return bm25Score * 0.6 + contextScore * CTX_BASE * 0.2 + projectBoost * CTX_BASE * 0.1 + workflowScore * CTX_BASE * 0.1; } // === Top-k Reranking (v5.8 P1-B) === /** * BM25 初筛后对 top-k 结果精排 * * 三个精排信号: * 1. Jaccard overlap — 查询 tokens 与技能 core 关键词的重叠率 * 2. Tier bonus — core 关键词匹配 ×1.5, extended ×1.0 * 3. Gap penalty — top-1 与 top-2 差距过小时收紧排名 * * @param {Array} results - BM25 排序后的结果 (已含 matchedKeywords) * @param {Set} queryTokens - 用户查询 tokens * @param {Object} index - 技能索引 * @param {number} k - rerank 窗口大小 (默认 10) * @returns {Array} 精排后的结果 */ function rerankTopK(results, queryTokens, index, k = 10) { if (!results || results.length < 2 || !queryTokens) return results; const topK = results.slice(0, k); const rest = results.slice(k); // 构建技能 core keywords 快速查找 const skillCoreKws = new Map(); for (const skill of (index.skills || [])) { const coreSet = new Set(); const allSet = new Set(); for (const kw of (skill.keywords || [])) { const kwLower = kw.keyword.toLowerCase(); allSet.add(kwLower); if (kw.tier === 'core') coreSet.add(kwLower); } skillCoreKws.set(skill.name, { core: coreSet, all: allSet }); } for (const r of topK) { const kwData = skillCoreKws.get(r.name); if (!kwData) continue; // 信号 1: Jaccard overlap (查询 vs 技能 core 关键词) let intersect = 0, unionSize = kwData.core.size; for (const token of queryTokens) { if (kwData.core.has(token)) intersect++; if (!kwData.all.has(token)) unionSize++; } const jaccard = unionSize > 0 ? intersect / unionSize : 0; // 信号 2: Tier bonus — 统计 matchedKeywords 中 core 占比 let coreMatches = 0, totalMatches = (r.matchedKeywords || []).length; for (const mk of (r.matchedKeywords || [])) { if (kwData.core.has(mk.keyword.toLowerCase())) coreMatches++; } const tierRatio = totalMatches > 0 ? coreMatches / totalMatches : 0; // rerank score = 原始 BM25 × (1 + jaccard×0.3 + tierRatio×0.2) // 消歧 boosted 技能受保护: rerank 不降低其排名 const rerankMultiplier = 1 + jaccard * 0.3 + tierRatio * 0.2; if (r.disambiguated) { // 消歧已确认此技能优先: 只允许 rerank 增强,不允许被其他技能超越 r.score = r.score * Math.max(rerankMultiplier, 1.0); r._rerankProtected = true; } else { r.score = r.score * rerankMultiplier; } r._rerankBoost = rerankMultiplier; } // 消歧保护 cap: 非消歧技能不得超越消歧 top const disambTop = topK.find(r => r.disambiguated); if (disambTop) { for (const r of topK) { if (!r.disambiguated && r.score > disambTop.score) { r.score = disambTop.score * 0.98; r._rerankCapped = true; } } } // 信号 3: Gap penalty — top-1 与 top-2 差距 < 5% 时不改变排名 topK.sort((a, b) => b.score - a.score); return topK.concat(rest); } function normalizeScores(results) { if (!results || results.length === 0) return results || []; const maxScore = results[0]?.score; if (maxScore === 0) return results; return results.map(r => ({ ...r, confidence: Math.min(Math.round(r.score / maxScore * 100) / 100, 1.0), })); } // === 路由审计日志 === function logRoute(query, results) { try { if (!fs.existsSync(DEBUG_DIR)) fs.mkdirSync(DEBUG_DIR, { recursive: true }); const dateStr = new Date().toISOString().slice(0, 10); const logFile = path.join(DEBUG_DIR, `route-${dateStr}.jsonl`); const entry = { ts: new Date().toISOString(), query: query.slice(0, 200), topResult: results[0]?.name || 'none', topConfidence: results[0]?.confidence || 0, candidates: results.slice(0, 5).map(r => ({ name: r.name, confidence: r.confidence })), }; fs.appendFileSync(logFile, JSON.stringify(entry) + '\n'); } catch {} } // === composable 协作推荐 === function getComposable(index, skillName) { const skill = index.skills.find(s => s.name === skillName); return skill?.composable || {}; } function buildComposableHints(index, topResults) { if (topResults.length === 0) return []; const top = topResults[0]; const comp = getComposable(index, top.name); const hints = []; // enhances: 本技能可增强的其他技能 if (comp.enhances?.length > 0) { for (const name of comp.enhances) { // 只推荐存在于索引中的技能 if (index.skills.some(s => s.name === name)) { hints.push({ skill: name, relation: 'enhances', from: top.name }); } } } // requires: 前置依赖技能 if (comp.requires?.length > 0) { for (const name of comp.requires) { if (index.skills.some(s => s.name === name)) { hints.push({ skill: name, relation: 'requires', from: top.name }); } } } // conflicts: 不宜同时使用 if (comp.conflicts?.length > 0) { for (const name of comp.conflicts) { if (index.skills.some(s => s.name === name)) { hints.push({ skill: name, relation: 'conflicts', from: top.name }); } } } return hints; } // === 冲突消歧规则引擎 (v5.3 三层防线, v5.5 P4 外部化) === // 规则数据从 disambiguation-rules.json 加载,trigger 字符串编译为 RegExp function loadDisambiguationRules() { try { const rulesPath = path.join(__dirname, 'disambiguation-rules.json'); const raw = JSON.parse(fs.readFileSync(rulesPath, 'utf8')); return raw.rules.map(r => ({ id: r.id, trigger: new RegExp(r.trigger, 'i'), boost: r.boost, penalty: r.penalty, weight: r.weight, })); } catch (e) { // 加载失败时返回空数组,优雅降级 if (typeof process !== 'undefined' && process.stderr) { process.stderr.write(`[route-analyzer] 消歧规则加载失败: ${e.message}\n`); } return []; } } const DISAMBIGUATION_RULES = loadDisambiguationRules(); /** * 计算规则 specificity: regex 中固定字符占比越高越具体 * @param {string} triggerSource - regex 源字符串 * @returns {number} 0~1 之间的 specificity 值 */ function computeRuleSpecificity(triggerSource) { if (!triggerSource) return 0.5; // 固定字符 = 非元字符 (非 . * + ? | [ ] ( ) { } ^ $ \) const fixed = (triggerSource.match(/[a-zA-Z0-9\u4e00-\u9fff_-]/g) || []).length; const total = triggerSource.length; return total > 0 ? Math.min(1, fixed / total) : 0.5; } /** * 对评分结果应用消歧规则 (v5.8 重构: 全量匹配 + 加权投票 + specificity) * * 改进点 (vs v5.7): * 1. 所有规则全量匹配,收集投票后统一应用 * 2. 每条规则的有效权重 = weight × specificity (越具体的规则影响越大) * 3. boost/penalty 分别累积,最终一次性合并到分值 * 4. 记录触发的规则 ID 供遥测消费 * * @param {Array} results - 排序后的评分结果 * @param {string} queryText - 原始查询文本 * @param {Object} index - skills-index * @returns {{ results: Array, firedRules: string[] }} 消歧后的结果 + 触发的规则 */ function applyDisambiguation(results, queryText, index) { if (results.length < 2) return { results, firedRules: [] }; const queryLower = queryText.toLowerCase(); const firedRules = []; // Phase 1: 收集所有匹配规则的投票 const boostVotes = new Map(); // skillName → 累积 boost 增量 const penaltyVotes = new Map(); // skillName → 累积 penalty 增量 for (const rule of DISAMBIGUATION_RULES) { if (!rule.trigger.test(queryLower)) continue; firedRules.push(rule.id); // specificity 加权: regex 越具体,权重越高 const specificity = computeRuleSpecificity(rule.trigger.source); const effectiveWeight = rule.weight * (0.5 + specificity * 0.5); // 基础 50% + specificity 50% // 累积 boost 投票 const boosted = results.find(r => r.name === rule.boost && r.score > 0); if (boosted) { const current = boostVotes.get(rule.boost) || 0; boostVotes.set(rule.boost, Math.max(current, effectiveWeight)); // 取最大值防止叠加虚高 } // 累积 penalty 投票 for (const penName of rule.penalty) { const current = penaltyVotes.get(penName) || 0; penaltyVotes.set(penName, Math.max(current, effectiveWeight * 0.5)); // penalty 折半 } } // Phase 1.5: mutual_exclusion 互斥消解 (RL-V14) for (const rule of DISAMBIGUATION_RULES) { if (!rule.mutual_exclusion || !firedRules.includes(rule.id)) continue; const conflictWith = rule.mutual_exclusion.with; if (firedRules.includes(conflictWith)) { if (rule.mutual_exclusion.on_keyword) { const keywordRe = new RegExp(rule.mutual_exclusion.on_keyword, 'i'); if (keywordRe.test(queryLower) && rule.boost) { boostVotes.delete(rule.boost); } } } } // Phase 2: 统一应用投票结果 for (const r of results) { if (r.score <= 0) continue; // 记录原始分数 (用于审计) if (!r._baseScore) r._baseScore = r.score; const boost = boostVotes.get(r.name) || 0; const penalty = penaltyVotes.get(r.name) || 0; if (boost > 0) { r.score = r._baseScore * (1 + boost); r.disambiguated = true; } if (penalty > 0 && !r.disambiguated) { // 仅在技能未被 boost 时施加 penalty r.score = r._baseScore * (1 - penalty * 0.3); r.penalized = true; } } // Phase 3: 排名强制 — boosted 技能必须排在其 penalized 对手前面 for (const rule of DISAMBIGUATION_RULES) { if (!firedRules.includes(rule.id)) continue; const boosted = results.find(r => r.name === rule.boost && r.disambiguated); if (!boosted) continue; for (const r of results) { if (rule.penalty.includes(r.name) && r.score > boosted.score) { r.score = boosted.score * 0.95; r.penalizedBy = rule.boost; } } } // 重新排序 results.sort((a, b) => b.score - a.score); return { results, firedRules }; } // === 主流程 === function main() { parseArgs(); if (!query) { console.error('Usage: node route-analyzer.js [--json] [--log] [--top N] ""'); process.exit(1); } const index = loadIndex(); const queryTokens = tokenize(query); // v4.9: 构建 BM25 参数 const bm25Params = buildBM25Params(index); // v5.0: 加载上下文信号 (优雅降级) let composableIdx = {}, contextScores = {}, projectBoosts = {}, workflowPrediction = null; try { const ct = require('./context-tracker.js'); composableIdx = ct.buildComposableIndex(index); const ctxState = ct.loadState(); // 一次性加载,避免 68 次 I/O for (const skill of index.skills) { contextScores[skill.name] = ct.computeContextScore(skill.name, composableIdx, ctxState); } } catch {} try { const pd = require('./project-detector.js'); projectBoosts = pd.getProjectBoost(process.cwd()); } catch {} try { const wp = require('./workflow-patterns.js'); const events = wp.loadActivityLogs(30); const sessions = wp.extractSkillSequences(events, 30); const patterns = wp.minePatterns(sessions, 2); workflowPrediction = patterns; } catch {} // 评分所有技能 // v6.3: 构建倒排索引加速精确匹配 const invertedIndex = buildInvertedIndex(index); const results = index.skills.map(skill => { const { totalScore, matchedKeywords } = scoreSkill(skill, queryTokens, bm25Params, invertedIndex); // v5.0: 上下文融合 const ctxScore = contextScores[skill.name] || 0; const projBoost = projectBoosts[skill.name] || 0; let wfScore = 0; if (workflowPrediction) { // 从 bigrams 检查当前技能是否为预测后继 for (const [key, count] of Object.entries(workflowPrediction.bigrams || {})) { const [, to] = key.split('→'); if (to === skill.name) { wfScore = Math.min(1, count * 0.1); break; } } } // 修复: 上下文信号可独立贡献,不再要求 BM25 > 0 const finalScore = (totalScore > 0 || ctxScore > 0 || projBoost > 0 || wfScore > 0) ? contextAwareScore(totalScore, ctxScore, projBoost, wfScore) : 0; return { name: skill.name, maturity: skill.maturity, score: Math.round(finalScore * 100) / 100, matchedKeywords: matchedKeywords .sort((a, b) => b.weight - a.weight) .slice(0, 8), }; }).sort((a, b) => b.score - a.score); // v5.3: 冲突消歧 (三层防线第 3 层) const { results: disambiguated, firedRules } = applyDisambiguation(results, query, index); // v5.8 P1-B: top-k reranking 精排 const reranked = rerankTopK(disambiguated, queryTokens, index, 10); // 归一化置信度 const normalized = normalizeScores(reranked).slice(0, topK); // composable 协作推荐 const composableHints = buildComposableHints(index, normalized); // 路由审计日志 if (logMode) { logRoute(query, normalized); } // 输出 if (jsonMode) { const output = { query, tokens: Array.from(queryTokens), results: normalized, recommendation: getRecommendation(normalized), }; if (composableHints.length > 0) { output.composable = composableHints; } console.log(JSON.stringify(output, null, 2)); } else { renderCli(normalized, composableHints); } } function getRecommendation(results) { if (results.length === 0) return { action: 'fallback', skill: 'developer-expert' }; const top = results[0]; if (top.confidence >= 0.8 && results.length > 1 && results[1].confidence < 0.6) { return { action: 'route', skill: top.name, confidence: top.confidence }; } if (top.confidence >= 0.5) { return { action: 'recommend', primary: top.name, candidates: results.filter(r => r.confidence >= 0.3).map(r => r.name), }; } return { action: 'fallback', skill: 'developer-expert' }; } function renderCli(results, composableHints = []) { console.log(`\nQuery: "${query}"\n`); if (results.length === 0) { console.log(' No matches found. Fallback: developer-expert'); return; } const maxScore = results[0].score || 1; for (const [i, r] of results.entries()) { const barLen = Math.round(r.confidence * 20); const bar = '\u2588'.repeat(barLen) + '\u2591'.repeat(20 - barLen); const level = r.confidence >= 0.8 ? 'HIGH' : r.confidence >= 0.5 ? 'MED ' : 'LOW '; const marker = i === 0 ? ' <--' : ''; console.log(` ${String(i + 1).padStart(2)}. ${r.name.padEnd(30)} ${bar} ${(r.confidence * 100).toFixed(0).padStart(3)}% [${level}]${marker}`); // 显示匹配关键词 if (r.matchedKeywords.length > 0) { const kwStr = r.matchedKeywords.slice(0, 5).map(k => k.keyword).join(', '); console.log(` matched: ${kwStr}`); } } // composable 协作提示 if (composableHints.length > 0) { console.log(); const enhances = composableHints.filter(h => h.relation === 'enhances'); const requires = composableHints.filter(h => h.relation === 'requires'); const conflicts = composableHints.filter(h => h.relation === 'conflicts'); if (enhances.length > 0) { console.log(` Enhances: ${enhances.map(h => h.skill).join(', ')}`); } if (requires.length > 0) { console.log(` Requires: ${requires.map(h => h.skill).join(', ')}`); } if (conflicts.length > 0) { console.log(` Conflicts: ${conflicts.map(h => h.skill).join(', ')}`); } } // 建议 const rec = getRecommendation(results); console.log(); if (rec.action === 'route') { console.log(` Recommendation: ROUTE to ${rec.skill} (${(rec.confidence * 100).toFixed(0)}% confidence)`); } else if (rec.action === 'recommend') { console.log(` Recommendation: ${rec.primary} (candidates: ${rec.candidates.join(', ')})`); } else { console.log(` Recommendation: FALLBACK to developer-expert`); } console.log(); } // 导出核心函数供测试使用 if (typeof module !== 'undefined') { // === P3-2: 倒排索引 (keyword → skill indices) === function buildInvertedIndex(index) { const skills = index.skills || []; const invertedIdx = new Map(); // keyword → Set for (let i = 0; i < skills.length; i++) { for (const { keyword } of (skills[i].keywords || [])) { const kw = keyword.toLowerCase(); if (!invertedIdx.has(kw)) invertedIdx.set(kw, new Set()); invertedIdx.get(kw).add(i); } } return invertedIdx; } module.exports = { tokenize, scoreSkill, legacyScoreSkill, normalizeScores, getRecommendation, buildComposableHints, loadLearnedWeights, buildBM25Params, computeBM25Score, contextAwareScore, applyDisambiguation, DISAMBIGUATION_RULES, applyColdStartBoost, epsilonGreedySelect, computeRuleSpecificity, buildInvertedIndex, loadDomainMap, IDF_FLOOR, IDF_BLEND_GLOBAL, IDF_BLEND_DOMAIN, rerankTopK, }; } // 仅在直接执行时运行 if (require.main === module) { main(); }