#!/usr/bin/env node /** * 关键词缺口检测器 (Keyword Gap Detector) * * 分析 route-feedback.jsonl 中的 miss 案例,检测 skills-index.json 中 * 缺失的关键词覆盖,输出建议报告。 * * 原则: 只读分析,不自动修改任何 SKILL.md 或 skills-index.json。 * * 用法: * node scripts/keyword-gap-detector.js # 文本报告 * node scripts/keyword-gap-detector.js --json # JSON 输出 */ const fs = require('fs'); const path = require('path'); function detectRoot() { try { return require('./paths.config.js').PATHS.root; } catch { const selfDir = __dirname; if (selfDir.includes('.claude')) return selfDir.replace(/[/\\]scripts$/, ''); return '/mnt/c/Users/janson9527us/.claude'; } } const ROOT = detectRoot(); const DEBUG_DIR = path.join(ROOT, 'debug'); const INDEX_FILE = path.join(ROOT, 'skills-index.json'); const FEEDBACK_FILE = path.join(DEBUG_DIR, 'route-feedback.jsonl'); const REPORT_FILE = path.join(DEBUG_DIR, 'keyword-gap-report.json'); const JSON_MODE = process.argv.includes('--json'); // === 数据加载 === function loadIndex() { if (!fs.existsSync(INDEX_FILE)) return null; return JSON.parse(fs.readFileSync(INDEX_FILE, 'utf8')); } function loadFeedback() { if (!fs.existsSync(FEEDBACK_FILE)) return []; return fs.readFileSync(FEEDBACK_FILE, 'utf8') .split('\n').filter(Boolean) .map(line => { try { return JSON.parse(line); } catch { return null; } }) .filter(Boolean); } // === 分析引擎 === /** * 构建技能关键词集合映射 * @param {Object} index - skills-index.json * @returns {Object} { skillName: Set } */ function buildSkillKeywordMap(index) { const map = {}; for (const skill of index.skills) { map[skill.name] = new Set(skill.keywords.map(k => k.keyword.toLowerCase())); } return map; } /** * 分析单条 miss 案例的关键词缺口 * @param {Object} fb - 反馈条目 * @param {Object} skillKeywords - 技能关键词映射 * @returns {Object|null} 缺口分析结果 */ function analyzeGap(fb, skillKeywords) { if (fb.routedTo === fb.correctedTo) return null; // 非纠正 if (fb.routedTo === 'unknown') return null; const correctSkillKw = skillKeywords[fb.correctedTo]; if (!correctSkillKw) return null; // 目标技能不存在 const queryTokens = new Set(fb.queryTokens || []); // 查询 token 中有多少被正确技能覆盖 const covered = []; const uncovered = []; for (const token of queryTokens) { if (correctSkillKw.has(token)) { covered.push(token); } else { uncovered.push(token); } } // 过滤噪声: 排除单字中文、同义词展开的通用词 const NOISE_PATTERNS = /^(test|testing|e2e|security|防护|加固|hardening|漏洞|vulnerability|性能|performance|optimization|调优|tuning|加速|接口|interface|rest|restful|graphql|endpoint|部署|deploy|deployment|发布|上线|release|publish|前端|frontend|front-end|客户端|client-side|界面|容器|container|docker|镜像|image|容器化|containerize|机器学习|machine learning|ml|model|训练|training|深度学习|deep learning|dl|神经网络|neural network|tensorflow)$/; const meaningfulUncovered = uncovered.filter(t => t.length >= 2 && !NOISE_PATTERNS.test(t) ); // 仅在有有意义的未覆盖 token 时报告 if (meaningfulUncovered.length === 0) return null; const coverageRate = queryTokens.size > 0 ? Math.round(covered.length / queryTokens.size * 100) : 0; return { query: fb.query, routedTo: fb.routedTo, correctedTo: fb.correctedTo, coverageRate, coveredCount: covered.length, totalTokens: queryTokens.size, suggestedKeywords: meaningfulUncovered, covered, }; } /** * 聚合所有缺口,按技能分组 * @param {Array} gaps - 缺口分析结果 * @returns {Object} { skillName: { gaps, suggestedKeywords: { keyword: frequency } } } */ function aggregateBySkill(gaps) { const bySkill = {}; for (const gap of gaps) { if (!bySkill[gap.correctedTo]) { bySkill[gap.correctedTo] = { gaps: [], keywordFreq: {} }; } bySkill[gap.correctedTo].gaps.push(gap); for (const kw of gap.suggestedKeywords) { bySkill[gap.correctedTo].keywordFreq[kw] = (bySkill[gap.correctedTo].keywordFreq[kw] || 0) + 1; } } // 排序关键词建议按频率降序 const result = {}; for (const [skill, data] of Object.entries(bySkill)) { result[skill] = { missCount: data.gaps.length, suggestedKeywords: Object.entries(data.keywordFreq) .sort((a, b) => b[1] - a[1]) .map(([keyword, frequency]) => ({ keyword, frequency })), queries: data.gaps.map(g => g.query), }; } return result; } // === 主流程 === function detect() { const index = loadIndex(); if (!index) { console.error('skills-index.json 不可用'); process.exit(1); } const feedback = loadFeedback(); if (feedback.length === 0) { console.log('无反馈数据'); return null; } const skillKeywords = buildSkillKeywordMap(index); const gaps = feedback .map(fb => analyzeGap(fb, skillKeywords)) .filter(Boolean); const bySkill = aggregateBySkill(gaps); // 全局统计 const corrections = feedback.filter(f => f.routedTo !== f.correctedTo && f.routedTo !== 'unknown'); const avgCoverage = gaps.length > 0 ? Math.round(gaps.reduce((s, g) => s + g.coverageRate, 0) / gaps.length) : 100; const report = { generated: new Date().toISOString(), summary: { totalFeedback: feedback.length, totalCorrections: corrections.length, gapsDetected: gaps.length, avgCoverageRate: avgCoverage + '%', skillsAffected: Object.keys(bySkill).length, }, bySkill, details: gaps, }; // 写入报告文件 if (!fs.existsSync(DEBUG_DIR)) fs.mkdirSync(DEBUG_DIR, { recursive: true }); fs.writeFileSync(REPORT_FILE, JSON.stringify(report, null, 2) + '\n'); return report; } function main() { const report = detect(); if (!report) return; if (JSON_MODE) { console.log(JSON.stringify(report, null, 2)); return; } console.log('=== 关键词缺口检测报告 ===\n'); console.log(`反馈总数: ${report.summary.totalFeedback}`); console.log(`纠正总数: ${report.summary.totalCorrections}`); console.log(`缺口数: ${report.summary.gapsDetected}`); console.log(`平均覆盖: ${report.summary.avgCoverageRate}`); console.log(`涉及技能: ${report.summary.skillsAffected}`); console.log(''); for (const [skill, data] of Object.entries(report.bySkill)) { console.log(`## ${skill} (${data.missCount} miss)`); console.log(` 建议添加关键词:`); for (const kw of data.suggestedKeywords.slice(0, 10)) { console.log(` + "${kw.keyword}" (出现 ${kw.frequency} 次)`); } console.log(` 相关查询:`); for (const q of data.queries) { console.log(` - "${q}"`); } console.log(''); } console.log(`报告已保存: ${REPORT_FILE}`); } // 模块导出 if (typeof module !== 'undefined') { module.exports = { loadIndex, loadFeedback, buildSkillKeywordMap, analyzeGap, aggregateBySkill, detect, main, }; } if (require.main === module) { main(); }