273 lines
10 KiB
JavaScript
273 lines
10 KiB
JavaScript
#!/usr/bin/env node
|
||
/**
|
||
* 分层路由 L1: 域分类器 (v5.9)
|
||
*
|
||
* 将用户查询分类到 8 个技能域之一,缩小 L2 精排的候选集。
|
||
*
|
||
* 域划分 (与 SKILL-REGISTRY.md 一致):
|
||
* ai-data — AI/机器学习/数据分析/数据工程
|
||
* development — 前后端/移动/小程序/通用开发/浏览器/工作流
|
||
* architecture — 系统架构/数据库/云原生/性能/图表
|
||
* devops — CI/CD/Docker/K8s/Git/SRE
|
||
* security — 安全/渗透/加密/DevSecOps
|
||
* quality — 测试/审查/审计
|
||
* product — 产品/设计/UX/项目管理
|
||
* business — 商业/财务/营销/法务/研究
|
||
* content — 技术写作/文案/邮件/SEO/社媒
|
||
* meta — 编排/提示词/元技能
|
||
*
|
||
* 模块导出:
|
||
* classifyDomain(queryText, intents, entities) → { domain, confidence, candidates }
|
||
* DOMAIN_SKILLS — 域→技能名映射
|
||
* DOMAIN_KEYWORDS — 域→关键词映射
|
||
*/
|
||
|
||
// === 域→技能名映射 ===
|
||
const DOMAIN_SKILLS = {
|
||
'ai-data': [
|
||
'ai-ml-expert', 'data-analyst-expert', 'data-engineer-expert',
|
||
],
|
||
'development': [
|
||
'frontend-expert', 'backend-builder', 'mobile-expert', 'miniprogram-expert',
|
||
'developer-expert', 'debugger-expert', 'api-integration-specialist',
|
||
'regex-shell-wizard', 'ultimate-code-expert', 'browser-automation-expert',
|
||
'workflow-automation-expert', 'notification-system-expert',
|
||
'typescript-pro', 'python-pro', 'golang-pro', 'rust-engineer',
|
||
'angular-architect', 'vue-expert', 'nextjs-developer', 'flutter-expert',
|
||
'swift-expert', 'websocket-engineer',
|
||
],
|
||
'architecture': [
|
||
'architect-expert', 'database-tuning-expert', 'cloud-native-expert',
|
||
'edge-computing-expert', 'performance-expert', 'impact-analyst',
|
||
'diagram-as-code-expert', 'zero-defect-guardian',
|
||
'api-designer', 'graphql-architect', 'cloud-architect',
|
||
],
|
||
'devops': [
|
||
'devops-expert', 'devsecops-expert', 'git-operation-master', 'sre-expert',
|
||
'kubernetes-specialist', 'terraform-engineer',
|
||
],
|
||
'security': [
|
||
'security-expert',
|
||
],
|
||
'quality': [
|
||
'tester-expert', 'reviewer-expert', 'project-audit-expert',
|
||
],
|
||
'product': [
|
||
'product-manager-expert', 'designer-expert', 'ux-researcher', 'project-coordinator',
|
||
],
|
||
'business': [
|
||
'business-plan-skill', 'finance-advisor', 'sales-consultant',
|
||
'pricing-strategist', 'customer-success-expert', 'growth-hacker',
|
||
'investor-review-guide', 'industry-research-cn', 'legal-review-skill',
|
||
],
|
||
'content': [
|
||
'tech-writer-expert', 'copywriter-expert', 'email-communicator',
|
||
'social-media-manager', 'technical-seo-expert',
|
||
],
|
||
'meta': [
|
||
'genesis-engine', 'prompt-optimizer', 'tech-lead-mentor', 'planning-with-files',
|
||
],
|
||
};
|
||
|
||
// === 域关键词 (用于 L1 快速匹配) ===
|
||
const DOMAIN_KEYWORDS = {
|
||
'ai-data': [
|
||
'ai', 'ml', '机器学习', '深度学习', 'pytorch', 'tensorflow', 'nlp', 'cv',
|
||
'llm', 'rag', '模型', '训练', '微调', 'fine-tune', 'embedding', 'transformer',
|
||
'pandas', 'numpy', '数据分析', '数据工程', 'etl', 'spark', 'kafka', 'dbt',
|
||
'大语言模型', 'langchain', 'huggingface', 'agent', 'prompt',
|
||
],
|
||
'development': [
|
||
'react', 'vue', 'angular', 'next', 'nuxt', 'svelte', 'tailwind', 'css',
|
||
'node', 'express', 'fastapi', 'django', 'flask', 'go', 'gin', 'fiber',
|
||
'flutter', 'swift', 'kotlin', 'react native', 'expo',
|
||
'小程序', 'taro', 'uni-app', '微信',
|
||
'api', 'rest', 'graphql', 'websocket', 'socket',
|
||
'正则', 'shell', 'bash', 'awk', 'sed',
|
||
'浏览器自动化', '爬虫', 'playwright', 'selenium', 'puppeteer',
|
||
'zapier', 'n8n', '工作流', '自动化',
|
||
'通知', '推送', 'fcm', 'sms', '站内信',
|
||
'typescript', 'python', 'golang', 'rust', 'wasm',
|
||
'写代码', '实现', '开发', '编程', '函数', '接口',
|
||
],
|
||
'architecture': [
|
||
'架构', '设计模式', 'ddd', 'adr', '技术选型', '微服务',
|
||
'数据库', 'sql', '索引', '慢查询', 'mysql', 'postgresql', 'mongodb',
|
||
'istio', 'gitops', '云原生', // V17 修复: k8s/kubernetes/helm 移至 devops,消除跨域冲突
|
||
'edge', 'workers', 'vercel edge', 'deno deploy', 'cdn',
|
||
'性能', '优化', 'cwv', '首屏', '内存', '调优', '瓶颈',
|
||
'影响范围', '依赖分析', '爆炸半径',
|
||
'mermaid', 'plantuml', 'graphviz', '画图', '图表', '可视化',
|
||
'零缺陷', 'pinning test',
|
||
],
|
||
'devops': [
|
||
'docker', 'ci', 'cd', 'ci/cd', 'pipeline', 'jenkins', 'github actions',
|
||
'nginx', '部署', 'deploy', '运维',
|
||
'git', 'rebase', 'cherry-pick', '分支', 'merge', 'conflict',
|
||
'sli', 'slo', '监控', 'prometheus', 'grafana', '告警', 'postmortem',
|
||
'terraform', 'iac', 'helm', 'rbac', 'k8s', 'kubernetes', // V17: 统一归 devops
|
||
'ssh', '服务器', 'linux',
|
||
],
|
||
'security': [
|
||
'安全', 'owasp', 'xss', 'csrf', 'sql注入', 'jwt', '加密', '渗透',
|
||
'sast', 'dast', 'sbom', '漏洞', '权限', '认证', '鉴权',
|
||
],
|
||
'quality': [
|
||
'测试', 'test', 'jest', 'vitest', 'pytest', 'tdd', 'bdd',
|
||
'code review', '代码审查', '审查', '重构', '技术债',
|
||
'审计', '上线前', '质量',
|
||
],
|
||
'product': [
|
||
'产品', 'prd', '需求', '用户故事', '路线图', 'rice', 'kano',
|
||
'ui', 'ux', '设计', '交互', 'figma', 'wcag', '无障碍',
|
||
'用户研究', '访谈', 'persona', '可用性',
|
||
'项目管理', '甘特图', 'sprint', '里程碑', '排期',
|
||
],
|
||
'business': [
|
||
'商业', 'bp', '融资', '商业计划', '商业模式',
|
||
'记账', '财务', '税务', '现金流', '报税', '报价',
|
||
'销售', 'crm', '谈判', '客户开发',
|
||
'定价', '收费', 'saas', '免费增值',
|
||
'客户成功', 'sla', 'onboarding', '续费', '流失',
|
||
'增长', 'aarrr', '转化率', '裂变', '私域',
|
||
'投资', '估值', '尽调', 'dd',
|
||
'行业研究', '市场调研', '竞品', '市场规模',
|
||
'合同', '法务', '合规', '知识产权', '劳动法',
|
||
],
|
||
'content': [
|
||
'文档', 'readme', 'api文档', '用户手册',
|
||
'文案', '广告', '营销', '落地页', 'cta',
|
||
'邮件', '商务邮件', '冷邮件', '催款',
|
||
'社交媒体', '新媒体', '公众号', '小红书', '抖音', 'kol',
|
||
'seo', 'sitemap', 'robots', 'json-ld', 'meta',
|
||
],
|
||
'meta': [
|
||
'从零', '全流程', '端到端', '全生命周期',
|
||
'提示词', '优化提示',
|
||
'团队管理', '晋升', '招聘', '1on1',
|
||
'规划文档',
|
||
],
|
||
};
|
||
|
||
// 预编译: 域关键词 → 小写 Set
|
||
const DOMAIN_KEYWORD_SETS = {};
|
||
for (const [domain, keywords] of Object.entries(DOMAIN_KEYWORDS)) {
|
||
DOMAIN_KEYWORD_SETS[domain] = new Set(keywords.map(k => k.toLowerCase()));
|
||
}
|
||
|
||
/**
|
||
* L1 域分类: 基于关键词命中率 + 意图映射
|
||
* @param {string} queryText - 用户查询文本
|
||
* @param {string[]} intents - 已检测到的意图 (来自 intent-classifier)
|
||
* @param {string[]} entities - 已检测到的实体 (来自 intent-classifier)
|
||
* @returns {{ domain: string, confidence: number, candidates: string[] }}
|
||
*/
|
||
function classifyDomain(queryText, intents, entities) {
|
||
const queryLower = (queryText || '').toLowerCase();
|
||
const scores = {};
|
||
|
||
// Phase 1: 关键词匹配评分
|
||
for (const [domain, kwSet] of Object.entries(DOMAIN_KEYWORD_SETS)) {
|
||
let hits = 0;
|
||
for (const kw of kwSet) {
|
||
// P1-4: 短英文关键词 (<=3字符) 使用 word boundary 防止子串误匹配
|
||
if (kw.length <= 3 && /^[a-z]+$/i.test(kw)) {
|
||
if (new RegExp('\\b' + kw + '\\b', 'i').test(queryLower)) hits++;
|
||
} else {
|
||
if (queryLower.includes(kw)) hits++;
|
||
}
|
||
}
|
||
if (hits > 0) {
|
||
// P2-15 修复: 二次方公式 — hits^2/size 减少大关键词集的噪声加分偏差
|
||
// 小域少量精确命中 > 大域多量模糊命中
|
||
scores[domain] = (scores[domain] || 0) + (hits * hits) / kwSet.size * 2.0;
|
||
}
|
||
}
|
||
|
||
// Phase 2: 实体匹配 (框架/工具名)
|
||
for (const entity of (entities || [])) {
|
||
const entityLower = entity.toLowerCase();
|
||
for (const [domain, kwSet] of Object.entries(DOMAIN_KEYWORD_SETS)) {
|
||
if (kwSet.has(entityLower)) {
|
||
scores[domain] = (scores[domain] || 0) + 0.5;
|
||
}
|
||
}
|
||
}
|
||
|
||
// Phase 3: 意图映射加成
|
||
const intentDomainMap = {
|
||
'debug': 'development', 'create': 'development', 'explain': 'development',
|
||
'performance': 'architecture', 'architecture': 'architecture',
|
||
'deploy': 'devops', 'security': 'security',
|
||
'test': 'quality', 'review': 'quality',
|
||
'data': 'ai-data',
|
||
};
|
||
for (const intent of (intents || [])) {
|
||
const mapped = intentDomainMap[intent];
|
||
if (mapped) {
|
||
scores[mapped] = (scores[mapped] || 0) + 0.3;
|
||
}
|
||
}
|
||
|
||
// 排序取 top
|
||
const sorted = Object.entries(scores).sort((a, b) => b[1] - a[1]);
|
||
|
||
if (sorted.length === 0) {
|
||
return {
|
||
domain: 'development',
|
||
confidence: 0.1,
|
||
candidates: DOMAIN_SKILLS['development'],
|
||
};
|
||
}
|
||
|
||
const topDomain = sorted[0][0];
|
||
const topScore = sorted[0][1];
|
||
|
||
// V09 修复: 归一化置信度 = topScore / sum(allScores),消除域大小偏差
|
||
const totalScore = sorted.reduce((s, [, v]) => s + v, 0);
|
||
const normalizedConfidence = totalScore > 0
|
||
? Math.min(1.0, Math.round((topScore / totalScore) * 100) / 100)
|
||
: 0.1;
|
||
|
||
// 如果 top-2 分数接近,合并候选集
|
||
let candidates = [...DOMAIN_SKILLS[topDomain]];
|
||
if (sorted.length >= 2 && sorted[1][1] / topScore > 0.6) {
|
||
const secondDomain = sorted[1][0];
|
||
candidates = candidates.concat(DOMAIN_SKILLS[secondDomain]);
|
||
}
|
||
|
||
// 始终包含 developer-expert 作为通用回退
|
||
if (!candidates.includes('developer-expert')) {
|
||
candidates.push('developer-expert');
|
||
}
|
||
|
||
return {
|
||
domain: topDomain,
|
||
confidence: normalizedConfidence,
|
||
candidates,
|
||
_scores: sorted.slice(0, 3).map(([d, s]) => ({ domain: d, score: Math.round(s * 100) / 100 })),
|
||
};
|
||
}
|
||
|
||
// 模块导出
|
||
if (typeof module !== 'undefined') {
|
||
module.exports = {
|
||
classifyDomain,
|
||
DOMAIN_SKILLS,
|
||
DOMAIN_KEYWORDS,
|
||
DOMAIN_KEYWORD_SETS,
|
||
};
|
||
}
|
||
|
||
// CLI 入口
|
||
if (require.main === module) {
|
||
const query = process.argv.slice(2).join(' ') || '帮我写一个 React 组件';
|
||
const result = classifyDomain(query, [], []);
|
||
console.log(`查询: "${query}"`);
|
||
console.log(`域: ${result.domain} (${result.confidence})`);
|
||
console.log(`候选: ${result.candidates.length} 个技能`);
|
||
if (result._scores) {
|
||
console.log(`评分: ${result._scores.map(s => `${s.domain}=${s.score}`).join(', ')}`);
|
||
}
|
||
}
|