135 lines
3.8 KiB
JavaScript
135 lines
3.8 KiB
JavaScript
#!/usr/bin/env node
|
||
/**
|
||
* 同义词展开器 (v4.9)
|
||
*
|
||
* 加载 synonyms.json,将查询 token 展开为同义词组。
|
||
* 供 route-analyzer.js 和 route-feedback.js 的 tokenize() 使用。
|
||
*
|
||
* 核心函数:
|
||
* loadSynonymMap() → Map<word, string[]> 单例缓存
|
||
* expandSynonyms(tokens) → 展开后的 Set<string>
|
||
*/
|
||
|
||
const fs = require('fs');
|
||
const path = require('path');
|
||
|
||
// 单例缓存
|
||
let _synonymMap = null;
|
||
|
||
/**
|
||
* 加载同义词映射表 (单例)
|
||
* @returns {Map<string, string[]>} 每个词 → 所属组的全部同义词
|
||
*/
|
||
function loadSynonymMap() {
|
||
if (_synonymMap) return _synonymMap;
|
||
|
||
_synonymMap = new Map();
|
||
|
||
try {
|
||
const selfDir = path.dirname(__filename);
|
||
const synFile = path.join(selfDir, 'synonyms.json');
|
||
if (!fs.existsSync(synFile)) return _synonymMap;
|
||
|
||
const data = JSON.parse(fs.readFileSync(synFile, 'utf8'));
|
||
for (const group of (data.groups || [])) {
|
||
const words = (group.words || []).map(w => w.toLowerCase());
|
||
for (const word of words) {
|
||
// 每个词映射到组内其他所有词
|
||
const others = words.filter(w => w !== word);
|
||
if (_synonymMap.has(word)) {
|
||
// 合并多组同义词
|
||
const existing = _synonymMap.get(word);
|
||
for (const o of others) {
|
||
if (!existing.includes(o)) existing.push(o);
|
||
}
|
||
} else {
|
||
_synonymMap.set(word, [...others]);
|
||
}
|
||
}
|
||
}
|
||
} catch {}
|
||
|
||
return _synonymMap;
|
||
}
|
||
|
||
/**
|
||
* 展开 token 集合为包含同义词的更大集合
|
||
* @param {Set<string>|Array<string>} tokens - 原始 token 集合
|
||
* @returns {Set<string>} 展开后的 token 集合 (包含原始 + 同义词)
|
||
*/
|
||
function expandSynonyms(tokens) {
|
||
const synMap = loadSynonymMap();
|
||
const expanded = new Set(tokens);
|
||
|
||
for (const token of tokens) {
|
||
const synonyms = synMap.get(token.toLowerCase());
|
||
if (synonyms) {
|
||
for (const syn of synonyms) {
|
||
expanded.add(syn);
|
||
}
|
||
}
|
||
}
|
||
|
||
return expanded;
|
||
}
|
||
|
||
/**
|
||
* 重置单例缓存 (测试用)
|
||
*/
|
||
/**
|
||
* P1-FIX: 加权同义词展开
|
||
* 原词权重 1.0, 主同义词 0.7, 次同义词 0.4
|
||
* @param {Set<string>} tokens
|
||
* @returns {{ expanded: Set<string>, weights: Map<string, number> }}
|
||
*/
|
||
function expandSynonymsWeighted(tokens) {
|
||
const synMap = loadSynonymMap();
|
||
const expanded = new Set(tokens);
|
||
const weights = new Map();
|
||
|
||
// 原词权重 1.0
|
||
for (const t of tokens) weights.set(t, 1.0);
|
||
|
||
for (const token of tokens) {
|
||
const synonyms = synMap.get(token.toLowerCase());
|
||
if (synonyms) {
|
||
for (let i = 0; i < synonyms.length; i++) {
|
||
const syn = synonyms[i];
|
||
expanded.add(syn);
|
||
// 前 3 个同义词为主同义词(0.7),其余为次(0.4)
|
||
const w = i < 3 ? 0.7 : 0.4;
|
||
if (!weights.has(syn) || weights.get(syn) < w) {
|
||
weights.set(syn, w);
|
||
}
|
||
}
|
||
}
|
||
}
|
||
return { expanded, weights };
|
||
}
|
||
|
||
function resetCache() {
|
||
_synonymMap = null;
|
||
}
|
||
|
||
// 模块导出
|
||
if (typeof module !== 'undefined') {
|
||
module.exports = { loadSynonymMap, expandSynonyms, expandSynonymsWeighted, resetCache };
|
||
}
|
||
|
||
// CLI 入口
|
||
if (require.main === module) {
|
||
const query = process.argv.slice(2).join(' ');
|
||
if (!query) {
|
||
console.log('Usage: node synonym-expander.js <tokens...>');
|
||
console.log('Example: node synonym-expander.js 前端 部署');
|
||
process.exit(0);
|
||
}
|
||
|
||
const tokens = new Set(query.toLowerCase().split(/\s+/));
|
||
const expanded = expandSynonyms(tokens);
|
||
|
||
console.log('原始 tokens:', Array.from(tokens).join(', '));
|
||
console.log('展开后:', Array.from(expanded).join(', '));
|
||
console.log(`展开: ${tokens.size} → ${expanded.size} (+${expanded.size - tokens.size})`);
|
||
}
|