304 lines
9.6 KiB
JavaScript
304 lines
9.6 KiB
JavaScript
|
|
#!/usr/bin/env node
|
|||
|
|
/**
|
|||
|
|
* 记忆搜索引擎 v1.0 — 全文搜索 + #tag 标签过滤
|
|||
|
|
* @file memory-search.js
|
|||
|
|
*
|
|||
|
|
* 功能:
|
|||
|
|
* - 搜索 memory/ 目录下所有 .md 文件
|
|||
|
|
* - 支持关键词搜索(大小写不敏感)
|
|||
|
|
* - 支持 #tag 标签过滤
|
|||
|
|
* - 返回匹配段落(含上下文)和文件:行号定位
|
|||
|
|
*
|
|||
|
|
* 用法:
|
|||
|
|
* node memory-search.js <keywords> [--tag <tag>] [--context <lines>] [--json]
|
|||
|
|
*
|
|||
|
|
* 示例:
|
|||
|
|
* node memory-search.js "部署 HTTPS"
|
|||
|
|
* node memory-search.js "docker" --tag deploy
|
|||
|
|
* node memory-search.js "qwen" --context 5 --json
|
|||
|
|
*
|
|||
|
|
* 退出码: 0=有结果, 1=无结果, 2=参数错误
|
|||
|
|
*/
|
|||
|
|
const fs = require('fs');
|
|||
|
|
const path = require('path');
|
|||
|
|
|
|||
|
|
// ─── 路径解析 ────────────────────────────────────────
|
|||
|
|
function detectMemoryDir() {
|
|||
|
|
// 尝试 paths.config.js
|
|||
|
|
try {
|
|||
|
|
const { PATHS } = require('./paths.config.js');
|
|||
|
|
if (PATHS.memoryDir) return PATHS.memoryDir;
|
|||
|
|
} catch {}
|
|||
|
|
// 回退: 基于脚本位置推断
|
|||
|
|
const root = path.resolve(__dirname, '..');
|
|||
|
|
// 查找 projects/ 下的 memory/ 目录
|
|||
|
|
const projectsDir = path.join(root, 'projects');
|
|||
|
|
if (fs.existsSync(projectsDir)) {
|
|||
|
|
const dirs = fs.readdirSync(projectsDir).filter(d =>
|
|||
|
|
fs.statSync(path.join(projectsDir, d)).isDirectory()
|
|||
|
|
);
|
|||
|
|
for (const d of dirs) {
|
|||
|
|
const memDir = path.join(projectsDir, d, 'memory');
|
|||
|
|
if (fs.existsSync(memDir)) return memDir;
|
|||
|
|
}
|
|||
|
|
}
|
|||
|
|
return null;
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
const MEMORY_DIR = detectMemoryDir();
|
|||
|
|
|
|||
|
|
// ─── 参数解析 ────────────────────────────────────────
|
|||
|
|
function parseArgs(argv) {
|
|||
|
|
const args = argv.slice(2);
|
|||
|
|
const result = {
|
|||
|
|
keywords: [],
|
|||
|
|
tags: [],
|
|||
|
|
contextLines: 3,
|
|||
|
|
jsonOutput: false,
|
|||
|
|
sectionMode: true, // 按 ## 段落返回
|
|||
|
|
};
|
|||
|
|
|
|||
|
|
let i = 0;
|
|||
|
|
while (i < args.length) {
|
|||
|
|
if (args[i] === '--tag' && args[i + 1]) {
|
|||
|
|
result.tags.push(args[i + 1].replace(/^#/, '').toLowerCase());
|
|||
|
|
i += 2;
|
|||
|
|
} else if (args[i] === '--context' && args[i + 1]) {
|
|||
|
|
result.contextLines = parseInt(args[i + 1], 10) || 3;
|
|||
|
|
i += 2;
|
|||
|
|
} else if (args[i] === '--json') {
|
|||
|
|
result.jsonOutput = true;
|
|||
|
|
i++;
|
|||
|
|
} else if (args[i] === '--lines') {
|
|||
|
|
result.sectionMode = false;
|
|||
|
|
i++;
|
|||
|
|
} else if (!args[i].startsWith('--')) {
|
|||
|
|
// 关键词(可能包含空格,作为一个搜索词组)
|
|||
|
|
result.keywords.push(...args[i].toLowerCase().split(/\s+/).filter(Boolean));
|
|||
|
|
i++;
|
|||
|
|
} else {
|
|||
|
|
i++;
|
|||
|
|
}
|
|||
|
|
}
|
|||
|
|
return result;
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
// ─── 文件扫描 ────────────────────────────────────────
|
|||
|
|
/**
|
|||
|
|
* 读取所有 memory .md 文件
|
|||
|
|
* @returns {Array<{file: string, name: string, lines: string[]}>}
|
|||
|
|
*/
|
|||
|
|
function loadMemoryFiles() {
|
|||
|
|
if (!MEMORY_DIR || !fs.existsSync(MEMORY_DIR)) return [];
|
|||
|
|
const files = fs.readdirSync(MEMORY_DIR)
|
|||
|
|
.filter(f => f.endsWith('.md'))
|
|||
|
|
.sort();
|
|||
|
|
|
|||
|
|
return files.map(f => {
|
|||
|
|
const filePath = path.join(MEMORY_DIR, f);
|
|||
|
|
const content = fs.readFileSync(filePath, 'utf8');
|
|||
|
|
return {
|
|||
|
|
file: filePath,
|
|||
|
|
name: f,
|
|||
|
|
lines: content.split('\n'),
|
|||
|
|
content,
|
|||
|
|
};
|
|||
|
|
});
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
// ─── 段落解析 ────────────────────────────────────────
|
|||
|
|
/**
|
|||
|
|
* 将 Markdown 文件按 ## 标题拆分为段落
|
|||
|
|
* @param {string[]} lines
|
|||
|
|
* @returns {Array<{title: string, startLine: number, endLine: number, text: string, tags: string[]}>}
|
|||
|
|
*/
|
|||
|
|
function parseSections(lines) {
|
|||
|
|
const sections = [];
|
|||
|
|
let current = null;
|
|||
|
|
|
|||
|
|
for (let i = 0; i < lines.length; i++) {
|
|||
|
|
const line = lines[i];
|
|||
|
|
if (/^#{1,3}\s/.test(line)) {
|
|||
|
|
if (current) {
|
|||
|
|
current.endLine = i - 1;
|
|||
|
|
current.text = lines.slice(current.startLine, i).join('\n');
|
|||
|
|
}
|
|||
|
|
current = {
|
|||
|
|
title: line.replace(/^#+\s*/, ''),
|
|||
|
|
startLine: i,
|
|||
|
|
endLine: i,
|
|||
|
|
text: '',
|
|||
|
|
tags: [],
|
|||
|
|
};
|
|||
|
|
sections.push(current);
|
|||
|
|
}
|
|||
|
|
}
|
|||
|
|
if (current) {
|
|||
|
|
current.endLine = lines.length - 1;
|
|||
|
|
current.text = lines.slice(current.startLine, lines.length).join('\n');
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
// 提取每段的 #tag
|
|||
|
|
for (const sec of sections) {
|
|||
|
|
const tagMatches = sec.text.match(/#([a-zA-Z\u4e00-\u9fff][\w\u4e00-\u9fff-]*)/g);
|
|||
|
|
if (tagMatches) {
|
|||
|
|
sec.tags = tagMatches.map(t => t.slice(1).toLowerCase());
|
|||
|
|
}
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
return sections;
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
// ─── 搜索引擎 ────────────────────────────────────────
|
|||
|
|
/**
|
|||
|
|
* 在所有记忆文件中搜索
|
|||
|
|
* @param {object} params - 搜索参数
|
|||
|
|
* @returns {Array<{file: string, section: string, startLine: number, snippet: string, score: number, tags: string[]}>}
|
|||
|
|
*/
|
|||
|
|
function search({ keywords, tags }) {
|
|||
|
|
const files = loadMemoryFiles();
|
|||
|
|
const results = [];
|
|||
|
|
|
|||
|
|
for (const f of files) {
|
|||
|
|
const sections = parseSections(f.lines);
|
|||
|
|
|
|||
|
|
for (const sec of sections) {
|
|||
|
|
const textLower = sec.text.toLowerCase();
|
|||
|
|
|
|||
|
|
// 标签过滤: 如果指定了 tag,section 必须包含该 tag
|
|||
|
|
if (tags.length > 0) {
|
|||
|
|
const hasTag = tags.some(t =>
|
|||
|
|
sec.tags.includes(t) || textLower.includes(`#${t}`)
|
|||
|
|
);
|
|||
|
|
if (!hasTag) continue;
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
// 关键词匹配: 计算匹配分数
|
|||
|
|
if (keywords.length === 0 && tags.length > 0) {
|
|||
|
|
// 只有标签过滤,无关键词,返回所有匹配段落
|
|||
|
|
results.push({
|
|||
|
|
file: f.name,
|
|||
|
|
filePath: f.file,
|
|||
|
|
section: sec.title,
|
|||
|
|
startLine: sec.startLine + 1, // 1-based
|
|||
|
|
snippet: truncateSnippet(sec.text, 500),
|
|||
|
|
score: 1,
|
|||
|
|
tags: sec.tags,
|
|||
|
|
});
|
|||
|
|
continue;
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
let score = 0;
|
|||
|
|
for (const kw of keywords) {
|
|||
|
|
// 标题匹配权重 x3
|
|||
|
|
if (sec.title.toLowerCase().includes(kw)) score += 3;
|
|||
|
|
// 内容匹配: 计数出现次数
|
|||
|
|
const regex = new RegExp(escapeRegExp(kw), 'gi');
|
|||
|
|
const matches = sec.text.match(regex);
|
|||
|
|
if (matches) score += matches.length;
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
if (score > 0) {
|
|||
|
|
results.push({
|
|||
|
|
file: f.name,
|
|||
|
|
filePath: f.file,
|
|||
|
|
section: sec.title,
|
|||
|
|
startLine: sec.startLine + 1,
|
|||
|
|
snippet: highlightSnippet(sec.text, keywords, 500),
|
|||
|
|
score,
|
|||
|
|
tags: sec.tags,
|
|||
|
|
});
|
|||
|
|
}
|
|||
|
|
}
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
// 按分数降序排列
|
|||
|
|
results.sort((a, b) => b.score - a.score);
|
|||
|
|
return results;
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
// ─── 工具函数 ────────────────────────────────────────
|
|||
|
|
function escapeRegExp(s) {
|
|||
|
|
return s.replace(/[.*+?^${}()|[\]\\]/g, '\\$&');
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
function truncateSnippet(text, maxLen) {
|
|||
|
|
if (text.length <= maxLen) return text.trim();
|
|||
|
|
return text.slice(0, maxLen).trim() + '...';
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
function highlightSnippet(text, keywords, maxLen) {
|
|||
|
|
// 找到第一个关键词出现的位置,截取周围上下文
|
|||
|
|
const textLower = text.toLowerCase();
|
|||
|
|
let firstIdx = text.length;
|
|||
|
|
for (const kw of keywords) {
|
|||
|
|
const idx = textLower.indexOf(kw);
|
|||
|
|
if (idx >= 0 && idx < firstIdx) firstIdx = idx;
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
const start = Math.max(0, firstIdx - 100);
|
|||
|
|
const end = Math.min(text.length, start + maxLen);
|
|||
|
|
let snippet = text.slice(start, end).trim();
|
|||
|
|
if (start > 0) snippet = '...' + snippet;
|
|||
|
|
if (end < text.length) snippet += '...';
|
|||
|
|
return snippet;
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
// ─── 输出格式化 ──────────────────────────────────────
|
|||
|
|
function formatResults(results, jsonOutput) {
|
|||
|
|
if (results.length === 0) {
|
|||
|
|
if (jsonOutput) return JSON.stringify({ count: 0, results: [] }, null, 2);
|
|||
|
|
return '未找到匹配结果。';
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
if (jsonOutput) {
|
|||
|
|
return JSON.stringify({
|
|||
|
|
count: results.length,
|
|||
|
|
results: results.map(r => ({
|
|||
|
|
file: r.file,
|
|||
|
|
section: r.section,
|
|||
|
|
line: r.startLine,
|
|||
|
|
score: r.score,
|
|||
|
|
tags: r.tags,
|
|||
|
|
snippet: r.snippet,
|
|||
|
|
})),
|
|||
|
|
}, null, 2);
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
// 人类可读格式
|
|||
|
|
const lines = [`找到 ${results.length} 个匹配段落:\n`];
|
|||
|
|
for (const r of results.slice(0, 10)) { // 最多显示 10 条
|
|||
|
|
lines.push(`── ${r.file}:${r.startLine} | ${r.section} (score: ${r.score})${r.tags.length ? ' [' + r.tags.map(t => '#' + t).join(' ') + ']' : ''}`);
|
|||
|
|
lines.push(r.snippet);
|
|||
|
|
lines.push('');
|
|||
|
|
}
|
|||
|
|
if (results.length > 10) {
|
|||
|
|
lines.push(`... 还有 ${results.length - 10} 条结果未显示`);
|
|||
|
|
}
|
|||
|
|
return lines.join('\n');
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
// ─── 主入口 ──────────────────────────────────────────
|
|||
|
|
function main() {
|
|||
|
|
const params = parseArgs(process.argv);
|
|||
|
|
|
|||
|
|
if (params.keywords.length === 0 && params.tags.length === 0) {
|
|||
|
|
console.error('用法: node memory-search.js <keywords> [--tag <tag>] [--context <lines>] [--json]');
|
|||
|
|
console.error('示例: node memory-search.js "部署 docker" --tag deploy');
|
|||
|
|
process.exit(2);
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
const results = search(params);
|
|||
|
|
console.log(formatResults(results, params.jsonOutput));
|
|||
|
|
process.exit(results.length > 0 ? 0 : 1);
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
// 模块导出 (供测试和其他脚本调用)
|
|||
|
|
if (typeof module !== 'undefined') {
|
|||
|
|
module.exports = { search, loadMemoryFiles, parseSections, MEMORY_DIR };
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
if (require.main === module) {
|
|||
|
|
main();
|
|||
|
|
}
|