bookworm-smart-assistant/scripts/memory-search.js

304 lines
9.6 KiB
JavaScript
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#!/usr/bin/env node
/**
* 记忆搜索引擎 v1.0 — 全文搜索 + #tag 标签过滤
* @file memory-search.js
*
* 功能:
* - 搜索 memory/ 目录下所有 .md 文件
* - 支持关键词搜索(大小写不敏感)
* - 支持 #tag 标签过滤
* - 返回匹配段落(含上下文)和文件:行号定位
*
* 用法:
* node memory-search.js <keywords> [--tag <tag>] [--context <lines>] [--json]
*
* 示例:
* node memory-search.js "部署 HTTPS"
* node memory-search.js "docker" --tag deploy
* node memory-search.js "qwen" --context 5 --json
*
* 退出码: 0=有结果, 1=无结果, 2=参数错误
*/
const fs = require('fs');
const path = require('path');
// ─── 路径解析 ────────────────────────────────────────
function detectMemoryDir() {
// 尝试 paths.config.js
try {
const { PATHS } = require('./paths.config.js');
if (PATHS.memoryDir) return PATHS.memoryDir;
} catch {}
// 回退: 基于脚本位置推断
const root = path.resolve(__dirname, '..');
// 查找 projects/ 下的 memory/ 目录
const projectsDir = path.join(root, 'projects');
if (fs.existsSync(projectsDir)) {
const dirs = fs.readdirSync(projectsDir).filter(d =>
fs.statSync(path.join(projectsDir, d)).isDirectory()
);
for (const d of dirs) {
const memDir = path.join(projectsDir, d, 'memory');
if (fs.existsSync(memDir)) return memDir;
}
}
return null;
}
const MEMORY_DIR = detectMemoryDir();
// ─── 参数解析 ────────────────────────────────────────
function parseArgs(argv) {
const args = argv.slice(2);
const result = {
keywords: [],
tags: [],
contextLines: 3,
jsonOutput: false,
sectionMode: true, // 按 ## 段落返回
};
let i = 0;
while (i < args.length) {
if (args[i] === '--tag' && args[i + 1]) {
result.tags.push(args[i + 1].replace(/^#/, '').toLowerCase());
i += 2;
} else if (args[i] === '--context' && args[i + 1]) {
result.contextLines = parseInt(args[i + 1], 10) || 3;
i += 2;
} else if (args[i] === '--json') {
result.jsonOutput = true;
i++;
} else if (args[i] === '--lines') {
result.sectionMode = false;
i++;
} else if (!args[i].startsWith('--')) {
// 关键词(可能包含空格,作为一个搜索词组)
result.keywords.push(...args[i].toLowerCase().split(/\s+/).filter(Boolean));
i++;
} else {
i++;
}
}
return result;
}
// ─── 文件扫描 ────────────────────────────────────────
/**
* 读取所有 memory .md 文件
* @returns {Array<{file: string, name: string, lines: string[]}>}
*/
function loadMemoryFiles() {
if (!MEMORY_DIR || !fs.existsSync(MEMORY_DIR)) return [];
const files = fs.readdirSync(MEMORY_DIR)
.filter(f => f.endsWith('.md'))
.sort();
return files.map(f => {
const filePath = path.join(MEMORY_DIR, f);
const content = fs.readFileSync(filePath, 'utf8');
return {
file: filePath,
name: f,
lines: content.split('\n'),
content,
};
});
}
// ─── 段落解析 ────────────────────────────────────────
/**
* 将 Markdown 文件按 ## 标题拆分为段落
* @param {string[]} lines
* @returns {Array<{title: string, startLine: number, endLine: number, text: string, tags: string[]}>}
*/
function parseSections(lines) {
const sections = [];
let current = null;
for (let i = 0; i < lines.length; i++) {
const line = lines[i];
if (/^#{1,3}\s/.test(line)) {
if (current) {
current.endLine = i - 1;
current.text = lines.slice(current.startLine, i).join('\n');
}
current = {
title: line.replace(/^#+\s*/, ''),
startLine: i,
endLine: i,
text: '',
tags: [],
};
sections.push(current);
}
}
if (current) {
current.endLine = lines.length - 1;
current.text = lines.slice(current.startLine, lines.length).join('\n');
}
// 提取每段的 #tag
for (const sec of sections) {
const tagMatches = sec.text.match(/#([a-zA-Z\u4e00-\u9fff][\w\u4e00-\u9fff-]*)/g);
if (tagMatches) {
sec.tags = tagMatches.map(t => t.slice(1).toLowerCase());
}
}
return sections;
}
// ─── 搜索引擎 ────────────────────────────────────────
/**
* 在所有记忆文件中搜索
* @param {object} params - 搜索参数
* @returns {Array<{file: string, section: string, startLine: number, snippet: string, score: number, tags: string[]}>}
*/
function search({ keywords, tags }) {
const files = loadMemoryFiles();
const results = [];
for (const f of files) {
const sections = parseSections(f.lines);
for (const sec of sections) {
const textLower = sec.text.toLowerCase();
// 标签过滤: 如果指定了 tagsection 必须包含该 tag
if (tags.length > 0) {
const hasTag = tags.some(t =>
sec.tags.includes(t) || textLower.includes(`#${t}`)
);
if (!hasTag) continue;
}
// 关键词匹配: 计算匹配分数
if (keywords.length === 0 && tags.length > 0) {
// 只有标签过滤,无关键词,返回所有匹配段落
results.push({
file: f.name,
filePath: f.file,
section: sec.title,
startLine: sec.startLine + 1, // 1-based
snippet: truncateSnippet(sec.text, 500),
score: 1,
tags: sec.tags,
});
continue;
}
let score = 0;
for (const kw of keywords) {
// 标题匹配权重 x3
if (sec.title.toLowerCase().includes(kw)) score += 3;
// 内容匹配: 计数出现次数
const regex = new RegExp(escapeRegExp(kw), 'gi');
const matches = sec.text.match(regex);
if (matches) score += matches.length;
}
if (score > 0) {
results.push({
file: f.name,
filePath: f.file,
section: sec.title,
startLine: sec.startLine + 1,
snippet: highlightSnippet(sec.text, keywords, 500),
score,
tags: sec.tags,
});
}
}
}
// 按分数降序排列
results.sort((a, b) => b.score - a.score);
return results;
}
// ─── 工具函数 ────────────────────────────────────────
function escapeRegExp(s) {
return s.replace(/[.*+?^${}()|[\]\\]/g, '\\$&');
}
function truncateSnippet(text, maxLen) {
if (text.length <= maxLen) return text.trim();
return text.slice(0, maxLen).trim() + '...';
}
function highlightSnippet(text, keywords, maxLen) {
// 找到第一个关键词出现的位置,截取周围上下文
const textLower = text.toLowerCase();
let firstIdx = text.length;
for (const kw of keywords) {
const idx = textLower.indexOf(kw);
if (idx >= 0 && idx < firstIdx) firstIdx = idx;
}
const start = Math.max(0, firstIdx - 100);
const end = Math.min(text.length, start + maxLen);
let snippet = text.slice(start, end).trim();
if (start > 0) snippet = '...' + snippet;
if (end < text.length) snippet += '...';
return snippet;
}
// ─── 输出格式化 ──────────────────────────────────────
function formatResults(results, jsonOutput) {
if (results.length === 0) {
if (jsonOutput) return JSON.stringify({ count: 0, results: [] }, null, 2);
return '未找到匹配结果。';
}
if (jsonOutput) {
return JSON.stringify({
count: results.length,
results: results.map(r => ({
file: r.file,
section: r.section,
line: r.startLine,
score: r.score,
tags: r.tags,
snippet: r.snippet,
})),
}, null, 2);
}
// 人类可读格式
const lines = [`找到 ${results.length} 个匹配段落:\n`];
for (const r of results.slice(0, 10)) { // 最多显示 10 条
lines.push(`── ${r.file}:${r.startLine} | ${r.section} (score: ${r.score})${r.tags.length ? ' [' + r.tags.map(t => '#' + t).join(' ') + ']' : ''}`);
lines.push(r.snippet);
lines.push('');
}
if (results.length > 10) {
lines.push(`... 还有 ${results.length - 10} 条结果未显示`);
}
return lines.join('\n');
}
// ─── 主入口 ──────────────────────────────────────────
function main() {
const params = parseArgs(process.argv);
if (params.keywords.length === 0 && params.tags.length === 0) {
console.error('用法: node memory-search.js <keywords> [--tag <tag>] [--context <lines>] [--json]');
console.error('示例: node memory-search.js "部署 docker" --tag deploy');
process.exit(2);
}
const results = search(params);
console.log(formatResults(results, params.jsonOutput));
process.exit(results.length > 0 ? 0 : 1);
}
// 模块导出 (供测试和其他脚本调用)
if (typeof module !== 'undefined') {
module.exports = { search, loadMemoryFiles, parseSections, MEMORY_DIR };
}
if (require.main === module) {
main();
}