bookworm-smart-assistant/scripts/memory-search.js

#!/usr/bin/env node
/**
 * 记忆搜索引擎 v1.0 — 全文搜索 + #tag 标签过滤
 * @file memory-search.js
 *
 * 功能:
 *   - 搜索 memory/ 目录下所有 .md 文件
 *   - 支持关键词搜索（大小写不敏感）
 *   - 支持 #tag 标签过滤
 *   - 返回匹配段落（含上下文）和文件:行号定位
 *
 * 用法:
 *   node memory-search.js <keywords> [--tag <tag>] [--context <lines>] [--json]
 *
 * 示例:
 *   node memory-search.js "部署 HTTPS"
 *   node memory-search.js "docker" --tag deploy
 *   node memory-search.js "qwen" --context 5 --json
 *
 * 退出码: 0=有结果, 1=无结果, 2=参数错误
 */
const fs = require('fs');
const path = require('path');

// ─── 路径解析 ────────────────────────────────────────
function detectMemoryDir() {
  // 尝试 paths.config.js
  try {
    const { PATHS } = require('./paths.config.js');
    if (PATHS.memoryDir) return PATHS.memoryDir;
  } catch {}
  // 回退: 基于脚本位置推断
  const root = path.resolve(__dirname, '..');
  // 查找 projects/ 下的 memory/ 目录
  const projectsDir = path.join(root, 'projects');
  if (fs.existsSync(projectsDir)) {
    const dirs = fs.readdirSync(projectsDir).filter(d =>
      fs.statSync(path.join(projectsDir, d)).isDirectory()
    );
    for (const d of dirs) {
      const memDir = path.join(projectsDir, d, 'memory');
      if (fs.existsSync(memDir)) return memDir;
    }
  }
  return null;
}

const MEMORY_DIR = detectMemoryDir();

// ─── 参数解析 ────────────────────────────────────────
function parseArgs(argv) {
  const args = argv.slice(2);
  const result = {
    keywords: [],
    tags: [],
    contextLines: 3,
    jsonOutput: false,
    sectionMode: true,  // 按 ## 段落返回
  };

  let i = 0;
  while (i < args.length) {
    if (args[i] === '--tag' && args[i + 1]) {
      result.tags.push(args[i + 1].replace(/^#/, '').toLowerCase());
      i += 2;
    } else if (args[i] === '--context' && args[i + 1]) {
      result.contextLines = parseInt(args[i + 1], 10) || 3;
      i += 2;
    } else if (args[i] === '--json') {
      result.jsonOutput = true;
      i++;
    } else if (args[i] === '--lines') {
      result.sectionMode = false;
      i++;
    } else if (!args[i].startsWith('--')) {
      // 关键词（可能包含空格，作为一个搜索词组）
      result.keywords.push(...args[i].toLowerCase().split(/\s+/).filter(Boolean));
      i++;
    } else {
      i++;
    }
  }
  return result;
}

// ─── 文件扫描 ────────────────────────────────────────
/**
 * 读取所有 memory .md 文件
 * @returns {Array<{file: string, name: string, lines: string[]}>}
 */
function loadMemoryFiles() {
  if (!MEMORY_DIR || !fs.existsSync(MEMORY_DIR)) return [];
  const files = fs.readdirSync(MEMORY_DIR)
    .filter(f => f.endsWith('.md'))
    .sort();

  return files.map(f => {
    const filePath = path.join(MEMORY_DIR, f);
    const content = fs.readFileSync(filePath, 'utf8');
    return {
      file: filePath,
      name: f,
      lines: content.split('\n'),
      content,
    };
  });
}

// ─── 段落解析 ────────────────────────────────────────
/**
 * 将 Markdown 文件按 ## 标题拆分为段落
 * @param {string[]} lines
 * @returns {Array<{title: string, startLine: number, endLine: number, text: string, tags: string[]}>}
 */
function parseSections(lines) {
  const sections = [];
  let current = null;

  for (let i = 0; i < lines.length; i++) {
    const line = lines[i];
    if (/^#{1,3}\s/.test(line)) {
      if (current) {
        current.endLine = i - 1;
        current.text = lines.slice(current.startLine, i).join('\n');
      }
      current = {
        title: line.replace(/^#+\s*/, ''),
        startLine: i,
        endLine: i,
        text: '',
        tags: [],
      };
      sections.push(current);
    }
  }
  if (current) {
    current.endLine = lines.length - 1;
    current.text = lines.slice(current.startLine, lines.length).join('\n');
  }

  // 提取每段的 #tag
  for (const sec of sections) {
    const tagMatches = sec.text.match(/#([a-zA-Z\u4e00-\u9fff][\w\u4e00-\u9fff-]*)/g);
    if (tagMatches) {
      sec.tags = tagMatches.map(t => t.slice(1).toLowerCase());
    }
  }

  return sections;
}

// ─── 搜索引擎 ────────────────────────────────────────
/**
 * 在所有记忆文件中搜索
 * @param {object} params - 搜索参数
 * @returns {Array<{file: string, section: string, startLine: number, snippet: string, score: number, tags: string[]}>}
 */
function search({ keywords, tags }) {
  const files = loadMemoryFiles();
  const results = [];

  for (const f of files) {
    const sections = parseSections(f.lines);

    for (const sec of sections) {
      const textLower = sec.text.toLowerCase();

      // 标签过滤: 如果指定了 tag，section 必须包含该 tag
      if (tags.length > 0) {
        const hasTag = tags.some(t =>
          sec.tags.includes(t) || textLower.includes(`#${t}`)
        );
        if (!hasTag) continue;
      }

      // 关键词匹配: 计算匹配分数
      if (keywords.length === 0 && tags.length > 0) {
        // 只有标签过滤，无关键词，返回所有匹配段落
        results.push({
          file: f.name,
          filePath: f.file,
          section: sec.title,
          startLine: sec.startLine + 1,  // 1-based
          snippet: truncateSnippet(sec.text, 500),
          score: 1,
          tags: sec.tags,
        });
        continue;
      }

      let score = 0;
      for (const kw of keywords) {
        // 标题匹配权重 x3
        if (sec.title.toLowerCase().includes(kw)) score += 3;
        // 内容匹配: 计数出现次数
        const regex = new RegExp(escapeRegExp(kw), 'gi');
        const matches = sec.text.match(regex);
        if (matches) score += matches.length;
      }

      if (score > 0) {
        results.push({
          file: f.name,
          filePath: f.file,
          section: sec.title,
          startLine: sec.startLine + 1,
          snippet: highlightSnippet(sec.text, keywords, 500),
          score,
          tags: sec.tags,
        });
      }
    }
  }

  // 按分数降序排列
  results.sort((a, b) => b.score - a.score);
  return results;
}

// ─── 工具函数 ────────────────────────────────────────
function escapeRegExp(s) {
  return s.replace(/[.*+?^${}()|[\]\\]/g, '\\$&');
}

function truncateSnippet(text, maxLen) {
  if (text.length <= maxLen) return text.trim();
  return text.slice(0, maxLen).trim() + '...';
}

function highlightSnippet(text, keywords, maxLen) {
  // 找到第一个关键词出现的位置，截取周围上下文
  const textLower = text.toLowerCase();
  let firstIdx = text.length;
  for (const kw of keywords) {
    const idx = textLower.indexOf(kw);
    if (idx >= 0 && idx < firstIdx) firstIdx = idx;
  }

  const start = Math.max(0, firstIdx - 100);
  const end = Math.min(text.length, start + maxLen);
  let snippet = text.slice(start, end).trim();
  if (start > 0) snippet = '...' + snippet;
  if (end < text.length) snippet += '...';
  return snippet;
}

// ─── 输出格式化 ──────────────────────────────────────
function formatResults(results, jsonOutput) {
  if (results.length === 0) {
    if (jsonOutput) return JSON.stringify({ count: 0, results: [] }, null, 2);
    return '未找到匹配结果。';
  }

  if (jsonOutput) {
    return JSON.stringify({
      count: results.length,
      results: results.map(r => ({
        file: r.file,
        section: r.section,
        line: r.startLine,
        score: r.score,
        tags: r.tags,
        snippet: r.snippet,
      })),
    }, null, 2);
  }

  // 人类可读格式
  const lines = [`找到 ${results.length} 个匹配段落:\n`];
  for (const r of results.slice(0, 10)) {  // 最多显示 10 条
    lines.push(`── ${r.file}:${r.startLine} | ${r.section} (score: ${r.score})${r.tags.length ? ' [' + r.tags.map(t => '#' + t).join(' ') + ']' : ''}`);
    lines.push(r.snippet);
    lines.push('');
  }
  if (results.length > 10) {
    lines.push(`... 还有 ${results.length - 10} 条结果未显示`);
  }
  return lines.join('\n');
}

// ─── 主入口 ──────────────────────────────────────────
function main() {
  const params = parseArgs(process.argv);

  if (params.keywords.length === 0 && params.tags.length === 0) {
    console.error('用法: node memory-search.js <keywords> [--tag <tag>] [--context <lines>] [--json]');
    console.error('示例: node memory-search.js "部署 docker" --tag deploy');
    process.exit(2);
  }

  const results = search(params);
  console.log(formatResults(results, params.jsonOutput));
  process.exit(results.length > 0 ? 0 : 1);
}

// 模块导出 (供测试和其他脚本调用)
if (typeof module !== 'undefined') {
  module.exports = { search, loadMemoryFiles, parseSections, MEMORY_DIR };
}

if (require.main === module) {
  main();
}