bookworm-smart-assistant/hooks/block-dangerous-commands.js

#!/usr/bin/env node
/**
 * PreToolUse Hook: 阻止危险的 Bash 命令
 * 匹配器: Bash
 * 退出码: 0=放行, 2=阻断(stderr输出JSON)
 *
 * v3.8: 规则外部化 + 安全事件日志
 * v3.9: 多层编码解码器 (base64/URL/hex/unicode/octal 预处理)
 * v4.0: Shell 词法分析器 (引号/转义感知分词, 替代 split)
 *
 * 防护层:
 * - Shell 词法分析器: 正确处理引号/转义的复合命令拆分
 * - 提取 bash -c / sh -c / cmd /c 内部命令
 * - 多层编码解码: 原始+解码版本双重检测
 * - catch 块默认 ask (fail-closed)
 */

const fs = require('fs');
const path = require('path');

const { logSecurityEvent } = require('./lib/security-log.js');
const { loadRules, compilePatterns, RULES_DIR } = require('./lib/rule-loader.js');

const readStdin = require('./lib/read-stdin.js');

// CRIT-4: 规则文件丢失时使用硬编码最小安全规则，而非空数组
const FALLBACK = {
  'deny-patterns': [
    { regex: 'rm\\s+(?:-[a-z]+\\s+)*-[a-z]*r[a-z]*\\s+.*[\\/\\\\~]', reason: '递归删除', flags: 'i' },
    { regex: 'mkfs|dd\\s+if=.*of=\\/dev', reason: '磁盘格式化', flags: 'i' },
    { regex: ':\\(\\)\\{\\s*:\\|:', reason: 'Fork 炸弹', flags: '' },
  ],
  'ask-patterns': [],
  'sensitive-redirect': [],
  'credential-patterns': [],
};

const DENY_PATTERNS = loadRules('deny-patterns.json', FALLBACK);
const ASK_PATTERNS = loadRules('ask-patterns.json', FALLBACK);
const SENSITIVE_REDIRECT_PATTERNS = loadRules('sensitive-redirect.json', FALLBACK);
const CREDENTIAL_PATTERNS = loadRules('credential-patterns.json', FALLBACK);

// ─── Shell 词法分析器 (v4.0) ──────────────────────────
/**
 * 轻量 Shell 词法分析器
 * 正确处理单引号、双引号、反斜杠转义，
 * 在 &&, ||, ;, | (非引号内) 处拆分复合命令。
 *
 * 返回 { tokens: string[], statements: string[] }
 *   tokens    - 逐词 token (供精细匹配)
 *   statements - 按 &&/||/;/| 拆分的子命令 (保留原文)
 */
function shellTokenize(cmd) {
  const tokens = [];
  const statements = [];
  let current = '';     // 当前 token
  let stmtBuf = '';     // 当前 statement 缓冲
  let inSingle = false; // 在单引号内
  let inDouble = false; // 在双引号内
  let escaped = false;  // 上一个字符是反斜杠

  function flushToken() {
    if (current) { tokens.push(current); current = ''; }
  }
  function flushStatement() {
    const s = stmtBuf.trim();
    if (s) statements.push(s);
    stmtBuf = '';
  }

  for (let i = 0; i < cmd.length; i++) {
    const ch = cmd[i];
    const next = cmd[i + 1] || '';

    // 处理转义
    if (escaped) {
      current += ch;
      stmtBuf += ch;
      escaped = false;
      continue;
    }

    // 反斜杠转义 (双引号内和无引号时有效; 单引号内无转义)
    if (ch === '\\' && !inSingle) {
      escaped = true;
      current += ch;
      stmtBuf += ch;
      continue;
    }

    // 单引号切换
    if (ch === "'" && !inDouble) {
      inSingle = !inSingle;
      current += ch;
      stmtBuf += ch;
      continue;
    }

    // 双引号切换
    if (ch === '"' && !inSingle) {
      inDouble = !inDouble;
      current += ch;
      stmtBuf += ch;
      continue;
    }

    // 引号内: 一切照收
    if (inSingle || inDouble) {
      current += ch;
      stmtBuf += ch;
      continue;
    }

    // === 以下仅在无引号状态下处理 ===

    // 分隔符: &&
    if (ch === '&' && next === '&') {
      flushToken();
      flushStatement();
      i++; // 跳过下一个 &
      continue;
    }

    // 分隔符: ||
    if (ch === '|' && next === '|') {
      flushToken();
      flushStatement();
      i++; // 跳过下一个 |
      continue;
    }

    // 分隔符: ; (语句结束)
    if (ch === ';') {
      flushToken();
      flushStatement();
      continue;
    }

    // 管道: | (单个)
    if (ch === '|') {
      flushToken();
      // 管道两侧作为同一个 statement 保留, 但也拆出左侧子段
      // 管道本身不拆 statement, 但标记管道 token
      stmtBuf += ch;
      continue;
    }

    // 空白: token 分隔
    if (/\s/.test(ch)) {
      flushToken();
      stmtBuf += ch;
      continue;
    }

    // 普通字符
    current += ch;
    stmtBuf += ch;
  }

  flushToken();
  flushStatement();

  return { tokens, statements };
}

/**
 * 将复合命令拆分为检测段，同时提取嵌套命令
 * 使用 Shell 词法分析器正确处理引号
 */
function extractCommandSegments(cmd) {
  const segments = [];

  // 先将整条命令作为一个段 (完整上下文检测管道类攻击)
  segments.push(cmd.trim());

  // 用词法分析器拆分
  const { statements } = shellTokenize(cmd);
  for (const stmt of statements) {
    const trimmed = stmt.trim();
    if (trimmed && trimmed !== cmd.trim()) {
      segments.push(trimmed);
    }
  }

  // 提取 bash -c "..." / sh -c '...' / cmd /c "..." 内部命令
  const shellWrappers = cmd.matchAll(/(?:ba)?sh\s+-c\s+(?:"((?:[^"\\]|\\.)*)"|'((?:[^'\\]|\\.)*)')/gi);
  for (const match of shellWrappers) {
    const inner = (match[1] || match[2] || '').replace(/\\(.)/g, '$1');
    if (inner) {
      segments.push(inner.trim());
      // 递归拆分内部命令
      const { statements: innerStmts } = shellTokenize(inner);
      for (const s of innerStmts) {
        if (s.trim()) segments.push(s.trim());
      }
    }
  }

  const cmdWrappers = cmd.matchAll(/cmd\s+\/c\s+["']([^"']+)["']/gi);
  for (const match of cmdWrappers) {
    segments.push(match[1].trim());
  }

  // 提取 $() 命令替换
  const dollarSubs = cmd.matchAll(/\$\(([^)]+)\)/g);
  for (const match of dollarSubs) {
    const inner = match[1].trim();
    if (inner) {
      segments.push(inner);
      const { statements: innerStmts } = shellTokenize(inner);
      for (const s of innerStmts) {
        if (s.trim() && s.trim() !== inner) segments.push(s.trim());
      }
    }
  }

  // 提取反引号命令替换
  const backtickSubs = cmd.matchAll(/`([^`]+)`/g);
  for (const match of backtickSubs) {
    const inner = match[1].trim();
    if (inner) {
      segments.push(inner);
      const { statements: innerStmts } = shellTokenize(inner);
      for (const s of innerStmts) {
        if (s.trim() && s.trim() !== inner) segments.push(s.trim());
      }
    }
  }

  // V02 修复: Heredoc 内容提取
  // v4.5: 区分数据写入型 (cat/tee > file) 和执行型 (bash/sh/python) heredoc
  // 数据写入型的 body 是文件内容, 不应触发命令级 deny 规则
  const heredocMatch = cmd.match(/<<-?\s*['"]?(\w+)['"]?/);
  if (heredocMatch) {
    const delim = heredocMatch[1];
    const bodyStart = cmd.indexOf(heredocMatch[0]) + heredocMatch[0].length;
    const rest = cmd.slice(bodyStart);
    const delimIdx = rest.search(new RegExp('^' + delim + '\\s*$', 'm'));
    if (delimIdx > 0) {
      const heredocBody = rest.slice(0, delimIdx).trim();
      // 提取 << 之前最后一个子句, 判断 heredoc 接收方
      const beforeHeredoc = cmd.slice(0, cmd.indexOf(heredocMatch[0]));
      const lastClause = beforeHeredoc.split(/\s*(?:&&|\|\|)\s*|\s*;\s*/).pop().trim();
      const isDataWrite = /\b(?:cat|tee)\b.*(?:>|>>)/.test(lastClause);

      if (heredocBody) {
        if (isDataWrite) {
          // 数据写入型: 从 segment[0] 剥离 heredoc body, 避免文件内容触发误报
          const heredocStart = cmd.indexOf(heredocMatch[0]);
          const heredocEnd = bodyStart + delimIdx + delim.length;
          segments[0] = (cmd.slice(0, heredocStart) + cmd.slice(heredocEnd)).trim();
        } else {
          // 执行型: body 可能被解释器执行, 保留安全检测
          segments.push(heredocBody);
          const hStmts = shellTokenize(heredocBody).statements;
          for (const hs of hStmts) { if (hs.trim()) segments.push(hs.trim()); }
        }
      }
    }
  }


  // P1-6: 进程替换 <() >() 提取 (修复正则: 转义括号以匹配字面量)
  const procSubMatches = [...cmd.matchAll(/[<>]\(([^)]+)\)/g)];
  for (const ps of procSubMatches) {
    const inner = ps[1];
    if (inner && inner.trim()) segments.push(inner.trim());
  }

  return segments;
}

/**
 * 可导出的命令安全检查函数 (供 dispatcher 调用)
 * @param {string} command - bash 命令
 * @param {object} input - 完整的 hook stdin 输入
 * @returns {object|null} 检查结果，null 表示放行
 *   { decision: 'deny'|'ask', message: string }
 */

/** L2: 检测变量间接执行模式 (R2: 修复正则转义 + 集成到 checkCommand) */
function checkIndirectExecution(command) {
  const SAFE = /^\$\{?(?:HOME|PATH|PWD|SHELL|USER|TERM|EDITOR|PAGER|BROWSER|VISUAL)\}?(?:[\/\\]|\s+-)/;
  const stmts = command.split(/(?:;|&&|\|\||\|)/).map(s => s.trim()).filter(Boolean);
  for (let i = 0; i < stmts.length; i++) {
    const s = stmts[i];
    if (/^\$\{?\w+\}?\s/.test(s) && !SAFE.test(s))
      return { matched: true, reason: "变量在命令位置执行", segment: s };
    if (/\$\{![^}]+\}/.test(s))
      return { matched: true, reason: "Bash 间接变量引用", segment: s };
    if (/\beval\s+["']?\$/.test(s))
      return { matched: true, reason: "eval 执行变量/命令替换", segment: s };
  }
  return { matched: false };
}

function checkCommand(command, input) {
  if (!command) return null;

  const rawSegments = extractCommandSegments(command);
  const decodedExtra = decodeSegments(rawSegments);
  const segments = [...rawSegments, ...decodedExtra];

  // 绝对禁止
  const denyResult = checkPatterns(segments, DENY_PATTERNS);
  if (denyResult.matched) {
    logSecurityEvent('deny', 'block-dangerous-commands', denyResult.reason, denyResult.segment);
    return {
      decision: 'deny',
      message: `[安全防护] 阻止危险命令: ${denyResult.reason}\n命令片段: ${denyResult.segment.substring(0, 120)}\n此操作已被安全策略禁止。请改用更安全的替代方案。`,
    };
  }

  // 需要确认
  const askResult = checkPatterns(segments, ASK_PATTERNS);
  if (askResult.matched) {
    logSecurityEvent('ask', 'block-dangerous-commands', askResult.reason, askResult.segment);
    return {
      decision: 'ask',
      message: `[安全警告] 高风险操作: ${askResult.reason}\n命令片段: ${askResult.segment.substring(0, 120)}\n请用户确认是否执行。`,
    };
  }

  // 敏感文件重定向 [RT-7: ask→deny 2026-03-30]
  const redirectResult = checkPatterns(segments, SENSITIVE_REDIRECT_PATTERNS);
  if (redirectResult.matched) {
    logSecurityEvent('deny', 'block-dangerous-commands', redirectResult.reason, redirectResult.segment);
    return {
      decision: 'deny',
      message: `[敏感文件保护] 阻止危险重定向: ${redirectResult.reason}\n命令片段: ${redirectResult.segment.substring(0, 120)}\n此操作已被安全策略禁止。请使用 Write/Edit 工具代替 Bash 重定向写入敏感文件。`,
    };
  }

  // 凭证泄露
  const credResult = checkPatterns(segments, CREDENTIAL_PATTERNS);
  if (credResult.matched) {
    logSecurityEvent('ask', 'block-dangerous-commands', credResult.reason, credResult.segment);
    return {
      decision: 'ask',
      message: `[凭证警告] ${credResult.reason}\n建议使用环境变量代替明文凭证。`,
    };
  }

  // R2: 变量间接执行检测 (集成到主流程)
  const indirectResult = checkIndirectExecution(command);
  if (indirectResult.matched) {
    logSecurityEvent('deny', 'block-dangerous-commands', indirectResult.reason, indirectResult.segment);
    return {
      decision: 'deny',
      message: `[安全防护] 变量间接执行: ${indirectResult.reason}\n命令片段: ${(indirectResult.segment || '').substring(0, 120)}\n此操作已被安全策略禁止。`,
    };
  }

  return null; // 放行
}

// 导出核心函数供测试和 dispatcher 使用
if (typeof module !== 'undefined') {
  module.exports = {
    shellTokenize, extractCommandSegments,
    normalizeFullwidth, decodeHtmlEntities, decodeSinglePass, decodeSegments, checkPatterns,
    checkCommand,
  };
}

// --- 多层编码解码器 (v3.9, 增强 v4.4) -----------------------------------
/**
 * 全角→半角标准化 (v4.4)
 * 将全角英数 (A-Z, a-z, 0-9) 和常见全角符号转换为半角
 */
function normalizeFullwidth(str) {
  return str.replace(/[\uff01-\uff5e]/g, ch =>
    String.fromCharCode(ch.charCodeAt(0) - 0xfee0)
  ).replace(/\u3000/g, ' ');
}

/**
 * HTML 实体解码 (v4.4)
 * 支持: &#60; &#x3c; &lt; &gt; &amp; &quot; &apos;
 */
function decodeHtmlEntities(str) {
  const named = { '&lt;': '<', '&gt;': '>', '&amp;': '&', '&quot;': '"', '&apos;': "'" };
  return str
    .replace(/&(lt|gt|amp|quot|apos);/gi, (m, name) => named['&' + name.toLowerCase() + ';'] || m)
    .replace(/&#x([0-9A-Fa-f]{1,4});/g, (_, hex) => String.fromCharCode(parseInt(hex, 16)))
    .replace(/&#(\d{1,5});/g, (_, dec) => String.fromCharCode(parseInt(dec, 10)));
}

/**
 * 单轮解码: 尝试所有解码器，返回新发现的段
 */
function decodeSinglePass(segments) {
  const decoded = [];

  for (const seg of segments) {
    // 0. 全角标准化 (v4.4)
    const normalized = normalizeFullwidth(seg);
    if (normalized !== seg) {
      decoded.push(normalized);
    }

    // 0b. HTML 实体解码 (v4.4)
    if (/&(?:#x?[0-9A-Fa-f]+|[a-z]+);/i.test(seg)) {
      const htmlDecoded = decodeHtmlEntities(seg);
      if (htmlDecoded !== seg) {
        decoded.push(htmlDecoded);
      }
    }

    // 1. Base64 管道解码: echo XXXX | base64 -d
    const b64Match = seg.match(/echo\s+["']?([A-Za-z0-9+/=]{8,})["']?\s*\|\s*base64\s+(?:-d|--decode)/);
    if (b64Match) {
      try {
        const plain = Buffer.from(b64Match[1], 'base64').toString('utf8');
        if (plain && /^[\x20-\x7e\s]+$/.test(plain)) {
          decoded.push(plain.trim());
        }
      } catch {}
    }

    // 2. URL 解码: %2F, %20, %3B 等
    if (/%[0-9A-Fa-f]{2}/.test(seg)) {
      try {
        const urlDecoded = decodeURIComponent(seg);
        if (urlDecoded !== seg) {
          decoded.push(urlDecoded);
        }
      } catch {}
    }

    // 3. Hex 转义解码: \x72\x6d -> rm
    if (/\\x[0-9A-Fa-f]{2}/.test(seg)) {
      const hexDecoded = seg.replace(/\\x([0-9A-Fa-f]{2})/g, (_, hex) => {
        return String.fromCharCode(parseInt(hex, 16));
      });
      if (hexDecoded !== seg) {
        decoded.push(hexDecoded);
      }
    }

    // 4. Unicode 转义解码: \u0072\u006d -> rm
    if (/\\u[0-9A-Fa-f]{4}/.test(seg)) {
      const uniDecoded = seg.replace(/\\u([0-9A-Fa-f]{4})/g, (_, hex) => {
        return String.fromCharCode(parseInt(hex, 16));
      });
      if (uniDecoded !== seg) {
        decoded.push(uniDecoded);
      }
    }

    // 5. Octal 转义解码: $'\162\155' -> rm
    if (/\$'((?:\\[0-7]{1,3})+)'/.test(seg)) {
      const octMatch = seg.match(/\$'((?:[^'\\]|\\[0-7]{1,3}|\\.)*)'/g);
      if (octMatch) {
        for (const om of octMatch) {
          const inner = om.slice(2, -1);
          const octDecoded = inner.replace(/\\([0-7]{1,3})/g, (_, oct) => {
            return String.fromCharCode(parseInt(oct, 8));
          });
          if (octDecoded !== inner) {
            decoded.push(octDecoded);
          }
        }
      }
    }

    // 6. Unicode NFC 规范化 (v4.4)
    try {
      const nfc = seg.normalize('NFC');
      if (nfc !== seg) decoded.push(nfc);
    } catch {}
  }

  return decoded;
}

/**
 * 递归解码: 最多 5 层 (V02 修复: 3→5)，每轮将新解码段送入下一轮
 */
function decodeSegments(segments) {
  const allDecoded = [];
  let current = segments;

  for (let depth = 0; depth < 5; depth++) {
    const newDecoded = decodeSinglePass(current);
    if (newDecoded.length === 0) break;
    allDecoded.push(...newDecoded);
    current = newDecoded; // 下一轮只处理新发现的段
  }

  return allDecoded;
}

function checkPatterns(segments, patterns) {
  for (const segment of segments) {
    for (const { pattern, reason } of patterns) {
      if (pattern.test(segment)) {
        return { matched: true, reason, segment };
      }
    }
  }
  return { matched: false };
}

// ─── 主流程 ────────────────────────────────────────────
function main() {
  readStdin({ maxSize: 1024 * 1024 }).then(input => {
    const cmd = (input.tool_input && input.tool_input.command) || '';

    if (!cmd) {
      process.exit(0);
      return;
    }

    const result = checkCommand(cmd, input);
    if (result) {
      process.stderr.write(JSON.stringify({
        hookSpecificOutput: { permissionDecision: result.decision },
        systemMessage: result.message,
      }));
      process.exit(2);
      return;
    }

    process.exit(0);
  }).catch((e) => {
    // Fail-closed: 解析失败或输入过大时请求用户确认而非静默放行
    process.stderr.write(JSON.stringify({
      hookSpecificOutput: { permissionDecision: 'ask' },
      systemMessage: `[安全防护] 命令检查遇到异常(${e.message})，请用户确认是否执行。`
    }));
    process.exit(2);
  });
}

if (require.main === module) {
  main();
}