bookworm-smart-assistant/scripts/compliance-analyzer.js

#!/usr/bin/env node
/**
 * 合规分析引擎 (v5.2 Neural Gateway)
 *
 * 分析 compliance-*.jsonl 日志，生成合规率报告 + 违规模式 + 阈值建议。
 *
 * 用法:
 *   node scripts/compliance-analyzer.js --report   # 文本报告
 *   node scripts/compliance-analyzer.js --json     # JSON 输出
 *
 * 模块导出:
 *   loadComplianceLogs(maxDays) → [entries]
 *   computeComplianceRate(entries) → { total, compliant, skipped, violated, rate }
 *   analyzeViolationPatterns(entries) → [{ intent, frequency, commonSkill }]
 *   suggestThresholdAdjustment(entries) → { currentThreshold, suggested, reason }
 *   generateReport(options) → { summary, metrics, patterns, suggestions }
 */

const fs = require('fs');
const path = require('path');

const detectClaudeRoot = () => require('./paths.config.js').PATHS.root;

const CLAUDE_ROOT = detectClaudeRoot();
const DEBUG_DIR = path.join(CLAUDE_ROOT, 'debug');

/**
 * 加载最近 N 天的 compliance 日志
 * @param {number} maxDays - 最大天数 (默认 7)
 * @returns {Object[]}
 */
function loadComplianceLogs(maxDays = 7) {
  const entries = [];
  const now = Date.now();

  try {
    if (!fs.existsSync(DEBUG_DIR)) return entries;

    const files = fs.readdirSync(DEBUG_DIR)
      .filter(f => f.startsWith('compliance-') && f.endsWith('.jsonl'))
      .sort()
      .reverse();

    for (const file of files) {
      // 从文件名提取日期
      const dateMatch = file.match(/compliance-(\d{4}-\d{2}-\d{2})\.jsonl/);
      if (!dateMatch) continue;

      const fileDate = new Date(dateMatch[1]);
      const ageDays = (now - fileDate.getTime()) / (1000 * 60 * 60 * 24);
      if (ageDays > maxDays) break;

      const filePath = path.join(DEBUG_DIR, file);
      const lines = fs.readFileSync(filePath, 'utf8').trim().split('\n');
      for (const line of lines) {
        try {
          const entry = JSON.parse(line);
          // 只统计审计记录 (有 compliant 字段的)
          if ('compliant' in entry) {
            entries.push(entry);
          }
        } catch {}
      }
    }
  } catch {}

  return entries;
}

/**
 * 计算合规率
 * @param {Object[]} entries
 * @returns {{ total: number, compliant: number, skipped: number, violated: number, rate: number }}
 */
function computeComplianceRate(entries) {
  let compliant = 0;
  let skipped = 0;
  let violated = 0;
  let incomplete = 0;

  for (const e of entries) {
    if (e.compliant === 'skipped') { skipped++; continue; }
    // 过滤不完整条目 (routedSkill 缺失无法判定合规性)
    if (e.compliant === false && !e.routedSkill) { incomplete++; continue; }
    if (e.compliant === true) compliant++;
    else if (e.compliant === false) violated++;
  }

  const total = compliant + violated; // skipped/incomplete 不计入
  const rate = total > 0 ? compliant / total : 1;

  return { total, compliant, skipped, violated, incomplete, rate };
}

/**
 * 分析违规模式
 * @param {Object[]} entries
 * @returns {Array<{ intent: string, frequency: number, commonSkill: string }>}
 */
function analyzeViolationPatterns(entries) {
  const violations = entries.filter(e => e.compliant === false);
  if (violations.length === 0) return [];

  // 按意图聚合
  const intentMap = {};
  const skillMap = {};

  for (const v of violations) {
    const intentKey = (v.intent?.intents || ['unknown']).join(',');
    intentMap[intentKey] = (intentMap[intentKey] || 0) + 1;

    const skill = v.actualSkill || 'unknown';
    skillMap[skill] = (skillMap[skill] || 0) + 1;
  }

  // 转为排序数组
  const patterns = Object.entries(intentMap)
    .map(([intent, frequency]) => {
      // 找该意图下最常见的违规技能
      const intentViolations = violations.filter(v =>
        (v.intent?.intents || []).join(',') === intent
      );
      const skillCounts = {};
      for (const iv of intentViolations) {
        const s = iv.actualSkill || 'unknown';
        skillCounts[s] = (skillCounts[s] || 0) + 1;
      }
      const commonSkill = Object.entries(skillCounts)
        .sort(([, a], [, b]) => b - a)[0]?.[0] || 'unknown';

      return { intent, frequency, commonSkill };
    })
    .sort((a, b) => b.frequency - a.frequency);

  return patterns;
}

/**
 * 建议阈值调整
 * @param {Object[]} entries
 * @returns {{ currentThreshold: number, suggested: number, reason: string }}
 */
function suggestThresholdAdjustment(entries) {
  const currentThreshold = 0.3; // 当前候选置信度阈值

  const violations = entries.filter(e => e.compliant === false);
  if (violations.length < 3) {
    return { currentThreshold, suggested: currentThreshold, reason: '数据不足，保持当前阈值' };
  }

  // 分析违规条目的置信度分布
  const confidences = violations
    .map(v => v.confidence)
    .filter(c => typeof c === 'number');

  if (confidences.length === 0) {
    return { currentThreshold, suggested: currentThreshold, reason: '无置信度数据' };
  }

  const avgConf = confidences.reduce((s, c) => s + c, 0) / confidences.length;

  // 若平均置信度低 (< 0.5), 可能需要降低阈值
  if (avgConf < 0.4) {
    return {
      currentThreshold,
      suggested: Math.max(0.2, currentThreshold - 0.05),
      reason: `违规条目平均置信度低 (${avgConf.toFixed(2)}), 建议降低阈值扩大候选集`,
    };
  }

  // 若平均置信度高 (> 0.7), 路由引擎准确但技能选择偏差
  if (avgConf > 0.7) {
    return {
      currentThreshold,
      suggested: currentThreshold,
      reason: `高置信度违规 (${avgConf.toFixed(2)}), 可能是技能定义重叠，需优化 skills-index`,
    };
  }

  return { currentThreshold, suggested: currentThreshold, reason: '当前阈值适中' };
}

/**
 * 生成综合报告
 * @param {{ maxDays?: number }} options
 */
function generateReport(options = {}) {
  const maxDays = options.maxDays || 7;
  const entries = loadComplianceLogs(maxDays);
  const metrics = computeComplianceRate(entries);
  const patterns = analyzeViolationPatterns(entries);
  const suggestions = suggestThresholdAdjustment(entries);

  const ratePct = (metrics.rate * 100).toFixed(1);
  let status = 'HEALTHY';
  if (metrics.rate < 0.8) status = 'CRITICAL';
  else if (metrics.rate < 0.95) status = 'WARNING';

  return {
    summary: {
      period: `${maxDays} 天`,
      status,
      complianceRate: `${ratePct}%`,
      totalDecisions: metrics.total,
      skipped: metrics.skipped,
    },
    metrics,
    patterns,
    suggestions,
  };
}

// 模块导出
if (typeof module !== 'undefined') {
  module.exports = {
    loadComplianceLogs,
    computeComplianceRate,
    analyzeViolationPatterns,
    suggestThresholdAdjustment,
    generateReport,
  };
}

// CLI 入口
if (require.main === module) {
  const args = process.argv.slice(2);
  const jsonMode = args.includes('--json');
  const report = generateReport({ maxDays: 7 });

  if (jsonMode) {
    console.log(JSON.stringify(report, null, 2));
  } else {
    console.log('=== Neural Gateway 合规报告 ===');
    console.log(`期间: ${report.summary.period}`);
    console.log(`状态: ${report.summary.status}`);
    console.log(`合规率: ${report.summary.complianceRate} (${report.metrics.compliant}/${report.metrics.total})`);
    console.log(`跳过: ${report.metrics.skipped} 次 (simple 查询)`);
    console.log(`违规: ${report.metrics.violated} 次`);
    console.log('');

    if (report.patterns.length > 0) {
      console.log('--- 违规模式 ---');
      for (const p of report.patterns) {
        console.log(`  意图: ${p.intent} | 频次: ${p.frequency} | 常见技能: ${p.commonSkill}`);
      }
      console.log('');
    }

    console.log(`--- 阈值建议 ---`);
    console.log(`  当前: ${report.suggestions.currentThreshold}`);
    console.log(`  建议: ${report.suggestions.suggested}`);
    console.log(`  原因: ${report.suggestions.reason}`);
  }
}