#!/usr/bin/env node /** * 工作流模式识别 (v5.0) * * 从 activity 日志中识别技能使用的序列模式, * 用于预测下一个可能使用的技能。 * * 核心函数: * extractSkillSequences(activityLogs, windowMinutes) → 按窗口分割的技能序列 * minePatterns(sessions, minSupport) → n-gram 频率统计 * predictNextSkill(currentSkill, patterns) → 最高频后继技能 */ const fs = require('fs'); const path = require('path'); // ─── 模块级缓存 (防止同进程内多次调用重复读取) ───────── let _activityCache = { data: null, ts: 0 }; const ACTIVITY_CACHE_TTL = 5 * 60 * 1000; // 5 分钟 TTL const detectClaudeRoot = () => require('./paths.config.js').PATHS.root; const ROOT = detectClaudeRoot(); const DEBUG_DIR = path.join(ROOT, 'debug'); // ─── 文件级缓存路径 (解决 hook 独立进程场景跨进程复用) ───── const PATTERNS_CACHE_FILE = path.join(DEBUG_DIR, '.workflow-patterns-cache.json'); /** * 从 activity 日志提取技能序列, 按时间窗口分割会话 * @param {Array} activityLogs - activity 事件列表 * @param {number} windowMinutes - 会话窗口大小 (分钟) * @returns {Array} 会话列表, 每个会话是技能名数组 */ function extractSkillSequences(activityLogs, windowMinutes = 30) { // 过滤并排序 skill 事件 const skillEvents = activityLogs .filter(e => e.event === 'skill' && e.detail && e.ts) .sort((a, b) => new Date(a.ts) - new Date(b.ts)); if (skillEvents.length === 0) return []; const windowMs = windowMinutes * 60 * 1000; const sessions = []; let currentSession = [skillEvents[0].detail]; let lastTime = new Date(skillEvents[0].ts).getTime(); for (let i = 1; i < skillEvents.length; i++) { const eventTime = new Date(skillEvents[i].ts).getTime(); if (eventTime - lastTime > windowMs) { // 超过窗口, 开始新会话 if (currentSession.length >= 2) { sessions.push(currentSession); } currentSession = []; } currentSession.push(skillEvents[i].detail); lastTime = eventTime; } // 最后一个会话 if (currentSession.length >= 2) { sessions.push(currentSession); } return sessions; } /** * 挖掘 n-gram 模式 (2-gram 和 3-gram) * @param {Array} sessions - 会话列表 * @param {number} minSupport - 最小支持度 (出现次数) * @returns {Object} { bigrams: {}, trigrams: {} } 频率映射 */ function minePatterns(sessions, minSupport = 3) { const bigrams = {}; // "A→B" → count const trigrams = {}; // "A→B→C" → count for (const session of sessions) { for (let i = 0; i < session.length - 1; i++) { const key2 = `${session[i]}→${session[i + 1]}`; bigrams[key2] = (bigrams[key2] || 0) + 1; if (i < session.length - 2) { const key3 = `${session[i]}→${session[i + 1]}→${session[i + 2]}`; trigrams[key3] = (trigrams[key3] || 0) + 1; } } } // 过滤低频模式 const filteredBigrams = {}; for (const [key, count] of Object.entries(bigrams)) { if (count >= minSupport) filteredBigrams[key] = count; } const filteredTrigrams = {}; for (const [key, count] of Object.entries(trigrams)) { if (count >= minSupport) filteredTrigrams[key] = count; } return { bigrams: filteredBigrams, trigrams: filteredTrigrams }; } /** * 预测下一个最可能使用的技能 * @param {string} currentSkill - 当前技能 * @param {Object} patterns - minePatterns() 返回的模式 * @returns {{ skill: string, confidence: number } | null} 预测结果 */ function predictNextSkill(currentSkill, patterns) { const candidates = {}; // 从 bigrams 中找 currentSkill 的后继 for (const [key, count] of Object.entries(patterns.bigrams || {})) { const [from, to] = key.split('→'); if (from === currentSkill) { candidates[to] = (candidates[to] || 0) + count; } } if (Object.keys(candidates).length === 0) return null; // 找最高频 const sorted = Object.entries(candidates).sort((a, b) => b[1] - a[1]); const total = sorted.reduce((s, [, c]) => s + c, 0); return { skill: sorted[0][0], confidence: Math.round(sorted[0][1] / total * 100) / 100, alternatives: sorted.slice(1, 3).map(([skill, count]) => ({ skill, confidence: Math.round(count / total * 100) / 100, })), }; } /** * 收集 activity 文件列表(含 mtime),用于生成缓存键 * @param {number} maxDays * @returns {{ files: string[], cacheKey: string }} */ function getActivityFileMeta(maxDays) { const cutoff = new Date(); cutoff.setDate(cutoff.getDate() - maxDays); const cutoffStr = cutoff.toISOString().slice(0, 10); let files = []; try { files = fs.readdirSync(DEBUG_DIR) .filter(f => f.startsWith('activity-') && f.endsWith('.jsonl')) .sort() .filter(f => { const m = f.match(/activity-(\d{4}-\d{2}-\d{2})/); return !m || m[1] >= cutoffStr; }) .map(f => path.join(DEBUG_DIR, f)); } catch {} // 缓存键:每个文件路径 + mtime(毫秒)拼接 const cacheKey = files.map(f => { try { return `${f}:${fs.statSync(f).mtimeMs}`; } catch { return f; } }).join('|'); return { files, cacheKey }; } /** * 加载 activity 日志 (双层缓存: 进程内 5 分钟 TTL + 文件级磁盘缓存) * * 文件级缓存解决 hook 独立进程场景下每次都要读取 2.4MB 日志的问题: * - 缓存键 = 所有 activity 文件路径 + mtime 拼接 * - mtime 不变则直接命中磁盘缓存,无需重新读取日志 * * @param {number} maxDays * @returns {Array} */ function loadActivityLogs(maxDays = 30) { const now = Date.now(); // 第一层: 进程内内存缓存(5 分钟 TTL,防止同进程多次调用) if (_activityCache.data && (now - _activityCache.ts) < ACTIVITY_CACHE_TTL) { return _activityCache.data; } // 收集文件列表与缓存键 const { files, cacheKey } = getActivityFileMeta(maxDays); // 第二层: 文件级磁盘缓存(跨进程复用,hook 独立进程场景受益) try { const cache = JSON.parse(fs.readFileSync(PATTERNS_CACHE_FILE, 'utf8')); if (cache.key === cacheKey && Array.isArray(cache.data)) { // 磁盘缓存命中,回填进程内缓存后返回 _activityCache = { data: cache.data, ts: now }; return cache.data; } } catch { /* 缓存不存在或格式错误,继续读取 */ } // 缓存 miss: 读取所有 activity 文件 const events = []; for (const filePath of files) { try { const lines = fs.readFileSync(filePath, 'utf8').trim().split('\n'); for (const line of lines) { try { events.push(JSON.parse(line)); } catch {} } } catch {} } // 写入磁盘缓存(写失败不影响主流程) try { if (!fs.existsSync(DEBUG_DIR)) fs.mkdirSync(DEBUG_DIR, { recursive: true }); fs.writeFileSync(PATTERNS_CACHE_FILE, JSON.stringify({ key: cacheKey, data: events, ts: new Date().toISOString() })); } catch {} // 更新进程内缓存 _activityCache = { data: events, ts: now }; return events; } // 模块导出 if (typeof module !== 'undefined') { module.exports = { extractSkillSequences, minePatterns, predictNextSkill, loadActivityLogs, }; } // CLI 入口 if (require.main === module) { const jsonMode = process.argv.includes('--json'); const events = loadActivityLogs(30); const sessions = extractSkillSequences(events, 30); const patterns = minePatterns(sessions, 2); if (jsonMode) { console.log(JSON.stringify({ sessions: sessions.length, patterns }, null, 2)); } else { console.log('=== 工作流模式分析 ==='); console.log(`会话数: ${sessions.length}`); console.log(`2-gram 模式: ${Object.keys(patterns.bigrams).length}`); console.log(`3-gram 模式: ${Object.keys(patterns.trigrams).length}`); if (Object.keys(patterns.bigrams).length > 0) { console.log('\nTop 10 2-gram:'); Object.entries(patterns.bigrams) .sort((a, b) => b[1] - a[1]) .slice(0, 10) .forEach(([key, count]) => console.log(` ${key.padEnd(50)} ${count}`)); } if (Object.keys(patterns.trigrams).length > 0) { console.log('\nTop 5 3-gram:'); Object.entries(patterns.trigrams) .sort((a, b) => b[1] - a[1]) .slice(0, 5) .forEach(([key, count]) => console.log(` ${key.padEnd(60)} ${count}`)); } } }