bookworm-smart-assistant/scripts/workflow-patterns.js

265 lines
8.6 KiB
JavaScript
Raw Normal View History

#!/usr/bin/env node
/**
* 工作流模式识别 (v5.0)
*
* activity 日志中识别技能使用的序列模式,
* 用于预测下一个可能使用的技能
*
* 核心函数:
* extractSkillSequences(activityLogs, windowMinutes) 按窗口分割的技能序列
* minePatterns(sessions, minSupport) n-gram 频率统计
* predictNextSkill(currentSkill, patterns) 最高频后继技能
*/
const fs = require('fs');
const path = require('path');
// ─── 模块级缓存 (防止同进程内多次调用重复读取) ─────────
let _activityCache = { data: null, ts: 0 };
const ACTIVITY_CACHE_TTL = 5 * 60 * 1000; // 5 分钟 TTL
const detectClaudeRoot = () => require('./paths.config.js').PATHS.root;
const ROOT = detectClaudeRoot();
const DEBUG_DIR = path.join(ROOT, 'debug');
// ─── 文件级缓存路径 (解决 hook 独立进程场景跨进程复用) ─────
const PATTERNS_CACHE_FILE = path.join(DEBUG_DIR, '.workflow-patterns-cache.json');
/**
* activity 日志提取技能序列, 按时间窗口分割会话
* @param {Array} activityLogs - activity 事件列表
* @param {number} windowMinutes - 会话窗口大小 (分钟)
* @returns {Array<string[]>} 会话列表, 每个会话是技能名数组
*/
function extractSkillSequences(activityLogs, windowMinutes = 30) {
// 过滤并排序 skill 事件
const skillEvents = activityLogs
.filter(e => e.event === 'skill' && e.detail && e.ts)
.sort((a, b) => new Date(a.ts) - new Date(b.ts));
if (skillEvents.length === 0) return [];
const windowMs = windowMinutes * 60 * 1000;
const sessions = [];
let currentSession = [skillEvents[0].detail];
let lastTime = new Date(skillEvents[0].ts).getTime();
for (let i = 1; i < skillEvents.length; i++) {
const eventTime = new Date(skillEvents[i].ts).getTime();
if (eventTime - lastTime > windowMs) {
// 超过窗口, 开始新会话
if (currentSession.length >= 2) {
sessions.push(currentSession);
}
currentSession = [];
}
currentSession.push(skillEvents[i].detail);
lastTime = eventTime;
}
// 最后一个会话
if (currentSession.length >= 2) {
sessions.push(currentSession);
}
return sessions;
}
/**
* 挖掘 n-gram 模式 (2-gram 3-gram)
* @param {Array<string[]>} sessions - 会话列表
* @param {number} minSupport - 最小支持度 (出现次数)
* @returns {Object} { bigrams: {}, trigrams: {} } 频率映射
*/
function minePatterns(sessions, minSupport = 3) {
const bigrams = {}; // "A→B" → count
const trigrams = {}; // "A→B→C" → count
for (const session of sessions) {
for (let i = 0; i < session.length - 1; i++) {
const key2 = `${session[i]}${session[i + 1]}`;
bigrams[key2] = (bigrams[key2] || 0) + 1;
if (i < session.length - 2) {
const key3 = `${session[i]}${session[i + 1]}${session[i + 2]}`;
trigrams[key3] = (trigrams[key3] || 0) + 1;
}
}
}
// 过滤低频模式
const filteredBigrams = {};
for (const [key, count] of Object.entries(bigrams)) {
if (count >= minSupport) filteredBigrams[key] = count;
}
const filteredTrigrams = {};
for (const [key, count] of Object.entries(trigrams)) {
if (count >= minSupport) filteredTrigrams[key] = count;
}
return { bigrams: filteredBigrams, trigrams: filteredTrigrams };
}
/**
* 预测下一个最可能使用的技能
* @param {string} currentSkill - 当前技能
* @param {Object} patterns - minePatterns() 返回的模式
* @returns {{ skill: string, confidence: number } | null} 预测结果
*/
function predictNextSkill(currentSkill, patterns) {
const candidates = {};
// 从 bigrams 中找 currentSkill 的后继
for (const [key, count] of Object.entries(patterns.bigrams || {})) {
const [from, to] = key.split('→');
if (from === currentSkill) {
candidates[to] = (candidates[to] || 0) + count;
}
}
if (Object.keys(candidates).length === 0) return null;
// 找最高频
const sorted = Object.entries(candidates).sort((a, b) => b[1] - a[1]);
const total = sorted.reduce((s, [, c]) => s + c, 0);
return {
skill: sorted[0][0],
confidence: Math.round(sorted[0][1] / total * 100) / 100,
alternatives: sorted.slice(1, 3).map(([skill, count]) => ({
skill,
confidence: Math.round(count / total * 100) / 100,
})),
};
}
/**
* 收集 activity 文件列表 mtime用于生成缓存键
* @param {number} maxDays
* @returns {{ files: string[], cacheKey: string }}
*/
function getActivityFileMeta(maxDays) {
const cutoff = new Date();
cutoff.setDate(cutoff.getDate() - maxDays);
const cutoffStr = cutoff.toISOString().slice(0, 10);
let files = [];
try {
files = fs.readdirSync(DEBUG_DIR)
.filter(f => f.startsWith('activity-') && f.endsWith('.jsonl'))
.sort()
.filter(f => {
const m = f.match(/activity-(\d{4}-\d{2}-\d{2})/);
return !m || m[1] >= cutoffStr;
})
.map(f => path.join(DEBUG_DIR, f));
} catch {}
// 缓存键:每个文件路径 + mtime毫秒拼接
const cacheKey = files.map(f => {
try { return `${f}:${fs.statSync(f).mtimeMs}`; } catch { return f; }
}).join('|');
return { files, cacheKey };
}
/**
* 加载 activity 日志 (双层缓存: 进程内 5 分钟 TTL + 文件级磁盘缓存)
*
* 文件级缓存解决 hook 独立进程场景下每次都要读取 2.4MB 日志的问题
* - 缓存键 = 所有 activity 文件路径 + mtime 拼接
* - mtime 不变则直接命中磁盘缓存无需重新读取日志
*
* @param {number} maxDays
* @returns {Array}
*/
function loadActivityLogs(maxDays = 30) {
const now = Date.now();
// 第一层: 进程内内存缓存5 分钟 TTL防止同进程多次调用
if (_activityCache.data && (now - _activityCache.ts) < ACTIVITY_CACHE_TTL) {
return _activityCache.data;
}
// 收集文件列表与缓存键
const { files, cacheKey } = getActivityFileMeta(maxDays);
// 第二层: 文件级磁盘缓存跨进程复用hook 独立进程场景受益)
try {
const cache = JSON.parse(fs.readFileSync(PATTERNS_CACHE_FILE, 'utf8'));
if (cache.key === cacheKey && Array.isArray(cache.data)) {
// 磁盘缓存命中,回填进程内缓存后返回
_activityCache = { data: cache.data, ts: now };
return cache.data;
}
} catch { /* 缓存不存在或格式错误,继续读取 */ }
// 缓存 miss: 读取所有 activity 文件
const events = [];
for (const filePath of files) {
try {
const lines = fs.readFileSync(filePath, 'utf8').trim().split('\n');
for (const line of lines) {
try { events.push(JSON.parse(line)); } catch {}
}
} catch {}
}
// 写入磁盘缓存(写失败不影响主流程)
try {
if (!fs.existsSync(DEBUG_DIR)) fs.mkdirSync(DEBUG_DIR, { recursive: true });
fs.writeFileSync(PATTERNS_CACHE_FILE, JSON.stringify({ key: cacheKey, data: events, ts: new Date().toISOString() }));
} catch {}
// 更新进程内缓存
_activityCache = { data: events, ts: now };
return events;
}
// 模块导出
if (typeof module !== 'undefined') {
module.exports = {
extractSkillSequences,
minePatterns,
predictNextSkill,
loadActivityLogs,
};
}
// CLI 入口
if (require.main === module) {
const jsonMode = process.argv.includes('--json');
const events = loadActivityLogs(30);
const sessions = extractSkillSequences(events, 30);
const patterns = minePatterns(sessions, 2);
if (jsonMode) {
console.log(JSON.stringify({ sessions: sessions.length, patterns }, null, 2));
} else {
console.log('=== 工作流模式分析 ===');
console.log(`会话数: ${sessions.length}`);
console.log(`2-gram 模式: ${Object.keys(patterns.bigrams).length}`);
console.log(`3-gram 模式: ${Object.keys(patterns.trigrams).length}`);
if (Object.keys(patterns.bigrams).length > 0) {
console.log('\nTop 10 2-gram:');
Object.entries(patterns.bigrams)
.sort((a, b) => b[1] - a[1])
.slice(0, 10)
.forEach(([key, count]) => console.log(` ${key.padEnd(50)} ${count}`));
}
if (Object.keys(patterns.trigrams).length > 0) {
console.log('\nTop 5 3-gram:');
Object.entries(patterns.trigrams)
.sort((a, b) => b[1] - a[1])
.slice(0, 5)
.forEach(([key, count]) => console.log(` ${key.padEnd(60)} ${count}`));
}
}
}