bookworm-smart-assistant/scripts/workflow-patterns.js

265 lines
8.6 KiB
JavaScript
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#!/usr/bin/env node
/**
* 工作流模式识别 (v5.0)
*
* 从 activity 日志中识别技能使用的序列模式,
* 用于预测下一个可能使用的技能。
*
* 核心函数:
* extractSkillSequences(activityLogs, windowMinutes) → 按窗口分割的技能序列
* minePatterns(sessions, minSupport) → n-gram 频率统计
* predictNextSkill(currentSkill, patterns) → 最高频后继技能
*/
const fs = require('fs');
const path = require('path');
// ─── 模块级缓存 (防止同进程内多次调用重复读取) ─────────
let _activityCache = { data: null, ts: 0 };
const ACTIVITY_CACHE_TTL = 5 * 60 * 1000; // 5 分钟 TTL
const detectClaudeRoot = () => require('./paths.config.js').PATHS.root;
const ROOT = detectClaudeRoot();
const DEBUG_DIR = path.join(ROOT, 'debug');
// ─── 文件级缓存路径 (解决 hook 独立进程场景跨进程复用) ─────
const PATTERNS_CACHE_FILE = path.join(DEBUG_DIR, '.workflow-patterns-cache.json');
/**
* 从 activity 日志提取技能序列, 按时间窗口分割会话
* @param {Array} activityLogs - activity 事件列表
* @param {number} windowMinutes - 会话窗口大小 (分钟)
* @returns {Array<string[]>} 会话列表, 每个会话是技能名数组
*/
function extractSkillSequences(activityLogs, windowMinutes = 30) {
// 过滤并排序 skill 事件
const skillEvents = activityLogs
.filter(e => e.event === 'skill' && e.detail && e.ts)
.sort((a, b) => new Date(a.ts) - new Date(b.ts));
if (skillEvents.length === 0) return [];
const windowMs = windowMinutes * 60 * 1000;
const sessions = [];
let currentSession = [skillEvents[0].detail];
let lastTime = new Date(skillEvents[0].ts).getTime();
for (let i = 1; i < skillEvents.length; i++) {
const eventTime = new Date(skillEvents[i].ts).getTime();
if (eventTime - lastTime > windowMs) {
// 超过窗口, 开始新会话
if (currentSession.length >= 2) {
sessions.push(currentSession);
}
currentSession = [];
}
currentSession.push(skillEvents[i].detail);
lastTime = eventTime;
}
// 最后一个会话
if (currentSession.length >= 2) {
sessions.push(currentSession);
}
return sessions;
}
/**
* 挖掘 n-gram 模式 (2-gram 和 3-gram)
* @param {Array<string[]>} sessions - 会话列表
* @param {number} minSupport - 最小支持度 (出现次数)
* @returns {Object} { bigrams: {}, trigrams: {} } 频率映射
*/
function minePatterns(sessions, minSupport = 3) {
const bigrams = {}; // "A→B" → count
const trigrams = {}; // "A→B→C" → count
for (const session of sessions) {
for (let i = 0; i < session.length - 1; i++) {
const key2 = `${session[i]}${session[i + 1]}`;
bigrams[key2] = (bigrams[key2] || 0) + 1;
if (i < session.length - 2) {
const key3 = `${session[i]}${session[i + 1]}${session[i + 2]}`;
trigrams[key3] = (trigrams[key3] || 0) + 1;
}
}
}
// 过滤低频模式
const filteredBigrams = {};
for (const [key, count] of Object.entries(bigrams)) {
if (count >= minSupport) filteredBigrams[key] = count;
}
const filteredTrigrams = {};
for (const [key, count] of Object.entries(trigrams)) {
if (count >= minSupport) filteredTrigrams[key] = count;
}
return { bigrams: filteredBigrams, trigrams: filteredTrigrams };
}
/**
* 预测下一个最可能使用的技能
* @param {string} currentSkill - 当前技能
* @param {Object} patterns - minePatterns() 返回的模式
* @returns {{ skill: string, confidence: number } | null} 预测结果
*/
function predictNextSkill(currentSkill, patterns) {
const candidates = {};
// 从 bigrams 中找 currentSkill 的后继
for (const [key, count] of Object.entries(patterns.bigrams || {})) {
const [from, to] = key.split('→');
if (from === currentSkill) {
candidates[to] = (candidates[to] || 0) + count;
}
}
if (Object.keys(candidates).length === 0) return null;
// 找最高频
const sorted = Object.entries(candidates).sort((a, b) => b[1] - a[1]);
const total = sorted.reduce((s, [, c]) => s + c, 0);
return {
skill: sorted[0][0],
confidence: Math.round(sorted[0][1] / total * 100) / 100,
alternatives: sorted.slice(1, 3).map(([skill, count]) => ({
skill,
confidence: Math.round(count / total * 100) / 100,
})),
};
}
/**
* 收集 activity 文件列表(含 mtime用于生成缓存键
* @param {number} maxDays
* @returns {{ files: string[], cacheKey: string }}
*/
function getActivityFileMeta(maxDays) {
const cutoff = new Date();
cutoff.setDate(cutoff.getDate() - maxDays);
const cutoffStr = cutoff.toISOString().slice(0, 10);
let files = [];
try {
files = fs.readdirSync(DEBUG_DIR)
.filter(f => f.startsWith('activity-') && f.endsWith('.jsonl'))
.sort()
.filter(f => {
const m = f.match(/activity-(\d{4}-\d{2}-\d{2})/);
return !m || m[1] >= cutoffStr;
})
.map(f => path.join(DEBUG_DIR, f));
} catch {}
// 缓存键:每个文件路径 + mtime毫秒拼接
const cacheKey = files.map(f => {
try { return `${f}:${fs.statSync(f).mtimeMs}`; } catch { return f; }
}).join('|');
return { files, cacheKey };
}
/**
* 加载 activity 日志 (双层缓存: 进程内 5 分钟 TTL + 文件级磁盘缓存)
*
* 文件级缓存解决 hook 独立进程场景下每次都要读取 2.4MB 日志的问题:
* - 缓存键 = 所有 activity 文件路径 + mtime 拼接
* - mtime 不变则直接命中磁盘缓存,无需重新读取日志
*
* @param {number} maxDays
* @returns {Array}
*/
function loadActivityLogs(maxDays = 30) {
const now = Date.now();
// 第一层: 进程内内存缓存5 分钟 TTL防止同进程多次调用
if (_activityCache.data && (now - _activityCache.ts) < ACTIVITY_CACHE_TTL) {
return _activityCache.data;
}
// 收集文件列表与缓存键
const { files, cacheKey } = getActivityFileMeta(maxDays);
// 第二层: 文件级磁盘缓存跨进程复用hook 独立进程场景受益)
try {
const cache = JSON.parse(fs.readFileSync(PATTERNS_CACHE_FILE, 'utf8'));
if (cache.key === cacheKey && Array.isArray(cache.data)) {
// 磁盘缓存命中,回填进程内缓存后返回
_activityCache = { data: cache.data, ts: now };
return cache.data;
}
} catch { /* 缓存不存在或格式错误,继续读取 */ }
// 缓存 miss: 读取所有 activity 文件
const events = [];
for (const filePath of files) {
try {
const lines = fs.readFileSync(filePath, 'utf8').trim().split('\n');
for (const line of lines) {
try { events.push(JSON.parse(line)); } catch {}
}
} catch {}
}
// 写入磁盘缓存(写失败不影响主流程)
try {
if (!fs.existsSync(DEBUG_DIR)) fs.mkdirSync(DEBUG_DIR, { recursive: true });
fs.writeFileSync(PATTERNS_CACHE_FILE, JSON.stringify({ key: cacheKey, data: events, ts: new Date().toISOString() }));
} catch {}
// 更新进程内缓存
_activityCache = { data: events, ts: now };
return events;
}
// 模块导出
if (typeof module !== 'undefined') {
module.exports = {
extractSkillSequences,
minePatterns,
predictNextSkill,
loadActivityLogs,
};
}
// CLI 入口
if (require.main === module) {
const jsonMode = process.argv.includes('--json');
const events = loadActivityLogs(30);
const sessions = extractSkillSequences(events, 30);
const patterns = minePatterns(sessions, 2);
if (jsonMode) {
console.log(JSON.stringify({ sessions: sessions.length, patterns }, null, 2));
} else {
console.log('=== 工作流模式分析 ===');
console.log(`会话数: ${sessions.length}`);
console.log(`2-gram 模式: ${Object.keys(patterns.bigrams).length}`);
console.log(`3-gram 模式: ${Object.keys(patterns.trigrams).length}`);
if (Object.keys(patterns.bigrams).length > 0) {
console.log('\nTop 10 2-gram:');
Object.entries(patterns.bigrams)
.sort((a, b) => b[1] - a[1])
.slice(0, 10)
.forEach(([key, count]) => console.log(` ${key.padEnd(50)} ${count}`));
}
if (Object.keys(patterns.trigrams).length > 0) {
console.log('\nTop 5 3-gram:');
Object.entries(patterns.trigrams)
.sort((a, b) => b[1] - a[1])
.slice(0, 5)
.forEach(([key, count]) => console.log(` ${key.padEnd(60)} ${count}`));
}
}
}