265 lines
8.6 KiB
JavaScript
265 lines
8.6 KiB
JavaScript
|
|
#!/usr/bin/env node
|
|||
|
|
/**
|
|||
|
|
* 工作流模式识别 (v5.0)
|
|||
|
|
*
|
|||
|
|
* 从 activity 日志中识别技能使用的序列模式,
|
|||
|
|
* 用于预测下一个可能使用的技能。
|
|||
|
|
*
|
|||
|
|
* 核心函数:
|
|||
|
|
* extractSkillSequences(activityLogs, windowMinutes) → 按窗口分割的技能序列
|
|||
|
|
* minePatterns(sessions, minSupport) → n-gram 频率统计
|
|||
|
|
* predictNextSkill(currentSkill, patterns) → 最高频后继技能
|
|||
|
|
*/
|
|||
|
|
|
|||
|
|
const fs = require('fs');
|
|||
|
|
const path = require('path');
|
|||
|
|
|
|||
|
|
// ─── 模块级缓存 (防止同进程内多次调用重复读取) ─────────
|
|||
|
|
let _activityCache = { data: null, ts: 0 };
|
|||
|
|
const ACTIVITY_CACHE_TTL = 5 * 60 * 1000; // 5 分钟 TTL
|
|||
|
|
|
|||
|
|
const detectClaudeRoot = () => require('./paths.config.js').PATHS.root;
|
|||
|
|
|
|||
|
|
const ROOT = detectClaudeRoot();
|
|||
|
|
const DEBUG_DIR = path.join(ROOT, 'debug');
|
|||
|
|
// ─── 文件级缓存路径 (解决 hook 独立进程场景跨进程复用) ─────
|
|||
|
|
const PATTERNS_CACHE_FILE = path.join(DEBUG_DIR, '.workflow-patterns-cache.json');
|
|||
|
|
|
|||
|
|
/**
|
|||
|
|
* 从 activity 日志提取技能序列, 按时间窗口分割会话
|
|||
|
|
* @param {Array} activityLogs - activity 事件列表
|
|||
|
|
* @param {number} windowMinutes - 会话窗口大小 (分钟)
|
|||
|
|
* @returns {Array<string[]>} 会话列表, 每个会话是技能名数组
|
|||
|
|
*/
|
|||
|
|
function extractSkillSequences(activityLogs, windowMinutes = 30) {
|
|||
|
|
// 过滤并排序 skill 事件
|
|||
|
|
const skillEvents = activityLogs
|
|||
|
|
.filter(e => e.event === 'skill' && e.detail && e.ts)
|
|||
|
|
.sort((a, b) => new Date(a.ts) - new Date(b.ts));
|
|||
|
|
|
|||
|
|
if (skillEvents.length === 0) return [];
|
|||
|
|
|
|||
|
|
const windowMs = windowMinutes * 60 * 1000;
|
|||
|
|
const sessions = [];
|
|||
|
|
let currentSession = [skillEvents[0].detail];
|
|||
|
|
let lastTime = new Date(skillEvents[0].ts).getTime();
|
|||
|
|
|
|||
|
|
for (let i = 1; i < skillEvents.length; i++) {
|
|||
|
|
const eventTime = new Date(skillEvents[i].ts).getTime();
|
|||
|
|
|
|||
|
|
if (eventTime - lastTime > windowMs) {
|
|||
|
|
// 超过窗口, 开始新会话
|
|||
|
|
if (currentSession.length >= 2) {
|
|||
|
|
sessions.push(currentSession);
|
|||
|
|
}
|
|||
|
|
currentSession = [];
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
currentSession.push(skillEvents[i].detail);
|
|||
|
|
lastTime = eventTime;
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
// 最后一个会话
|
|||
|
|
if (currentSession.length >= 2) {
|
|||
|
|
sessions.push(currentSession);
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
return sessions;
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
/**
|
|||
|
|
* 挖掘 n-gram 模式 (2-gram 和 3-gram)
|
|||
|
|
* @param {Array<string[]>} sessions - 会话列表
|
|||
|
|
* @param {number} minSupport - 最小支持度 (出现次数)
|
|||
|
|
* @returns {Object} { bigrams: {}, trigrams: {} } 频率映射
|
|||
|
|
*/
|
|||
|
|
function minePatterns(sessions, minSupport = 3) {
|
|||
|
|
const bigrams = {}; // "A→B" → count
|
|||
|
|
const trigrams = {}; // "A→B→C" → count
|
|||
|
|
|
|||
|
|
for (const session of sessions) {
|
|||
|
|
for (let i = 0; i < session.length - 1; i++) {
|
|||
|
|
const key2 = `${session[i]}→${session[i + 1]}`;
|
|||
|
|
bigrams[key2] = (bigrams[key2] || 0) + 1;
|
|||
|
|
|
|||
|
|
if (i < session.length - 2) {
|
|||
|
|
const key3 = `${session[i]}→${session[i + 1]}→${session[i + 2]}`;
|
|||
|
|
trigrams[key3] = (trigrams[key3] || 0) + 1;
|
|||
|
|
}
|
|||
|
|
}
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
// 过滤低频模式
|
|||
|
|
const filteredBigrams = {};
|
|||
|
|
for (const [key, count] of Object.entries(bigrams)) {
|
|||
|
|
if (count >= minSupport) filteredBigrams[key] = count;
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
const filteredTrigrams = {};
|
|||
|
|
for (const [key, count] of Object.entries(trigrams)) {
|
|||
|
|
if (count >= minSupport) filteredTrigrams[key] = count;
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
return { bigrams: filteredBigrams, trigrams: filteredTrigrams };
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
/**
|
|||
|
|
* 预测下一个最可能使用的技能
|
|||
|
|
* @param {string} currentSkill - 当前技能
|
|||
|
|
* @param {Object} patterns - minePatterns() 返回的模式
|
|||
|
|
* @returns {{ skill: string, confidence: number } | null} 预测结果
|
|||
|
|
*/
|
|||
|
|
function predictNextSkill(currentSkill, patterns) {
|
|||
|
|
const candidates = {};
|
|||
|
|
|
|||
|
|
// 从 bigrams 中找 currentSkill 的后继
|
|||
|
|
for (const [key, count] of Object.entries(patterns.bigrams || {})) {
|
|||
|
|
const [from, to] = key.split('→');
|
|||
|
|
if (from === currentSkill) {
|
|||
|
|
candidates[to] = (candidates[to] || 0) + count;
|
|||
|
|
}
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
if (Object.keys(candidates).length === 0) return null;
|
|||
|
|
|
|||
|
|
// 找最高频
|
|||
|
|
const sorted = Object.entries(candidates).sort((a, b) => b[1] - a[1]);
|
|||
|
|
const total = sorted.reduce((s, [, c]) => s + c, 0);
|
|||
|
|
|
|||
|
|
return {
|
|||
|
|
skill: sorted[0][0],
|
|||
|
|
confidence: Math.round(sorted[0][1] / total * 100) / 100,
|
|||
|
|
alternatives: sorted.slice(1, 3).map(([skill, count]) => ({
|
|||
|
|
skill,
|
|||
|
|
confidence: Math.round(count / total * 100) / 100,
|
|||
|
|
})),
|
|||
|
|
};
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
/**
|
|||
|
|
* 收集 activity 文件列表(含 mtime),用于生成缓存键
|
|||
|
|
* @param {number} maxDays
|
|||
|
|
* @returns {{ files: string[], cacheKey: string }}
|
|||
|
|
*/
|
|||
|
|
function getActivityFileMeta(maxDays) {
|
|||
|
|
const cutoff = new Date();
|
|||
|
|
cutoff.setDate(cutoff.getDate() - maxDays);
|
|||
|
|
const cutoffStr = cutoff.toISOString().slice(0, 10);
|
|||
|
|
|
|||
|
|
let files = [];
|
|||
|
|
try {
|
|||
|
|
files = fs.readdirSync(DEBUG_DIR)
|
|||
|
|
.filter(f => f.startsWith('activity-') && f.endsWith('.jsonl'))
|
|||
|
|
.sort()
|
|||
|
|
.filter(f => {
|
|||
|
|
const m = f.match(/activity-(\d{4}-\d{2}-\d{2})/);
|
|||
|
|
return !m || m[1] >= cutoffStr;
|
|||
|
|
})
|
|||
|
|
.map(f => path.join(DEBUG_DIR, f));
|
|||
|
|
} catch {}
|
|||
|
|
|
|||
|
|
// 缓存键:每个文件路径 + mtime(毫秒)拼接
|
|||
|
|
const cacheKey = files.map(f => {
|
|||
|
|
try { return `${f}:${fs.statSync(f).mtimeMs}`; } catch { return f; }
|
|||
|
|
}).join('|');
|
|||
|
|
|
|||
|
|
return { files, cacheKey };
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
/**
|
|||
|
|
* 加载 activity 日志 (双层缓存: 进程内 5 分钟 TTL + 文件级磁盘缓存)
|
|||
|
|
*
|
|||
|
|
* 文件级缓存解决 hook 独立进程场景下每次都要读取 2.4MB 日志的问题:
|
|||
|
|
* - 缓存键 = 所有 activity 文件路径 + mtime 拼接
|
|||
|
|
* - mtime 不变则直接命中磁盘缓存,无需重新读取日志
|
|||
|
|
*
|
|||
|
|
* @param {number} maxDays
|
|||
|
|
* @returns {Array}
|
|||
|
|
*/
|
|||
|
|
function loadActivityLogs(maxDays = 30) {
|
|||
|
|
const now = Date.now();
|
|||
|
|
|
|||
|
|
// 第一层: 进程内内存缓存(5 分钟 TTL,防止同进程多次调用)
|
|||
|
|
if (_activityCache.data && (now - _activityCache.ts) < ACTIVITY_CACHE_TTL) {
|
|||
|
|
return _activityCache.data;
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
// 收集文件列表与缓存键
|
|||
|
|
const { files, cacheKey } = getActivityFileMeta(maxDays);
|
|||
|
|
|
|||
|
|
// 第二层: 文件级磁盘缓存(跨进程复用,hook 独立进程场景受益)
|
|||
|
|
try {
|
|||
|
|
const cache = JSON.parse(fs.readFileSync(PATTERNS_CACHE_FILE, 'utf8'));
|
|||
|
|
if (cache.key === cacheKey && Array.isArray(cache.data)) {
|
|||
|
|
// 磁盘缓存命中,回填进程内缓存后返回
|
|||
|
|
_activityCache = { data: cache.data, ts: now };
|
|||
|
|
return cache.data;
|
|||
|
|
}
|
|||
|
|
} catch { /* 缓存不存在或格式错误,继续读取 */ }
|
|||
|
|
|
|||
|
|
// 缓存 miss: 读取所有 activity 文件
|
|||
|
|
const events = [];
|
|||
|
|
for (const filePath of files) {
|
|||
|
|
try {
|
|||
|
|
const lines = fs.readFileSync(filePath, 'utf8').trim().split('\n');
|
|||
|
|
for (const line of lines) {
|
|||
|
|
try { events.push(JSON.parse(line)); } catch {}
|
|||
|
|
}
|
|||
|
|
} catch {}
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
// 写入磁盘缓存(写失败不影响主流程)
|
|||
|
|
try {
|
|||
|
|
if (!fs.existsSync(DEBUG_DIR)) fs.mkdirSync(DEBUG_DIR, { recursive: true });
|
|||
|
|
fs.writeFileSync(PATTERNS_CACHE_FILE, JSON.stringify({ key: cacheKey, data: events, ts: new Date().toISOString() }));
|
|||
|
|
} catch {}
|
|||
|
|
|
|||
|
|
// 更新进程内缓存
|
|||
|
|
_activityCache = { data: events, ts: now };
|
|||
|
|
return events;
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
// 模块导出
|
|||
|
|
if (typeof module !== 'undefined') {
|
|||
|
|
module.exports = {
|
|||
|
|
extractSkillSequences,
|
|||
|
|
minePatterns,
|
|||
|
|
predictNextSkill,
|
|||
|
|
loadActivityLogs,
|
|||
|
|
};
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
// CLI 入口
|
|||
|
|
if (require.main === module) {
|
|||
|
|
const jsonMode = process.argv.includes('--json');
|
|||
|
|
|
|||
|
|
const events = loadActivityLogs(30);
|
|||
|
|
const sessions = extractSkillSequences(events, 30);
|
|||
|
|
const patterns = minePatterns(sessions, 2);
|
|||
|
|
|
|||
|
|
if (jsonMode) {
|
|||
|
|
console.log(JSON.stringify({ sessions: sessions.length, patterns }, null, 2));
|
|||
|
|
} else {
|
|||
|
|
console.log('=== 工作流模式分析 ===');
|
|||
|
|
console.log(`会话数: ${sessions.length}`);
|
|||
|
|
console.log(`2-gram 模式: ${Object.keys(patterns.bigrams).length}`);
|
|||
|
|
console.log(`3-gram 模式: ${Object.keys(patterns.trigrams).length}`);
|
|||
|
|
|
|||
|
|
if (Object.keys(patterns.bigrams).length > 0) {
|
|||
|
|
console.log('\nTop 10 2-gram:');
|
|||
|
|
Object.entries(patterns.bigrams)
|
|||
|
|
.sort((a, b) => b[1] - a[1])
|
|||
|
|
.slice(0, 10)
|
|||
|
|
.forEach(([key, count]) => console.log(` ${key.padEnd(50)} ${count}`));
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
if (Object.keys(patterns.trigrams).length > 0) {
|
|||
|
|
console.log('\nTop 5 3-gram:');
|
|||
|
|
Object.entries(patterns.trigrams)
|
|||
|
|
.sort((a, b) => b[1] - a[1])
|
|||
|
|
.slice(0, 5)
|
|||
|
|
.forEach(([key, count]) => console.log(` ${key.padEnd(60)} ${count}`));
|
|||
|
|
}
|
|||
|
|
}
|
|||
|
|
}
|