bookworm-smart-assistant/scripts/route-ab-test.js

283 lines
8.8 KiB
JavaScript
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#!/usr/bin/env node
/**
* 路由 A/B 实验框架 (v5.3)
*
* 当路由引擎 top-2 候选分数接近时 (差距 < 15%)
* 使用 Thompson Sampling 随机选择,收集隐式反馈,
* 自动收敛到最优路由。
*
* 模块导出:
* shouldExperiment(top2) → boolean
* selectVariant(skillA, skillB) → { selected, experiment }
* recordOutcome(experimentId, skill, outcome) → void
* getExperimentStats() → { experiments, convergence }
* resolveConverged(minTrials=20) → string[] (自动定论的技能对)
*/
const fs = require('fs');
const path = require('path');
const detectClaudeRoot = () => require('./paths.config.js').PATHS.root;
const ROOT = detectClaudeRoot();
const DEBUG_DIR = path.join(ROOT, 'debug');
const AB_FILE = path.join(DEBUG_DIR, 'ab-experiments.json');
const AB_LOG = path.join(DEBUG_DIR, 'ab-experiments.jsonl');
// 实验触发阈值: top-2 置信度差距 < 此值时启动实验
const EXPERIMENT_THRESHOLD = 0.15;
// 最小试验次数 (每个变体) 才能判定收敛
const MIN_TRIALS = 20;
// 收敛阈值: 胜率差 > 此值视为收敛
const CONVERGENCE_THRESHOLD = 0.2;
/**
* 加载 A/B 实验数据
* @returns {{ pairs: Object }}
*/
function loadExperiments() {
try {
if (fs.existsSync(AB_FILE)) {
return JSON.parse(fs.readFileSync(AB_FILE, 'utf8'));
}
} catch {}
return { pairs: {}, createdAt: new Date().toISOString() };
}
/**
* 保存 A/B 实验数据
*/
function saveExperiments(data) {
if (!fs.existsSync(DEBUG_DIR)) fs.mkdirSync(DEBUG_DIR, { recursive: true });
fs.writeFileSync(AB_FILE, JSON.stringify(data, null, 2) + '\n');
}
/**
* 生成实验对键 (排序确保 A-B 和 B-A 是同一个实验)
*/
function pairKey(skillA, skillB) {
return [skillA, skillB].sort().join(':');
}
/**
* 判断是否应启动 A/B 实验
* @param {{ name: string, confidence: number }[]} top2 - top-2 路由结果
* @returns {boolean}
*/
function shouldExperiment(top2) {
if (!top2 || top2.length < 2) return false;
const [a, b] = top2;
if (a.confidence === 0) return false;
const gap = (a.confidence - b.confidence) / a.confidence;
return gap < EXPERIMENT_THRESHOLD && b.confidence >= 0.3;
}
/**
* Thompson Sampling 选择变体
* 使用 Beta 分布近似: Beta(successes+1, failures+1)
*
* @param {string} skillA
* @param {string} skillB
* @returns {{ selected: string, experiment: { id: string, pair: string } }}
*/
function selectVariant(skillA, skillB) {
const data = loadExperiments();
const key = pairKey(skillA, skillB);
if (!data.pairs[key]) {
data.pairs[key] = {
skills: [skillA, skillB],
createdAt: new Date().toISOString(),
stats: {
[skillA]: { trials: 0, successes: 0 },
[skillB]: { trials: 0, successes: 0 },
},
resolved: null, // 收敛后填入获胜技能名
};
saveExperiments(data);
}
const pair = data.pairs[key];
// 已收敛直接返回获胜者
if (pair.resolved) {
return {
selected: pair.resolved,
experiment: { id: key, pair: key, resolved: true },
};
}
// v5.9: 真 Beta 分布采样 (Gamma 分布法)
// Beta(alpha, beta) = Gamma(alpha,1) / (Gamma(alpha,1) + Gamma(beta,1))
const sampleGamma = (shape) => {
// Marsaglia-Tsang 方法 (shape >= 1)
// 对 shape < 1 使用 Gamma(shape+1) * U^(1/shape) 变换
if (shape < 1) {
return sampleGamma(shape + 1) * Math.pow(Math.random(), 1.0 / shape);
}
const d = shape - 1.0 / 3.0;
const c = 1.0 / Math.sqrt(9.0 * d);
while (true) {
let x, v;
do {
// Box-Muller 正态分布采样
const u1 = Math.random();
const u2 = Math.random();
x = Math.sqrt(-2.0 * Math.log(u1)) * Math.cos(2.0 * Math.PI * u2);
v = 1.0 + c * x;
} while (v <= 0);
v = v * v * v;
const u = Math.random();
if (u < 1.0 - 0.0331 * (x * x) * (x * x)) return d * v;
if (Math.log(u) < 0.5 * x * x + d * (1.0 - v + Math.log(v))) return d * v;
}
};
const sampleBeta = (s, f) => {
const alpha = s + 1;
const beta = f + 1;
const gA = sampleGamma(alpha);
const gB = sampleGamma(beta);
// v5.9: NaN 防护 — 极端情况下 gA+gB=0 返回无信息先验 0.5
if (gA + gB === 0) return 0.5;
return gA / (gA + gB);
};
const statsA = pair.stats[skillA] || { trials: 0, successes: 0 };
const statsB = pair.stats[skillB] || { trials: 0, successes: 0 };
const sampleA = sampleBeta(statsA.successes, statsA.trials - statsA.successes);
const sampleB = sampleBeta(statsB.successes, statsB.trials - statsB.successes);
const selected = sampleA >= sampleB ? skillA : skillB;
return {
selected,
experiment: { id: key, pair: key },
};
}
/**
* 记录实验结果
* @param {string} experimentId - 实验对键
* @param {string} skill - 实际使用的技能
* @param {'success'|'failure'} outcome - 结果 (success=用户继续使用, failure=用户切换)
*/
function recordOutcome(experimentId, skill, outcome) {
const data = loadExperiments();
const pair = data.pairs[experimentId];
if (!pair) return;
if (!pair.stats[skill]) {
pair.stats[skill] = { trials: 0, successes: 0 };
}
pair.stats[skill].trials++;
if (outcome === 'success') pair.stats[skill].successes++;
saveExperiments(data);
// 追加日志
try {
const entry = {
ts: new Date().toISOString(),
experimentId,
skill,
outcome,
stats: pair.stats,
};
fs.appendFileSync(AB_LOG, JSON.stringify(entry) + '\n');
} catch {}
}
/**
* 获取实验统计
* @returns {{ totalPairs: number, active: number, resolved: number, pairs: Object }}
*/
function getExperimentStats() {
const data = loadExperiments();
const entries = Object.entries(data.pairs);
return {
totalPairs: entries.length,
active: entries.filter(([, v]) => !v.resolved).length,
resolved: entries.filter(([, v]) => v.resolved).length,
pairs: data.pairs,
};
}
/**
* 自动收敛判定: 试验次数足够且胜率差明显
* @param {number} minTrials - 每个变体最小试验次数
* @returns {string[]} 新收敛的实验对键列表
*/
function resolveConverged(minTrials = MIN_TRIALS) {
const data = loadExperiments();
const newlyResolved = [];
for (const [key, pair] of Object.entries(data.pairs)) {
if (pair.resolved) continue;
const skills = pair.skills;
const statsA = pair.stats[skills[0]] || { trials: 0, successes: 0 };
const statsB = pair.stats[skills[1]] || { trials: 0, successes: 0 };
// 两个变体都需要足够的试验次数
if (statsA.trials < minTrials || statsB.trials < minTrials) continue;
const rateA = statsA.successes / statsA.trials;
const rateB = statsB.successes / statsB.trials;
const gap = Math.abs(rateA - rateB);
if (gap >= CONVERGENCE_THRESHOLD) {
pair.resolved = rateA > rateB ? skills[0] : skills[1];
pair.resolvedAt = new Date().toISOString();
pair.resolvedReason = `胜率差 ${(gap * 100).toFixed(1)}% (${skills[0]}: ${(rateA * 100).toFixed(0)}%, ${skills[1]}: ${(rateB * 100).toFixed(0)}%)`;
newlyResolved.push(key);
}
}
if (newlyResolved.length > 0) saveExperiments(data);
return newlyResolved;
}
// 模块导出
if (typeof module !== 'undefined') {
module.exports = {
shouldExperiment,
selectVariant,
recordOutcome,
getExperimentStats,
resolveConverged,
loadExperiments,
saveExperiments,
pairKey,
EXPERIMENT_THRESHOLD,
MIN_TRIALS,
CONVERGENCE_THRESHOLD,
};
}
// CLI 入口
if (require.main === module) {
const stats = getExperimentStats();
console.log('=== A/B 路由实验 ===');
console.log(`总实验对: ${stats.totalPairs}`);
console.log(`活跃: ${stats.active}, 已收敛: ${stats.resolved}`);
for (const [key, pair] of Object.entries(stats.pairs)) {
const skills = pair.skills;
const statsA = pair.stats[skills[0]] || { trials: 0, successes: 0 };
const statsB = pair.stats[skills[1]] || { trials: 0, successes: 0 };
const rateA = statsA.trials > 0 ? (statsA.successes / statsA.trials * 100).toFixed(0) : '?';
const rateB = statsB.trials > 0 ? (statsB.successes / statsB.trials * 100).toFixed(0) : '?';
const status = pair.resolved ? `RESOLVED → ${pair.resolved}` : 'ACTIVE';
console.log(`\n ${key} [${status}]`);
console.log(` ${skills[0]}: ${statsA.trials} trials, ${rateA}% success`);
console.log(` ${skills[1]}: ${statsB.trials} trials, ${rateB}% success`);
}
// 尝试收敛
const resolved = resolveConverged();
if (resolved.length > 0) {
console.log(`\n新收敛: ${resolved.join(', ')}`);
}
}