- VERSION file as authoritative version source - export.mjs reads VERSION with package.json fallback - bw-ota.ps1 DryRun mode for safe testing - auto-setup.ps1 bumped to v3.2.0 (Phase 8 OTA)
97 lines
3.6 KiB
JavaScript
97 lines
3.6 KiB
JavaScript
#!/usr/bin/env node
|
|
/**
|
|
* Shadow Haiku Route Evaluation Tool
|
|
* Analyzes shadow-route-log.jsonl for haiku routing feasibility.
|
|
*
|
|
* Usage:
|
|
* node scripts/tools/shadow-haiku-eval.js # Summary
|
|
* node scripts/tools/shadow-haiku-eval.js --export # TSV for manual review
|
|
*/
|
|
'use strict';
|
|
const fs = require('fs');
|
|
const path = require('path');
|
|
|
|
const CLAUDE_ROOT = path.resolve(__dirname, '..', '..');
|
|
const LOG_FILE = path.join(CLAUDE_ROOT, 'debug', 'shadow-route-log.jsonl');
|
|
|
|
function loadLog() {
|
|
if (!fs.existsSync(LOG_FILE)) { console.error('No shadow log at ' + LOG_FILE); process.exit(1); }
|
|
return fs.readFileSync(LOG_FILE, 'utf8').split('\n').filter(Boolean).map(function(l) {
|
|
try { return JSON.parse(l); } catch { return null; }
|
|
}).filter(Boolean);
|
|
}
|
|
|
|
function summary(entries) {
|
|
console.log('=== Shadow Route Log Summary ===');
|
|
console.log('Entries: ' + entries.length);
|
|
if (!entries.length) return;
|
|
|
|
console.log('Period: ' + entries[0].ts.slice(0, 10) + ' ~ ' + entries[entries.length - 1].ts.slice(0, 10));
|
|
|
|
var skillCounts = {};
|
|
for (var i = 0; i < entries.length; i++) { skillCounts[entries[i].p] = (skillCounts[entries[i].p] || 0) + 1; }
|
|
var sorted = Object.entries(skillCounts).sort(function(a, b) { return b[1] - a[1]; });
|
|
console.log('\nTop 10 skills:');
|
|
for (var j = 0; j < Math.min(10, sorted.length); j++) {
|
|
console.log(' ' + sorted[j][0] + ': ' + sorted[j][1] + ' (' + (sorted[j][1] / entries.length * 100).toFixed(1) + '%)');
|
|
}
|
|
|
|
var hi = 0, mid = 0, lo = 0;
|
|
for (var k = 0; k < entries.length; k++) {
|
|
if (entries[k].cf >= 0.8) hi++;
|
|
else if (entries[k].cf >= 0.5) mid++;
|
|
else lo++;
|
|
}
|
|
console.log('\nConfidence: high(>=0.8)=' + hi + ' mid(0.5-0.8)=' + mid + ' low(<0.5)=' + lo);
|
|
|
|
var compCounts = {};
|
|
for (var m = 0; m < entries.length; m++) {
|
|
var c = (entries[m].it && entries[m].it.c) || 'unknown';
|
|
compCounts[c] = (compCounts[c] || 0) + 1;
|
|
}
|
|
console.log('\nComplexity: ' + JSON.stringify(compCounts));
|
|
|
|
var inherited = entries.filter(function(e) { return e.ih; }).length;
|
|
console.log('Inherited: ' + inherited + '/' + entries.length);
|
|
|
|
var closeCalls = 0;
|
|
for (var p = 0; p < entries.length; p++) {
|
|
var e = entries[p];
|
|
if (e.t5 && e.t5.length >= 2 && e.t5[0].c > 0) {
|
|
if ((e.t5[0].c - e.t5[1].c) / e.t5[0].c < 0.10) closeCalls++;
|
|
}
|
|
}
|
|
console.log('Close calls (gap<10%): ' + closeCalls + ' (' + (closeCalls / entries.length * 100).toFixed(1) + '%)');
|
|
|
|
console.log('\n=== Haiku Routing Recommendation ===');
|
|
if (entries.length < 50) {
|
|
console.log('Insufficient data (' + entries.length + '/50 min). Continue collecting.');
|
|
} else if (closeCalls / entries.length > 0.20) {
|
|
console.log('High ambiguity rate. Haiku semantic routing may improve accuracy.');
|
|
console.log('Cost: +300-800ms per prompt. Consider: activate only for close-call queries.');
|
|
} else {
|
|
console.log('TF-IDF routing performing well. Haiku latency (+300-800ms) not justified.');
|
|
}
|
|
}
|
|
|
|
function exportTsv(entries) {
|
|
console.log(['ts', 'prompt_head', 'complexity', 'primary', 'confidence', 'runner_up', 'domain', 'inherited'].join('\t'));
|
|
for (var i = 0; i < entries.length; i++) {
|
|
var e = entries[i];
|
|
console.log([
|
|
e.ts.slice(0, 19),
|
|
(e.ph || '').replace(/[\t\n\r]/g, ' ').slice(0, 100),
|
|
(e.it && e.it.c) || '',
|
|
e.p || '',
|
|
(e.cf || 0).toFixed(2),
|
|
(e.t5 && e.t5[1]) ? e.t5[1].n + ':' + e.t5[1].c.toFixed(2) : '',
|
|
e.d || '',
|
|
e.ih ? 'Y' : 'N',
|
|
].join('\t'));
|
|
}
|
|
}
|
|
|
|
var entries = loadLog();
|
|
if (process.argv.includes('--export')) exportTsv(entries);
|
|
else summary(entries);
|