bookworm-smart-assistant/scripts/tools/shadow-haiku-eval.js

97 lines
3.6 KiB
JavaScript
Raw Normal View History

#!/usr/bin/env node
/**
* Shadow Haiku Route Evaluation Tool
* Analyzes shadow-route-log.jsonl for haiku routing feasibility.
*
* Usage:
* node scripts/tools/shadow-haiku-eval.js # Summary
* node scripts/tools/shadow-haiku-eval.js --export # TSV for manual review
*/
'use strict';
const fs = require('fs');
const path = require('path');
const CLAUDE_ROOT = path.resolve(__dirname, '..', '..');
const LOG_FILE = path.join(CLAUDE_ROOT, 'debug', 'shadow-route-log.jsonl');
function loadLog() {
if (!fs.existsSync(LOG_FILE)) { console.error('No shadow log at ' + LOG_FILE); process.exit(1); }
return fs.readFileSync(LOG_FILE, 'utf8').split('\n').filter(Boolean).map(function(l) {
try { return JSON.parse(l); } catch { return null; }
}).filter(Boolean);
}
function summary(entries) {
console.log('=== Shadow Route Log Summary ===');
console.log('Entries: ' + entries.length);
if (!entries.length) return;
console.log('Period: ' + entries[0].ts.slice(0, 10) + ' ~ ' + entries[entries.length - 1].ts.slice(0, 10));
var skillCounts = {};
for (var i = 0; i < entries.length; i++) { skillCounts[entries[i].p] = (skillCounts[entries[i].p] || 0) + 1; }
var sorted = Object.entries(skillCounts).sort(function(a, b) { return b[1] - a[1]; });
console.log('\nTop 10 skills:');
for (var j = 0; j < Math.min(10, sorted.length); j++) {
console.log(' ' + sorted[j][0] + ': ' + sorted[j][1] + ' (' + (sorted[j][1] / entries.length * 100).toFixed(1) + '%)');
}
var hi = 0, mid = 0, lo = 0;
for (var k = 0; k < entries.length; k++) {
if (entries[k].cf >= 0.8) hi++;
else if (entries[k].cf >= 0.5) mid++;
else lo++;
}
console.log('\nConfidence: high(>=0.8)=' + hi + ' mid(0.5-0.8)=' + mid + ' low(<0.5)=' + lo);
var compCounts = {};
for (var m = 0; m < entries.length; m++) {
var c = (entries[m].it && entries[m].it.c) || 'unknown';
compCounts[c] = (compCounts[c] || 0) + 1;
}
console.log('\nComplexity: ' + JSON.stringify(compCounts));
var inherited = entries.filter(function(e) { return e.ih; }).length;
console.log('Inherited: ' + inherited + '/' + entries.length);
var closeCalls = 0;
for (var p = 0; p < entries.length; p++) {
var e = entries[p];
if (e.t5 && e.t5.length >= 2 && e.t5[0].c > 0) {
if ((e.t5[0].c - e.t5[1].c) / e.t5[0].c < 0.10) closeCalls++;
}
}
console.log('Close calls (gap<10%): ' + closeCalls + ' (' + (closeCalls / entries.length * 100).toFixed(1) + '%)');
console.log('\n=== Haiku Routing Recommendation ===');
if (entries.length < 50) {
console.log('Insufficient data (' + entries.length + '/50 min). Continue collecting.');
} else if (closeCalls / entries.length > 0.20) {
console.log('High ambiguity rate. Haiku semantic routing may improve accuracy.');
console.log('Cost: +300-800ms per prompt. Consider: activate only for close-call queries.');
} else {
console.log('TF-IDF routing performing well. Haiku latency (+300-800ms) not justified.');
}
}
function exportTsv(entries) {
console.log(['ts', 'prompt_head', 'complexity', 'primary', 'confidence', 'runner_up', 'domain', 'inherited'].join('\t'));
for (var i = 0; i < entries.length; i++) {
var e = entries[i];
console.log([
e.ts.slice(0, 19),
(e.ph || '').replace(/[\t\n\r]/g, ' ').slice(0, 100),
(e.it && e.it.c) || '',
e.p || '',
(e.cf || 0).toFixed(2),
(e.t5 && e.t5[1]) ? e.t5[1].n + ':' + e.t5[1].c.toFixed(2) : '',
e.d || '',
e.ih ? 'Y' : 'N',
].join('\t'));
}
}
var entries = loadLog();
if (process.argv.includes('--export')) exportTsv(entries);
else summary(entries);