91 lines
3.8 KiB
JavaScript
91 lines
3.8 KiB
JavaScript
|
|
#!/usr/bin/env node
|
||
|
|
/**
|
||
|
|
* P2-2 Patch: CJK Token Estimation Fix
|
||
|
|
* Replaces hardcoded /3.5 with CJK-aware estimation in token-saver-dispatcher.js
|
||
|
|
* - estimateFileTokens: byte-level CJK sampling (same algo as context-pressure-monitor)
|
||
|
|
* - estimateStringTokens: char-level CJK ratio, chars x 1.5 for CJK
|
||
|
|
* Idempotent: sentinel [P2-2] CJK_TOKEN_FIX_v1
|
||
|
|
*/
|
||
|
|
'use strict';
|
||
|
|
const fs = require('fs');
|
||
|
|
const path = require('path');
|
||
|
|
|
||
|
|
const TARGET = path.resolve(__dirname, '..', '..', 'hooks', 'token-saver-dispatcher.js');
|
||
|
|
const SENTINEL = '[P2-2] CJK_TOKEN_FIX_v1';
|
||
|
|
|
||
|
|
if (!fs.existsSync(TARGET)) { process.stderr.write('[SKIP] token-saver-dispatcher.js not found\n'); process.exit(0); }
|
||
|
|
|
||
|
|
const src = fs.readFileSync(TARGET, 'utf8');
|
||
|
|
if (src.includes(SENTINEL)) { process.stderr.write('[SKIP] already patched (sentinel found)\n'); process.exit(0); }
|
||
|
|
|
||
|
|
fs.writeFileSync(TARGET + '.bak-p22.' + Date.now(), src);
|
||
|
|
|
||
|
|
// Find insertion point: after purgeOld function closing brace
|
||
|
|
const purgeAnchor = 'function purgeOld(obj, ttlMs) {';
|
||
|
|
const purgeIdx = src.indexOf(purgeAnchor);
|
||
|
|
if (purgeIdx === -1) { process.stderr.write('[FAIL] purgeOld anchor not found\n'); process.exit(1); }
|
||
|
|
|
||
|
|
let braceCount = 0, purgeEnd = -1;
|
||
|
|
for (let i = purgeIdx; i < src.length; i++) {
|
||
|
|
if (src[i] === '{') braceCount++;
|
||
|
|
else if (src[i] === '}') { braceCount--; if (braceCount === 0) { purgeEnd = i + 1; break; } }
|
||
|
|
}
|
||
|
|
if (purgeEnd === -1) { process.stderr.write('[FAIL] purgeOld end not found\n'); process.exit(1); }
|
||
|
|
|
||
|
|
const helpers = '\n\n// ' + SENTINEL + '\n' +
|
||
|
|
'function estimateFileTokens(filePath, fileSize) {\n' +
|
||
|
|
' try {\n' +
|
||
|
|
' var fd = fs.openSync(filePath, \'r\');\n' +
|
||
|
|
' var buf = Buffer.alloc(4096);\n' +
|
||
|
|
' var n = fs.readSync(fd, buf, 0, 4096, 0);\n' +
|
||
|
|
' fs.closeSync(fd);\n' +
|
||
|
|
' if (n < 50) return Math.round(fileSize / 3.5);\n' +
|
||
|
|
' var cjkBytes = 0;\n' +
|
||
|
|
' for (var i = 0; i < n; i++) {\n' +
|
||
|
|
' if (buf[i] >= 0xE4 && buf[i] <= 0xED) cjkBytes += 3;\n' +
|
||
|
|
' }\n' +
|
||
|
|
' var ratio = cjkBytes / n;\n' +
|
||
|
|
' var bpt = ratio >= 0.40 ? 2.2 : ratio >= 0.15 ? 2.8 : 3.5;\n' +
|
||
|
|
' return Math.round(fileSize / bpt);\n' +
|
||
|
|
' } catch { return Math.round(fileSize / 3.5); }\n' +
|
||
|
|
'}\n' +
|
||
|
|
'\n' +
|
||
|
|
'function estimateStringTokens(str) {\n' +
|
||
|
|
' var len = str.length;\n' +
|
||
|
|
' if (len < 50) return Math.round(len / 4);\n' +
|
||
|
|
' var sampleLen = Math.min(len, 2000);\n' +
|
||
|
|
' var cjk = 0;\n' +
|
||
|
|
' for (var i = 0; i < sampleLen; i++) {\n' +
|
||
|
|
' var c = str.charCodeAt(i);\n' +
|
||
|
|
' if ((c >= 0x3400 && c <= 0x9FFF) || (c >= 0xAC00 && c <= 0xD7AF)) cjk++;\n' +
|
||
|
|
' }\n' +
|
||
|
|
' var ratio = cjk / sampleLen;\n' +
|
||
|
|
' var tokPerChar = ratio * 1.5 + (1 - ratio) * 0.25;\n' +
|
||
|
|
' return Math.round(len * tokPerChar);\n' +
|
||
|
|
'}\n';
|
||
|
|
|
||
|
|
let patched = src.slice(0, purgeEnd) + helpers + src.slice(purgeEnd);
|
||
|
|
|
||
|
|
// Replace hardcoded /3.5 in handleReadGuard
|
||
|
|
const old1 = 'var estTokens = Math.round(fileSize / 3.5);';
|
||
|
|
const new1 = 'var estTokens = estimateFileTokens(input.file_path, fileSize);';
|
||
|
|
if (!patched.includes(old1)) { process.stderr.write('[WARN] read-guard replacement anchor not found\n'); }
|
||
|
|
patched = patched.replace(old1, new1);
|
||
|
|
|
||
|
|
// Replace hardcoded /3.5 in handlePostOutputGuard
|
||
|
|
const old2 = 'var tokens = Math.round(len / 3.5);';
|
||
|
|
const new2 = 'var tokens = estimateStringTokens(out);';
|
||
|
|
if (!patched.includes(old2)) { process.stderr.write('[WARN] post-output-guard replacement anchor not found\n'); }
|
||
|
|
patched = patched.replace(old2, new2);
|
||
|
|
|
||
|
|
// Verify no remaining hardcoded /3.5 in estimation contexts
|
||
|
|
const remaining = (patched.match(/Math\.round\([a-zA-Z]+ \/ 3\.5\)/g) || []);
|
||
|
|
if (remaining.length > 0) {
|
||
|
|
process.stderr.write('[INFO] ' + remaining.length + ' other /3.5 patterns remain (may be intentional)\n');
|
||
|
|
}
|
||
|
|
|
||
|
|
const tmp = TARGET + '.tmp.' + process.pid;
|
||
|
|
fs.writeFileSync(tmp, patched, 'utf8');
|
||
|
|
fs.renameSync(tmp, TARGET);
|
||
|
|
process.stderr.write('[DONE] P2-2 CJK token estimation fix applied (2 replacements)\n');
|