bookworm-smart-assistant/scripts/patches/patch-p2-2-cjk-token-fix.js
Bookworm Admin b7a8e29d21 release: v6.7.0 - OTA E2E test release
- VERSION file as authoritative version source
- export.mjs reads VERSION with package.json fallback
- bw-ota.ps1 DryRun mode for safe testing
- auto-setup.ps1 bumped to v3.2.0 (Phase 8 OTA)
2026-04-27 17:59:44 +08:00

91 lines
3.8 KiB
JavaScript

#!/usr/bin/env node
/**
* P2-2 Patch: CJK Token Estimation Fix
* Replaces hardcoded /3.5 with CJK-aware estimation in token-saver-dispatcher.js
* - estimateFileTokens: byte-level CJK sampling (same algo as context-pressure-monitor)
* - estimateStringTokens: char-level CJK ratio, chars x 1.5 for CJK
* Idempotent: sentinel [P2-2] CJK_TOKEN_FIX_v1
*/
'use strict';
const fs = require('fs');
const path = require('path');
const TARGET = path.resolve(__dirname, '..', '..', 'hooks', 'token-saver-dispatcher.js');
const SENTINEL = '[P2-2] CJK_TOKEN_FIX_v1';
if (!fs.existsSync(TARGET)) { process.stderr.write('[SKIP] token-saver-dispatcher.js not found\n'); process.exit(0); }
const src = fs.readFileSync(TARGET, 'utf8');
if (src.includes(SENTINEL)) { process.stderr.write('[SKIP] already patched (sentinel found)\n'); process.exit(0); }
fs.writeFileSync(TARGET + '.bak-p22.' + Date.now(), src);
// Find insertion point: after purgeOld function closing brace
const purgeAnchor = 'function purgeOld(obj, ttlMs) {';
const purgeIdx = src.indexOf(purgeAnchor);
if (purgeIdx === -1) { process.stderr.write('[FAIL] purgeOld anchor not found\n'); process.exit(1); }
let braceCount = 0, purgeEnd = -1;
for (let i = purgeIdx; i < src.length; i++) {
if (src[i] === '{') braceCount++;
else if (src[i] === '}') { braceCount--; if (braceCount === 0) { purgeEnd = i + 1; break; } }
}
if (purgeEnd === -1) { process.stderr.write('[FAIL] purgeOld end not found\n'); process.exit(1); }
const helpers = '\n\n// ' + SENTINEL + '\n' +
'function estimateFileTokens(filePath, fileSize) {\n' +
' try {\n' +
' var fd = fs.openSync(filePath, \'r\');\n' +
' var buf = Buffer.alloc(4096);\n' +
' var n = fs.readSync(fd, buf, 0, 4096, 0);\n' +
' fs.closeSync(fd);\n' +
' if (n < 50) return Math.round(fileSize / 3.5);\n' +
' var cjkBytes = 0;\n' +
' for (var i = 0; i < n; i++) {\n' +
' if (buf[i] >= 0xE4 && buf[i] <= 0xED) cjkBytes += 3;\n' +
' }\n' +
' var ratio = cjkBytes / n;\n' +
' var bpt = ratio >= 0.40 ? 2.2 : ratio >= 0.15 ? 2.8 : 3.5;\n' +
' return Math.round(fileSize / bpt);\n' +
' } catch { return Math.round(fileSize / 3.5); }\n' +
'}\n' +
'\n' +
'function estimateStringTokens(str) {\n' +
' var len = str.length;\n' +
' if (len < 50) return Math.round(len / 4);\n' +
' var sampleLen = Math.min(len, 2000);\n' +
' var cjk = 0;\n' +
' for (var i = 0; i < sampleLen; i++) {\n' +
' var c = str.charCodeAt(i);\n' +
' if ((c >= 0x3400 && c <= 0x9FFF) || (c >= 0xAC00 && c <= 0xD7AF)) cjk++;\n' +
' }\n' +
' var ratio = cjk / sampleLen;\n' +
' var tokPerChar = ratio * 1.5 + (1 - ratio) * 0.25;\n' +
' return Math.round(len * tokPerChar);\n' +
'}\n';
let patched = src.slice(0, purgeEnd) + helpers + src.slice(purgeEnd);
// Replace hardcoded /3.5 in handleReadGuard
const old1 = 'var estTokens = Math.round(fileSize / 3.5);';
const new1 = 'var estTokens = estimateFileTokens(input.file_path, fileSize);';
if (!patched.includes(old1)) { process.stderr.write('[WARN] read-guard replacement anchor not found\n'); }
patched = patched.replace(old1, new1);
// Replace hardcoded /3.5 in handlePostOutputGuard
const old2 = 'var tokens = Math.round(len / 3.5);';
const new2 = 'var tokens = estimateStringTokens(out);';
if (!patched.includes(old2)) { process.stderr.write('[WARN] post-output-guard replacement anchor not found\n'); }
patched = patched.replace(old2, new2);
// Verify no remaining hardcoded /3.5 in estimation contexts
const remaining = (patched.match(/Math\.round\([a-zA-Z]+ \/ 3\.5\)/g) || []);
if (remaining.length > 0) {
process.stderr.write('[INFO] ' + remaining.length + ' other /3.5 patterns remain (may be intentional)\n');
}
const tmp = TARGET + '.tmp.' + process.pid;
fs.writeFileSync(tmp, patched, 'utf8');
fs.renameSync(tmp, TARGET);
process.stderr.write('[DONE] P2-2 CJK token estimation fix applied (2 replacements)\n');