#!/usr/bin/env node /** * P2-2 Patch: CJK Token Estimation Fix * Replaces hardcoded /3.5 with CJK-aware estimation in token-saver-dispatcher.js * - estimateFileTokens: byte-level CJK sampling (same algo as context-pressure-monitor) * - estimateStringTokens: char-level CJK ratio, chars x 1.5 for CJK * Idempotent: sentinel [P2-2] CJK_TOKEN_FIX_v1 */ 'use strict'; const fs = require('fs'); const path = require('path'); const TARGET = path.resolve(__dirname, '..', '..', 'hooks', 'token-saver-dispatcher.js'); const SENTINEL = '[P2-2] CJK_TOKEN_FIX_v1'; if (!fs.existsSync(TARGET)) { process.stderr.write('[SKIP] token-saver-dispatcher.js not found\n'); process.exit(0); } const src = fs.readFileSync(TARGET, 'utf8'); if (src.includes(SENTINEL)) { process.stderr.write('[SKIP] already patched (sentinel found)\n'); process.exit(0); } fs.writeFileSync(TARGET + '.bak-p22.' + Date.now(), src); // Find insertion point: after purgeOld function closing brace const purgeAnchor = 'function purgeOld(obj, ttlMs) {'; const purgeIdx = src.indexOf(purgeAnchor); if (purgeIdx === -1) { process.stderr.write('[FAIL] purgeOld anchor not found\n'); process.exit(1); } let braceCount = 0, purgeEnd = -1; for (let i = purgeIdx; i < src.length; i++) { if (src[i] === '{') braceCount++; else if (src[i] === '}') { braceCount--; if (braceCount === 0) { purgeEnd = i + 1; break; } } } if (purgeEnd === -1) { process.stderr.write('[FAIL] purgeOld end not found\n'); process.exit(1); } const helpers = '\n\n// ' + SENTINEL + '\n' + 'function estimateFileTokens(filePath, fileSize) {\n' + ' try {\n' + ' var fd = fs.openSync(filePath, \'r\');\n' + ' var buf = Buffer.alloc(4096);\n' + ' var n = fs.readSync(fd, buf, 0, 4096, 0);\n' + ' fs.closeSync(fd);\n' + ' if (n < 50) return Math.round(fileSize / 3.5);\n' + ' var cjkBytes = 0;\n' + ' for (var i = 0; i < n; i++) {\n' + ' if (buf[i] >= 0xE4 && buf[i] <= 0xED) cjkBytes += 3;\n' + ' }\n' + ' var ratio = cjkBytes / n;\n' + ' var bpt = ratio >= 0.40 ? 2.2 : ratio >= 0.15 ? 2.8 : 3.5;\n' + ' return Math.round(fileSize / bpt);\n' + ' } catch { return Math.round(fileSize / 3.5); }\n' + '}\n' + '\n' + 'function estimateStringTokens(str) {\n' + ' var len = str.length;\n' + ' if (len < 50) return Math.round(len / 4);\n' + ' var sampleLen = Math.min(len, 2000);\n' + ' var cjk = 0;\n' + ' for (var i = 0; i < sampleLen; i++) {\n' + ' var c = str.charCodeAt(i);\n' + ' if ((c >= 0x3400 && c <= 0x9FFF) || (c >= 0xAC00 && c <= 0xD7AF)) cjk++;\n' + ' }\n' + ' var ratio = cjk / sampleLen;\n' + ' var tokPerChar = ratio * 1.5 + (1 - ratio) * 0.25;\n' + ' return Math.round(len * tokPerChar);\n' + '}\n'; let patched = src.slice(0, purgeEnd) + helpers + src.slice(purgeEnd); // Replace hardcoded /3.5 in handleReadGuard const old1 = 'var estTokens = Math.round(fileSize / 3.5);'; const new1 = 'var estTokens = estimateFileTokens(input.file_path, fileSize);'; if (!patched.includes(old1)) { process.stderr.write('[WARN] read-guard replacement anchor not found\n'); } patched = patched.replace(old1, new1); // Replace hardcoded /3.5 in handlePostOutputGuard const old2 = 'var tokens = Math.round(len / 3.5);'; const new2 = 'var tokens = estimateStringTokens(out);'; if (!patched.includes(old2)) { process.stderr.write('[WARN] post-output-guard replacement anchor not found\n'); } patched = patched.replace(old2, new2); // Verify no remaining hardcoded /3.5 in estimation contexts const remaining = (patched.match(/Math\.round\([a-zA-Z]+ \/ 3\.5\)/g) || []); if (remaining.length > 0) { process.stderr.write('[INFO] ' + remaining.length + ' other /3.5 patterns remain (may be intentional)\n'); } const tmp = TARGET + '.tmp.' + process.pid; fs.writeFileSync(tmp, patched, 'utf8'); fs.renameSync(tmp, TARGET); process.stderr.write('[DONE] P2-2 CJK token estimation fix applied (2 replacements)\n');