- VERSION file as authoritative version source - export.mjs reads VERSION with package.json fallback - bw-ota.ps1 DryRun mode for safe testing - auto-setup.ps1 bumped to v3.2.0 (Phase 8 OTA)
44 lines
2.7 KiB
JavaScript
44 lines
2.7 KiB
JavaScript
#!/usr/bin/env node
|
|
/**
|
|
* patch-r4-cjk-token-ratio.js · 2026-04-26
|
|
* 修复 R4 BYTES_PER_TOKEN=3.5 在 CJK 密集场景低估 token ~43% 的偏差:
|
|
* 改为采样 transcript 头 8KB 计算 CJK 字节占比, 动态选择 ratio
|
|
* - CJK >= 40% → 2.2 (中文密集)
|
|
* - 15% <= CJK < 40% → 2.8 (混合)
|
|
* - CJK < 15% → 3.5 (英文 / JSON)
|
|
* Idempotent: sentinel 'R4-CJK-RATIO-V2'
|
|
*/
|
|
'use strict';
|
|
const fs = require('fs');
|
|
const path = require('path');
|
|
|
|
const HOOK = path.join(process.env.HOME || process.env.USERPROFILE || 'C:/Users/leesu', '.claude', 'hooks', 'context-pressure-monitor.js');
|
|
const SENTINEL = 'R4-CJK-RATIO-V2';
|
|
|
|
const OLD = " let bytes = 0;\n try { bytes = fs.statSync(tp).size; } catch { process.exit(0); }\n if (bytes < 50000) process.exit(0); // <50KB 显然没压力, 跳过\n\n const tokens = Math.round(bytes / BYTES_PER_TOKEN);";
|
|
|
|
const NEW = " let bytes = 0;\n try { bytes = fs.statSync(tp).size; } catch { process.exit(0); }\n if (bytes < 50000) process.exit(0); // <50KB 显然没压力, 跳过\n\n // R4-CJK-RATIO-V2: 采样头 8KB 计算 CJK 字节占比, 动态选择 ratio\n const ratio_bpt = sampleBytesPerToken(tp);\n const tokens = Math.round(bytes / ratio_bpt);";
|
|
|
|
const HELPER_ANCHOR = "(async () => {";
|
|
const HELPER_FN = "function sampleBytesPerToken(tp) {\n try {\n const fd = fs.openSync(tp, 'r');\n const buf = Buffer.alloc(8192);\n const n = fs.readSync(fd, buf, 0, 8192, 0);\n fs.closeSync(fd);\n if (n < 200) return BYTES_PER_TOKEN; // 样本太小, 用默认\n let cjkBytes = 0;\n // CJK Unified Ideographs (U+4E00-U+9FFF) UTF-8: E4-E9 起始的 3 字节序列\n // CJK ext A (U+3400-U+4DBF) UTF-8: E3 起始 + 第二字节 90-9F\n // 简化: 统计 0xE3-0xE9 起始的 3 字节序列首字节即可\n for (let i = 0; i < n; i++) {\n const b = buf[i];\n if (b >= 0xE3 && b <= 0xE9) cjkBytes += 3;\n }\n const cjkRatio = cjkBytes / n;\n if (cjkRatio >= 0.40) return 2.2;\n if (cjkRatio >= 0.15) return 2.8;\n return 3.5;\n } catch { return BYTES_PER_TOKEN; }\n}\n\n";
|
|
|
|
try {
|
|
let raw = fs.readFileSync(HOOK, 'utf8');
|
|
if (raw.includes(SENTINEL)) {
|
|
console.log('[r4-cjk] already applied, skip');
|
|
process.exit(0);
|
|
}
|
|
if (!raw.includes(OLD) || !raw.includes(HELPER_ANCHOR)) {
|
|
console.error('[r4-cjk] anchor not found, abort');
|
|
process.exit(1);
|
|
}
|
|
fs.writeFileSync(HOOK + '.bak.r4cjk.' + Date.now(), raw, 'utf8');
|
|
raw = raw.replace(OLD, NEW);
|
|
raw = raw.replace(HELPER_ANCHOR, HELPER_FN + HELPER_ANCHOR);
|
|
fs.writeFileSync(HOOK, raw, 'utf8');
|
|
console.log('[r4-cjk] applied');
|
|
} catch (e) {
|
|
console.error('[r4-cjk] error:', e.message);
|
|
process.exit(1);
|
|
}
|