bookworm-smart-assistant/scripts/patches/patch-sanitize-v6-17patterns.js

149 lines
5.7 KiB
JavaScript
Raw Normal View History

#!/usr/bin/env node
/**
* patch-sanitize-v6-17patterns.js
*
* P0 升级scripts/sanitize.js v5.9 (7 ) 升级到 v6.0 (17 pattern)
*
* 借鉴自 OpenClaw `src/logging/redact.ts` (DEFAULT_REDACT_PATTERNS)
*
* 新增覆盖:
* - JSON 字段 ("apiKey":"...","accessToken":"...")
* - CLI flags (--api-key xxx, --token xxx)
* - PEM private key block (多行保留首尾)
* - github_pat_ / xoxp- / gsk_ / AIza / npm_ / pplx- / Telegram bot
* - maskToken: 保留前 6 4 调试友好短于 18 字符全替换 ***
*
* 修复:
* - Base64 阈值 64保持不再降低由专门 PATTERN 覆盖具名密钥
* - Bearer 限定 [A-Za-z0-9._\-+=]{18,} 防误杀 URL path
*
* 协议: .bak + sentinel + 原子写
*/
'use strict';
const fs = require('fs');
const path = require('path');
const TARGET = path.join(__dirname, '..', 'sanitize.js');
const SENTINEL = 'SANITIZE-V6-17PATTERNS';
const NEW_SANITIZE = `#!/usr/bin/env node
/**
* 共享日志脱敏模块 (v6.0) ${SENTINEL}
*
* 17 pattern 对齐 OpenClaw redact.ts
* 提供 maskToken 部分可见输出前6后4位+ 全量 [REDACTED] fallback
*/
const REDACT_MIN_LEN = 18;
const KEEP_START = 6;
const KEEP_END = 4;
const PATTERNS = [
// 1. ENV 键值对 KEY=value KEY: value (含引号)
{ re: /\\b[A-Z0-9_]*(?:KEY|TOKEN|SECRET|PASSWORD|PASSWD|CREDENTIAL|APIKEY)\\b\\s*[=:]\\s*(["']?)([^\\s"'\\\\]{8,})\\1/gi, type: 'kv' },
// 2. JSON 字段
{ re: /"(?:apiKey|api_key|token|secret|password|passwd|accessToken|refreshToken|credential)"\\s*:\\s*"([^"]{8,})"/gi, type: 'json' },
// 3. CLI flags
{ re: /--(?:api[-_]?key|hook[-_]?token|token|secret|password|credential)\\s+(["']?)([^\\s"']{8,})\\1/gi, type: 'cli' },
// 4. Bearer header
{ re: /Authorization\\s*[:=]\\s*Bearer\\s+([A-Za-z0-9._\\-+=]{18,})/gi, type: 'bearer' },
{ re: /\\bBearer\\s+([A-Za-z0-9._\\-+=]{18,})\\b/g, type: 'bearer' },
// 5. PEM block (多行)
{ re: /-----BEGIN [A-Z ]*PRIVATE KEY-----[\\s\\S]+?-----END [A-Z ]*PRIVATE KEY-----/g, type: 'pem' },
// 6-15. 已知 token 前缀
{ re: /\\b(sk-[A-Za-z0-9_-]{8,})\\b/g, type: 'token' }, // OpenAI/Anthropic
{ re: /\\b(sk-ant-[A-Za-z0-9_-]{8,})\\b/g, type: 'token' }, // Anthropic 显式
{ re: /\\b(ghp_[A-Za-z0-9]{20,})\\b/g, type: 'token' }, // GitHub PAT
{ re: /\\b(gho_[A-Za-z0-9]{20,})\\b/g, type: 'token' }, // GitHub OAuth
{ re: /\\b(github_pat_[A-Za-z0-9_]{20,})\\b/g, type: 'token' },// GitHub Fine-grained PAT
{ re: /\\b(xox[baprs]-[A-Za-z0-9-]{10,})\\b/g, type: 'token' }, // Slack
{ re: /\\b(gsk_[A-Za-z0-9_-]{10,})\\b/g, type: 'token' }, // Groq
{ re: /\\b(AIza[0-9A-Za-z\\-_]{20,})\\b/g, type: 'token' }, // Google API
{ re: /\\b(npm_[A-Za-z0-9]{10,})\\b/g, type: 'token' }, // npm
{ re: /\\b(pplx-[A-Za-z0-9_-]{10,})\\b/g, type: 'token' }, // Perplexity
{ re: /\\bAKIA[A-Z0-9]{16}\\b/g, type: 'token' }, // AWS Access Key
// 16. JWT (eyJ 开头三段)
{ re: /\\beyJ[A-Za-z0-9_-]{10,}\\.[A-Za-z0-9_-]{10,}\\.[A-Za-z0-9_-]{10,}\\b/g, type: 'jwt' },
// 17. Telegram bot token
{ re: /\\b(\\d{6,}:[A-Za-z0-9_-]{20,})\\b/g, type: 'telegram' },
];
function maskToken(token) {
if (!token || token.length < REDACT_MIN_LEN) return '***';
return token.slice(0, KEEP_START) + '\\u2026' + token.slice(-KEEP_END);
}
function sanitize(text, opts) {
if (!text || typeof text !== 'string') return text || '';
if (opts && opts.mode === 'off') return text;
let result = text;
for (let i = 0; i < PATTERNS.length; i++) {
const { re, type } = PATTERNS[i];
re.lastIndex = 0;
if (type === 'pem') {
result = result.replace(re, (m) => {
const lines = m.split(/\\r?\\n/).filter(Boolean);
return lines.length < 2 ? '***' : lines[0] + '\\n[REDACTED_PEM]\\n' + lines[lines.length - 1];
});
} else if (type === 'kv' || type === 'json' || type === 'cli') {
// 抓最后一个非空捕获组作为 token
result = result.replace(re, function() {
const args = Array.from(arguments);
const m = args[0];
const groups = args.slice(1, -2).filter(Boolean);
const token = groups[groups.length - 1] || m;
return m.replace(token, maskToken(token));
});
} else if (type === 'jwt' || type === 'token' || type === 'bearer' || type === 'telegram') {
result = result.replace(re, function(m, g1) {
const token = g1 || m;
return m.replace(token, maskToken(token));
});
}
}
return result;
}
// 兼容旧调用: safeAppendLog 保持不变
const fs = require('fs');
function safeAppendLog(filePath, jsonData) {
try {
const dir = require('path').dirname(filePath);
if (!fs.existsSync(dir)) fs.mkdirSync(dir, { recursive: true });
fs.appendFileSync(filePath, JSON.stringify(jsonData) + '\\n');
} catch (e) {
try { process.stderr.write('[LOG-FALLBACK] ' + JSON.stringify(jsonData) + '\\n'); } catch {}
}
}
if (typeof module !== 'undefined') {
module.exports = { sanitize, safeAppendLog, maskToken };
}
`;
function main() {
if (!fs.existsSync(TARGET)) {
console.error(`[ERROR] target not found: ${TARGET}`);
process.exit(1);
}
const cur = fs.readFileSync(TARGET, 'utf8');
if (cur.includes(SENTINEL)) {
console.log('[SKIP] already patched');
process.exit(0);
}
const ts = new Date().toISOString().replace(/[:.]/g, '-');
const bakPath = `${TARGET}.bak.${ts}`;
fs.copyFileSync(TARGET, bakPath);
console.log(`[BACKUP] ${bakPath}`);
const tmpPath = `${TARGET}.tmp.${process.pid}`;
fs.writeFileSync(tmpPath, NEW_SANITIZE);
fs.renameSync(tmpPath, TARGET);
console.log(`[OK] sanitize.js upgraded to v6.0 (17 patterns)`);
}
if (require.main === module) main();