#!/usr/bin/env node /** * patch-validator-doc-exempt.js · 2026-04-25 灰度首日修正 * * 问题: warn 模式激活后首次 Write md 文件, validator 报 high-entropy-token 误报。 * 原因: MEMORY.md / 记忆文件含大量 "看似高熵" 的 git hash / UUID / 项目 ID, * entropy 阈值 3.5 会全部命中。 * * 修法: 扩展文档类白名单 (.md/.txt/.rst/.adoc/.mdx/.yaml/.yml) * → 只跑精确 credential-patterns regex, 不跑 entropy/hex 启发式 * 代码类 (.js/.py/.ts/.go/.rs/.java/.json) 保持原有全量扫描 * * 幂等: sentinel DOC-EXEMPT-V1 检测 */ 'use strict'; const fs = require('fs'); const path = require('path'); const { execSync } = require('child_process'); const VALIDATOR = path.join(__dirname, '..', '..', 'hooks', 'staging-validator.js'); const SENTINEL = '/* DOC-EXEMPT-V1 */'; function main() { const src = fs.readFileSync(VALIDATOR, 'utf8'); if (src.includes(SENTINEL)) { console.log('[doc-exempt] already applied, skip'); process.exit(0); } // 找 scanCredentials 函数入口, 在开头加文档类短路逻辑 // 现有签名: function scanCredentials(content, credPatterns) { // 注入: 接受 filePath 参数 + 按扩展名判断 const oldFn = /function scanCredentials\(content, credPatterns\) \{/; const newFnHeader = `function isDocFile(fp) { ${SENTINEL} const ext = (require('path').extname(fp) || '').toLowerCase(); return ['.md','.mdx','.txt','.rst','.adoc','.yaml','.yml','.toml','.csv','.log'].includes(ext); } function scanCredentials(content, credPatterns, filePath) {`; let newSrc = src.replace(oldFn, newFnHeader); if (newSrc === src) { console.error('[doc-exempt] 未找到 scanCredentials 签名锚点'); process.exit(1); } // 在 scanCredentials 函数体内, 高熵/hex 扫描段落前插入文档类早退 // 锚点: const hexHits = (text.match(/\b[a-f0-9]{32,}\b/gi) ... const hexAnchor = /const hexHits = \(text\.match\(\/\\b\[a-f0-9\]\{32,\}\\b\/gi\)/; if (!hexAnchor.test(newSrc)) { console.error('[doc-exempt] 未找到 hex 扫描锚点 (fix-v1 未应用?)'); process.exit(1); } newSrc = newSrc.replace(hexAnchor, `// 文档类文件: 仅命中精确 credential-pattern 即可, 跳过启发式 hex/entropy\n if (filePath && isDocFile(filePath)) return hits;\n const hexHits = (text.match(/\\b[a-f0-9]{32,}\\b/gi)` ); // 找 scanCredentials 调用处, 传入 stagingPath const callSite = /credHits = scanCredentials\(content, loadCredPatterns\(\)\);/; if (!callSite.test(newSrc)) { console.error('[doc-exempt] 未找到 scanCredentials 调用点'); process.exit(1); } newSrc = newSrc.replace(callSite, 'credHits = scanCredentials(content, loadCredPatterns(), stagingPath);'); // 备份 + 原子写 const bak = VALIDATOR + '.bak.doc-exempt.' + new Date().toISOString().replace(/[:.]/g, '-'); fs.copyFileSync(VALIDATOR, bak); const tmp = VALIDATOR + '.tmp.' + process.pid; fs.writeFileSync(tmp, newSrc, 'utf8'); fs.renameSync(tmp, VALIDATOR); console.log('[doc-exempt] 应用成功, 备份:', path.basename(bak)); // 语法校验 try { execSync('node --check "' + VALIDATOR + '"', { stdio: 'pipe' }); console.log('[doc-exempt] syntax ok'); } catch (e) { console.error('[doc-exempt] SYNTAX FAIL, 回滚:', String(e.stderr).split('\n')[0]); fs.copyFileSync(bak, VALIDATOR); process.exit(1); } } main();