bookworm-smart-assistant/scripts/patches/patch-p1-2-evolution-log-relinify.js

#!/usr/bin/env node
/**
 * patch-p1-2-evolution-log-relinify.js
 *
 * 修复 evolution-log.jsonl 中 4 处历史粘连 JSON（缺 \n 分隔），
 * 让其重新成为合法 JSONL，使 baseline 能成功生成。
 *
 * 算法: JSON 流式 token 平衡计数 — 在每个完整 JSON 对象闭合后插入换行。
 *
 * 协议: .bak 备份 + 原子写
 */

'use strict';
const fs = require('fs');
const path = require('path');

const TARGET = path.join(__dirname, '..', '..', 'evolution-log.jsonl');

/**
 * 流式扫描，逐字符跟踪 JSON 嵌套深度，遇到深度归零即视为一个完整对象闭合。
 * 字符串内的 { } 不计数。
 */
function relinify(text) {
  let depth = 0;
  let inString = false;
  let escape = false;
  let out = '';

  for (let i = 0; i < text.length; i++) {
    const ch = text[i];
    out += ch;

    if (escape) { escape = false; continue; }

    if (inString) {
      if (ch === '\\') { escape = true; }
      else if (ch === '"') { inString = false; }
      continue;
    }

    if (ch === '"') { inString = true; continue; }

    if (ch === '{') depth++;
    else if (ch === '}') {
      depth--;
      if (depth === 0) {
        // 对象闭合，检查下一个非空白字符是否为 { (即粘连)
        // 如果是，需要插入 \n
        let j = i + 1;
        while (j < text.length && (text[j] === ' ' || text[j] === '\t')) j++;
        if (j < text.length && text[j] === '{') {
          // 粘连: 插入换行
          out += '\n';
        }
        // 如果下个字符已经是 \n，不重复插
      }
    }
  }
  return out;
}

function main() {
  if (!fs.existsSync(TARGET)) {
    process.stderr.write('[ERROR] evolution-log.jsonl not found\n');
    process.exit(1);
  }

  const original = fs.readFileSync(TARGET, 'utf8');
  // Step 1: normalize CRLF → LF
  const lfNormalized = original.replace(/\r\n/g, '\n').replace(/\r/g, '\n');
  // Step 2: relinify (handle 粘连 }{)
  const fixed = relinify(lfNormalized);

  if (fixed === original) {
    process.stdout.write('[SKIP] no concatenated JSON found\n');
    process.exit(0);
  }

  // 验证修复后每行都是合法 JSON
  const lines = fixed.split('\n');
  let okLines = 0, badLines = [];
  for (let i = 0; i < lines.length; i++) {
    if (!lines[i]) continue;
    try { JSON.parse(lines[i]); okLines++; }
    catch (_) { badLines.push(i + 1); }
  }
  if (badLines.length > 0) {
    process.stderr.write('[ERROR] after relinify still has bad lines: ' + JSON.stringify(badLines.slice(0, 5)) + '\n');
    process.exit(1);
  }

  // 备份
  const ts = new Date().toISOString().replace(/[:.]/g, '-');
  const bakPath = TARGET + '.bak.relinify.' + ts;
  fs.copyFileSync(TARGET, bakPath);
  process.stdout.write('[BACKUP] ' + bakPath + '\n');

  // 原子写
  const tmpPath = TARGET + '.tmp.' + process.pid;
  fs.writeFileSync(tmpPath, fixed);
  fs.renameSync(tmpPath, TARGET);

  const oldLines = original.split('\n').filter(Boolean).length;
  const newLines = okLines;
  process.stdout.write('[OK] evolution-log.jsonl relinified\n');
  process.stdout.write('     原行数: ' + oldLines + ' → 新行数: ' + newLines + ' (+' + (newLines - oldLines) + ' 拆开)\n');
}

if (require.main === module) main();