bookworm-boot/patches/circuit-breaker-v2.js

229 lines
6.0 KiB
JavaScript
Raw Permalink Normal View History

'use strict';
/**
* Circuit Breaker v2 增强版 per-Provider 熔断器
*
* 改进点:
* 1. 半衰期自动恢复 (失败计数随时间衰减, 避免永久 OPEN)
* 2. 可配置阈值 (通过环境变量覆盖)
* 3. 管理端点支持 (getStatus + reset + resetAll)
* 4. 统计指标 (总请求数成功率平均恢复时间)
* 5. 事件日志 (状态变迁记录, 供审计)
*
* @module src/circuit-breaker
*/
const STATES = { CLOSED: 'closed', OPEN: 'open', HALF_OPEN: 'half_open' };
// 可通过环境变量覆盖
const FAILURE_THRESHOLD = parseInt(process.env.CB_FAILURE_THRESHOLD) || 5;
const RECOVERY_TIMEOUT = parseInt(process.env.CB_RECOVERY_TIMEOUT) || 30000;
const SUCCESS_THRESHOLD = parseInt(process.env.CB_SUCCESS_THRESHOLD) || 2;
const HALF_LIFE_MS = parseInt(process.env.CB_HALF_LIFE_MS) || 120000; // 2 分钟半衰期
const breakers = {};
// 状态变迁日志 (保留最近 100 条)
const MAX_LOG_SIZE = 100;
const transitionLog = [];
function _log(provider, from, to, reason) {
const entry = {
ts: new Date().toISOString(),
provider,
from,
to,
reason,
};
transitionLog.push(entry);
if (transitionLog.length > MAX_LOG_SIZE) transitionLog.shift();
}
function _getBreaker(provider) {
if (!breakers[provider]) {
breakers[provider] = {
state: STATES.CLOSED,
failures: 0,
lastFailureAt: 0,
successCount: 0,
totalTrips: 0,
// 统计
totalRequests: 0,
totalSuccesses: 0,
totalFailures: 0,
lastTripAt: 0,
lastRecoveryAt: 0,
tripDurations: [], // 最近 10 次恢复耗时(ms)
};
}
return breakers[provider];
}
/**
* 半衰期衰减: failures 随时间指数衰减
* 避免偶尔的瞬态错误累积到阈值
*/
function _decayedFailures(b) {
if (b.failures === 0 || b.lastFailureAt === 0) return 0;
const elapsed = Date.now() - b.lastFailureAt;
if (elapsed <= 0) return b.failures;
// 每过一个半衰期, failures 减半
const decayFactor = Math.pow(0.5, elapsed / HALF_LIFE_MS);
return b.failures * decayFactor;
}
/**
* 检查是否允许向 provider 发送请求
*/
function canRequest(provider) {
const b = _getBreaker(provider);
if (b.state === STATES.CLOSED) {
b.totalRequests++;
return true;
}
if (b.state === STATES.OPEN) {
if (Date.now() - b.lastFailureAt > RECOVERY_TIMEOUT) {
const prev = b.state;
b.state = STATES.HALF_OPEN;
b.successCount = 0;
_log(provider, prev, STATES.HALF_OPEN, 'recovery_timeout_elapsed');
b.totalRequests++;
return true;
}
b.totalRejected = (b.totalRejected || 0) + 1;
return false;
}
// HALF_OPEN: 允许有限探测
b.totalRequests++;
return true;
}
/**
* 记录成功
*/
function recordSuccess(provider) {
const b = _getBreaker(provider);
b.totalSuccesses++;
if (b.state === STATES.HALF_OPEN) {
b.successCount++;
if (b.successCount >= SUCCESS_THRESHOLD) {
const prev = b.state;
b.state = STATES.CLOSED;
b.failures = 0;
b.successCount = 0;
b.lastRecoveryAt = Date.now();
// 记录恢复耗时
if (b.lastTripAt > 0) {
b.tripDurations.push(Date.now() - b.lastTripAt);
if (b.tripDurations.length > 10) b.tripDurations.shift();
}
_log(provider, prev, STATES.CLOSED, `${SUCCESS_THRESHOLD}_consecutive_successes`);
}
} else if (b.state === STATES.CLOSED) {
// 使用衰减后的失败数, 而非直接清零
// 这样偶尔的成功会让累积的失败自然消散
b.failures = Math.max(0, _decayedFailures(b) - 0.5);
}
}
/**
* 记录失败
*/
function recordFailure(provider) {
const b = _getBreaker(provider);
b.totalFailures++;
b.lastFailureAt = Date.now();
if (b.state === STATES.HALF_OPEN) {
const prev = b.state;
b.state = STATES.OPEN;
b.totalTrips++;
b.lastTripAt = Date.now();
_log(provider, prev, STATES.OPEN, 'half_open_failure');
} else if (b.state === STATES.CLOSED) {
// 使用衰减后的值 + 1
b.failures = _decayedFailures(b) + 1;
if (b.failures >= FAILURE_THRESHOLD) {
const prev = b.state;
b.state = STATES.OPEN;
b.totalTrips++;
b.lastTripAt = Date.now();
_log(provider, prev, STATES.OPEN, `${FAILURE_THRESHOLD}_failures_reached`);
}
}
}
/**
* 获取所有熔断器状态快照 (管理后台用)
*/
function getStatus() {
const snapshot = {};
for (const [name, b] of Object.entries(breakers)) {
const successRate = b.totalRequests > 0
? Math.round(b.totalSuccesses / b.totalRequests * 10000) / 100
: 100;
const avgRecoveryMs = b.tripDurations.length > 0
? Math.round(b.tripDurations.reduce((a, c) => a + c, 0) / b.tripDurations.length)
: 0;
snapshot[name] = {
state: b.state,
failures: Math.round(_decayedFailures(b) * 100) / 100,
rawFailures: b.failures,
totalTrips: b.totalTrips,
lastFailureAt: b.lastFailureAt ? new Date(b.lastFailureAt).toISOString() : null,
lastRecoveryAt: b.lastRecoveryAt ? new Date(b.lastRecoveryAt).toISOString() : null,
stats: {
totalRequests: b.totalRequests,
totalSuccesses: b.totalSuccesses,
totalFailures: b.totalFailures,
successRate: successRate + '%',
avgRecoveryMs,
},
};
}
return snapshot;
}
/**
* 获取状态变迁日志
*/
function getTransitionLog(limit) {
const n = limit || 20;
return transitionLog.slice(-n);
}
/**
* 手动重置单个 provider
*/
function reset(provider) {
if (breakers[provider]) {
const prev = breakers[provider].state;
breakers[provider].state = STATES.CLOSED;
breakers[provider].failures = 0;
breakers[provider].lastFailureAt = 0;
breakers[provider].successCount = 0;
_log(provider, prev, STATES.CLOSED, 'manual_reset');
}
}
/**
* 重置所有 provider
*/
function resetAll() {
for (const name of Object.keys(breakers)) {
reset(name);
}
}
module.exports = {
canRequest, recordSuccess, recordFailure,
getStatus, getTransitionLog,
reset, resetAll,
STATES,
};