229 lines
6.0 KiB
JavaScript
229 lines
6.0 KiB
JavaScript
|
|
'use strict';
|
||
|
|
|
||
|
|
/**
|
||
|
|
* Circuit Breaker v2 — 增强版 per-Provider 熔断器
|
||
|
|
*
|
||
|
|
* 改进点:
|
||
|
|
* 1. 半衰期自动恢复 (失败计数随时间衰减, 避免永久 OPEN)
|
||
|
|
* 2. 可配置阈值 (通过环境变量覆盖)
|
||
|
|
* 3. 管理端点支持 (getStatus + reset + resetAll)
|
||
|
|
* 4. 统计指标 (总请求数、成功率、平均恢复时间)
|
||
|
|
* 5. 事件日志 (状态变迁记录, 供审计)
|
||
|
|
*
|
||
|
|
* @module src/circuit-breaker
|
||
|
|
*/
|
||
|
|
|
||
|
|
const STATES = { CLOSED: 'closed', OPEN: 'open', HALF_OPEN: 'half_open' };
|
||
|
|
|
||
|
|
// 可通过环境变量覆盖
|
||
|
|
const FAILURE_THRESHOLD = parseInt(process.env.CB_FAILURE_THRESHOLD) || 5;
|
||
|
|
const RECOVERY_TIMEOUT = parseInt(process.env.CB_RECOVERY_TIMEOUT) || 30000;
|
||
|
|
const SUCCESS_THRESHOLD = parseInt(process.env.CB_SUCCESS_THRESHOLD) || 2;
|
||
|
|
const HALF_LIFE_MS = parseInt(process.env.CB_HALF_LIFE_MS) || 120000; // 2 分钟半衰期
|
||
|
|
|
||
|
|
const breakers = {};
|
||
|
|
|
||
|
|
// 状态变迁日志 (保留最近 100 条)
|
||
|
|
const MAX_LOG_SIZE = 100;
|
||
|
|
const transitionLog = [];
|
||
|
|
|
||
|
|
function _log(provider, from, to, reason) {
|
||
|
|
const entry = {
|
||
|
|
ts: new Date().toISOString(),
|
||
|
|
provider,
|
||
|
|
from,
|
||
|
|
to,
|
||
|
|
reason,
|
||
|
|
};
|
||
|
|
transitionLog.push(entry);
|
||
|
|
if (transitionLog.length > MAX_LOG_SIZE) transitionLog.shift();
|
||
|
|
}
|
||
|
|
|
||
|
|
function _getBreaker(provider) {
|
||
|
|
if (!breakers[provider]) {
|
||
|
|
breakers[provider] = {
|
||
|
|
state: STATES.CLOSED,
|
||
|
|
failures: 0,
|
||
|
|
lastFailureAt: 0,
|
||
|
|
successCount: 0,
|
||
|
|
totalTrips: 0,
|
||
|
|
// 统计
|
||
|
|
totalRequests: 0,
|
||
|
|
totalSuccesses: 0,
|
||
|
|
totalFailures: 0,
|
||
|
|
lastTripAt: 0,
|
||
|
|
lastRecoveryAt: 0,
|
||
|
|
tripDurations: [], // 最近 10 次恢复耗时(ms)
|
||
|
|
};
|
||
|
|
}
|
||
|
|
return breakers[provider];
|
||
|
|
}
|
||
|
|
|
||
|
|
/**
|
||
|
|
* 半衰期衰减: failures 随时间指数衰减
|
||
|
|
* 避免偶尔的瞬态错误累积到阈值
|
||
|
|
*/
|
||
|
|
function _decayedFailures(b) {
|
||
|
|
if (b.failures === 0 || b.lastFailureAt === 0) return 0;
|
||
|
|
const elapsed = Date.now() - b.lastFailureAt;
|
||
|
|
if (elapsed <= 0) return b.failures;
|
||
|
|
// 每过一个半衰期, failures 减半
|
||
|
|
const decayFactor = Math.pow(0.5, elapsed / HALF_LIFE_MS);
|
||
|
|
return b.failures * decayFactor;
|
||
|
|
}
|
||
|
|
|
||
|
|
/**
|
||
|
|
* 检查是否允许向 provider 发送请求
|
||
|
|
*/
|
||
|
|
function canRequest(provider) {
|
||
|
|
const b = _getBreaker(provider);
|
||
|
|
|
||
|
|
if (b.state === STATES.CLOSED) {
|
||
|
|
b.totalRequests++;
|
||
|
|
return true;
|
||
|
|
}
|
||
|
|
|
||
|
|
if (b.state === STATES.OPEN) {
|
||
|
|
if (Date.now() - b.lastFailureAt > RECOVERY_TIMEOUT) {
|
||
|
|
const prev = b.state;
|
||
|
|
b.state = STATES.HALF_OPEN;
|
||
|
|
b.successCount = 0;
|
||
|
|
_log(provider, prev, STATES.HALF_OPEN, 'recovery_timeout_elapsed');
|
||
|
|
b.totalRequests++;
|
||
|
|
return true;
|
||
|
|
}
|
||
|
|
b.totalRejected = (b.totalRejected || 0) + 1;
|
||
|
|
return false;
|
||
|
|
}
|
||
|
|
|
||
|
|
// HALF_OPEN: 允许有限探测
|
||
|
|
b.totalRequests++;
|
||
|
|
return true;
|
||
|
|
}
|
||
|
|
|
||
|
|
/**
|
||
|
|
* 记录成功
|
||
|
|
*/
|
||
|
|
function recordSuccess(provider) {
|
||
|
|
const b = _getBreaker(provider);
|
||
|
|
b.totalSuccesses++;
|
||
|
|
|
||
|
|
if (b.state === STATES.HALF_OPEN) {
|
||
|
|
b.successCount++;
|
||
|
|
if (b.successCount >= SUCCESS_THRESHOLD) {
|
||
|
|
const prev = b.state;
|
||
|
|
b.state = STATES.CLOSED;
|
||
|
|
b.failures = 0;
|
||
|
|
b.successCount = 0;
|
||
|
|
b.lastRecoveryAt = Date.now();
|
||
|
|
// 记录恢复耗时
|
||
|
|
if (b.lastTripAt > 0) {
|
||
|
|
b.tripDurations.push(Date.now() - b.lastTripAt);
|
||
|
|
if (b.tripDurations.length > 10) b.tripDurations.shift();
|
||
|
|
}
|
||
|
|
_log(provider, prev, STATES.CLOSED, `${SUCCESS_THRESHOLD}_consecutive_successes`);
|
||
|
|
}
|
||
|
|
} else if (b.state === STATES.CLOSED) {
|
||
|
|
// 使用衰减后的失败数, 而非直接清零
|
||
|
|
// 这样偶尔的成功会让累积的失败自然消散
|
||
|
|
b.failures = Math.max(0, _decayedFailures(b) - 0.5);
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|
||
|
|
/**
|
||
|
|
* 记录失败
|
||
|
|
*/
|
||
|
|
function recordFailure(provider) {
|
||
|
|
const b = _getBreaker(provider);
|
||
|
|
b.totalFailures++;
|
||
|
|
b.lastFailureAt = Date.now();
|
||
|
|
|
||
|
|
if (b.state === STATES.HALF_OPEN) {
|
||
|
|
const prev = b.state;
|
||
|
|
b.state = STATES.OPEN;
|
||
|
|
b.totalTrips++;
|
||
|
|
b.lastTripAt = Date.now();
|
||
|
|
_log(provider, prev, STATES.OPEN, 'half_open_failure');
|
||
|
|
} else if (b.state === STATES.CLOSED) {
|
||
|
|
// 使用衰减后的值 + 1
|
||
|
|
b.failures = _decayedFailures(b) + 1;
|
||
|
|
if (b.failures >= FAILURE_THRESHOLD) {
|
||
|
|
const prev = b.state;
|
||
|
|
b.state = STATES.OPEN;
|
||
|
|
b.totalTrips++;
|
||
|
|
b.lastTripAt = Date.now();
|
||
|
|
_log(provider, prev, STATES.OPEN, `${FAILURE_THRESHOLD}_failures_reached`);
|
||
|
|
}
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|
||
|
|
/**
|
||
|
|
* 获取所有熔断器状态快照 (管理后台用)
|
||
|
|
*/
|
||
|
|
function getStatus() {
|
||
|
|
const snapshot = {};
|
||
|
|
for (const [name, b] of Object.entries(breakers)) {
|
||
|
|
const successRate = b.totalRequests > 0
|
||
|
|
? Math.round(b.totalSuccesses / b.totalRequests * 10000) / 100
|
||
|
|
: 100;
|
||
|
|
const avgRecoveryMs = b.tripDurations.length > 0
|
||
|
|
? Math.round(b.tripDurations.reduce((a, c) => a + c, 0) / b.tripDurations.length)
|
||
|
|
: 0;
|
||
|
|
|
||
|
|
snapshot[name] = {
|
||
|
|
state: b.state,
|
||
|
|
failures: Math.round(_decayedFailures(b) * 100) / 100,
|
||
|
|
rawFailures: b.failures,
|
||
|
|
totalTrips: b.totalTrips,
|
||
|
|
lastFailureAt: b.lastFailureAt ? new Date(b.lastFailureAt).toISOString() : null,
|
||
|
|
lastRecoveryAt: b.lastRecoveryAt ? new Date(b.lastRecoveryAt).toISOString() : null,
|
||
|
|
stats: {
|
||
|
|
totalRequests: b.totalRequests,
|
||
|
|
totalSuccesses: b.totalSuccesses,
|
||
|
|
totalFailures: b.totalFailures,
|
||
|
|
successRate: successRate + '%',
|
||
|
|
avgRecoveryMs,
|
||
|
|
},
|
||
|
|
};
|
||
|
|
}
|
||
|
|
return snapshot;
|
||
|
|
}
|
||
|
|
|
||
|
|
/**
|
||
|
|
* 获取状态变迁日志
|
||
|
|
*/
|
||
|
|
function getTransitionLog(limit) {
|
||
|
|
const n = limit || 20;
|
||
|
|
return transitionLog.slice(-n);
|
||
|
|
}
|
||
|
|
|
||
|
|
/**
|
||
|
|
* 手动重置单个 provider
|
||
|
|
*/
|
||
|
|
function reset(provider) {
|
||
|
|
if (breakers[provider]) {
|
||
|
|
const prev = breakers[provider].state;
|
||
|
|
breakers[provider].state = STATES.CLOSED;
|
||
|
|
breakers[provider].failures = 0;
|
||
|
|
breakers[provider].lastFailureAt = 0;
|
||
|
|
breakers[provider].successCount = 0;
|
||
|
|
_log(provider, prev, STATES.CLOSED, 'manual_reset');
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|
||
|
|
/**
|
||
|
|
* 重置所有 provider
|
||
|
|
*/
|
||
|
|
function resetAll() {
|
||
|
|
for (const name of Object.keys(breakers)) {
|
||
|
|
reset(name);
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|
||
|
|
module.exports = {
|
||
|
|
canRequest, recordSuccess, recordFailure,
|
||
|
|
getStatus, getTransitionLog,
|
||
|
|
reset, resetAll,
|
||
|
|
STATES,
|
||
|
|
};
|