crash loop detection

This commit is contained in:
Erik 2023-02-06 14:08:41 +02:00
parent 1bac533313
commit 796c0e1226
Signed by: Navy.gif
GPG Key ID: 2532FBBB61C65A68
2 changed files with 24 additions and 22 deletions

View File

@ -84,7 +84,7 @@ class Controller extends EventEmitter {
shard.on('disconnect', () => this.logger.warn(`Shard ${shard.id} has disconnected`)); shard.on('disconnect', () => this.logger.warn(`Shard ${shard.id} has disconnected`));
shard.on('spawn', () => this.logger.info(`Shard ${shard.id} spawned`)); shard.on('spawn', () => this.logger.info(`Shard ${shard.id} spawned`));
shard.on('error', (err) => this.logger.error(`Shard ${shard.id} ran into an error:\n${err.stack}`)); shard.on('error', (err) => this.logger.error(`Shard ${shard.id} ran into an error:\n${err.stack}`));
shard.on('warn', (msg) => this.logger.warn(`Warning from shard ${shard.id}: ${msg}`)); shard.on('warn', (msg) => this.logger.warn(`Warning from shard ${shard.id}: ${msg}`, { broadcast: true }));
shard.on('message', (msg) => this._handleMessage(shard, msg)); shard.on('message', (msg) => this._handleMessage(shard, msg));
} }

View File

@ -38,10 +38,7 @@ class Shard extends EventEmitter {
this.process = ChildProcess.fork(this.filePath, this.args, { env: { ...this.env, SHARD_ID: this.id }, execArgv: this.execArgv }) this.process = ChildProcess.fork(this.filePath, this.args, { env: { ...this.env, SHARD_ID: this.id }, execArgv: this.execArgv })
.on('message', this._handleMessage.bind(this)) .on('message', this._handleMessage.bind(this))
.on('exit', () => { .on('exit', this._handleExit().bind(this))
this.crashes.push(Date.now() - this.spawnedAt);
this._handleExit();
})
.on('disconnect', this._handleDisconnect.bind(this)); // Don't know if this is going to help, but monitoring whether this gets called whenever a process on its own closes the IPC channel .on('disconnect', this._handleDisconnect.bind(this)); // Don't know if this is going to help, but monitoring whether this gets called whenever a process on its own closes the IPC channel
this.process.once('spawn', () => { this.process.once('spawn', () => {
@ -84,9 +81,9 @@ class Shard extends EventEmitter {
resolve(); resolve();
}, 5000); }, 5000);
// Gracefully handle exit // Gracefully handle exit
this.process.once('exit', () => { this.process.once('exit', (code, signal) => {
clearTimeout(to); clearTimeout(to);
this._handleExit(false); this._handleExit(code, signal, false);
resolve(); resolve();
}); });
// Clear the force kill timeout if the process responds with a shutdown echo, allowing it time to gracefully close all connections // Clear the force kill timeout if the process responds with a shutdown echo, allowing it time to gracefully close all connections
@ -97,7 +94,7 @@ class Shard extends EventEmitter {
this.process.send({ _shutdown: true }); this.process.send({ _shutdown: true });
}); });
} }
this._handleExit(false); this._handleExit(null, null, false);
} }
send (message) { send (message) {
@ -125,7 +122,7 @@ class Shard extends EventEmitter {
this.process.removeAllListeners(); this.process.removeAllListeners();
this.ready = false; this.ready = false;
this.fatal = true; this.fatal = true;
this._handleExit(false); this._handleExit(null, null, false);
this.emit('fatal', message); this.emit('fatal', message);
} }
} }
@ -138,27 +135,32 @@ class Shard extends EventEmitter {
this.emit('disconnect'); this.emit('disconnect');
} }
_handleExit (respawn = this._respawn) { _handleExit (code, signal, respawn = this._respawn) {
this.process.removeAllListeners(); this.process.removeAllListeners();
this.emit('death'); this.emit('death');
if (code !== 0) this.crashes.push(Date.now() - this.spawnedAt);
this.ready = false; this.ready = false;
this.process = null; this.process = null;
if (respawn) { const len = this.crashes.length;
const len = this.crashes.length; if (len > 2) {
if (len > 2) { const last3 = this.crashes.slice(len - 3);
const last3 = this.crashes.slice(len - 3); const sum = last3.reduce((s, val) => {
const sum = last3.reduce((s, val) => { s += val;
s += val; return s;
return s; }, 0);
}, 0); const avg = sum / 3;
const avg = sum / 3; // If average run duration is below 60 mins send a notification about detected crash loop and stop the respawn
// If average run duration is below 5 mins send a notification about potential crash loop if (avg < 60 * 60 * 1000) {
if (avg < 5 * 60 * 1000) this.emit('warn', `Potentially in a crash loop, average run time for the last 3 spawns: ${avg}`); this.emit('warn', `Crash loop detected, average run time for the last 3 spawns: ${avg}`);
} }
this.spawn().catch(err => this.emit('error', err)); respawn = false;
} }
if (respawn) this.spawn().catch(err => this.emit('error', err));
} }
} }