crash loop detection

This commit is contained in:
Erik 2023-02-06 14:08:41 +02:00
parent 1bac533313
commit 796c0e1226
Signed by: Navy.gif
GPG Key ID: 2532FBBB61C65A68
2 changed files with 24 additions and 22 deletions

View File

@ -84,7 +84,7 @@ class Controller extends EventEmitter {
shard.on('disconnect', () => this.logger.warn(`Shard ${shard.id} has disconnected`));
shard.on('spawn', () => this.logger.info(`Shard ${shard.id} spawned`));
shard.on('error', (err) => this.logger.error(`Shard ${shard.id} ran into an error:\n${err.stack}`));
shard.on('warn', (msg) => this.logger.warn(`Warning from shard ${shard.id}: ${msg}`));
shard.on('warn', (msg) => this.logger.warn(`Warning from shard ${shard.id}: ${msg}`, { broadcast: true }));
shard.on('message', (msg) => this._handleMessage(shard, msg));
}

View File

@ -38,10 +38,7 @@ class Shard extends EventEmitter {
this.process = ChildProcess.fork(this.filePath, this.args, { env: { ...this.env, SHARD_ID: this.id }, execArgv: this.execArgv })
.on('message', this._handleMessage.bind(this))
.on('exit', () => {
this.crashes.push(Date.now() - this.spawnedAt);
this._handleExit();
})
.on('exit', this._handleExit().bind(this))
.on('disconnect', this._handleDisconnect.bind(this)); // Don't know if this is going to help, but monitoring whether this gets called whenever a process on its own closes the IPC channel
this.process.once('spawn', () => {
@ -84,9 +81,9 @@ class Shard extends EventEmitter {
resolve();
}, 5000);
// Gracefully handle exit
this.process.once('exit', () => {
this.process.once('exit', (code, signal) => {
clearTimeout(to);
this._handleExit(false);
this._handleExit(code, signal, false);
resolve();
});
// Clear the force kill timeout if the process responds with a shutdown echo, allowing it time to gracefully close all connections
@ -97,7 +94,7 @@ class Shard extends EventEmitter {
this.process.send({ _shutdown: true });
});
}
this._handleExit(false);
this._handleExit(null, null, false);
}
send (message) {
@ -125,7 +122,7 @@ class Shard extends EventEmitter {
this.process.removeAllListeners();
this.ready = false;
this.fatal = true;
this._handleExit(false);
this._handleExit(null, null, false);
this.emit('fatal', message);
}
}
@ -138,27 +135,32 @@ class Shard extends EventEmitter {
this.emit('disconnect');
}
_handleExit (respawn = this._respawn) {
_handleExit (code, signal, respawn = this._respawn) {
this.process.removeAllListeners();
this.emit('death');
if (code !== 0) this.crashes.push(Date.now() - this.spawnedAt);
this.ready = false;
this.process = null;
if (respawn) {
const len = this.crashes.length;
if (len > 2) {
const last3 = this.crashes.slice(len - 3);
const sum = last3.reduce((s, val) => {
s += val;
return s;
}, 0);
const avg = sum / 3;
// If average run duration is below 5 mins send a notification about potential crash loop
if (avg < 5 * 60 * 1000) this.emit('warn', `Potentially in a crash loop, average run time for the last 3 spawns: ${avg}`);
const len = this.crashes.length;
if (len > 2) {
const last3 = this.crashes.slice(len - 3);
const sum = last3.reduce((s, val) => {
s += val;
return s;
}, 0);
const avg = sum / 3;
// If average run duration is below 60 mins send a notification about detected crash loop and stop the respawn
if (avg < 60 * 60 * 1000) {
this.emit('warn', `Crash loop detected, average run time for the last 3 spawns: ${avg}`);
}
this.spawn().catch(err => this.emit('error', err));
respawn = false;
}
if (respawn) this.spawn().catch(err => this.emit('error', err));
}
}