crash loop detection
This commit is contained in:
parent
1bac533313
commit
796c0e1226
@ -84,7 +84,7 @@ class Controller extends EventEmitter {
|
|||||||
shard.on('disconnect', () => this.logger.warn(`Shard ${shard.id} has disconnected`));
|
shard.on('disconnect', () => this.logger.warn(`Shard ${shard.id} has disconnected`));
|
||||||
shard.on('spawn', () => this.logger.info(`Shard ${shard.id} spawned`));
|
shard.on('spawn', () => this.logger.info(`Shard ${shard.id} spawned`));
|
||||||
shard.on('error', (err) => this.logger.error(`Shard ${shard.id} ran into an error:\n${err.stack}`));
|
shard.on('error', (err) => this.logger.error(`Shard ${shard.id} ran into an error:\n${err.stack}`));
|
||||||
shard.on('warn', (msg) => this.logger.warn(`Warning from shard ${shard.id}: ${msg}`));
|
shard.on('warn', (msg) => this.logger.warn(`Warning from shard ${shard.id}: ${msg}`, { broadcast: true }));
|
||||||
|
|
||||||
shard.on('message', (msg) => this._handleMessage(shard, msg));
|
shard.on('message', (msg) => this._handleMessage(shard, msg));
|
||||||
}
|
}
|
||||||
|
@ -38,10 +38,7 @@ class Shard extends EventEmitter {
|
|||||||
|
|
||||||
this.process = ChildProcess.fork(this.filePath, this.args, { env: { ...this.env, SHARD_ID: this.id }, execArgv: this.execArgv })
|
this.process = ChildProcess.fork(this.filePath, this.args, { env: { ...this.env, SHARD_ID: this.id }, execArgv: this.execArgv })
|
||||||
.on('message', this._handleMessage.bind(this))
|
.on('message', this._handleMessage.bind(this))
|
||||||
.on('exit', () => {
|
.on('exit', this._handleExit().bind(this))
|
||||||
this.crashes.push(Date.now() - this.spawnedAt);
|
|
||||||
this._handleExit();
|
|
||||||
})
|
|
||||||
.on('disconnect', this._handleDisconnect.bind(this)); // Don't know if this is going to help, but monitoring whether this gets called whenever a process on its own closes the IPC channel
|
.on('disconnect', this._handleDisconnect.bind(this)); // Don't know if this is going to help, but monitoring whether this gets called whenever a process on its own closes the IPC channel
|
||||||
|
|
||||||
this.process.once('spawn', () => {
|
this.process.once('spawn', () => {
|
||||||
@ -84,9 +81,9 @@ class Shard extends EventEmitter {
|
|||||||
resolve();
|
resolve();
|
||||||
}, 5000);
|
}, 5000);
|
||||||
// Gracefully handle exit
|
// Gracefully handle exit
|
||||||
this.process.once('exit', () => {
|
this.process.once('exit', (code, signal) => {
|
||||||
clearTimeout(to);
|
clearTimeout(to);
|
||||||
this._handleExit(false);
|
this._handleExit(code, signal, false);
|
||||||
resolve();
|
resolve();
|
||||||
});
|
});
|
||||||
// Clear the force kill timeout if the process responds with a shutdown echo, allowing it time to gracefully close all connections
|
// Clear the force kill timeout if the process responds with a shutdown echo, allowing it time to gracefully close all connections
|
||||||
@ -97,7 +94,7 @@ class Shard extends EventEmitter {
|
|||||||
this.process.send({ _shutdown: true });
|
this.process.send({ _shutdown: true });
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
this._handleExit(false);
|
this._handleExit(null, null, false);
|
||||||
}
|
}
|
||||||
|
|
||||||
send (message) {
|
send (message) {
|
||||||
@ -125,7 +122,7 @@ class Shard extends EventEmitter {
|
|||||||
this.process.removeAllListeners();
|
this.process.removeAllListeners();
|
||||||
this.ready = false;
|
this.ready = false;
|
||||||
this.fatal = true;
|
this.fatal = true;
|
||||||
this._handleExit(false);
|
this._handleExit(null, null, false);
|
||||||
this.emit('fatal', message);
|
this.emit('fatal', message);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -138,27 +135,32 @@ class Shard extends EventEmitter {
|
|||||||
this.emit('disconnect');
|
this.emit('disconnect');
|
||||||
}
|
}
|
||||||
|
|
||||||
_handleExit (respawn = this._respawn) {
|
_handleExit (code, signal, respawn = this._respawn) {
|
||||||
this.process.removeAllListeners();
|
this.process.removeAllListeners();
|
||||||
this.emit('death');
|
this.emit('death');
|
||||||
|
|
||||||
|
if (code !== 0) this.crashes.push(Date.now() - this.spawnedAt);
|
||||||
|
|
||||||
this.ready = false;
|
this.ready = false;
|
||||||
this.process = null;
|
this.process = null;
|
||||||
|
|
||||||
if (respawn) {
|
const len = this.crashes.length;
|
||||||
const len = this.crashes.length;
|
if (len > 2) {
|
||||||
if (len > 2) {
|
const last3 = this.crashes.slice(len - 3);
|
||||||
const last3 = this.crashes.slice(len - 3);
|
const sum = last3.reduce((s, val) => {
|
||||||
const sum = last3.reduce((s, val) => {
|
s += val;
|
||||||
s += val;
|
return s;
|
||||||
return s;
|
}, 0);
|
||||||
}, 0);
|
const avg = sum / 3;
|
||||||
const avg = sum / 3;
|
// If average run duration is below 60 mins send a notification about detected crash loop and stop the respawn
|
||||||
// If average run duration is below 5 mins send a notification about potential crash loop
|
if (avg < 60 * 60 * 1000) {
|
||||||
if (avg < 5 * 60 * 1000) this.emit('warn', `Potentially in a crash loop, average run time for the last 3 spawns: ${avg}`);
|
this.emit('warn', `Crash loop detected, average run time for the last 3 spawns: ${avg}`);
|
||||||
}
|
}
|
||||||
this.spawn().catch(err => this.emit('error', err));
|
respawn = false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (respawn) this.spawn().catch(err => this.emit('error', err));
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user