cleanup + linkfilter improvements

This commit is contained in:
Erik 2022-07-02 19:09:35 +03:00
parent c140073d7a
commit 1dfc4a7e6b
Signed by: Navy.gif
GPG Key ID: 811EC0CD80E7E5FB
5 changed files with 68 additions and 107 deletions

View File

@ -44,4 +44,9 @@ Missing target member for action **{actionType}**.
[LINKFILTER_WARN]
Failed to validate domain `{domain}`.
The domain has a valid top level domain (e.g. .com, .net) but failed a DNS check.
{emoji_warning} **Warning**
Exercise caution when clicking on unknown links. Adviced to run the link through a link checker such as urlscan.io or similar.
Full link: ||{link}||
If this is a valid domain, add it to the greylist to ensure it is filtered.

View File

@ -1,4 +1,5 @@
const { EventEmitter } = require('events');
const { inspect } = require('util');
const SlashCommandManager = require('./rest/SlashCommandManager.js');
const { ShardingManager } = require('./shard/');
@ -6,6 +7,10 @@ const Logger = require('./Logger.js');
const Metrics = require('./Metrics.js');
const ApiClientUtil = require('./ApiClientUtil.js');
// Available for evals
// eslint-disable-next-line no-unused-vars
const ClientUtils = require('./ClientUtils.js');
class BaseClient extends EventEmitter {
constructor(options, version) {
@ -85,7 +90,7 @@ class BaseClient extends EventEmitter {
* @memberof Manager
* @private
*/
async eval(shard, { script }) {
async eval(shard, { script, debug }) {
this.logger.info(`Incoming manager eval from shard ${shard.id}:\n${script}`);
let result = null,
@ -95,6 +100,7 @@ class BaseClient extends EventEmitter {
// eslint-disable-next-line no-eval
result = await eval(script);
//if(typeof result !== 'string') result = inspect(result);
if(debug) this.logger.debug(`Eval result: ${inspect(result)}`);
} catch (err) {
error = err.stack || err;
}

View File

@ -0,0 +1,23 @@
const { Util } = require("../utilities");
// Class to house various functions the clients use on the main process
class ClientUtils {
static cache = {};
constructor() {
throw new Error('Static class, may not be instantiated.');
}
static async fetchTlds(force = false) {
if (this.cache.tlds && !force) return this.cache.tlds;
const buffer = await Util.downloadAsBuffer('https://data.iana.org/TLD/tlds-alpha-by-domain.txt');
const list = buffer.toString().split('\n').filter((str) => !str.startsWith('#')).map((str) => str.toLowerCase());
this.cache.tlds = list;
return list;
}
}
module.exports = ClientUtils;

View File

@ -85,6 +85,7 @@ class Registry {
module.components.set(component.resolveable, component);
}
if (component.initialise && typeof component.initialise === 'function') await component.initialise();
this.components.set(component.resolveable, component);
if(!silent) this.client.emit('componentUpdate', { component, type: 'LOAD' });
return component;

View File

@ -29,9 +29,6 @@ const CONSTANTS = {
}
};
const linkRegG = /(https?:\/\/(www\.)?)?(?<domain>([a-z0-9-]{1,63}\.)?([a-z0-9-]{2,63})(\.[a-z0-9-]{2,63})(\.[a-z0-9-]{2,63})?)(\/\S*)?/iug;
const linkReg = /(https?:\/\/(www\.)?)?(?<domain>([a-z0-9-]{1,63}\.)?([a-z0-9-]{2,63})(\.[a-z0-9-]{2,63})(\.[a-z0-9-]{2,63})?)(\/\S*)?/iu;
// TODO:
// Clean up commented out code once testing of new code is done
// Implement missing automod features -- done
@ -60,17 +57,38 @@ module.exports = class AutoModeration extends Observer {
['interactionCreate', this.flagAction.bind(this)]
];
this.whitelist = new BinaryTree(this.client, FilterPresets.whitelist);
this.topLevelDomains = null;
// Moved to FilterUtil
// this.whitelist = new BinaryTree(this.client, FilterPresets.whitelist);
this.executing = {};
this.regex = {
invite: /((discord)?\s*\.?\s*gg\s*|discord(app)?\.com\/invite)\/\s?(?<code>[a-z0-9]+)/i,
linkRegG: /(https?:\/\/(www\.)?)?(?<domain>([a-z0-9-]{1,63}\.)?([a-z0-9-]{2,63})(\.[a-z0-9-]{2,63})(\.[a-z0-9-]{2,63})?)(\/\S*)?/iug,
linkReg: /(https?:\/\/(www\.)?)?(?<domain>([a-z0-9-]{1,63}\.)?([a-z0-9-]{2,63})(\.[a-z0-9-]{2,63})(\.[a-z0-9-]{2,63})?)(\/\S*)?/iu,
mention: /<@!?(?<id>[0-9]{18,22})>/,
mentionG: /<@!?(?<id>[0-9]{18,22})>/g,
};
}
async initialise() {
// Fetch a list of TLDs from iana
const tldList = await this.client.managerEval(`
(() => {
return ClientUtils.fetchTlds()
})()
`).catch(this.logger.error.bind(this.logger));
const middlePoint = Math.floor(tldList.length / 2);
const [midEntry] = tldList.splice(middlePoint, 1);
tldList.splice(0, 0, midEntry);
this.topLevelDomains = new BinaryTree(this.client, tldList);
this.topLevelDomains.add('onion');
}
async _moderate(action, wrapper, channel, member, reason, filterResult, moderator) {
// Prevent simultaneous execution of the same filter on the same user when spamming
@ -150,29 +168,11 @@ module.exports = class AutoModeration extends Observer {
// 2. Filter explicit - no bypass checking (unless you count normalising the text, i.e. emoji letters => normal letters)
if (explicit.length && !filterResult.matched) {
//filterResult = FilterUtil.filterExplicit(words, explicit);
// if(filterResult)
const result = FilterUtil.filterExplicit(words, explicit);
if (result) {
log += `\nMessage matched with "${result.match}" in the explicit list.\nFull content: ${content}`;
filterResult = result;
}
// for (const word of explicit) {
// //Do it like this instead of regex so it doesn't match stuff like Scunthorpe with cunt
// if (words.some((_word) => _word === word)) {
// log += `\nMessage matched with "${word}" in the explicit list.\nFull content: ${content}`;
// filterResult = {
// match: word,
// matched: true,
// matcher: 'explicit',
// _matcher: word,
// type: 'explicit'
// };
// break;
// }
// }
}
@ -184,40 +184,7 @@ module.exports = class AutoModeration extends Observer {
log += `\nMessage matched with "${result.matcher}" in the regex list.\nMatch: ${result._raw}, Full word: ${result.match}\nFull content: ${content}`;
filterResult = result;
}
// for (const reg of regex) {
// const match = content.toLowerCase().match(new RegExp(`(?:^|\\s)(${reg})`, 'iu')); // (?:^|\\s) |un
// if (match) {
// //log += `\next reg: ${tmp}`;
// const fullWord = words.find((word) => word.includes(match[1]));
// let inWL = false;
// try { // This is for debugging only
// inWL = this.whitelist.find(fullWord);
// } catch (err) {
// this.logger.error(`Whitelist.find error: ${fullWord} ${match[1]} ${words}`);
// }
// if (inWL || whitelist.some((word) => word === fullWord)) continue;
// const diff = Math.abs(fullWord.length - match[0].length);
// if (diff > 3) {
// this.logger.debug(`Match length diff: ${diff} MATCH: ${match[0]} FULL: ${fullWord}`);
// continue;
// }
// log += `\nMessage matched with "${reg}" in the regex list.\nMatch: ${match[0]}, Full word: ${fullWord}\nFull content: ${content}`;
// filterResult = {
// match: fullWord,
// matched: true,
// _matcher: match[1].toLowerCase(),
// matcher: `Regex: __${reg}__`,
// type: 'regex'
// };
// break;
// }
// }
}
// 4. Filter fuzzy
@ -228,52 +195,7 @@ module.exports = class AutoModeration extends Observer {
filterResult = result;
log += `\nMessage matched with "${result._matcher}" in fuzzy.\nMatched word: ${result.match}\nFull content: ${content}\nSimilarity: ${result._sim}\nThreshold: ${result._threshold}`;
}
// const text = words.join('').replace(/\s/u, '');
// let threshold = (0.93 - 0.133 * Math.log(text.length)).toFixed(3);
// if (threshold < 0.6) threshold = 0.6;
// else if (threshold > 0.9) threshold = 0.9;
// outer:
// for (const _word of fuzzy) {
// for (const word of words) {
// const sim = similarity(word, _word);
// let threshold = (0.93 - 0.133 * Math.log(word.length)).toFixed(3);
// if (threshold < 0.6) threshold = 0.6;
// else if (threshold > 0.9) threshold = 0.9;
// if (sim >= threshold && Math.abs(_word.length - word.length) <= 2) {
// if (this.whitelist.find(word) || whitelist.some((w) => w === word) && sim < 1) continue;
// log += `\nMessage matched with "${_word}" in fuzzy.\nMatched word: ${word}\nFull content: ${content}\nSimilarity: ${sim}\nThreshold: ${threshold}`;
// filterResult = {
// match: word,
// matched: true,
// _matcher: _word,
// matcher: `fuzzy [\`${_word}\`, \`${sim}\`, \`${threshold}\`]`,
// type: 'fuzzy'
// };
// break outer;
// }
// }
// const sim = similarity(text, _word);
// if (sim >= threshold && Math.abs(_word.length - text.length) <= 2) {
// if (this.whitelist.find(text) || whitelist.some((w) => w === text) && sim < 1) continue;
// log += `\nMessage matched with "${_word}" in fuzzy.\nMatched word: ${text}\nFull content: ${content}\nSimilarity: ${sim}\nThreshold: ${threshold}`;
// filterResult = {
// match: text,
// matched: true,
// _matcher: _word,
// matcher: `fuzzy [\`${_word}\`, \`${sim}\`, \`${threshold}\`]`,
// type: 'fuzzy'
// };
// break;
// }
// //this.client.logger.debug(`Message did not match with "${_word}" in fuzzy.\nFull content: ${content}\nSimilarity: ${sim}\nThreshold: ${threshold}`);
// }
}
// 5. Remove message, inline response and add a reason to msg object
@ -510,15 +432,15 @@ module.exports = class AutoModeration extends Observer {
const msg = edited || message;
if (!msg.content) return;
const content = msg.content.split('').join(''); //Copy the string...
let matches = content.match(linkRegG);
if (!matches) matches = content.replace(/\s/u, '').match(linkRegG);
let matches = content.match(this.regex.linkRegG);
if (!matches) matches = content.replace(/\s/u, '').match(this.regex.linkRegG);
if (!matches) return;
let remove = false;
const filterResult = {};
let log = `${guild.name} Link filter debug:`;
for (const match of matches) {
const { domain } = match.match(linkReg).groups;
const { domain } = match.match(this.regex.linkReg).groups;
log += `\nMatched link ${match}: `;
const predicate = (dom) => {
@ -543,9 +465,13 @@ module.exports = class AutoModeration extends Observer {
continue;
}
const parts = domain.split('.');
const validTld = this.topLevelDomains.find(parts[parts.length - 1]);
// console.log(parts, validTld);
if (!validTld) continue;
const valid = await resolver.validateDomain(domain);
if (!valid) {
this.client.emit('linkFilterWarn', { guild: wrapper, message: wrapper.format('LINKFILTER_WARN', { domain }) });
this.client.emit('linkFilterWarn', { guild: wrapper, message: wrapper.format('LINKFILTER_WARN', { domain, link: match }) });
continue;
}