From 4b80a5ad3011cdb5350606d75fc8c830fe386f8c Mon Sep 17 00:00:00 2001 From: Navy Date: Wed, 30 Sep 2020 19:32:48 +0300 Subject: [PATCH] experimental word filter implementation --- .../components/observers/Automoderation.js | 127 +++++++++++++++++- 1 file changed, 121 insertions(+), 6 deletions(-) diff --git a/structure/client/components/observers/Automoderation.js b/structure/client/components/observers/Automoderation.js index 574283c..50bc489 100644 --- a/structure/client/components/observers/Automoderation.js +++ b/structure/client/components/observers/Automoderation.js @@ -1,13 +1,16 @@ -const { Observer } = require('../../../interfaces/'); +const similarity = require('similarity'); + +const { Observer, BinaryTree } = require('../../../interfaces'); +const { FilterUtil, FilterPresets } = require('../../../../util'); const CONSTANTS = {}; -class Automoderation extends Observer { +module.exports = class AutoModeration extends Observer { constructor(client) { super(client, { - name: 'automoderation', + name: 'autoModeration', priority: 1 }); @@ -21,10 +24,124 @@ class Automoderation extends Observer { ['message', this.filterMentions.bind(this)] ]; + this.whitelist = new BinaryTree(this.client, FilterPresets.whitelist); + } async filterWords(message, edited) { + const { guild, author, channel } = message; + if (!guild || author.bot) return; + + const member = message.member || await guild.members.fetch(author.id).catch(); + const settings = await guild.settings(); + const setting = settings.wordFilter; + const { bypass, ignore, enabled, silent, explicit, fuzzy, tokenized, whitelist, actions, presets } = setting; + const roles = member.roles.cache.map((r) => r.id); + + if (!enabled || roles.some((r) => bypass.includes(r.id)) || ignore.includes(channel.id)) return; + + // Which message obj to work with + const msg = edited || message; + this.client.logger.debug(`Pre norm:\n${msg.cleanContent}`); + const content = FilterUtil.normalize(msg.cleanContent); + this.client.logger.debug(`Normalized\n${content}`); + + let result = { match: null, matched: false, matcher: null, preset: false }; + const words = content.toLowerCase().split(' ').filter((elem) => elem.length); + // Remove any potential bypass characters + const _words = words.map((word) => word.replace(/[.'*]/gu, '')); + + // 1. Filter for preset lists + if (presets.length) { + for (const preset of presets) { + + const text = _words.join('').replace(/\s/u, ''); //Also check for spaced out words, ex "f u c k" + //Combine array of presets to one expression + const regex = new RegExp(`(${FilterPresets[preset].join(')|(')})`, 'ui'); + const match = content.match(regex) || text.length === words.length ? text.match(regex) : null; + if (!match) continue; + this.client.logger.debug(`Message matched with "${preset}" preset list.\nMatch: ${match[0]}\nFull content: ${content}`); + result = { match: match[0], matched: true, matcher: preset, preset: true }; + break; + + } + } + + // 2. Filter explicit - no bypass checking (unless you count normalising the text, i.e. emoji letters => normal letters) + if (explicit.length && !result.matched) { + + for (const word of explicit) { + //Do it like this instead of regex so it doesn't match stuff like Scunthorpe with cunt + if (words.some((_word) => _word === word)) { + this.client.logger.debug(`Message matched with "${word}" in the explicit list.\nFull content: ${content}`); + result = { match: word, matched: true, matcher: 'explicit', preset: false }; + } + + } + + } + + // 3. Filter fuzzy + if (fuzzy.length && !result.matched) { + + const text = words.join('').replace(/\s/u, ''); + const threshold = 0.93 - 0.165 * Math.log(text.length); + + outer: + for (const _word of fuzzy) { + + for (const word of words) { + const sim = similarity(word, _word); + const threshold = 0.93 - 0.165 * Math.log(word.length); + if (sim >= threshold) { + if (this.whitelist.find(word) || whitelist.some((w) => w === word) && sim < 1) continue; + this.client.logger.debug(`Message matched with "${_word}" in fuzzy.\nMatched word: ${word}\nFull content: ${content}\nSimilarity: ${sim}\nThreshold: ${threshold}`); + result = { match: word, matched: true, _matcher: _word, matcher: `fuzzy [\`${_word}\`, \`${sim}\`, \`${threshold}\`]`, preset: false }; + break outer; + } + + } + + const sim = similarity(text, _word); + if (sim >= threshold) { + if (this.whitelist.find(text) || whitelist.some((w) => w === text) && sim < 1) continue; + this.client.logger.debug(`Message matched with "${_word}" in fuzzy.\nMatched word: ${text}\nFull content: ${content}\nSimilarity: ${sim}\nThreshold: ${threshold}`); + result = { match: text, matched: true, _matcher: _word, matcher: `fuzzy [\`${_word}\`, \`${sim}\`, \`${threshold}\`]`, preset: false }; + break; + } + + this.client.logger.debug(`Message did not match with "${_word}" in fuzzy.\nFull content: ${content}\nSimilarity: ${sim}\nThreshold: ${threshold}`); + + } + + } + + // 4. Filter tokenized + if (tokenized.length && !result.matched) { + + for (const word of explicit) { + //Do it like this instead of regex so it doesn't match stuff like Scunthorpe with cunt + if (content.toLowerCase().includes(word)) { + this.client.logger.debug(`Message matched with "${word}" in the tokenized list.\nFull content: ${content}`); + result = { match: word, matched: true, matcher: 'tokenized', preset: false }; + } + + } + + } + + // 5. Remove message, inline response and add a reason to msg object + if (!result.matched) return; + msg.filtered = result; + await msg.delete(); + if (!silent) { + const res = await msg.formattedRespond('W_FILTER_DELETE', { params: { user: author.id } }); + res.delete({ timeout: 10000 }); + } + + // 6. Automated actions + } async filterLinks(message, edited) { @@ -39,6 +156,4 @@ class Automoderation extends Observer { } -} - -module.exports = Automoderation; \ No newline at end of file +}; \ No newline at end of file