galactic-bot/util/FilterUtility.js
2020-11-13 21:13:03 +02:00

412 lines
24 KiB
JavaScript
Raw Blame History

This file contains invisible Unicode characters

This file contains invisible Unicode characters that are indistinguishable to humans but may be processed differently by a computer. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

/* eslint-disable no-labels */
const similarity = require('similarity');
const debug = true;
module.exports = class FilterUtility {
constructor() {
throw new Error('This class cannot be instantiated');
}
/*static filter = {
words: CONFIG.words.map((word) => new RegExp(`(${word})`, 'gi')),
_words: CONFIG._words,
links: CONFIG.links
}*/
static get REPLACED_CHARS_PATTERNS() {
return {
"0": "0|⓪|₀|⁰|𝟢|𝟘||𝟎|𝟬|𝟶",
"1": "⑴|➀|❶|⓵|①|₁|¹|𝟣|𝟙||𝟏|𝟭|𝟷",
"2": "⑵|➋|➁|❷|⓶|②|₂|²|𝟤|𝟚||𝟐|𝟮|𝟸",
"3": "⑶|➌|➂|❸|⓷|③|₃|³|𝟥|𝟛||𝟑|𝟯|𝟹",
"4": "⑷|➍|➃|❹|⓸|④|₄|⁴|𝟦|𝟜||𝟒|𝟰|𝟺",
"5": "⑸|➎|➄|❺|⓹|⑤|₅|⁵|𝟧|𝟝||𝟓|𝟱|𝟻",
"6": "⑹|➏|➅|❻|⓺|⑥|₆|⁶|𝟨|𝟞||𝟔|𝟲|𝟼",
"7": "⑺|➐|➆|❼|⓻|⑦|₇|⁷|𝟩|𝟟||𝟕|𝟳|𝟽",
"8": "⑻|➑|➇|❽|⓼|⑧|₈|⁸|𝟪|𝟠||𝟖|𝟴|𝟾",
"9": "⑼|➒|➈|❾|⓽|⑨|₉|⁹|𝟫|𝟡||𝟗|𝟵|𝟿",
a: [
"ム||||🇦|🅰|🅐|🄰|𝞪|𝞐|𝝰|𝝖|𝜶|𝜜|𝛼|𝛢|𝛂|𝚨|𝚊|𝙰|𝙖|𝘼|𝘢|𝘈|𝗮|𝗔|𝖺|𝖠|𝖆|𝕬|𝕒|𝔸|𝔞|𝔄|𝓪|𝓐|𝒶|𝒜|𝒂|𝑨|𝑎",
"𝐴|𝐚|𝐀|𐊠|ꭺ|ꓯ||ꋬ|卂|Ɐ|ⓐ|Ⓐ|⒜||∆|∀|₳|ₐ|ᾼ|Ὰ|Ᾱ|Ᾰ|ᾷ|ᾶ|ᾴ|ᾳ|ᾲ|ᾱ|ᾰ|ᾏ|ᾎ|ᾍ|ᾌ|ᾋ|ᾊ|ᾉ|ᾈ|ᾇ|ᾆ|ᾅ|ᾄ|ᾃ|ᾂ|ᾁ",
"ᾀ|ὰ|ἇ|ἆ|ἅ|ἄ|ἃ|ἂ|ἁ|ἀ|ặ|Ặ|ẵ|Ẵ|ẳ|Ẳ|ằ|Ằ|ắ|Ắ|ậ|Ậ|ẫ|Ẫ|ẩ|Ẩ|ầ|Ầ|ấ|Ấ|ả|Ả|ạ|Ạ|ẚ|ḁ|Ḁ|ᵃ|ᴬ|ᴀ|ᗩ||ᗄ|Ꮧ|Ꭿ",
"|ለ|ค|බ|Թ|ӓ|Ӓ|Ѧ|а|Д|А|α|ά|Λ|Δ|Α|Ά|ɒ|ɑ|ɐ|Ⱥ|ȧ|Ȧ|ǻ|Ǻ|ǟ|ǎ|Ǎ|ą|Ą|ă|Ă|ā|Ā|å|ä|ã|â|á|à|Å|Ä|Ã|Â|Á|À",
"ª|a|@|:regional_indicator_a:"
].join("|"),
b: [
"||🇧|🅱|🅑|🄱|𝞫|𝞑|𝝱|𝝗|𝜷|𝜝|𝛽|𝛣|𝛃|𝚩|𝚋|𝙱|𝙗|𝘽|𝘣|𝘉|𝗯|𝗕|𝖻|𝖡|𝖇|𝕻|𝕭|𝕓|𝔹|𝔟|𝔓|𝔅|𝓫|𝓑|𝒷|𝒃|𝑩|𝑏|𝐵|𝐛",
"𝐁|𐑂|𐌁|𐊡|𐊂|ꮟ|ꞵ||ꓭ||乃|ⓑ|Ⓑ|⒝||ḇ|Ḇ|ḅ|Ḅ|ḃ|Ḃ|ᵇ|ᴮ|ᛒ|ᙠ|||ᏼ||Ᏸ||ც|Ⴆ|๖|๒|฿|ط|ҍ|ѣ|ь|ъ|в|Ь|В",
"Б|ϐ|β|Β|ʙ|ɮ|ɞ|ƅ|Ƅ|ƀ|ß|b|:regional_indicator_b:"
].join("|"),
c: [
"||🝌|🇨|🅲|🅒|🄲|𝚌|𝙲|𝙘|𝘾|𝘤|𝘊|𝗰|𝗖|𝖼|𝖢|𝖈|𝕮|𝕔|𝔠|𝓬|𝓒|𝒸|𝒞|𝒄|𝑪|𝑐|𝐶|𝐜|𝐂|𐑋|𐐽|𐐣|𐐕|𐌂|𐊢||ꓛ||匚|||ⓒ|Ⓒ|⒞",
"↻|ↄ|Ↄ||||℃||₵|ḉ|Ḉ|ᶜ|ᴐ||ᑢ|ᑕ|Ꮳ||ፈ|ር|ᄃ|ၥ|၁|ང|උ|ҫ|Ҁ|с|С|Ͻ|Ϲ|ϲ|Ϛ|ς|ͻ|ʗ|ɕ|ɔ|Ȼ|ƈ|Ɔ|č|Č|ċ|Ċ|ĉ",
"Ĉ|ć|Ć|ç|Ç|©|¢|c|:regional_indicator_c:"
].join("|"),
d: [
"||🇩|🅳|🅓|🄳|𝚍|𝙳|𝙙|𝘿|𝘥|𝘋|𝗱|𝗗|𝖽|𝖣|𝖉|𝕯|𝕕|𝔻|𝔡|𝔇|𝓭|𝓓|𝒹|𝒟|𝒅|𝑫|𝑑|𝐷|𝐝|𝐃|ꭰ|ꓷ|||ⓓ|Ⓓ|⒟|∂|ↁ||",
"||₫|ḓ|Ḓ|ḑ|Ḑ|ḏ|Ḏ|ḍ|Ḍ|ḋ|Ḋ|ᵈ|ᴰ|ᴅ|ᗬ||ᗡ||ᕲ|||Ꮄ||໓|๔|ծ|ժ|ԃ|ԁ|ɗ|ɖ|ƌ|Ɗ|đ|Đ|ď|Ď|Ð|d|:regional_indicator_d:"
].join("|"),
e: [
"ミ|||ﻉ|🇪|🅴|🅔|🄴|𝞷|𝞢|𝞔|𝝽|𝝨|𝝚|𝝃|𝜮|𝜠|𝜉|𝛴|𝛦|𝛏|𝚺|𝚬|𝚎|𝙴|𝙚|𝙀|𝘦|𝘌|𝗲|𝗘|𝖾|𝖤|𝖊|𝕰|𝕖|𝔼|𝔢|𝔈|𝓮|𝓔|𝒆|𝑬|𝑒|𝐸",
"𝐞|𝐄|𐐩|𐐁|𐊆|ꮛ|ꭼ||ꞓ|ꝫ|ꓱ||乇|㉫|ⵉ|ⴺ||ⳍ|ⲉ|ⓔ|Ⓔ|⒠||⋴|∑|∊|∈|∃||⅀||||ℇ|€|ₑ|Ὲ|ὲ|Ἕ|Ἔ|Ἓ|Ἒ|Ἑ|Ἐ|ἕ",
"ἔ|ἓ|ἒ|ἑ|ἐ|ệ|Ệ|ễ|Ễ|ể|Ể|ề|Ề|ế|Ế|ẽ|Ẽ|ẻ|Ẻ|Ẹ|ḝ|Ḝ|ḛ|Ḛ|ḙ|Ḙ|ḗ|Ḗ|ḕ|Ḕ|ᵉ|ᴱ|ᴈ|ᴇ|ᘿ|ᗴ|Ꮛ||ჳ|ཇ|ԑ|Ԑ|ӡ|ә|Ә|ҿ|ҽ",
"є|э|з|е|Е|ϵ|ξ|ε|έ|Σ|Ξ|Ε|ʒ|ɜ|ɛ|ə|ɘ|Ɇ|ȝ|ǝ|ƺ|Ʃ|Ɛ|Ə|Ǝ|ě|Ě|ę|Ę|ė|Ė|ĕ|Ĕ|ē|Ē|ë|ê|é|è|Ë|Ê|É|È|£|e|:regional_indicator_e:"
].join("|"),
f: [
"||ךּ|🇫|🅵|🅕|🄵|𝟋|𝚏|𝙵|𝙛|𝙁|𝘧|𝘍|𝗳|𝗙|𝖿|𝖥|𝖋|𝕱|𝕗|𝔽|𝔣|𝔉|𝓯|𝓕|𝒻|𝒇|𝑭|𝑓|𝐹|𝐟|𝐅|𐊥|𐊇||ꟻ|||ꜰ|ꓞ||千|ⓕ|Ⓕ",
"⒡|Ⅎ||℉|₣||ḟ|Ḟ|ᶠ|ᖷ|ᖵ||Ꮈ|ན|ғ|ϝ|Ϝ|ʄ|ɟ|ƒ|Ƒ|ſ|f|:regional_indicator_f:"
].join("|"), // conflicts with T: Ŧ
g: [
"||ﻮ|פֿ|𠂎|🇬|🅶|🅖|🄶|𝚐|𝙶|𝙜|𝙂|𝘨|𝘎|𝗴|𝗚|𝗀|𝖦|𝖌|𝕲|𝕘|𝔾|𝔤|𝔊|𝓰|𝓖|𝒢|𝒈|𝑮|𝑔|𝐺|𝐠|𝐆|ꮐ||ⓖ|Ⓖ|⒢|⅁||₲|ḡ",
"Ḡ||ᵍ|ᴳ|ᘜ|ᏻ||Ꮹ||Ꮆ|ງ|ق|ց|ԍ|Ԍ|Б|ʛ|ɢ|ɡ|ɠ|ɓ|ǵ|Ǵ|ǫ|ǧ|Ǧ|Ǥ|ƃ|ģ|Ģ|ġ|Ġ|ğ|Ğ|ĝ|Ĝ|g|:regional_indicator_g:"
].join("|"),
h: [
"||🇭|🅷|🅗|🄷|𝞖|𝝜|𝜢|𝛨|𝚮|𝚑|𝙷|𝙝|𝙃|𝘩|𝘏|𝗵|𝗛|𝗁|𝖧|𝖍|𝕳|𝕙|𝔥|𝓱|𝓗|𝒽|𝒉|𝑯|𝐻|𝐡|𝐇|𐋏|ꮋ||卄|ん||Ⱨ|ⓗ",
"Ⓗ|⒣||||𝑖||ₕ|ῌ|Ὴ|ᾟ|ᾞ|ᾝ|ᾜ|ᾛ|ᾚ|ᾙ|ᾘ|Ἧ|Ἦ|Ἥ|Ἤ|Ἣ|Ἢ|Ἡ|Ἠ|ẖ|ḫ|Ḫ|ḩ|Ḩ|ḧ|Ḧ|ḥ|Ḥ|ḣ|Ḣ|ᴴ||Ᏺ|||ዠ|ዞ",
"հ|ԋ|Ԋ|Ӊ|ӈ|һ|ђ|н|Н|Ћ|Η|Ή|ʱ|ʰ|ʜ|ɧ|ɦ|ɥ|Ƕ|ħ|Ħ|ĥ|Ĥ|h|:regional_indicator_h:"
].join("|"),
i: [
"ノ|||||ﺁ|🇮|🅸|🅘|🄸|𝚒|𝙸|𝙞|𝙄|𝘪|𝘐|𝗶|𝗜|𝗂|𝖨|𝖎|𝕴|𝕚|𝕀|𝔦|𝓲|𝓘|𝒾|𝒊|𝑰|𝑗|𝑖|𝐼|𝐢|𝐈|𐌠|𐌉|𐊊||||丨||||ⓘ|Ⓘ|⒤||",
"|||||||ⁱ|Ὶ|Ῑ|Ῐ|ῗ|ῖ|ῒ|ῑ|ῐ|ὶ|Ἷ|Ἶ|Ἵ|Ἴ|Ἳ|Ἲ|Ἱ|Ἰ|ἷ|ἶ|ἵ|ἴ|ἳ|ἲ|ἱ|ἰ|ị|Ị|ỉ|Ỉ|ḯ|Ḯ|ḭ|Ḭ|ᶤ|ᵢ|ᴵ||ᓰ|Ꮖ||ར",
"เ|ߊ|۱|ٱ|١|ا|أ|آ|ו|׀|ӏ|ї|і|І|ϊ|ι|ί|Ι|ΐ|ɪ|ɨ|ǐ|Ǐ|ǃ|Ɨ|ł|ı|İ|į|Į|ĭ|Ĭ|ī|Ī|ĩ|Ĩ|ï|î|í|ì|Ï|Î|Í|Ì|¡|i",
"\\|\\ǀ|:regional_indicator_i:"
].join("|"),
j: [
"フ|||ﻝ|🇯|🅹|🅙|🄹|𝚓|𝙹|𝙟|𝙅|𝘫|𝘑|𝗷|𝗝|𝗃|𝖩|𝖏|𝕵|𝕛|𝕁|𝔧|𝔍|𝓳|𝓙|𝒿|𝒥|𝒋|𝑱|𝐽|𝐣|𝐉|ꭻ||ꞁ||ⱼ|ⓙ|Ⓙ|⒥||ᴶ|ᴊ|ᒚ|ᒎ||Ꮰ||ว",
"ڶ|ل|ز|נ|ן|ј|Ј|ϳ|Ϳ|ʲ|ʝ|Ɉ|ǰ|ĵ|Ĵ|j|:regional_indicator_j:"
].join("|"),
k: [
"||🇰|🅺|🅚|🄺|𝟆|𝞳|𝞙|𝞌|𝝹|𝝟|𝝒|𝜿|𝜥|𝜘|𝜅|𝛫|𝛞|𝛋|𝚱|𝚔|𝙺|𝙠|𝙆|𝘬|𝘒|𝗸|𝗞|𝗄|𝖪|𝖐|𝕶|𝕜|𝕂|𝔨|𝔎|𝓴|𝓚|𝓀|𝒦|𝒌|𝑲",
"𝑘|𝐾|𝐤|𝐊|𐒼|ꮶ|Ꝁ||ⲕ||ⓚ|Ⓚ|⒦|⋊||₭|ₖ|ḵ|Ḵ|ḳ|Ḳ|ḱ|Ḱ|ᵏ|ᴷ|ᴋ||ᖽᐸ||ӄ|Ӄ|Ҡ|ҟ|Ҝ|қ|к|К|Ќ|ϰ|ϗ|κ|Κ|ʞ|ƙ|ĸ",
"ķ|Ķ|k|:regional_indicator_k:"
].join("|"),
l: [
"レ||||ﺂ|🇱|🅻|🅛|🄻|𝚕|𝙻|𝙡|𝙇|𝘭|𝘓|𝗹|𝗟|𝗅|𝖫|𝖑|𝕷|𝕝|𝕃|𝔩|𝔏|𝓵|𝓛|𝓁|𝒍|𝑳|𝑙|𝐿|𝐥|𝐋|𐑃|𐐛|ꮮ|Ꝉ||ㄥ|し|ⳑ||Ⱡ|ⓛ|Ⓛ|⒧",
"|⅃|⅂|||ₗ|ḽ|Ḽ|ḻ|Ḻ|ḹ|Ḹ|ḷ|Ḷ|ᴸ|ᒺ|||Ꮭ|Ꮁ|ᄂ|Ӏ|ˡ|ʟ|ʆ|ʅ|ɭ|ɫ|ƪ|Ɩ|ł|Ł|ŀ|Ŀ|ľ|Ľ|ļ|Ļ|ĺ|Ĺ|l|:regional_indicator_l:"
].join("|"),
m: [
"ᄊ|||🇲|🅼|🅜|🄼|𝞛|𝝡|𝜧|𝛭|𝚳|𝚖|𝙼|𝙢|𝙈|𝘮|𝘔|𝗺|𝗠|𝗆|𝖬|𝖒|𝕸|𝕞|𝕄|𝔪|𝔐|𝓶|𝓜|𝓂|𝒎|𝑴|𝑚|𝑀|𝐦|𝐌|𐌑",
"𐊰|ꮇ||爪||ⓜ|Ⓜ|⒨|||₥|ₘ|ṃ|Ṃ|ṁ|Ṁ|ḿ|Ḿ|ᵐ|ᴹ|ᴍ|៣||ᘻ|||๓|Ӎ|м|М|ϻ|Ϻ|Μ|ʍ|ɱ|ɯ|m|:regional_indicator_m:"
].join("|"),
n: [
"||🇳|🅽|🅝|🄽|𝞜|𝝢|𝜨|𝛮|𝚴|𝚗|𝙽|𝙣|𝙉|𝘯|𝘕|𝗻|𝗡|𝗇|𝖭|𝖓|𝕹|𝕟|𝔫|𝔑|𝓷|𝓝|𝓃|𝒩|𝒏|𝑵|𝑛|𝑁|𝐧|𝐍|𐑍|𐐥|ꓵ||刀|几",
"Ⲡ||ⓝ|Ⓝ|⒩|⋂|∏|ℿ||₦|ₙ|ⁿ|ῇ|ῆ|ῄ|ῃ|ῂ|ᾗ|ᾖ|ᾕ|ᾔ|ᾓ|ᾒ|ᾑ|ᾐ|ὴ|ἧ|ἦ|ἥ|ἤ|ἣ|ἢ|ἡ|ἠ|ṋ|Ṋ|ṉ|Ṉ|ṇ|Ṇ|ṅ|Ṅ|ᶰ|ᴺ|ᴎ|ហ|\\/\\\\/",
"ᘉ|ᑎ|Ꮑ|ቡ|በ|ຖ|ภ|ก|מ|ռ|ո|ղ|Ռ|Ո|ӣ|ѝ|й|и|П|Й|И|Ѝ|Ϟ|η|ή|Π|Ν|ͷ|Ͷ|ɴ|ɳ|ɲ|Ǹ|ƞ|Ɲ|ŋ|ʼn|ň|Ň|ņ|Ņ|ń|Ń|ñ|Ñ|n|:regional_indicator_n:"
].join("|"),
o: [
"||||||||||||||🇴|🅾|🅞|🄾|𝞼|𝞸|𝞞|𝞂|𝝾|𝝤|𝝈|𝝄|𝜪|𝜎|𝜊|𝛰|𝛔|𝛐|𝚶|𝚘|𝙾|𝙤|𝙊|𝘰|𝘖|𝗼|𝗢|𝗈|𝖮|𝖔|𝕺",
"𝕠|𝕆|𝔬|𝔒|𝓸|𝓞|𝒪|𝒐|𝑶|𝑜|𝑂|𝐨|𝐎|𐓪|𐓃|𐓂|𐐬|𐐄|𐊫|𐊒||Ꙩ||㊉|ㄖ|の||ⵙ||||⨀|✿|☉|ⓞ|Ⓞ|⒪|⍥|⊙|∅||ₒ",
"Ὼ|Ὸ|ᾯ|ᾮ|ᾭ|ᾬ|ᾫ|ᾪ|ᾩ|ᾨ|ὸ|Ὧ|Ὦ|Ὥ|Ὤ|Ὣ|Ὢ|Ὡ|Ὠ|Ὅ|Ὄ|Ὃ|Ὂ|Ὁ|Ὀ|ὅ|ὄ|ὃ|ὂ|ὁ|ὀ|ỡ|Ỡ|ở|Ở|ờ|Ờ|ớ|Ớ|ộ|Ộ|Ỗ|ổ|Ổ|ồ|Ồ|ố|Ố|ỏ|Ỏ",
"ọ|Ọ|ṓ|Ṓ|ṑ|Ṑ|ṏ|Ṏ|ṍ|Ṍ|ð|ᵒ|ᴼ|||ᗝ|ᓍ|Ꮎ|Ꭷ|||||||๏|ට||||||||||||߀|۵|۝|ە|ہ|ھ|٥|ه|ס|օ",
"Օ|Ө|ӧ|Ӧ|ѻ|о|Ф|О|ό|φ|σ|ο|θ|Ο|Θ|˚|ʘ|ǿ|Ǿ|ǒ|Ǒ|Ʊ|ơ|Ơ|ő|Ő|ŏ|Ŏ|ō|Ō|ø|ö|õ|ô|ó|ò|ð|Ø|Ö|Õ|Ô|Ó|Ò|º|°|o|♡|:regional_indicator_o:"
].join("|"),
p: [
"ア|||🇵|🅿|🅟|🄿|𝟈|𝞺|𝞠ϱ|𝞠|𝞎|𝞀|𝝦|𝝔|𝝆|𝜬|𝜚|𝜌|𝛲|𝛠|𝛒|𝚸|𝚙|𝙿|𝙥|𝙋|𝘱|𝘗|𝗽|𝗣|𝗉|𝖯|𝖕|𝕡|𝔭|𝓹|𝓟|𝓅|𝒫|𝒑|𝑷|𝑝|𝑃|𝐩|𝐏",
"𐓄|𐊕|ꮲ||卩|||Ᵽ|ⓟ|Ⓟ|⒫|||℘|₱|ₚ|‽|Ῥ|ῥ|ῤ|ṗ|Ṗ|ṕ|Ṕ|ᵖ|ᴾ|ᴩ|ᴘ|ᕵ||||ק|ք|բ|Ԁ|Ҏ|р|Р|ϸ|Ϸ|ϱ|ρ|Ρ|ƿ|Ƥ|þ|Þ|¶",
"p|:regional_indicator_p:"
].join("|"),
q: [
"||🇶|🆀|🅠|🅀|𝚚|𝚀|𝙦|𝙌|𝘲|𝘘|𝗾|𝗤|𝗊|𝖰|𝖖|𝕼|𝕢|𝔮|𝔔|𝓺|𝓠|𝓆|𝒬|𝒒|𝑸|𝑞|𝑄|𝐪|𝐐|𐌒|𐊭|ꟼ|Ꝗ|ゐ||ⓠ|Ⓠ|⒬||ợ|ᶐ|ᕴ",
"ᑫ|Ꭴ|๑|۹|ף|զ|գ|ԛ|Ҩ|ϥ|ϙ|Ϙ|Ω|ʠ|ɋ|Ɋ|ǭ|Ǭ|Ǫ|ƍ|q|:regional_indicator_q:"
].join("|"),
r: [
"||🇷|🆁|🅡|🅁|𝞒|𝝘|𝜞|𝛤|𝚪|𝚛|𝚁|𝙧|𝙍|𝘳|𝘙|𝗿|𝗥|𝗋|𝖱|𝖗|𝕽|𝕣|𝔯|𝓻|𝓡|𝓇|𝒓|𝑹|𝑟|𝑅|𝐫|𝐑|𐒴|ꮢ||ꭱ||||尺||Ɽ|ⓡ|Ⓡ|⒭|℞",
"|||ṟ|Ṟ|ṝ|Ṝ|ṛ|Ṛ|ṙ|Ṙ|ᵣ|ᴿ||ᴚ|ᴙ|ᚱ||||འ|ཞ|ર|ր|Ի|я|г|Я|ʳ|ʁ|ʀ|ɿ|ɾ|ɼ|ɹ|Ɍ|Ʀ|ř|Ř|ŗ|Ŗ|ŕ|Ŕ|®|r|:regional_indicator_r:"
].join("|"),
s: [
"|||ﮎ|🇸|🆂|🅢|🅂|𝚜|𝚂|𝙨|𝙎|𝘴|𝘚|𝘀|𝗦|𝗌|𝖲|𝖘|𝕾|𝕤|𝕊|𝔰|𝔖|𝓼|𝓢|𝓈|𝒮|𝒔|𝑺|𝑠|𝑆|𝐬|𝐒|𐑈|𐐠|𐊖|||ꙅ|||꒚|丂|ⓢ|Ⓢ|⒮|∫|₴",
"ₛ|ṩ|Ṩ|ṧ|Ṧ|ṥ|Ṥ|ṣ|Ṣ|ṡ|Ṡ|ᴤ|ᔕ|||Ꭶ|ร|ى|ֆ|Տ|ѕ|Ѕ|ϩ|ˢ|ʃ|ʂ|Ș|ƽ|ƨ|Ƨ|š|Š|ş|Ş|ŝ|Ŝ|ś|Ś|§|s|\\$|:regional_indicator_s:"
].join("|"),
t: [
"イ|ィ|||🇹|🆃|🅣|🅃|𝞽|𝚝|𝚃|𝙩|𝙏|𝘵|𝘛|𝘁|𝗧|𝗍|𝖳|𝖙|𝕿|𝕥|𝕋|𝔱|𝔗|𝓽|𝓣|𝓉|𝒯|𝒕|𝑻|𝑡|𝑇|𝐭|𝐓|𐌕|𐊱|𐊗|ꭲ||꓄|丅|ㄒ||Ⲅ||ⓣ|Ⓣ|⒯",
"⊥||ℾ|₮|ₜ|†|ẗ|ṱ|Ṱ|ṯ|Ṯ|ṭ|Ṭ|ṫ|Ṫ|ᵗ|ᵀ|ᴛ|ᖶ|ᒥ|Ꮦ|Ꮏ|Ꮁ||ኮ|ح|է|Շ|Ի|Ե|ҭ|т|Т|Г|ϯ|Ϯ|τ|π|Τ|Γ|Ͳ|ʈ|ʇ|ɬ|ȶ|ț|Ț|ǂ|Ʈ|Ƭ|ƫ|ƚ",
"ŧ|ť|Ť|ţ|Ţ|t|:regional_indicator_t:"
].join("|"), // conflicts with F: Ŧ
u: [
"||🇺|🆄|🅤|🅄|𝞵|𝝻|𝝁|𝜇|𝛍|𝚞|𝚄|𝙪|𝙐|𝘶|𝘜|𝘂|𝗨|𝗎|𝖴|𝖚|𝖀|𝕦|𝕌|𝔲|𝔘|𝓾|𝓤|𝓊|𝒰|𝒖|𝑼|𝑢|𝑈|𝐮|𝐔|𐓶|𐓎|||||ㄩ|ひ|ⓤ",
"Ⓤ|⒰|||∩|℧|ῧ|ῦ|ῢ|ῡ|ῠ|ὺ|ὗ|ὖ|ὕ|ὔ|ὓ|ὒ|ὑ|ὐ|ự|Ự|Ữ|ử|Ử|ừ|Ừ|ứ|Ứ|ủ|Ủ|ụ|Ụ|ṻ|Ṻ|ṹ|Ṹ|ṷ|Ṷ|ṵ|Ṵ|ṳ|Ṳ|ᵾ|ᵤ|ᵘ|ᵁ||ᘴ|ᘮ",
"ᓑ|ᑘ||ᐡ|Ꮼ|ሆ||ย|น|પ|և|ս|մ|Ս|Մ|Ц|ύ|ϋ|υ|μ|ΰ|ʋ|ʊ|Ʉ|Ȕ|ǜ|Ǜ|ǚ|Ǚ|ǘ|Ǘ|ǖ|Ǖ|ǔ|Ǔ|Ʊ|ư|Ư|ų|Ų|ű|Ű|ů|Ů|ŭ|Ŭ|ū|Ū|ũ|Ũ|û",
"ú|ù|Ü|Û|Ú|Ù|µ|u|:regional_indicator_u:"
].join("|"),
v: [
"||🇻|🆅|🅥|🅅|𝝼|𝝂|𝜈|𝛎|𝚟|𝚅|𝙫|𝙑|𝘷|𝘝|𝘃|𝗩|𝗏|𝖵|𝖛|𝖁|𝕧|𝕍|𝔳|𝔙|𝓿|𝓥|𝓋|𝒱|𝒗|𝑽|𝑣|𝑉|𝐯|𝐕|𐓘|𐒰|𐌡|𐊍|||ꓥ||ⴷ|ⱽ|ⓥ|Ⓥ",
"⒱|||√|||℣|ṿ|Ṿ|ṽ|Ṽ|ᵥ|ᵛ|ᴧ||ᐺ|ᐱ|||Ꮙ|ง|۸|۷|٨|٧|ש|ע|ט|Ѷ|ѵ|Ѵ|Л|ν|Λ|ʌ|ʋ|Ʌ|Ɣ|v|:regional_indicator_v:"
].join("|"),
w: [
"||🇼|🆆|🅦|🅆|𝟉|𝟂|𝞏|𝞈|𝝕|𝝎|𝜛|𝜔|𝜋|𝛡|𝛚|𝛑|𝚠|𝚆|𝙬|𝙒|𝘸|𝘞|𝘄|𝗪|𝗐|𝖶|𝖜|𝖂|𝕨|𝕎|𝔴|𝔚|𝔀|𝓦|𝓌|𝒲|𝒘|𝑾|𝑤",
"𝑊|𝐰|𝐖|𐓑||ꞷ|ꙍ||山|ⲱ|ⓦ|Ⓦ|⒲|⍵|ℼ|₩|ῷ|ῶ|ῴ|ῳ|ῲ|ẘ|ẉ|Ẉ|ẇ|Ẇ|ẅ|Ẅ|ẃ|Ẃ|ẁ|Ẁ|ᵂ||ᘺ|ᗯ|Ꮿ||Ꮚ|Ꮗ||ሠ|ཡ|ຟ|ฬ",
"ฝ|చ|ա|ԝ|Ԝ|ѡ|Ѡ|ш|Щ|ϖ|ώ|ω|ψ|ʷ|ʍ|ɯ|ŵ|Ŵ|w|:regional_indicator_w:"
].join("|"),
x: [
"メ|||אָ|אַ|🇽|🆇|🅧|🅇|𝟀|𝞆|𝝌|𝜒|𝛘|𝚡|𝚇|𝙭|𝙓|𝘹|𝘟|𝘅|𝗫|𝗑|𝖷|𝖝|𝖃|𝕩|𝕏|𝔵|𝔛|𝔁|𝓧|𝓍|𝒳|𝒙|𝑿|𝑥|𝑋|𝐱|𝐗|𐌢|𐌗|𐊴|𐊐|ꭕ|ꭓ|||꒼",
"乂|〤||ⲭ||||||ⓧ|Ⓧ|⒳|⌧|||ℵ|ₓ|ẍ|Ẍ|ẋ|Ẋ||||||ጀ|ჯ|א|Ӿ|Ӽ|ҳ|х|Х|Ж|χ|Χ|ˣ|ɤ|×|x|:regional_indicator_x:"
].join("|"),
y: [
"リ|||🇾|🆈|🅨|🅈|𝞬|𝝲|𝜸|𝛾|𝛄|𝚢|𝚈|𝙮|𝙔|𝘺|𝘠|𝘆|𝗬|𝗒|𝖸|𝖞|𝖄|𝕪|𝕐|𝔶|𝔜|𝔂|𝓨|𝓎|𝒴|𝒚|𝒀|𝑦|𝑌|𝐲|𝐘|𐊲|||ꐯ|ꌦ|ㄚ||ⓨ|Ⓨ|⒴",
"⅄||Ὺ|Ῡ|Ῠ|Ὗ|Ὕ|Ὓ|Ὑ|ỿ|ỹ|Ỹ|ỷ|Ỷ|ỵ|Ỵ|ỳ|Ỳ|ẙ|ẏ|Ẏ||ᖻ||||ฯ|ץ|վ|կ|Ӳ|Ӌ|ұ|ү|Ү|ч|у|У|Ў|ϔ|ϓ|ϒ|γ|Υ|Ύ|ˠ|ʸ|ʏ|ʎ|ɣ|Ɏ|Ƴ",
"Ÿ|ŷ|Ŷ|ÿ|ý|Ý|¥|y|:regional_indicator_y:"
].join("|"),
z: [
"||🇿|🆉|🅩|🅉|𝚣|𝚉|𝙯|𝙕|𝘻|𝘡|𝘇|𝗭|𝗓|𝖹|𝖟|𝖅|𝕫|𝔷|𝔃|𝓩|𝓏|𝒵|𝒛|𝒁|𝑧|𝑍|𝐳|𝐙|||乙|Ⱬ|☡|ⓩ|Ⓩ|⒵|||ẕ|Ẕ|ẓ|Ẓ|ẑ|Ẑ|ᶻ||ᙆ",
"ᘔ||ፚ|ຊ|չ|ζ|Ζ|ʑ|ʐ|ɀ|ȥ|ƹ|ƶ|Ƶ|ž|Ž|ż|Ż|ź|Ź|z|:regional_indicator_z:"
].join("|"),
" ": "\\s+", // multiple whitespace -> space
".": "",
",": "|",
"?": ""
};
}
static get REGEX() {
return {
invite: /((discord\s?\.\s?gg\s?\/)|(discord(app)?\s?\.\s?com\/invite\/))(\s*?[a-z0-9]+)/giu,
link: /(https?:\/\/(www\.)?)?(?<domain>([a-z0-9-]{1,63}\.)?([a-z0-9-]{2,63})(\.[a-z0-9-]{2,63})(\.[a-z0-9-]{2,63})?)(\/\S*)?/iu,
// eslint-disable-next-line camelcase
link_g: /(https?:\/\/(www\.)?)?(?<domain>([a-z0-9-]{1,63}\.)?([a-z0-9-]{2,63})(\.[a-z0-9-]{2,63})(\.[a-z0-9-]{2,63})?)(\/\S*)?/iug,
emoji: /<a?:[a-z0-9_-]{2,}:[0-9]{17,22}>/giu,
letters: Array.from(Object.entries(this.REPLACED_CHARS_PATTERNS)).reduce((obj, [key, val]) => {
// eslint-disable-next-line require-unicode-regexp
obj[key] = new RegExp(val, 'gm');
return obj;
}, {})
};
}
static normalize(content) {
if (typeof content !== 'string') throw new Error('Invalid input type, must be of type string');
if (!content || !content.length) return '';
//Zero width character (UTF-16 8206)
content = content.replace(//gu, '');
//Replace the weird letters with their normal text counterparts
// eslint-disable-next-line no-useless-escape
const match = (/[a-z0-9\w\(\)\.\\\/\?!]+/gimu).exec(content);
if (!(match && match[0].length === content.length)) {
for (const char of Object.keys(this.REGEX.letters)) {
content = content.replace(this.REGEX.letters[char], char);
}
}
//if (debug) console.log('weird char regex: ' + content);
//Remove duplicate characters
const words = content.split(' ');
for (let i = 0; i < words.length; i++) {
if (words[i].length === 0) {
words.splice(i, 1);
i--;
}
if (this.REGEX.link.test(words[i])) continue;
const letters = words[i].split('');
for (let j = 1; j < letters.length - 1; j++) {
if (letters[j - 1] === letters[j] && letters[j] === letters[j + 1]) {
letters.splice(j, 1);
j--;
}
}
words[i] = letters.join('');
}
content = words.join(' ');
//if (debug) console.log('dupes: ' + content);
return content;
}
/**
* Filter words explicitly
*
* @static
* @param {Array<String>} [words=[]]
* @param {Array<String>} [filterList=[]]
* @return {Object}
*/
static filterExplicit(words = [], filterList = []) {
for (const word of filterList) {
//Do it like this instead of regex so it doesn't match stuff like Scunthorpe with cunt
if (words.some((_word) => _word === word)) {
this.client.logger.debug(`\nMessage matched with "${word}" in the explicit list.\nFull content: ${content}`);
return {
match: word,
matched: true,
matcher: 'explicit',
_matcher: word,
type: 'explicit'
};
}
}
}
static global(content) {
content = this._links(content);
content = this._words(content);
return content;
}
static _links(content) {
if (!content || !content.length) return '';
const domains = this.filter.links;
const regex = this.REGEX.link;
// eslint-disable-next-line camelcase
const regex_g = this.REGEX.link_g;
const matches = content.match(regex_g);
//console.log(content.match(this.regex.link_g))
if (!matches) return content;
outer:
for (const match of matches) {
for (const domain of domains) {
if (match.includes(domain)) {
content = content.replace(match, '`<REDACTED: link (g)>`');
continue outer;
}
}
}
//console.log('[pre link filter]', content)
// for (const domain of domains) {
// while (regex.test(content) && content.includes(domain)) {
// const match = content.match(regex);
// content = content.replace(match[0], '`<REDACTED: link (g)>`');
// }
// }
//console.log('[post link filter]', content)
return content;
}
static _words(content) {
if (!content || !content.length) return '';
const regex = this.filter.words;
//console.log('[pre word filter]', content)
//console.log('[pre regex]:', content);
const words = content.replace('\n', '\n ').split(' ');
for (const reg of regex) {
if (reg.test(content)) {
console.log(reg);
content = content.replace(reg, '`<REDACTED: word (g)>`');
}
const text = content.replace(/\s/gu, '');
if (reg.test(text)) {
content = '`<REDACTED: message (g)>`';
}
}
//console.log('[pre similarity]:', content)
for (const word of words) {
if (!content.includes(word)) continue;
for (const _word of this.filter._words) {
if (similarity(word, _word) >= 0.93 - 0.15 * Math.log(word.length)) {
console.log('[similarity]:', word, _word);
content = content.replace(word, '`<REDACTED: word (g)>`');
}
}
}
//console.log('[post word filter]',content);
// if (regex.test(content)) {
// console.log('ping')
// const words = content.split(' ');
// console.log(words);
// for (const word of words) {
// const [match] = word.match(regex);
// console.log(match)
// if (match === word) content = content.replace(regex, '`<REDACTED: word>`');
// }
// //return content.replace(regex, '`<REDACTED: word>`');
// }
return content;
}
static invites(content) {
if (this.REGEX.invite.test(content)) return content.replace(this.REGEX.invite, '`<REDACTED: invite>`');
return content;
}
static links(content, options) {
//console.log('1',content)
const matches = content.match(this.REGEX.link_g);
//console.log(matches)
if (matches) {
for (const match of matches) {
let safe = false;
for (const domain of options.whitelist) {
if (match.includes(domain)) {
safe = true;
break;
}
}
if (!safe) content = content.replace(match, '`<REDACTED: link>`');
}
}
//console.log('2', content)
return content;
}
static words(content, filter) {
const { exact, inexact } = filter;
// console.log(exact);
// console.log(new RegExp(`(${exact.join('|')})`, 'giu'))
if (exact.length) content = content.replace(new RegExp(`(${exact.join('|')})`, 'giu'), '`<REDACTED: word>`');
if (inexact.length) {
for (const word of inexact) {
const words = content.replace('\n', '\n ').split(' ');
for (const _word of words) {
const sim = similarity(word, _word);
console.log(_word, word, sim, 0.95 - 0.15 * Math.log(_word.length));
if (sim >= 0.93 - 0.15 * Math.log(word.length)) {
console.log('[similarity]:', word, _word);
content = content.replace(_word, '`<REDACTED: word>`');
}
}
const text = content.replace(/\s/gu, '');
if (text.includes(word)) {
return '`<REDACTED: message>`';
}
}
}
return content;
}
static emojis(content) {
if (this.REGEX.emoji.test(content)) return content.replace(this.REGEX.emoji, '`<REDACTED: emoji>`');
return content;
}
};