diff --git a/CHANGELOG.md b/CHANGELOG.md index 3ecde4e..fbe2b71 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,16 @@ +### 0.19.1 +- Added: cleaning more than one of diacritic chars on `fix_diacritics`, props @languagetool-org +- Added: extra method for converting persian numbers back +- Added: fix another arabic kaf char on `fix_misc_non_persian_chars` +- Added: removes space before common domain tlds on `fix_spacing_for_punctuations` +- Added: replace comma between numbers to thousands separators on `fix_numeral_symbols` +- Added: support for man tan shan suffixes +- Changed: begin/end space cleanup after preservers +- Changed: extract diacritics fixes as new option: `fix_diacritics` +- Changed: yet another pattern for preserving URIs, ([ref](https://stackoverflow.com/a/6927878/)) +- Fixed: fix ha haye before other suffixes +- Fixed: support for more punctuation types after suffixes + ### 0.19.0 - Added: (undocumented) fix heh + ye, alternative to `fix_hamzeh` - Added: cleaning whitespace/zwnj between new-lines on `cleanup_spacing` diff --git a/lib/virastar.js b/lib/virastar.js index 24ea7bf..b47237e 100644 --- a/lib/virastar.js +++ b/lib/virastar.js @@ -1,5 +1,5 @@ /*! -* Virastar - v0.19.0 - 2020-02-03 +* Virastar - v0.19.1 - 2020-02-23 * https://github.com/brothersincode/virastar * Licensed: MIT */ diff --git a/lib/virastar.min.js b/lib/virastar.min.js index 6f4dbb4..065464b 100644 --- a/lib/virastar.min.js +++ b/lib/virastar.min.js @@ -1,6 +1 @@ -/*! -* Virastar - v0.19.0 - 2020-02-03 -* https://github.com/brothersincode/virastar -* Licensed: MIT -*/ -!function(e,r,n){"undefined"!=typeof module?module.exports=n():"function"==typeof define&&"object"==typeof define.amd?define(n):"undefined"!=typeof window?window[e]=n():r[e]=n()}("Virastar",this,function(){function e(r,n){if(!(this instanceof e))return new e(r,n);if(r=r||{},"object"==typeof r)this.opts=this.parseOptions(r);else if("string"==typeof r)return this.opts=this.parseOptions(n||{}),this.cleanup(r);return this}return e.prototype={defaults:{cleanup_begin_and_end:!0,cleanup_extra_marks:!0,cleanup_kashidas:!0,cleanup_line_breaks:!0,cleanup_rlm:!0,cleanup_spacing:!0,cleanup_zwnj:!0,decode_htmlentities:!0,fix_arabic_numbers:!0,fix_dashes:!0,fix_english_numbers:!0,fix_english_quotes_pairs:!0,fix_english_quotes:!0,fix_hamzeh:!0,fix_misc_non_persian_chars:!0,fix_numeral_symbols:!0,fix_perfix_spacing:!0,fix_persian_glyphs:!0,fix_punctuations:!0,fix_question_mark:!0,fix_spacing_for_braces_and_quotes:!0,fix_spacing_for_punctuations:!0,fix_suffix_misc:!0,fix_suffix_spacing:!0,fix_three_dots:!0,kashidas_as_parenthetic:!0,markdown_normalize_braces:!0,markdown_normalize_lists:!0,normalize_ellipsis:!0,normalize_eol:!0,preserve_braces:!1,preserve_brackets:!1,preserve_comments:!0,preserve_entities:!0,preserve_frontmatter:!0,preserve_HTML:!0,preserve_nbsps:!0,preserve_URIs:!0,skip_markdown_ordered_lists_numbers_conversion:!1},digits:"۱۲۳۴۵۶۷۸۹۰",entities:{"sbquo;":"‚","lsquo;":"‘","lsquor;":"‚","ldquo;":"“","ldquor;":"„","rdquo;":"”","rdquor;":"”","rsquo;":"’","rsquor;":"’","apos;":"'","QUOT;":'"',QUOT:'"',"quot;":'"',quot:'"',"zwj;":"‍","ZWNJ;":"‌","zwnj;":"‌","shy;":"­"},glyphs:{"‌ه":"ﻫ","ی‌":"ﻰﻲ","ﺃ":"ﺄﺃ","ﺁ":"ﺁﺂ","ﺇ":"ﺇﺈ","ا":"ﺎا","ب":"ﺏﺐﺑﺒ","پ":"ﭖﭗﭘﭙ","ت":"ﺕﺖﺗﺘ","ث":"ﺙﺚﺛﺜ","ج":"ﺝﺞﺟﺠ","چ":"ﭺﭻﭼﭽ","ح":"ﺡﺢﺣﺤ","خ":"ﺥﺦﺧﺨ","د":"ﺩﺪ","ذ":"ﺫﺬ","ر":"ﺭﺮ","ز":"ﺯﺰ","ژ":"ﮊﮋ","س":"ﺱﺲﺳﺴ","ش":"ﺵﺶﺷﺸ","ص":"ﺹﺺﺻﺼ","ض":"ﺽﺾﺿﻀ","ط":"ﻁﻂﻃﻄ","ظ":"ﻅﻆﻇﻈ","ع":"ﻉﻊﻋﻌ","غ":"ﻍﻎﻏﻐ","ف":"ﻑﻒﻓﻔ","ق":"ﻕﻖﻗﻘ","ک":"ﮎﮏﮐﮑﻙﻚﻛﻜ","گ":"ﮒﮓﮔﮕ","ل":"ﻝﻞﻟﻠ","م":"ﻡﻢﻣﻤ","ن":"ﻥﻦﻧﻨ","ه":"ﻩﻪﻫﻬ","هٔ":"ﮤﮥ","و":"ﻭﻮ","ﺅ":"ﺅﺆ","ی":"ﯼﯽﯾﯿﻯﻰﻱﻲﻳﻴ","ئ":"ﺉﺊﺋﺌ","لا":"ﻼ","ﻹ":"ﻺ","ﻷ":"ﻸ","ﻵ":"ﻶ"},parseOptions:function(e){var r=Object.assign({},this.defaults);for(var n in r)e.hasOwnProperty(n)&&(r[n]=e[n]);return r},charReplace:function(e,r,n){var i=r.split(""),s=n.split("");for(var a in i)e=e.replace(new RegExp(i[a],"g"),s[a]);return e},arrReplace:function(e,r){for(var n in r)r.hasOwnProperty(n)&&(e=e.replace(new RegExp("["+r[n]+"]","g"),n));return e},cleanup:function(e,r){if("string"!=typeof e)throw new TypeError("Expected a String");if(!e.trim())return e;var n=this,i=r?n.parseOptions(r):n.opts;if(e+=" ",i.preserve_frontmatter){var s=[];e=e.replace(/^---[\S\s]*---\s*/g,function(e){return s.push(e),"__FRONTMATTER__PRESERVER__"})}if(i.preserve_HTML){var a=[];e=e.replace(/<\/?[a-z][^>]*?>/gi,function(e){return a.push(e),"__HTML__PRESERVER__"})}if(i.preserve_comments){var c=[];e=e.replace(//g,function(e){return c.push(e),"__COMMENT__PRESERVER__"})}if(i.preserve_brackets){var t=[];e=e.replace(/(\[.*?\])/g,function(e){return t.push(e),"__BRACKETS__PRESERVER__"})}if(i.preserve_braces){var u=[];e=e.replace(/(\{.*?\})/g,function(e){return u.push(e),"__BRACES__PRESERVER__"})}if(i.preserve_URIs){var p=[];e=e.replace(/\[(.*?)\]\((.*?)\)/g,function(e,r,n){return p.push(n.trim()),"["+r+"](__MD_LINK__PRESERVER__)"});var _=[];e=e.replace(/(https?:\/\/(?:www\.|(?!www))[a-zA-Z0-9][a-zA-Z0-9-]+[a-zA-Z0-9]\.[^\s]{2,}|www\.[a-zA-Z0-9][a-zA-Z0-9-]+[a-zA-Z0-9]\.[^\s]{2,}|https?:\/\/(?:www\.|(?!www))[a-zA-Z0-9]+\.[^\s]{2,}|www\.[a-zA-Z0-9]+\.[^\s]{2,})/g,function(e){return _.push(e),"__URI__PRESERVER__"})}if(i.preserve_nbsps){var f=[];e=e.replace(/ | /gi,function(e){return f.push(e),"__NBSPS__PRESERVER__"})}if(i.decode_htmlentities&&(e=n.decodeHTMLEntities(e,i)),i.preserve_entities){var l=[];e=e.replace(/&(#?[^;\W]+;?)/g,function(e){return l.push(e),"__ENTITIES__PRESERVER__"})}return i.normalize_eol&&(e=n.normalizeEOL(e,i)),i.fix_dashes&&(e=n.fixDashes(e,i)),i.fix_three_dots&&(e=n.fixThreeDots(e,i)),i.normalize_ellipsis&&(e=n.normalizeEllipsis(e,i)),i.fix_english_quotes_pairs&&(e=n.fixEnglishQuotesPairs(e,i)),i.fix_english_quotes&&(e=n.fixEnglishQuotes(e,i)),i.fix_hamzeh?e=n.fixHamzeh(e,i):i.fix_suffix_spacing&&(e=n.fixSuffixSpacingHamzeh(e,i)),i.cleanup_rlm&&(e=n.cleanupRLM(e,i)),i.cleanup_zwnj&&(e=n.cleanupZWNJ(e,i)),i.fix_arabic_numbers&&(e=n.fixArabicNumbers(e,i)),e=e.replace(/(^|\s+)([[({"'“«]?)(\S+)([\])}"'”»]?)(?=($|\s+))/g,function(e,r,s,a,c,t){return a.match(/[a-zA-Z\-_]{2,}/g)?e:a.match(/%(?:\d+\$)?[+-]?(?:[ 0]|'.{1})?-?\d*(?:\.\d+)?[bcdeEufFgGosxX]/g)?e:a.match(/&#\d+;/g)?e:i.skip_markdown_ordered_lists_numbers_conversion&&(e+c+t).match(/(?:(?:\r?\n)|(?:\r\n?)|(?:^|\n))\d+\.\s/)?e:(i.fix_english_numbers&&(e=n.fixEnglishNumbers(e,i)),i.fix_numeral_symbols&&(e=n.fixNumeralSymbols(e,i)),i.fix_punctuations&&(e=n.fixPunctuations(e,i)),i.fix_persian_glyphs&&(e=n.fixPersianGlyphs(e,i)),i.fix_misc_non_persian_chars&&(e=n.fixMiscNonPersianChars(e,i)),i.fix_question_mark&&(e=n.fixQuestionMark(e,i)),e)}),i.fix_perfix_spacing&&(e=n.fixPerfixSpacing(e,i)),i.fix_suffix_spacing&&(e=n.fixSuffixSpacing(e,i)),i.fix_suffix_misc&&(e=n.fixSuffixMisc(e,i)),i.fix_spacing_for_braces_and_quotes&&(e=n.fixBracesSpacing(e,i)),i.cleanup_extra_marks&&(e=n.cleanupExtraMarks(e,i)),i.fix_spacing_for_punctuations&&(e=n.fixPunctuationSpacing(e,i)),i.kashidas_as_parenthetic&&(e=n.kashidasAsParenthetic(e,i)),i.cleanup_kashidas&&(e=n.cleanupKashidas(e,i)),i.markdown_normalize_braces&&(e=n.markdownNormalizeBraces(e,i)),i.markdown_normalize_lists&&(e=n.markdownNormalizeLists(e,i)),i.fix_spacing_for_braces_and_quotes&&(e=n.fixBracesSpacingInside(e,i)),i.cleanup_spacing&&(e=n.cleanupSpacing(e,i)),i.cleanup_line_breaks&&(e=n.cleanupLineBreaks(e,i)),e=i.cleanup_begin_and_end?n.cleanupBeginAndEnd(e,i):e.replace(/[ ]$/g,""),i.preserve_entities&&(e=e.replace(/__ENTITIES__PRESERVER__/g,function(){return l.shift()})),i.preserve_nbsps&&(e=e.replace(/__NBSPS__PRESERVER__/g,function(){return f.shift()})),i.preserve_URIs&&(e=e.replace(/__MD_LINK__PRESERVER__/g,function(){return p.shift()}),e=e.replace(/__URI__PRESERVER__/g,function(){return _.shift()})),i.preserve_braces&&(e=e.replace(/__BRACES__PRESERVER__/g,function(){return u.shift()})),i.preserve_brackets&&(e=e.replace(/__BRACKETS__PRESERVER__/g,function(){return t.shift()})),i.preserve_comments&&(e=e.replace(/__COMMENT__PRESERVER__/g,function(){return c.shift()})),i.preserve_HTML&&(e=e.replace(/__HTML__PRESERVER__/g,function(){return a.shift()})),i.preserve_frontmatter&&(e=e.replace(/__FRONTMATTER__PRESERVER__/g,function(){return s.shift()})),e},cleanupZWNJ:function(e,r){return e=e.replace(/\u00ad/g,"‌"),e=e.replace(/\u200c{2,}/g,"‌"),e=e.replace(/([۰-۹0-9إأةؤورزژاآدذ،؛,:«»\\\/@#$٪×*()ـ\-=|])\u200c/g,"$1"),e=e.replace(/\u200c([\u064e\u0650\u064f\u064b\u064d\u064c\u0651\u06c0])/g,"$1"),e=e.replace(/\u200c([\w])/g,"$1"),e=e.replace(/([\w])\u200c/g,"$1"),e=e.replace(/\u200c([\n\s[].،«»:()؛؟?;$!@-=+\\])/g,"$1"),e=e.replace(/([\n\s[.،«»:()؛؟?;$!@\-=+\\])\u200c/g,"$1"),e=e.replace(/(\s+)\u200c/g,"$1"),e=e.replace(/\u200c(\s+)/g,"$1"),e=e.replace(/(^\u200c|\u200c$)/gm,"")},decodeHTMLEntities:function(e,r){var n=this;return e.replace(/&(#?[^;\W]+;?)/g,function(e,r){var i;if(i=/^#(\d+);?$/.exec(r))return String.fromCharCode(parseInt(i[1],10));if(i=/^#[Xx]([A-Fa-f0-9]+);?/.exec(r))return String.fromCharCode(parseInt(i[1],16));var s=/;$/.test(r),a=s?r.replace(/;$/,""):r,c=n.entities[a]||s&&n.entities[r];return"number"==typeof c?String.fromCharCode(c):"string"==typeof c?c:"&"+r})},normalizeEOL:function(e,r){return e.replace(/(\r?\n)|(\r\n?)/g,"\n")},fixDashes:function(e,r){return e.replace(/-{3}/g,"—").replace(/-{2}/g,"–")},fixThreeDots:function(e,r){return e.replace(/\s*\.{3,}/g,"…")},normalizeEllipsis:function(e,r){return e.replace(/(…){2,}/g,"…")},fixEnglishQuotesPairs:function(e,r){return e.replace(/(“)(.+?)(”)/g,"«$2»")},fixEnglishQuotes:function(e,r){return e.replace(/(["'`]+)(.+?)(\1)/g,"«$2»")},fixHamzeh:function(e,r){return e.replace(/(\S)(ه[\s\u200c\u200e]+[یي])([\s\u200c\u200e])/g,"$1هٔ$3").replace(/(\S)(ه[\s\u200c\u200e]?\u0621)([\s\u200c\u200e])/g,"$1هٔ$3").replace(/(ۀ|هٓ)/g,"هٔ")},cleanupRLM:function(e,r){return e=e.replace(/([^a-zA-Z\-_])(\u200F)/g,"$1‌")},fixPersianGlyphs:function(e,r){return this.arrReplace(e,this.glyphs)},fixMiscNonPersianChars:function(e,r){return this.charReplace(e,"كيىۍېہە","کییییههه")},fixEnglishNumbers:function(e,r){return this.charReplace(e,"1234567890",this.digits)},fixArabicNumbers:function(e,r){return this.charReplace(e,"١٢٣٤٥٦٧٨٩٠",this.digits)},fixNumeralSymbols:function(e,r){return e.replace(new RegExp("(["+this.digits+"]) ?%","g"),"$1٪").replace(new RegExp("(["+this.digits+"])\\.(?=["+this.digits+"])","g"),"$1٫")},fixPunctuations:function(e,r){return this.charReplace(e,",;","،؛")},fixQuestionMark:function(e,r){return e.replace(/(\?)/g,"؟")},fixPerfixSpacing:function(e,r){return e.replace(/((\s|^)ن?می) /g,"$1‌").replace(/((\s|^)بی) /g,"$1‌")},fixSuffixSpacing:function(e,r){return e.replace(/ ((ام|ات|اش|ای|اید|ایم|اند)[\s.!؟?"'()[\]{}“”«»])/g,"‌$1").replace(/ (ها(ی)?[\s.!؟?"'()[\]{}“”«»])/g,"‌$1").replace(/ (تر((ی)|(ین))?[\s.!؟?"'()[\]{}“”«»])/g,"‌$1").replace(/ ((هایی|هایم|هایت|هایش|هایمان|هایتان|هایشان)[\s.!؟?"'()[\]{}“”«»])/g,"‌$1")},fixSuffixSpacingHamzeh:function(e,r){return e=e.replace(/(\S)(ه[\s\u200c]+[یي])([\s\u200c])/g,"$1ه‌ی$3"),e=e.replace(/(\S)(ه[\s\u200c]?\u0621)([\s\u200c])/g,"$1ه‌ی$3"),e=e.replace(/(\S)(ه[\s\u200c]?\u0654)([\s\u200c])/g,"$1ه‌ی$3")},fixSuffixMisc:function(e,r){return e.replace(/(\S)ه[\u200c\u200e][ئی]ی([\s\u200c\u200e])/g,"$1ه‌ای$2")},cleanupExtraMarks:function(e,r){return e.replace(/(!){2,}/g,"$1").replace(/(\u061F|\?){2,}/g,"$1").replace(/(!)([\s]*)([\u061F?])/g,"$3$1")},kashidasAsParenthetic:function(e,r){return e.replace(/(\s)\u0640+/g,"$1–").replace(/\u0640+(\s)/g,"–$1")},cleanupKashidas:function(e,r){return e.replace(/(\S)\u0640+(\S)/g,"$1$2").replace(/(\S)\u0640+(\S)/g,"$1$2")},fixPunctuationSpacing:function(e,r){return e.replace(/[ \t\u200c]*([:;,؛،.؟?!]{1})[ \t\u200c]*/g,"$1 ").replace(/([0-9۰-۹]+):\s+([0-9۰-۹]+)/g,"$1:$2").replace(/([0-9۰-۹]+)\. ([0-9۰-۹]+)/g,"$1.$2").replace(/([\u061F?!])\s+([\u061F?!])/g,"$1$2").replace(/!\s{1,}!/g,"!!").replace(/\?\s{1,}\?/g,"??").replace(/؟\s{1,}؟/g,"؟؟")},fixBracesSpacing:function(e,r){return e.replace(/[ \t\u200c]*(\()\s*([^)]+?)\s*?(\))[ \t\u200c]*/g," $1$2$3 ").replace(/[ \t\u200c]*(\[)\s*([^\]]+?)\s*?(\])[ \t\u200c]*/g," $1$2$3 ").replace(/[ \t\u200c]*(\{)\s*([^}]+?)\s*?(\})[ \t\u200c]*/g," $1$2$3 ").replace(/[ \t\u200c]*(“)\s*([^”]+?)\s*?(”)[ \t\u200c]*/g," $1$2$3 ").replace(/[ \t\u200c]*(«)\s*([^»]+?)\s*?(»)[ \t\u200c]*/g," $1$2$3 ")},fixBracesSpacingInside:function(e,r){return e.replace(/(\()\s*([^)]+?)\s*?(\))/g,"$1$2$3").replace(/(\[)\s*([^\]]+?)\s*?(\])/g,"$1$2$3").replace(/(\{)\s*([^}]+?)\s*?(\})/g,"$1$2$3").replace(/(“)\s*([^”]+?)\s*?(”)/g,"$1$2$3").replace(/(«)\s*([^»]+?)\s*?(»)/g,"$1$2$3")},markdownNormalizeBraces:function(e,r){return e.replace(/(\[.*?\])\s+(\(.*?\))/g,"$1$2").replace(/\(\(\s*(.*?)\s*\)\)/g,"(($1))").replace(/\[\[\s*(.*?)\s*\]\]/g,"[[$1]]").replace(/\{\{\s*(.*?)\s*\}\}/g,"{{$1}}").replace(/\{\{\{\s*(.*?)\s*\}\}\}/g,"{{{$1}}}").replace(/(\(\(.*\))\s+(\))/g,"$1$2").replace(/(\[\[.*\])\s+(\])/g,"$1$2").replace(/(\{\{.*\})\s+(\})/g,"$1$2")},markdownNormalizeLists:function(e,r){return e.replace(/((\n|^)\*.*?)\n+(?=\n\*)/g,"$1").replace(/((\n|^)-.*?)\n+(?=\n-)/g,"$1").replace(/((\n|^)#.*?)\n+(?=\n#)/g,"$1")},cleanupSpacing:function(e,r){return e.replace(/[ ]+/g," ").replace(/(\S)[ ]+([\u064e\u0650\u064f\u064b\u064d\u064C\u0651\u06C0])/g,"$1$2").replace(/\n[\s\u200c]*\n/g,"\n\n")},cleanupLineBreaks:function(e,r){return e.replace(/(\n{2,})/g,"\n\n")},cleanupBeginAndEnd:function(e,r){return e.replace(/([\n]+)[ \t\u200c]*/g,"$1").replace(/^[\s\u200c\u200e\u200f]+|[\s\u200c\u200e\u200f]+$/g,"")},flipPunctuations:function(e,r){var n=["-"],i=["!",".","،","…",'"'],s=[],a=[];e=this.fixThreeDots(e,r);for(var c=0;c]*?>/gi,function(e){return a.push(e)," __HTML__PRESERVER__ "})}if(i.preserve_comments){var c=[];e=e.replace(//g,function(e){return c.push(e)," __COMMENT__PRESERVER__ "})}if(i.preserve_brackets){var t=[];e=e.replace(/(\[.*?\])/g,function(e){return t.push(e)," __BRACKETS__PRESERVER__ "})}if(i.preserve_braces){var u=[];e=e.replace(/(\{.*?\})/g,function(e){return u.push(e)," __BRACES__PRESERVER__ "})}if(i.preserve_URIs){var p=[];e=e.replace(/\[(.*?)\]\((.*?)\)/g,function(e,r,n){return p.push(n.trim()),"["+r+"](__MD_LINK__PRESERVER__)"});var _=[];e=e.replace(/\b((?:[a-z][\w-]+:(?:\/{1,3}|[a-z0-9%])|www\d{0,3}[.]|[a-z0-9.-]+[.][a-z]{2,4}\/)(?:[^\s()<>]+|\(([^\s()<>]+|(\([^\s()<>]+\)))*\))+(?:\(([^\s()<>]+|(\([^\s()<>]+\)))*\)|[^\s`!()[\]{};:'".,<>?«»“”‘’]))/g,function(e){return _.push(e)," __URI__PRESERVER__ "})}if(i.preserve_nbsps){var f=[];e=e.replace(/ | /gi,function(e){return f.push(e)," __NBSPS__PRESERVER__ "})}if(i.decode_htmlentities&&(e=n.decodeHTMLEntities(e,i)),i.preserve_entities){var l=[];e=e.replace(/&(#?[^;\W]+;?)/g,function(e){return l.push(e)," __ENTITIES__PRESERVER__ "})}return i.normalize_eol&&(e=n.normalizeEOL(e,i)),i.fix_dashes&&(e=n.fixDashes(e,i)),i.fix_three_dots&&(e=n.fixThreeDots(e,i)),i.normalize_ellipsis&&(e=n.normalizeEllipsis(e,i)),i.fix_english_quotes_pairs&&(e=n.fixEnglishQuotesPairs(e,i)),i.fix_english_quotes&&(e=n.fixEnglishQuotes(e,i)),i.fix_hamzeh?e=n.fixHamzeh(e,i):i.fix_suffix_spacing&&(e=n.fixSuffixSpacingHamzeh(e,i)),i.cleanup_rlm&&(e=n.cleanupRLM(e,i)),i.cleanup_zwnj&&(e=n.cleanupZWNJ(e,i)),i.fix_arabic_numbers&&(e=n.fixArabicNumbers(e,i)),e=e.replace(/(^|\s+)([[({"'“«]?)(\S+)([\])}"'”»]?)(?=($|\s+))/g,function(e,r,s,a,c,t){return a.match(/[a-zA-Z\-_]{2,}/g)?e:a.match(/%(?:\d+\$)?[+-]?(?:[ 0]|'.{1})?-?\d*(?:\.\d+)?[bcdeEufFgGosxX]/g)?e:a.match(/&#\d+;/g)?e:i.skip_markdown_ordered_lists_numbers_conversion&&(e+c+t).match(/(?:(?:\r?\n)|(?:\r\n?)|(?:^|\n))\d+\.\s/)?e:(i.fix_english_numbers&&(e=n.fixEnglishNumbers(e,i)),i.fix_numeral_symbols&&(e=n.fixNumeralSymbols(e,i)),i.fix_punctuations&&(e=n.fixPunctuations(e,i)),i.fix_persian_glyphs&&(e=n.fixPersianGlyphs(e,i)),i.fix_misc_non_persian_chars&&(e=n.fixMiscNonPersianChars(e,i)),i.fix_question_mark&&(e=n.fixQuestionMark(e,i)),e)}),i.fix_perfix_spacing&&(e=n.fixPerfixSpacing(e,i)),i.fix_suffix_spacing&&(e=n.fixSuffixSpacing(e,i)),i.fix_suffix_misc&&(e=n.fixSuffixMisc(e,i)),i.fix_spacing_for_braces_and_quotes&&(e=n.fixBracesSpacing(e,i)),i.cleanup_extra_marks&&(e=n.cleanupExtraMarks(e,i)),i.fix_spacing_for_punctuations&&(e=n.fixPunctuationSpacing(e,i)),i.kashidas_as_parenthetic&&(e=n.kashidasAsParenthetic(e,i)),i.cleanup_kashidas&&(e=n.cleanupKashidas(e,i)),i.markdown_normalize_braces&&(e=n.markdownNormalizeBraces(e,i)),i.markdown_normalize_lists&&(e=n.markdownNormalizeLists(e,i)),i.fix_spacing_for_braces_and_quotes&&(e=n.fixBracesSpacingInside(e,i)),i.fix_diacritics&&(e=n.fixDiacritics(e,i)),i.cleanup_spacing&&(e=n.cleanupSpacing(e,i)),i.cleanup_line_breaks&&(e=n.cleanupLineBreaks(e,i)),i.preserve_entities&&(e=e.replace(/[ ]?__ENTITIES__PRESERVER__ /g,function(){return l.shift()})),i.preserve_nbsps&&(e=e.replace(/[ ]?__NBSPS__PRESERVER__ /g,function(){return f.shift()})),i.preserve_URIs&&(e=e.replace(/__MD_LINK__PRESERVER__/g,function(){return p.shift()}),e=e.replace(/[ ]?__URI__PRESERVER__ /g,function(){return _.shift()})),i.preserve_braces&&(e=e.replace(/[ ]?__BRACES__PRESERVER__ /g,function(){return u.shift()})),i.preserve_brackets&&(e=e.replace(/[ ]?__BRACKETS__PRESERVER__ /g,function(){return t.shift()})),i.preserve_comments&&(e=e.replace(/[ ]?__COMMENT__PRESERVER__ /g,function(){return c.shift()})),i.preserve_HTML&&(e=e.replace(/[ ]?__HTML__PRESERVER__ /g,function(){return a.shift()})),i.preserve_frontmatter&&(e=e.replace(/[ ]?__FRONTMATTER__PRESERVER__ /g,function(){return s.shift()})),e=i.cleanup_begin_and_end?n.cleanupBeginAndEnd(e,i):e.replace(/[ ]$/g,"")},cleanupZWNJ:function(e,r){return e=e.replace(/\u00ad/g,"‌"),e=e.replace(/\u200c{2,}/g,"‌"),e=e.replace(/([۰-۹0-9إأةؤورزژاآدذ،؛,:«»\\\/@#$٪×*()ـ\-=|])\u200c/g,"$1"),e=e.replace(/\u200c([\w])/g,"$1"),e=e.replace(/([\w])\u200c/g,"$1"),e=e.replace(/\u200c([\n\s[].،«»:()؛؟?;$!@-=+\\])/g,"$1"),e=e.replace(/([\n\s[.،«»:()؛؟?;$!@\-=+\\])\u200c/g,"$1"),e=e.replace(/(\s+)\u200c/g,"$1"),e=e.replace(/\u200c(\s+)/g,"$1"),e=e.replace(/(^\u200c|\u200c$)/gm,"")},decodeHTMLEntities:function(e,r){var n=this;return e.replace(/&(#?[^;\W]+;?)/g,function(e,r){var i;if(i=/^#(\d+);?$/.exec(r))return String.fromCharCode(parseInt(i[1],10));if(i=/^#[Xx]([A-Fa-f0-9]+);?/.exec(r))return String.fromCharCode(parseInt(i[1],16));var s=/;$/.test(r),a=s?r.replace(/;$/,""):r,c=n.entities[a]||s&&n.entities[r];return"number"==typeof c?String.fromCharCode(c):"string"==typeof c?c:"&"+r})},normalizeEOL:function(e,r){return e.replace(/(\r?\n)|(\r\n?)/g,"\n")},fixDashes:function(e,r){return e.replace(/-{3}/g,"—").replace(/-{2}/g,"–")},fixThreeDots:function(e,r){return e.replace(/\s*\.{3,}/g,"…")},normalizeEllipsis:function(e,r){return e.replace(/(…){2,}/g,"…")},fixEnglishQuotesPairs:function(e,r){return e.replace(/(“)(.+?)(”)/g,"«$2»")},fixEnglishQuotes:function(e,r){return e.replace(/(["'`]+)(.+?)(\1)/g,"«$2»")},fixHamzeh:function(e,r){return e.replace(/(\S)(ه[\s\u200c\u200e]+[یي])([\s\u200c\u200e])/g,"$1هٔ$3").replace(/(\S)(ه[\s\u200c\u200e]?\u0621)([\s\u200c\u200e])/g,"$1هٔ$3").replace(/(ۀ|هٓ)/g,"هٔ")},cleanupRLM:function(e,r){return e=e.replace(/([^a-zA-Z\-_])(\u200F)/g,"$1‌")},fixPersianGlyphs:function(e,r){return this.arrReplace(e,this.glyphs)},fixMiscNonPersianChars:function(e,r){return this.charReplace(e,"كڪيىۍېہە","ککییییههه")},fixEnglishNumbers:function(e,r){return this.charReplace(e,"1234567890",this.digits)},fixArabicNumbers:function(e,r){return this.charReplace(e,"١٢٣٤٥٦٧٨٩٠",this.digits)},convertPersianNumbers:function(e,r){return e.replace(/[\u0660-\u0669\u06f0-\u06f9]/g,function(e){return 15&e.charCodeAt(0)})},fixNumeralSymbols:function(e,r){return e.replace(new RegExp("(["+this.digits+"]) ?%","g"),"$1٪").replace(new RegExp("(["+this.digits+"])\\.(?=["+this.digits+"])","g"),"$1٫").replace(new RegExp("(["+this.digits+"])\\,(?=["+this.digits+"])","g"),"$1٬")},fixPunctuations:function(e,r){return this.charReplace(e,",;","،؛")},fixQuestionMark:function(e,r){return e.replace(/(\?)/g,"؟")},fixPerfixSpacing:function(e,r){return e.replace(/((\s|^)ن?می) /g,"$1‌").replace(/((\s|^)بی) /g,"$1‌")},fixSuffixSpacing:function(e,r){return e.replace(/ (ها(ی)?[\s.,;،؛!؟?"'()[\]{}“”«»])/g,"‌$1").replace(/ ((ام|ات|اش|ای|اید|ایم|اند|مان|تان|شان)[\s.,;،؛!؟?"'()[\]{}“”«»])/g,"‌$1").replace(/ (تر((ی)|(ین))?[\s.,;،؛!؟?"'()[\]{}“”«»])/g,"‌$1").replace(/ ((هایی|هایم|هایت|هایش|هایمان|هایتان|هایشان)[\s.,;،؛!؟?"'()[\]{}“”«»])/g,"‌$1")},fixSuffixSpacingHamzeh:function(e,r){return e=e.replace(/(\S)(ه[\s\u200c]+[یي])([\s\u200c])/g,"$1ه‌ی$3"),e=e.replace(/(\S)(ه[\s\u200c]?\u0621)([\s\u200c])/g,"$1ه‌ی$3"),e=e.replace(/(\S)(ه[\s\u200c]?\u0654)([\s\u200c])/g,"$1ه‌ی$3")},fixSuffixMisc:function(e,r){return e.replace(/(\S)ه[\u200c\u200e][ئی]ی([\s\u200c\u200e])/g,"$1ه‌ای$2")},cleanupExtraMarks:function(e,r){return e.replace(/(!){2,}/g,"$1").replace(/(\u061F|\?){2,}/g,"$1").replace(/(!)([\s]*)([\u061F?])/g,"$3$1")},kashidasAsParenthetic:function(e,r){return e.replace(/(\s)\u0640+/g,"$1–").replace(/\u0640+(\s)/g,"–$1")},cleanupKashidas:function(e,r){return e.replace(/(\S)\u0640+(\S)/g,"$1$2").replace(/(\S)\u0640+(\S)/g,"$1$2")},fixPunctuationSpacing:function(e,r){return e.replace(/[ \t\u200c]*([:;,؛،.؟?!]{1})[ \t\u200c]*/g,"$1 ").replace(/([0-9۰-۹]+):\s+([0-9۰-۹]+)/g,"$1:$2").replace(/([0-9۰-۹]+)\. ([0-9۰-۹]+)/g,"$1.$2").replace(/([\w\-_]+)\. (ir|com|org|net|info|edu|me)(\s)/g,"$1.$2$3").replace(/([\u061F?!])\s+([\u061F?!])/g,"$1$2").replace(/!\s{1,}!/g,"!!").replace(/\?\s{1,}\?/g,"??").replace(/؟\s{1,}؟/g,"؟؟")},fixBracesSpacing:function(e,r){return e.replace(/[ \t\u200c]*(\()\s*([^)]+?)\s*?(\))[ \t\u200c]*/g," $1$2$3 ").replace(/[ \t\u200c]*(\[)\s*([^\]]+?)\s*?(\])[ \t\u200c]*/g," $1$2$3 ").replace(/[ \t\u200c]*(\{)\s*([^}]+?)\s*?(\})[ \t\u200c]*/g," $1$2$3 ").replace(/[ \t\u200c]*(“)\s*([^”]+?)\s*?(”)[ \t\u200c]*/g," $1$2$3 ").replace(/[ \t\u200c]*(«)\s*([^»]+?)\s*?(»)[ \t\u200c]*/g," $1$2$3 ")},fixBracesSpacingInside:function(e,r){return e.replace(/(\()\s*([^)]+?)\s*?(\))/g,"$1$2$3").replace(/(\[)\s*([^\]]+?)\s*?(\])/g,"$1$2$3").replace(/(\{)\s*([^}]+?)\s*?(\})/g,"$1$2$3").replace(/(“)\s*([^”]+?)\s*?(”)/g,"$1$2$3").replace(/(«)\s*([^»]+?)\s*?(»)/g,"$1$2$3")},markdownNormalizeBraces:function(e,r){return e.replace(/(\[.*?\])\s+(\(.*?\))/g,"$1$2").replace(/\(\(\s*(.*?)\s*\)\)/g,"(($1))").replace(/\[\[\s*(.*?)\s*\]\]/g,"[[$1]]").replace(/\{\{\s*(.*?)\s*\}\}/g,"{{$1}}").replace(/\{\{\{\s*(.*?)\s*\}\}\}/g,"{{{$1}}}").replace(/(\(\(.*\))\s+(\))/g,"$1$2").replace(/(\[\[.*\])\s+(\])/g,"$1$2").replace(/(\{\{.*\})\s+(\})/g,"$1$2")},markdownNormalizeLists:function(e,r){return e.replace(/((\n|^)\*.*?)\n+(?=\n\*)/g,"$1").replace(/((\n|^)-.*?)\n+(?=\n-)/g,"$1").replace(/((\n|^)#.*?)\n+(?=\n#)/g,"$1")},fixDiacritics:function(e,r){return e.replace(/\u200c([\u064e\u0650\u064f\u064b\u064d\u064c\u0651\u06c0])/g,"$1").replace(/(.*)([\u064e\u0650\u064f\u064b\u064d\u064c\u0651\u06c0]){2,}(.*)/,"$1$2$3").replace(/(\S)[ ]+([\u064e\u0650\u064f\u064b\u064d\u064C\u0651\u06C0])/g,"$1$2")},cleanupSpacing:function(e,r){return e.replace(/[ ]+/g," ").replace(/\n[\s\u200c]*\n/g,"\n\n")},cleanupLineBreaks:function(e,r){return e.replace(/(\n{2,})/g,"\n\n")},cleanupBeginAndEnd:function(e,r){return e.replace(/([\n]+)[ \t\u200c]*/g,"$1").replace(/^[\s\u200c\u200e\u200f]+|[\s\u200c\u200e\u200f]+$/g,"")},flipPunctuations:function(e,r){var n=["-"],i=["!",".","،","…",'"'],s=[],a=[];e=this.fixThreeDots(e,r);for(var c=0;c