From 6be73aaefab2b5ea8de73bf3233a6d869e31f7d9 Mon Sep 17 00:00:00 2001 From: Nasser Rafie Date: Mon, 3 Feb 2020 20:33:26 +0330 Subject: [PATCH] :package: 0.19.0 --- CHANGELOG.md | 27 +++++++++++++++++++++++++++ lib/virastar.js | 2 +- lib/virastar.min.js | 4 ++-- package.json | 2 +- 4 files changed, 31 insertions(+), 4 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 635a0f4..3ecde4e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,30 @@ +### 0.19.0 +- Added: (undocumented) fix heh + ye, alternative to `fix_hamzeh` +- Added: cleaning whitespace/zwnj between new-lines on `cleanup_spacing` +- Added: new option `fix_numeral_symbols` to replace percent signs and decimal separators, props @ebraminio/persiantools +- Added: new option `fix_persian_glyphs` to replace glyph chars, props @ebraminio/persiantools +- Added: new option `fix_suffix_misc` to fix hamza with double yeh, props @ebraminio/persiantools +- Added: new option `markdown_normalize_braces` +- Added: new option `markdown_normalize_lists` +- Added: new option `preserve_frontmatter` to preserve frontmatter data +- Added: padding the end of string while cleanup +- Added: re-ordering extra marks: `?!` into `!?` +- Added: removing space between same marks +- Added: removing unnecessary zwnj on start/end of each line, props @ebraminio/persiantools +- Added: replacing more than one english question mark with just one +- Added: skip cleanup if text is empty or whitespace +- Added: storing markdown links separetly to help space cleanup working +- Changed: deprecated `aggresive` option +- Changed: moved cleaning whitespaces before newlines to `cleanup_begin_and_end` +- Changed: new option `fix_spacing_for_punctuations` extracted from `fix_spacing_for_braces_and_quotes` +- Changed: simpler pattern for preserving URIs +- Changed: some options disabled by default: `preserve_braces`, `preserve_brackets`, `skip_markdown_ordered_lists_numbers_conversion` +- Fixed: account for punctuations, braces and quots after suffixes +- Fixed: account for zwnj after yeh on fixing hamzeh +- Fixed: copy options object before parsing +- Fixed: putting back correct whitespace on cleaning zwnjs +- Fixed: unescaped char on space after dots in numbers + ### 0.18.0 - Added: new option `normalize_ellipsis` to replace more than one ellipsis with one - Added: convert all soft hyphens into zwnj, on `cleanup_zwnj` diff --git a/lib/virastar.js b/lib/virastar.js index e34a463..dba0716 100644 --- a/lib/virastar.js +++ b/lib/virastar.js @@ -1,5 +1,5 @@ /*! -* Virastar - v0.18.0 - 2019-05-11 +* Virastar - v0.19.0 - 2020-02-03 * https://github.com/brothersincode/virastar * Licensed: MIT */ diff --git a/lib/virastar.min.js b/lib/virastar.min.js index 6eccb3d..6f4dbb4 100644 --- a/lib/virastar.min.js +++ b/lib/virastar.min.js @@ -1,6 +1,6 @@ /*! -* Virastar - v0.18.0 - 2019-05-11 +* Virastar - v0.19.0 - 2020-02-03 * https://github.com/brothersincode/virastar * Licensed: MIT */ -!function(e,r,a){"undefined"!=typeof module?module.exports=a():"function"==typeof define&&"object"==typeof define.amd?define(a):"undefined"!=typeof window?window[e]=a():r[e]=a()}("Virastar",this,function(){function e(r,a){if(!(this instanceof e))return new e(r,a);if(r=r||{},"object"==typeof r)this.opts=this.parseOptions(r);else if("string"==typeof r)return this.opts=this.parseOptions(a||{}),this.cleanup(r);return this}return e.prototype={defaults:{normalize_eol:!0,decode_htmlentities:!0,fix_dashes:!0,fix_three_dots:!0,normalize_ellipsis:!0,fix_english_quotes_pairs:!0,fix_english_quotes:!0,fix_hamzeh:!0,cleanup_rlm:!0,cleanup_zwnj:!0,fix_spacing_for_braces_and_quotes:!0,fix_arabic_numbers:!0,fix_english_numbers:!0,fix_misc_non_persian_chars:!0,fix_punctuations:!0,fix_question_mark:!0,skip_markdown_ordered_lists_numbers_conversion:!0,fix_perfix_spacing:!0,fix_suffix_spacing:!0,aggresive:!0,kashidas_as_parenthetic:!0,cleanup_kashidas:!0,cleanup_extra_marks:!0,cleanup_spacing:!0,cleanup_line_breaks:!0,cleanup_begin_and_end:!0,preserve_HTML:!0,preserve_comments:!0,preserve_entities:!0,preserve_URIs:!0,preserve_nbsps:!0,preserve_brackets:!0,preserve_braces:!0},entities:{"sbquo;":"‚","lsquo;":"‘","lsquor;":"‚","ldquo;":"“","ldquor;":"„","rdquo;":"”","rdquor;":"”","rsquo;":"’","rsquor;":"’","apos;":"'","QUOT;":'"',QUOT:'"',"quot;":'"',quot:'"',"zwj;":"‍","ZWNJ;":"‌","zwnj;":"‌","shy;":"­"},numbersPersian:"۱۲۳۴۵۶۷۸۹۰",numbersArabic:"١٢٣٤٥٦٧٨٩٠",numbersEnglish:"1234567890",parseOptions:function(e){var r=this.defaults;for(var a in r)e.hasOwnProperty(a)&&(r[a]=e[a]);return r},charBatchReplace:function(e,r,a){var s=r.split(""),n=a.split("");for(var _ in s)e=e.replace(new RegExp(s[_],"g"),n[_]);return e},cleanup:function(e,r){if("string"!=typeof e)throw new TypeError("Expected a String");var a=this,s=r?a.parseOptions(r):a.opts;if(s.preserve_HTML){var n=[];e=e.replace(/<\/?[a-z][^>]*?>/gi,function(e){return n.push(e),"__HTML__PRESERVER__"})}if(s.preserve_comments){var _=[];e=e.replace(//g,function(e){return _.push(e),"__COMMENT__PRESERVER__"})}if(s.preserve_URIs){var i=[],t=new RegExp("([A-Za-z][A-Za-z0-9+\\-.]*):(?:(//)(?:((?:[A-Za-z0-9\\-._~!$&'()*+,;=:]|%[0-9A-Fa-f]{2})*)@)?((?:\\[(?:(?:(?:(?:[0-9A-Fa-f]{1,4}:){6}|::(?:[0-9A-Fa-f]{1,4}:){5}|(?:[0-9A-Fa-f]{1,4})?::(?:[0-9A-Fa-f]{1,4}:){4}|(?:(?:[0-9A-Fa-f]{1,4}:){0,1}[0-9A-Fa-f]{1,4})?::(?:[0-9A-Fa-f]{1,4}:){3}|(?:(?:[0-9A-Fa-f]{1,4}:){0,2}[0-9A-Fa-f]{1,4})?::(?:[0-9A-Fa-f]{1,4}:){2}|(?:(?:[0-9A-Fa-f]{1,4}:){0,3}[0-9A-Fa-f]{1,4})?::[0-9A-Fa-f]{1,4}:|(?:(?:[0-9A-Fa-f]{1,4}:){0,4}[0-9A-Fa-f]{1,4})?::)(?:[0-9A-Fa-f]{1,4}:[0-9A-Fa-f]{1,4}|(?:(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\\.){3}(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?))|(?:(?:[0-9A-Fa-f]{1,4}:){0,5}[0-9A-Fa-f]{1,4})?::[0-9A-Fa-f]{1,4}|(?:(?:[0-9A-Fa-f]{1,4}:){0,6}[0-9A-Fa-f]{1,4})?::)|[Vv][0-9A-Fa-f]+\\.[A-Za-z0-9\\-._~!$&'()*+,;=:]+)\\]|(?:(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\\.){3}(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)|(?:[A-Za-z0-9\\-._~!$&'()*+,;=]|%[0-9A-Fa-f]{2})*))(?::([0-9]*))?((?:/(?:[A-Za-z0-9\\-._~!$&'()*+,;=:@]|%[0-9A-Fa-f]{2})*)*)|/((?:(?:[A-Za-z0-9\\-._~!$&'()*+,;=:@]|%[0-9A-Fa-f]{2})+(?:/(?:[A-Za-z0-9\\-._~!$&'()*+,;=:@]|%[0-9A-Fa-f]{2})*)*)?)|((?:[A-Za-z0-9\\-._~!$&'()*+,;=:@]|%[0-9A-Fa-f]{2})+(?:/(?:[A-Za-z0-9\\-._~!$&'()*+,;=:@]|%[0-9A-Fa-f]{2})*)*)|)(?:\\?((?:[A-Za-z0-9\\-._~!$&'()*+,;=:@/?]|%[0-9A-Fa-f]{2})*))?(?:\\#((?:[A-Za-z0-9\\-._~!$&'()*+,;=:@/?]|%[0-9A-Fa-f]{2})*))?","g");e=e.replace(t,function(e){return i.push(e),"__URI__PRESERVER__"})}if(s.preserve_brackets){var u=[];e=e.replace(/(\[.*?\])/g,function(e){return u.push(e),"__BRACKETS__PRESERVER__"})}if(s.preserve_braces){var p=[];e=e.replace(/(\{.*?\})/g,function(e){return p.push(e),"__BRACES__PRESERVER__"})}if(s.preserve_nbsps){var c=[];e=e.replace(/ | /gi,function(e){return c.push(e),"__NBSPS__PRESERVER__"})}if(s.decode_htmlentities&&(e=a.decodeHTMLEntities(e,s)),s.preserve_entities){var l=[];e=e.replace(/&(#?[^;\W]+;?)/g,function(e){return l.push(e),"__ENTITIES__PRESERVER__"})}return s.normalize_eol&&(e=e.replace(/(\r?\n)|(\r\n?)/g,"\n")),s.fix_dashes&&(e=e.replace(/-{3}/g,"—"),e=e.replace(/-{2}/g,"–")),s.fix_three_dots&&(e=a.fixThreeDots(e,s)),s.normalize_ellipsis&&(e=a.normalizeEllipsis(e,s)),s.fix_english_quotes_pairs&&(e=e.replace(/(“)(.+?)(”)/g,"«$2»")),s.fix_english_quotes&&(e=e.replace(/(["'`]+)(.+?)(\1)/g,"«$2»")),s.fix_hamzeh&&(e=e.replace(/(\S)(ه[\s\u200C]+[یي])(\s)/g,"$1هٔ$3"),e=e.replace(/(\S)(ه[\s\u200C]?\u0621)(\s)/g,"$1هٔ$3")),s.cleanup_rlm&&(e=e.replace(/([^a-zA-Z\-_])(\u200F)/g,"$1‌")),s.cleanup_zwnj&&(e=a.cleanupZWNJ(e,s)),s.fix_arabic_numbers&&(e=a.charBatchReplace(e,a.numbersArabic,a.numbersPersian)),e=e.replace(/(^|\s+)([[({"'“«]?)(\S+)([\])}"'”»]?)(?=($|\s+))/g,function(e,r,n,_,i,t){return _.match(/[a-zA-Z\-_]{2,}/g)?e:_.match(/%(?:\d+\$)?[+-]?(?:[ 0]|'.{1})?-?\d*(?:\.\d+)?[bcdeEufFgGosxX]/g)?e:_.match(/&#\d+;/g)?e:s.skip_markdown_ordered_lists_numbers_conversion&&(e+i+t).match(/(?:(?:\r?\n)|(?:\r\n?)|(?:^|\n))\d+\.\s/)?e:(s.fix_english_numbers&&(e=a.charBatchReplace(e,a.numbersEnglish,a.numbersPersian)),s.fix_punctuations&&(e=a.charBatchReplace(e,"%,;","٪،؛")),s.fix_misc_non_persian_chars&&(e=e.replace(/ك/g,"ک").replace(/ي/g,"ی").replace(/ى/g,"ی").replace(/ۍ/g,"ی").replace(/ې/g,"ی").replace(/ہ/g,"ه").replace(/[ەھ]/g,"ه")),s.fix_question_mark&&(e=e.replace(/(\?)/g,"؟")),e)}),s.fix_perfix_spacing&&(e=e.replace(/((\s|^)ن?می)\u0020/g,"$1‌"),e=e.replace(/((\s|^)بی)\u0020/g,"$1‌")),s.fix_suffix_spacing&&(e=e.replace(/\u0020((ام|ات|اش|ای|اید|ایم|اند)\s)/g,"‌$1"),e=e.replace(/\u0020(ها(ی)?\s)/g,"‌$1"),e=e.replace(/\u0020(تر((ی)|(ین))?\s)/g,"‌$1"),e=e.replace(/\u0020((هایی|هایم|هایت|هایش|هایمان|هایتان|هایشان)\s)/g,"‌$1")),s.aggresive&&(s.cleanup_extra_marks&&(e=e.replace(/(!){2,}/g,"$1"),e=e.replace(/(\u061F){2,}/g,"$1")),s.kashidas_as_parenthetic&&(e=e.replace(/(\s)\u0640+/g,"$1–"),e=e.replace(/\u0640+(\s)/g,"–$1")),s.cleanup_kashidas&&(e=e.replace(/(\S)\u0640+(\S)/g,"$1$2"),e=e.replace(/(\S)\u0640+(\S)/g,"$1$2"))),s.fix_spacing_for_braces_and_quotes&&(e=e.replace(/[ \t\u200C]*(\()\s*([^)]+?)\s*?(\))[ \t\u200C]*/g," $1$2$3 "),e=e.replace(/[ \t\u200C]*(\[)\s*([^\]]+?)\s*?(\])[ \t\u200C]*/g," $1$2$3 "),e=e.replace(/[ \t\u200C]*(\{)\s*([^}]+?)\s*?(\})[ \t\u200C]*/g," $1$2$3 "),e=e.replace(/[ \t\u200C]*(“)\s*([^”]+?)\s*?(”)[ \t\u200C]*/g," $1$2$3 "),e=e.replace(/[ \t\u200C]*(«)\s*([^»]+?)\s*?(»)[ \t\u200C]*/g," $1$2$3 "),e=e.replace(/[ \t\u200C]*([:;,؛،.؟!]{1})[ \t\u200C]*/g,"$1 "),e=e.replace(/([0-9۰-۹]+):\s+([0-9۰-۹]+)/g,"$1:$2"),e=e.replace(/([0-9۰-۹]+). ([0-9۰-۹]+)/g,"$1.$2"),e=e.replace(/(\u061F|!)\s(\u061F|!)/g,"$1$2"),e=e.replace(/(\()\s*([^)]+?)\s*?(\))/g,"$1$2$3"),e=e.replace(/(\[)\s*([^\]]+?)\s*?(\])/g,"$1$2$3"),e=e.replace(/(\{)\s*([^}]+?)\s*?(\})/g,"$1$2$3"),e=e.replace(/(“)\s*([^”]+?)\s*?(”)/g,"$1$2$3"),e=e.replace(/(«)\s*([^»]+?)\s*?(»)/g,"$1$2$3")),s.cleanup_spacing&&(e=e.replace(/[ ]+/g," "),e=e.replace(/(.)\u0020+([\u064e\u0650\u064f\u064b\u064d\u064C\u0651\u06C0])/g,"$1$2"),e=e.replace(/([\n]+)[ \t\u200C]*/g,"$1")),s.cleanup_line_breaks&&(e=e.replace(/(\n{2,})/g,"\n\n")),s.cleanup_begin_and_end&&(e=e.replace(/^[\s\u200c\u200e\u200f]+|[\s\u200c\u200e\u200f]+$/g,"")),s.preserve_entities&&(e=e.replace(/__ENTITIES__PRESERVER__/g,function(){return l.shift()})),s.preserve_nbsps&&(e=e.replace(/__NBSPS__PRESERVER__/g,function(){return c.shift()})),s.preserve_braces&&(e=e.replace(/__BRACES__PRESERVER__/g,function(){return p.shift()})),s.preserve_brackets&&(e=e.replace(/__BRACKETS__PRESERVER__/g,function(){return u.shift()})),s.preserve_URIs&&(e=e.replace(/__URI__PRESERVER__/g,function(){return i.shift()})),s.preserve_comments&&(e=e.replace(/__COMMENT__PRESERVER__/g,function(){return _.shift()})),s.preserve_HTML&&(e=e.replace(/__HTML__PRESERVER__/g,function(){return n.shift()})),e},cleanupZWNJ:function(e,r){return e=e.replace(/\u00ad/g,"‌"),e=e.replace(/\u200C{2,}/g,"‌"),e=e.replace(/([۰-۹0-9إأةؤورزژاآدذ،؛,:«»\\\/@#$٪×*()ـ\-=|])\u200c/g,"$1"),e=e.replace(/\u200c([\u064e\u0650\u064f\u064b\u064d\u064C\u0651\u06C0])/g,"$1"),e=e.replace(/\u200c([\w])/g,"$1"),e=e.replace(/([\w])\u200c/g,"$1"),e=e.replace(/\u200c([\n\s[].،«»:()؛؟?;$!@-=+\\])/g,"$1"),e=e.replace(/([\n\s[.،«»:()؛؟?;$!@\-=+\\])\u200c/g,"$1"),e=e.replace(/\s+\u200C|\u200C\s+/g," ")},decodeHTMLEntities:function(e,r){var a=this;return e.replace(/&(#?[^;\W]+;?)/g,function(e,r){var s;if(s=/^#(\d+);?$/.exec(r))return String.fromCharCode(parseInt(s[1],10));if(s=/^#[Xx]([A-Fa-f0-9]+);?/.exec(r))return String.fromCharCode(parseInt(s[1],16));var n=/;$/.test(r),_=n?r.replace(/;$/,""):r,i=a.entities[_]||n&&a.entities[r];return"number"==typeof i?String.fromCharCode(i):"string"==typeof i?i:"&"+r})},fixThreeDots:function(e,r){return e.replace(/\s*\.{3,}/g,"…")},normalizeEllipsis:function(e,r){return e.replace(/(…){2,}/g,"…")},flipPunctuations:function(e,r){var a=["-"],s=["!",".","،","…",'"'],n=[],_=[];e=this.fixThreeDots(e,r);for(var i=0;i]*?>/gi,function(e){return a.push(e),"__HTML__PRESERVER__"})}if(i.preserve_comments){var c=[];e=e.replace(//g,function(e){return c.push(e),"__COMMENT__PRESERVER__"})}if(i.preserve_brackets){var t=[];e=e.replace(/(\[.*?\])/g,function(e){return t.push(e),"__BRACKETS__PRESERVER__"})}if(i.preserve_braces){var u=[];e=e.replace(/(\{.*?\})/g,function(e){return u.push(e),"__BRACES__PRESERVER__"})}if(i.preserve_URIs){var p=[];e=e.replace(/\[(.*?)\]\((.*?)\)/g,function(e,r,n){return p.push(n.trim()),"["+r+"](__MD_LINK__PRESERVER__)"});var _=[];e=e.replace(/(https?:\/\/(?:www\.|(?!www))[a-zA-Z0-9][a-zA-Z0-9-]+[a-zA-Z0-9]\.[^\s]{2,}|www\.[a-zA-Z0-9][a-zA-Z0-9-]+[a-zA-Z0-9]\.[^\s]{2,}|https?:\/\/(?:www\.|(?!www))[a-zA-Z0-9]+\.[^\s]{2,}|www\.[a-zA-Z0-9]+\.[^\s]{2,})/g,function(e){return _.push(e),"__URI__PRESERVER__"})}if(i.preserve_nbsps){var f=[];e=e.replace(/ | /gi,function(e){return f.push(e),"__NBSPS__PRESERVER__"})}if(i.decode_htmlentities&&(e=n.decodeHTMLEntities(e,i)),i.preserve_entities){var l=[];e=e.replace(/&(#?[^;\W]+;?)/g,function(e){return l.push(e),"__ENTITIES__PRESERVER__"})}return i.normalize_eol&&(e=n.normalizeEOL(e,i)),i.fix_dashes&&(e=n.fixDashes(e,i)),i.fix_three_dots&&(e=n.fixThreeDots(e,i)),i.normalize_ellipsis&&(e=n.normalizeEllipsis(e,i)),i.fix_english_quotes_pairs&&(e=n.fixEnglishQuotesPairs(e,i)),i.fix_english_quotes&&(e=n.fixEnglishQuotes(e,i)),i.fix_hamzeh?e=n.fixHamzeh(e,i):i.fix_suffix_spacing&&(e=n.fixSuffixSpacingHamzeh(e,i)),i.cleanup_rlm&&(e=n.cleanupRLM(e,i)),i.cleanup_zwnj&&(e=n.cleanupZWNJ(e,i)),i.fix_arabic_numbers&&(e=n.fixArabicNumbers(e,i)),e=e.replace(/(^|\s+)([[({"'“«]?)(\S+)([\])}"'”»]?)(?=($|\s+))/g,function(e,r,s,a,c,t){return a.match(/[a-zA-Z\-_]{2,}/g)?e:a.match(/%(?:\d+\$)?[+-]?(?:[ 0]|'.{1})?-?\d*(?:\.\d+)?[bcdeEufFgGosxX]/g)?e:a.match(/&#\d+;/g)?e:i.skip_markdown_ordered_lists_numbers_conversion&&(e+c+t).match(/(?:(?:\r?\n)|(?:\r\n?)|(?:^|\n))\d+\.\s/)?e:(i.fix_english_numbers&&(e=n.fixEnglishNumbers(e,i)),i.fix_numeral_symbols&&(e=n.fixNumeralSymbols(e,i)),i.fix_punctuations&&(e=n.fixPunctuations(e,i)),i.fix_persian_glyphs&&(e=n.fixPersianGlyphs(e,i)),i.fix_misc_non_persian_chars&&(e=n.fixMiscNonPersianChars(e,i)),i.fix_question_mark&&(e=n.fixQuestionMark(e,i)),e)}),i.fix_perfix_spacing&&(e=n.fixPerfixSpacing(e,i)),i.fix_suffix_spacing&&(e=n.fixSuffixSpacing(e,i)),i.fix_suffix_misc&&(e=n.fixSuffixMisc(e,i)),i.fix_spacing_for_braces_and_quotes&&(e=n.fixBracesSpacing(e,i)),i.cleanup_extra_marks&&(e=n.cleanupExtraMarks(e,i)),i.fix_spacing_for_punctuations&&(e=n.fixPunctuationSpacing(e,i)),i.kashidas_as_parenthetic&&(e=n.kashidasAsParenthetic(e,i)),i.cleanup_kashidas&&(e=n.cleanupKashidas(e,i)),i.markdown_normalize_braces&&(e=n.markdownNormalizeBraces(e,i)),i.markdown_normalize_lists&&(e=n.markdownNormalizeLists(e,i)),i.fix_spacing_for_braces_and_quotes&&(e=n.fixBracesSpacingInside(e,i)),i.cleanup_spacing&&(e=n.cleanupSpacing(e,i)),i.cleanup_line_breaks&&(e=n.cleanupLineBreaks(e,i)),e=i.cleanup_begin_and_end?n.cleanupBeginAndEnd(e,i):e.replace(/[ ]$/g,""),i.preserve_entities&&(e=e.replace(/__ENTITIES__PRESERVER__/g,function(){return l.shift()})),i.preserve_nbsps&&(e=e.replace(/__NBSPS__PRESERVER__/g,function(){return f.shift()})),i.preserve_URIs&&(e=e.replace(/__MD_LINK__PRESERVER__/g,function(){return p.shift()}),e=e.replace(/__URI__PRESERVER__/g,function(){return _.shift()})),i.preserve_braces&&(e=e.replace(/__BRACES__PRESERVER__/g,function(){return u.shift()})),i.preserve_brackets&&(e=e.replace(/__BRACKETS__PRESERVER__/g,function(){return t.shift()})),i.preserve_comments&&(e=e.replace(/__COMMENT__PRESERVER__/g,function(){return c.shift()})),i.preserve_HTML&&(e=e.replace(/__HTML__PRESERVER__/g,function(){return a.shift()})),i.preserve_frontmatter&&(e=e.replace(/__FRONTMATTER__PRESERVER__/g,function(){return s.shift()})),e},cleanupZWNJ:function(e,r){return e=e.replace(/\u00ad/g,"‌"),e=e.replace(/\u200c{2,}/g,"‌"),e=e.replace(/([۰-۹0-9إأةؤورزژاآدذ،؛,:«»\\\/@#$٪×*()ـ\-=|])\u200c/g,"$1"),e=e.replace(/\u200c([\u064e\u0650\u064f\u064b\u064d\u064c\u0651\u06c0])/g,"$1"),e=e.replace(/\u200c([\w])/g,"$1"),e=e.replace(/([\w])\u200c/g,"$1"),e=e.replace(/\u200c([\n\s[].،«»:()؛؟?;$!@-=+\\])/g,"$1"),e=e.replace(/([\n\s[.،«»:()؛؟?;$!@\-=+\\])\u200c/g,"$1"),e=e.replace(/(\s+)\u200c/g,"$1"),e=e.replace(/\u200c(\s+)/g,"$1"),e=e.replace(/(^\u200c|\u200c$)/gm,"")},decodeHTMLEntities:function(e,r){var n=this;return e.replace(/&(#?[^;\W]+;?)/g,function(e,r){var i;if(i=/^#(\d+);?$/.exec(r))return String.fromCharCode(parseInt(i[1],10));if(i=/^#[Xx]([A-Fa-f0-9]+);?/.exec(r))return String.fromCharCode(parseInt(i[1],16));var s=/;$/.test(r),a=s?r.replace(/;$/,""):r,c=n.entities[a]||s&&n.entities[r];return"number"==typeof c?String.fromCharCode(c):"string"==typeof c?c:"&"+r})},normalizeEOL:function(e,r){return e.replace(/(\r?\n)|(\r\n?)/g,"\n")},fixDashes:function(e,r){return e.replace(/-{3}/g,"—").replace(/-{2}/g,"–")},fixThreeDots:function(e,r){return e.replace(/\s*\.{3,}/g,"…")},normalizeEllipsis:function(e,r){return e.replace(/(…){2,}/g,"…")},fixEnglishQuotesPairs:function(e,r){return e.replace(/(“)(.+?)(”)/g,"«$2»")},fixEnglishQuotes:function(e,r){return e.replace(/(["'`]+)(.+?)(\1)/g,"«$2»")},fixHamzeh:function(e,r){return e.replace(/(\S)(ه[\s\u200c\u200e]+[یي])([\s\u200c\u200e])/g,"$1هٔ$3").replace(/(\S)(ه[\s\u200c\u200e]?\u0621)([\s\u200c\u200e])/g,"$1هٔ$3").replace(/(ۀ|هٓ)/g,"هٔ")},cleanupRLM:function(e,r){return e=e.replace(/([^a-zA-Z\-_])(\u200F)/g,"$1‌")},fixPersianGlyphs:function(e,r){return this.arrReplace(e,this.glyphs)},fixMiscNonPersianChars:function(e,r){return this.charReplace(e,"كيىۍېہە","کییییههه")},fixEnglishNumbers:function(e,r){return this.charReplace(e,"1234567890",this.digits)},fixArabicNumbers:function(e,r){return this.charReplace(e,"١٢٣٤٥٦٧٨٩٠",this.digits)},fixNumeralSymbols:function(e,r){return e.replace(new RegExp("(["+this.digits+"]) ?%","g"),"$1٪").replace(new RegExp("(["+this.digits+"])\\.(?=["+this.digits+"])","g"),"$1٫")},fixPunctuations:function(e,r){return this.charReplace(e,",;","،؛")},fixQuestionMark:function(e,r){return e.replace(/(\?)/g,"؟")},fixPerfixSpacing:function(e,r){return e.replace(/((\s|^)ن?می) /g,"$1‌").replace(/((\s|^)بی) /g,"$1‌")},fixSuffixSpacing:function(e,r){return e.replace(/ ((ام|ات|اش|ای|اید|ایم|اند)[\s.!؟?"'()[\]{}“”«»])/g,"‌$1").replace(/ (ها(ی)?[\s.!؟?"'()[\]{}“”«»])/g,"‌$1").replace(/ (تر((ی)|(ین))?[\s.!؟?"'()[\]{}“”«»])/g,"‌$1").replace(/ ((هایی|هایم|هایت|هایش|هایمان|هایتان|هایشان)[\s.!؟?"'()[\]{}“”«»])/g,"‌$1")},fixSuffixSpacingHamzeh:function(e,r){return e=e.replace(/(\S)(ه[\s\u200c]+[یي])([\s\u200c])/g,"$1ه‌ی$3"),e=e.replace(/(\S)(ه[\s\u200c]?\u0621)([\s\u200c])/g,"$1ه‌ی$3"),e=e.replace(/(\S)(ه[\s\u200c]?\u0654)([\s\u200c])/g,"$1ه‌ی$3")},fixSuffixMisc:function(e,r){return e.replace(/(\S)ه[\u200c\u200e][ئی]ی([\s\u200c\u200e])/g,"$1ه‌ای$2")},cleanupExtraMarks:function(e,r){return e.replace(/(!){2,}/g,"$1").replace(/(\u061F|\?){2,}/g,"$1").replace(/(!)([\s]*)([\u061F?])/g,"$3$1")},kashidasAsParenthetic:function(e,r){return e.replace(/(\s)\u0640+/g,"$1–").replace(/\u0640+(\s)/g,"–$1")},cleanupKashidas:function(e,r){return e.replace(/(\S)\u0640+(\S)/g,"$1$2").replace(/(\S)\u0640+(\S)/g,"$1$2")},fixPunctuationSpacing:function(e,r){return e.replace(/[ \t\u200c]*([:;,؛،.؟?!]{1})[ \t\u200c]*/g,"$1 ").replace(/([0-9۰-۹]+):\s+([0-9۰-۹]+)/g,"$1:$2").replace(/([0-9۰-۹]+)\. ([0-9۰-۹]+)/g,"$1.$2").replace(/([\u061F?!])\s+([\u061F?!])/g,"$1$2").replace(/!\s{1,}!/g,"!!").replace(/\?\s{1,}\?/g,"??").replace(/؟\s{1,}؟/g,"؟؟")},fixBracesSpacing:function(e,r){return e.replace(/[ \t\u200c]*(\()\s*([^)]+?)\s*?(\))[ \t\u200c]*/g," $1$2$3 ").replace(/[ \t\u200c]*(\[)\s*([^\]]+?)\s*?(\])[ \t\u200c]*/g," $1$2$3 ").replace(/[ \t\u200c]*(\{)\s*([^}]+?)\s*?(\})[ \t\u200c]*/g," $1$2$3 ").replace(/[ \t\u200c]*(“)\s*([^”]+?)\s*?(”)[ \t\u200c]*/g," $1$2$3 ").replace(/[ \t\u200c]*(«)\s*([^»]+?)\s*?(»)[ \t\u200c]*/g," $1$2$3 ")},fixBracesSpacingInside:function(e,r){return e.replace(/(\()\s*([^)]+?)\s*?(\))/g,"$1$2$3").replace(/(\[)\s*([^\]]+?)\s*?(\])/g,"$1$2$3").replace(/(\{)\s*([^}]+?)\s*?(\})/g,"$1$2$3").replace(/(“)\s*([^”]+?)\s*?(”)/g,"$1$2$3").replace(/(«)\s*([^»]+?)\s*?(»)/g,"$1$2$3")},markdownNormalizeBraces:function(e,r){return e.replace(/(\[.*?\])\s+(\(.*?\))/g,"$1$2").replace(/\(\(\s*(.*?)\s*\)\)/g,"(($1))").replace(/\[\[\s*(.*?)\s*\]\]/g,"[[$1]]").replace(/\{\{\s*(.*?)\s*\}\}/g,"{{$1}}").replace(/\{\{\{\s*(.*?)\s*\}\}\}/g,"{{{$1}}}").replace(/(\(\(.*\))\s+(\))/g,"$1$2").replace(/(\[\[.*\])\s+(\])/g,"$1$2").replace(/(\{\{.*\})\s+(\})/g,"$1$2")},markdownNormalizeLists:function(e,r){return e.replace(/((\n|^)\*.*?)\n+(?=\n\*)/g,"$1").replace(/((\n|^)-.*?)\n+(?=\n-)/g,"$1").replace(/((\n|^)#.*?)\n+(?=\n#)/g,"$1")},cleanupSpacing:function(e,r){return e.replace(/[ ]+/g," ").replace(/(\S)[ ]+([\u064e\u0650\u064f\u064b\u064d\u064C\u0651\u06C0])/g,"$1$2").replace(/\n[\s\u200c]*\n/g,"\n\n")},cleanupLineBreaks:function(e,r){return e.replace(/(\n{2,})/g,"\n\n")},cleanupBeginAndEnd:function(e,r){return e.replace(/([\n]+)[ \t\u200c]*/g,"$1").replace(/^[\s\u200c\u200e\u200f]+|[\s\u200c\u200e\u200f]+$/g,"")},flipPunctuations:function(e,r){var n=["-"],i=["!",".","،","…",'"'],s=[],a=[];e=this.fixThreeDots(e,r);for(var c=0;c