diff --git a/CHANGELOG.md b/CHANGELOG.md index fbdf299..b0a5a22 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,9 @@ +### 0.15.0 +- Added: support prefix: `bi*`, props @zoghal +- Added: support suffix: `*am`, `*at`, `*ash`, `*ei`, `*eid`, `*eem`, `*and`, props @zoghal +- Added: support suffix: `*hayee`, `*hayam`, `*hayat`, `*hayash`, `*hayetan`, `*hayeman`, `*hayeshan`, props @zoghal +- Fixed: check for space befor suffix: `*tar`, `*tari`, `*tarin`, props @zoghal + ### 0.14.0 - Added: convert back quot/apos entities - Added: new option: `decode_htmlentities` diff --git a/lib/virastar.js b/lib/virastar.js index 457c67e..ec391c0 100644 --- a/lib/virastar.js +++ b/lib/virastar.js @@ -1,5 +1,5 @@ /*! -* Virastar - v0.14.0 - 2019-01-01 +* Virastar - v0.15.0 - 2019-01-27 * https://github.com/juvee/virastar * Licensed: MIT */ diff --git a/lib/virastar.min.js b/lib/virastar.min.js index b958a4b..8131603 100644 --- a/lib/virastar.min.js +++ b/lib/virastar.min.js @@ -1,7 +1,7 @@ -/*! -* Virastar - v0.14.0 - 2019-01-01 -* https://github.com/juvee/virastar -* Licensed: MIT -*/ - -!function(e,a,r){"undefined"!=typeof module?module.exports=r():"function"==typeof define&&"object"==typeof define.amd?define(r):"undefined"!=typeof window?window[e]=r():a[e]=r()}("Virastar",this,function(){function e(a,r){if(!(this instanceof e))return new e(a,r);if(a=a||{},"object"==typeof a)this.opts=this.parseOptions(a);else if("string"==typeof a)return this.opts=this.parseOptions(r||{}),this.cleanup(a);return this}return e.prototype.parseOptions=function(e){var a={normalize_eol:!0,decode_htmlentities:!0,fix_dashes:!0,fix_three_dots:!0,fix_english_quotes_pairs:!0,fix_english_quotes:!0,fix_hamzeh:!0,cleanup_rlm:!0,cleanup_zwnj:!0,fix_spacing_for_braces_and_quotes:!0,fix_numbersArabic:!0,fix_numbersEnglish:!0,fix_misc_non_persian_chars:!0,fix_question_mark:!0,skip_markdown_ordered_lists_numbers_conversion:!0,fix_perfix_spacing:!0,fix_suffix_spacing:!0,aggresive:!0,kashidas_as_parenthetic:!0,cleanup_kashidas:!0,cleanup_extra_marks:!0,cleanup_spacing:!0,cleanup_begin_and_end:!0,preserve_HTML:!0,preserve_URIs:!0,preserve_brackets:!0,preserve_braces:!0};for(var r in a)e.hasOwnProperty(r)&&(a[r]=e[r]);return a},e.prototype.cleanup=function(e,a){var r=a?this.parseOptions(a):this.opts,s="۱۲۳۴۵۶۷۸۹۰",_="١٢٣٤٥٦٧٨٩٠",n="1234567890",i=function(e,a,r){var s=a.split(""),_=r.split("");for(var n in s)e=e.replace(new RegExp(s[n],"g"),_[n]);return e};if(r.preserve_HTML){var p=[];e=e.replace(/(<[^<>]+>)/g,function(e){return p.push(e),"__HTML__PRESERVER__"})}if(r.preserve_URIs){var c=[],t=new RegExp("([A-Za-z][A-Za-z0-9+\\-.]*):(?:(//)(?:((?:[A-Za-z0-9\\-._~!$&'()*+,;=:]|%[0-9A-Fa-f]{2})*)@)?((?:\\[(?:(?:(?:(?:[0-9A-Fa-f]{1,4}:){6}|::(?:[0-9A-Fa-f]{1,4}:){5}|(?:[0-9A-Fa-f]{1,4})?::(?:[0-9A-Fa-f]{1,4}:){4}|(?:(?:[0-9A-Fa-f]{1,4}:){0,1}[0-9A-Fa-f]{1,4})?::(?:[0-9A-Fa-f]{1,4}:){3}|(?:(?:[0-9A-Fa-f]{1,4}:){0,2}[0-9A-Fa-f]{1,4})?::(?:[0-9A-Fa-f]{1,4}:){2}|(?:(?:[0-9A-Fa-f]{1,4}:){0,3}[0-9A-Fa-f]{1,4})?::[0-9A-Fa-f]{1,4}:|(?:(?:[0-9A-Fa-f]{1,4}:){0,4}[0-9A-Fa-f]{1,4})?::)(?:[0-9A-Fa-f]{1,4}:[0-9A-Fa-f]{1,4}|(?:(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\\.){3}(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?))|(?:(?:[0-9A-Fa-f]{1,4}:){0,5}[0-9A-Fa-f]{1,4})?::[0-9A-Fa-f]{1,4}|(?:(?:[0-9A-Fa-f]{1,4}:){0,6}[0-9A-Fa-f]{1,4})?::)|[Vv][0-9A-Fa-f]+\\.[A-Za-z0-9\\-._~!$&'()*+,;=:]+)\\]|(?:(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\\.){3}(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)|(?:[A-Za-z0-9\\-._~!$&'()*+,;=]|%[0-9A-Fa-f]{2})*))(?::([0-9]*))?((?:/(?:[A-Za-z0-9\\-._~!$&'()*+,;=:@]|%[0-9A-Fa-f]{2})*)*)|/((?:(?:[A-Za-z0-9\\-._~!$&'()*+,;=:@]|%[0-9A-Fa-f]{2})+(?:/(?:[A-Za-z0-9\\-._~!$&'()*+,;=:@]|%[0-9A-Fa-f]{2})*)*)?)|((?:[A-Za-z0-9\\-._~!$&'()*+,;=:@]|%[0-9A-Fa-f]{2})+(?:/(?:[A-Za-z0-9\\-._~!$&'()*+,;=:@]|%[0-9A-Fa-f]{2})*)*)|)(?:\\?((?:[A-Za-z0-9\\-._~!$&'()*+,;=:@/?]|%[0-9A-Fa-f]{2})*))?(?:\\#((?:[A-Za-z0-9\\-._~!$&'()*+,;=:@/?]|%[0-9A-Fa-f]{2})*))?","g");e=e.replace(t,function(e){return c.push(e),"__URI__PRESERVER__"})}if(r.preserve_brackets){var f=[];e=e.replace(/(\[.*?\])/g,function(e){return f.push(e),"__BRACKETS__PRESERVER__"})}if(r.preserve_braces){var u=[];e=e.replace(/(\{.*?\})/g,function(e){return u.push(e),"__BRACES__PRESERVER__"})}return r.normalize_eol&&(e=e.replace(/(\r?\n)|(\r\n?)/g,"\n")),r.decode_htmlentities&&(e=e.replace(/&#(\d+);/g,function(e,a){return String.fromCharCode(a)})),r.fix_dashes&&(e=e.replace(/-{3}/g,"—"),e=e.replace(/-{2}/g,"–")),r.fix_three_dots&&(e=e.replace(/\s*\.{3,}/g,"…")),r.fix_english_quotes_pairs&&(e=e.replace(/(“)(.+?)(”)/g,"«$2»")),r.fix_english_quotes&&(e=e.replace(/"/g,'"'),e=e.replace(/'/g,"'"),e=e.replace(/(["'`]+)(.+?)(\1)/g,"«$2»")),r.fix_hamzeh&&(e=e.replace(/(\S)(ه[\s\u200C]+[یي])(\s)/g,"$1هٔ$3")),r.cleanup_rlm&&(e=e.replace(/([^a-zA-Z\-_])(\u200F)/g,"$1‌")),r.cleanup_zwnj&&(e=e.replace(/\u200C{2,}/g,"‌"),e=e.replace(/([۰-۹0-9إأةؤورزژاآدذ،؛,:«»\\\/@#$٪×*()ـ\-=|])\u200c/g,"$1"),e=e.replace(/\u200c([\w])/g,"$1"),e=e.replace(/([\w])\u200c/g,"$1"),e=e.replace(/\u200c([\n\s[].،«»:()؛؟?;$!@-=+\\])/g,"$1"),e=e.replace(/([\n\s[.،«»:()؛؟?;$!@\-=+\\])\u200c/g,"$1"),e=e.replace(/\s+\u200C|\u200C\s+/g," ")),r.fix_numbersArabic&&(e=i(e,_,s)),e=e.replace(/(^|\s+)(\S+)(?=($|\s+))/g,function(e,a,_,p){return _.match(/[a-zA-Z\-_]{2,}/g)?e:_.match(/&#\d+;/g)?e:r.skip_markdown_ordered_lists_numbers_conversion&&(e+p).match(/(?:(?:\r?\n)|(?:\r\n?)|(?:^|\n))\d+\.\s/)?e:(r.fix_numbersEnglish&&(e=i(e,n,s)),r.fix_misc_non_persian_chars&&(e=i(e,",;كي%","،؛کی٪")),r.fix_question_mark&&(e=e.replace(/(\?)/g,"؟")),e)}),r.fix_perfix_spacing&&(e=e.replace(/((\s+|^)ن?می)\u0020/g,"$1‌")),r.fix_suffix_spacing&&(e=e.replace(/\u0020(تر(ی(ن)?)?|ها(ی)?\s+)/g,"‌$1")),r.aggresive&&(r.cleanup_extra_marks&&(e=e.replace(/(!){2,}/g,"$1"),e=e.replace(/(\u061F){2,}/g,"$1")),r.kashidas_as_parenthetic&&(e=e.replace(/(\s)\u0640+/g,"$1–"),e=e.replace(/\u0640+(\s)/g,"–$1")),r.cleanup_kashidas&&(e=e.replace(/(\S)\u0640+(\S)/g,"$1$2"))),r.fix_spacing_for_braces_and_quotes&&(e=e.replace(/[ \t\u200C]*(\()\s*([^)]+?)\s*?(\))[ \t\u200C]*/g," $1$2$3 "),e=e.replace(/[ \t\u200C]*(\[)\s*([^\]]+?)\s*?(\])[ \t\u200C]*/g," $1$2$3 "),e=e.replace(/[ \t\u200C]*(\{)\s*([^}]+?)\s*?(\})[ \t\u200C]*/g," $1$2$3 "),e=e.replace(/[ \t\u200C]*(“)\s*([^”]+?)\s*?(”)[ \t\u200C]*/g," $1$2$3 "),e=e.replace(/[ \t\u200C]*(«)\s*([^»]+?)\s*?(»)[ \t\u200C]*/g," $1$2$3 "),e=e.replace(/[ \t\u200C]*([:;,؛،.؟!]{1})[ \t\u200C]*/g,"$1 "),e=e.replace(/([۰-۹]+):\s+([۰-۹]+)/g,"$1:$2"),e=e.replace(/(\u061F|!)\s(\u061F|!)/g,"$1$2"),e=e.replace(/(\()\s*([^)]+?)\s*?(\))/g,"$1$2$3"),e=e.replace(/(\[)\s*([^\]]+?)\s*?(\])/g,"$1$2$3"),e=e.replace(/(\{)\s*([^}]+?)\s*?(\})/g,"$1$2$3"),e=e.replace(/(“)\s*([^”]+?)\s*?(”)/g,"$1$2$3"),e=e.replace(/(«)\s*([^»]+?)\s*?(»)/g,"$1$2$3")),r.cleanup_spacing&&(e=e.replace(/[ ]+/g," "),e=e.replace(/([\n]+)[ \t\u200C]*/g,"$1")),r.cleanup_begin_and_end&&(e=e.replace(/^[\s\u200c]+|[\s\u200c]+$/g,"")),r.preserve_braces&&(e=e.replace(/__BRACES__PRESERVER__/g,function(){return u.shift()})),r.preserve_brackets&&(e=e.replace(/__BRACKETS__PRESERVER__/g,function(){return f.shift()})),r.preserve_URIs&&(e=e.replace(/__URI__PRESERVER__/g,function(){return c.shift()})),r.preserve_HTML&&(e=e.replace(/__HTML__PRESERVER__/g,function(){return p.shift()})),e},e}); +/*! +* Virastar - v0.15.0 - 2019-01-27 +* https://github.com/juvee/virastar +* Licensed: MIT +*/ + +!function(e,a,r){"undefined"!=typeof module?module.exports=r():"function"==typeof define&&"object"==typeof define.amd?define(r):"undefined"!=typeof window?window[e]=r():a[e]=r()}("Virastar",this,function(){function e(a,r){if(!(this instanceof e))return new e(a,r);if(a=a||{},"object"==typeof a)this.opts=this.parseOptions(a);else if("string"==typeof a)return this.opts=this.parseOptions(r||{}),this.cleanup(a);return this}return e.prototype.parseOptions=function(e){var a={normalize_eol:!0,decode_htmlentities:!0,fix_dashes:!0,fix_three_dots:!0,fix_english_quotes_pairs:!0,fix_english_quotes:!0,fix_hamzeh:!0,cleanup_rlm:!0,cleanup_zwnj:!0,fix_spacing_for_braces_and_quotes:!0,fix_numbersArabic:!0,fix_numbersEnglish:!0,fix_misc_non_persian_chars:!0,fix_question_mark:!0,skip_markdown_ordered_lists_numbers_conversion:!0,fix_perfix_spacing:!0,fix_suffix_spacing:!0,aggresive:!0,kashidas_as_parenthetic:!0,cleanup_kashidas:!0,cleanup_extra_marks:!0,cleanup_spacing:!0,cleanup_begin_and_end:!0,preserve_HTML:!0,preserve_URIs:!0,preserve_brackets:!0,preserve_braces:!0};for(var r in a)e.hasOwnProperty(r)&&(a[r]=e[r]);return a},e.prototype.cleanup=function(e,a){var r=a?this.parseOptions(a):this.opts,s="۱۲۳۴۵۶۷۸۹۰",_="١٢٣٤٥٦٧٨٩٠",n="1234567890",p=function(e,a,r){var s=a.split(""),_=r.split("");for(var n in s)e=e.replace(new RegExp(s[n],"g"),_[n]);return e};if(r.preserve_HTML){var i=[];e=e.replace(/(<[^<>]+>)/g,function(e){return i.push(e),"__HTML__PRESERVER__"})}if(r.preserve_URIs){var c=[],t=new RegExp("([A-Za-z][A-Za-z0-9+\\-.]*):(?:(//)(?:((?:[A-Za-z0-9\\-._~!$&'()*+,;=:]|%[0-9A-Fa-f]{2})*)@)?((?:\\[(?:(?:(?:(?:[0-9A-Fa-f]{1,4}:){6}|::(?:[0-9A-Fa-f]{1,4}:){5}|(?:[0-9A-Fa-f]{1,4})?::(?:[0-9A-Fa-f]{1,4}:){4}|(?:(?:[0-9A-Fa-f]{1,4}:){0,1}[0-9A-Fa-f]{1,4})?::(?:[0-9A-Fa-f]{1,4}:){3}|(?:(?:[0-9A-Fa-f]{1,4}:){0,2}[0-9A-Fa-f]{1,4})?::(?:[0-9A-Fa-f]{1,4}:){2}|(?:(?:[0-9A-Fa-f]{1,4}:){0,3}[0-9A-Fa-f]{1,4})?::[0-9A-Fa-f]{1,4}:|(?:(?:[0-9A-Fa-f]{1,4}:){0,4}[0-9A-Fa-f]{1,4})?::)(?:[0-9A-Fa-f]{1,4}:[0-9A-Fa-f]{1,4}|(?:(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\\.){3}(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?))|(?:(?:[0-9A-Fa-f]{1,4}:){0,5}[0-9A-Fa-f]{1,4})?::[0-9A-Fa-f]{1,4}|(?:(?:[0-9A-Fa-f]{1,4}:){0,6}[0-9A-Fa-f]{1,4})?::)|[Vv][0-9A-Fa-f]+\\.[A-Za-z0-9\\-._~!$&'()*+,;=:]+)\\]|(?:(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\\.){3}(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)|(?:[A-Za-z0-9\\-._~!$&'()*+,;=]|%[0-9A-Fa-f]{2})*))(?::([0-9]*))?((?:/(?:[A-Za-z0-9\\-._~!$&'()*+,;=:@]|%[0-9A-Fa-f]{2})*)*)|/((?:(?:[A-Za-z0-9\\-._~!$&'()*+,;=:@]|%[0-9A-Fa-f]{2})+(?:/(?:[A-Za-z0-9\\-._~!$&'()*+,;=:@]|%[0-9A-Fa-f]{2})*)*)?)|((?:[A-Za-z0-9\\-._~!$&'()*+,;=:@]|%[0-9A-Fa-f]{2})+(?:/(?:[A-Za-z0-9\\-._~!$&'()*+,;=:@]|%[0-9A-Fa-f]{2})*)*)|)(?:\\?((?:[A-Za-z0-9\\-._~!$&'()*+,;=:@/?]|%[0-9A-Fa-f]{2})*))?(?:\\#((?:[A-Za-z0-9\\-._~!$&'()*+,;=:@/?]|%[0-9A-Fa-f]{2})*))?","g");e=e.replace(t,function(e){return c.push(e),"__URI__PRESERVER__"})}if(r.preserve_brackets){var u=[];e=e.replace(/(\[.*?\])/g,function(e){return u.push(e),"__BRACKETS__PRESERVER__"})}if(r.preserve_braces){var f=[];e=e.replace(/(\{.*?\})/g,function(e){return f.push(e),"__BRACES__PRESERVER__"})}return r.normalize_eol&&(e=e.replace(/(\r?\n)|(\r\n?)/g,"\n")),r.decode_htmlentities&&(e=e.replace(/&#(\d+);/g,function(e,a){return String.fromCharCode(a)})),r.fix_dashes&&(e=e.replace(/-{3}/g,"—"),e=e.replace(/-{2}/g,"–")),r.fix_three_dots&&(e=e.replace(/\s*\.{3,}/g,"…")),r.fix_english_quotes_pairs&&(e=e.replace(/(“)(.+?)(”)/g,"«$2»")),r.fix_english_quotes&&(e=e.replace(/"/g,'"'),e=e.replace(/'/g,"'"),e=e.replace(/(["'`]+)(.+?)(\1)/g,"«$2»")),r.fix_hamzeh&&(e=e.replace(/(\S)(ه[\s\u200C]+[یي])(\s)/g,"$1هٔ$3")),r.cleanup_rlm&&(e=e.replace(/([^a-zA-Z\-_])(\u200F)/g,"$1‌")),r.cleanup_zwnj&&(e=e.replace(/\u200C{2,}/g,"‌"),e=e.replace(/([۰-۹0-9إأةؤورزژاآدذ،؛,:«»\\\/@#$٪×*()ـ\-=|])\u200c/g,"$1"),e=e.replace(/\u200c([\w])/g,"$1"),e=e.replace(/([\w])\u200c/g,"$1"),e=e.replace(/\u200c([\n\s[].،«»:()؛؟?;$!@-=+\\])/g,"$1"),e=e.replace(/([\n\s[.،«»:()؛؟?;$!@\-=+\\])\u200c/g,"$1"),e=e.replace(/\s+\u200C|\u200C\s+/g," ")),r.fix_numbersArabic&&(e=p(e,_,s)),e=e.replace(/(^|\s+)(\S+)(?=($|\s+))/g,function(e,a,_,i){return _.match(/[a-zA-Z\-_]{2,}/g)?e:_.match(/&#\d+;/g)?e:r.skip_markdown_ordered_lists_numbers_conversion&&(e+i).match(/(?:(?:\r?\n)|(?:\r\n?)|(?:^|\n))\d+\.\s/)?e:(r.fix_numbersEnglish&&(e=p(e,n,s)),r.fix_misc_non_persian_chars&&(e=p(e,",;كي%","،؛کی٪")),r.fix_question_mark&&(e=e.replace(/(\?)/g,"؟")),e)}),r.fix_perfix_spacing&&(e=e.replace(/((\s|^)ن?می)\u0020/g,"$1‌"),e=e.replace(/((\s|^)بی)\u0020/g,"$1‌")),r.fix_suffix_spacing&&(e=e.replace(/\u0020(ام|ات|اش|ای|اید|ایم|اند)\s/g,"‌$1"),e=e.replace(/\u0020(ها(ی)?\s)/g,"‌$1"),e=e.replace(/\u0020(تر((ی)|(ین))?\s)/g,"‌$1"),e=e.replace(/\u0020(هایی|هایم|هایت|هایش|هایمان|هایتان|هایشان)\s/g,"‌$1")),r.aggresive&&(r.cleanup_extra_marks&&(e=e.replace(/(!){2,}/g,"$1"),e=e.replace(/(\u061F){2,}/g,"$1")),r.kashidas_as_parenthetic&&(e=e.replace(/(\s)\u0640+/g,"$1–"),e=e.replace(/\u0640+(\s)/g,"–$1")),r.cleanup_kashidas&&(e=e.replace(/(\S)\u0640+(\S)/g,"$1$2"))),r.fix_spacing_for_braces_and_quotes&&(e=e.replace(/[ \t\u200C]*(\()\s*([^)]+?)\s*?(\))[ \t\u200C]*/g," $1$2$3 "),e=e.replace(/[ \t\u200C]*(\[)\s*([^\]]+?)\s*?(\])[ \t\u200C]*/g," $1$2$3 "),e=e.replace(/[ \t\u200C]*(\{)\s*([^}]+?)\s*?(\})[ \t\u200C]*/g," $1$2$3 "),e=e.replace(/[ \t\u200C]*(“)\s*([^”]+?)\s*?(”)[ \t\u200C]*/g," $1$2$3 "),e=e.replace(/[ \t\u200C]*(«)\s*([^»]+?)\s*?(»)[ \t\u200C]*/g," $1$2$3 "),e=e.replace(/[ \t\u200C]*([:;,؛،.؟!]{1})[ \t\u200C]*/g,"$1 "),e=e.replace(/([۰-۹]+):\s+([۰-۹]+)/g,"$1:$2"),e=e.replace(/(\u061F|!)\s(\u061F|!)/g,"$1$2"),e=e.replace(/(\()\s*([^)]+?)\s*?(\))/g,"$1$2$3"),e=e.replace(/(\[)\s*([^\]]+?)\s*?(\])/g,"$1$2$3"),e=e.replace(/(\{)\s*([^}]+?)\s*?(\})/g,"$1$2$3"),e=e.replace(/(“)\s*([^”]+?)\s*?(”)/g,"$1$2$3"),e=e.replace(/(«)\s*([^»]+?)\s*?(»)/g,"$1$2$3")),r.cleanup_spacing&&(e=e.replace(/[ ]+/g," "),e=e.replace(/([\n]+)[ \t\u200C]*/g,"$1")),r.cleanup_begin_and_end&&(e=e.replace(/^[\s\u200c]+|[\s\u200c]+$/g,"")),r.preserve_braces&&(e=e.replace(/__BRACES__PRESERVER__/g,function(){return f.shift()})),r.preserve_brackets&&(e=e.replace(/__BRACKETS__PRESERVER__/g,function(){return u.shift()})),r.preserve_URIs&&(e=e.replace(/__URI__PRESERVER__/g,function(){return c.shift()})),r.preserve_HTML&&(e=e.replace(/__HTML__PRESERVER__/g,function(){return i.shift()})),e},e}); diff --git a/package.json b/package.json index b9dd59d..d007632 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "virastar", - "version": "0.14.0", + "version": "0.15.0", "description": "cleanning up Persian text!", "author": { "name": "juvee"