From bcd570612d4c0a66037f4f2b63c6098562e05b59 Mon Sep 17 00:00:00 2001 From: Jason Lee Date: Mon, 27 Nov 2023 20:02:54 +0800 Subject: [PATCH] Add `space-backtick` rule, default: on. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ```diff - 演示`code`代码 + 演示 `code` 代码 ``` Fix #153 --- autocorrect/.autocorrectrc.default | 2 ++ autocorrect/src/format.rs | 1 + autocorrect/src/rule/mod.rs | 3 +++ autocorrect/src/rule/word.rs | 38 +++++++++++++++++++++++++++++- 4 files changed, 43 insertions(+), 1 deletion(-) diff --git a/autocorrect/.autocorrectrc.default b/autocorrect/.autocorrectrc.default index 060767dc..e28f2065 100644 --- a/autocorrect/.autocorrectrc.default +++ b/autocorrect/.autocorrectrc.default @@ -7,6 +7,8 @@ rules: space-punctuation: 1 # Add space between brackets (), [] when near the CJK. space-bracket: 1 + # Add space between ``, when near the CJK. + space-backtick-quote: 1 # Add space between dash `-` space-dash: 0 # Convert to fullwidth. diff --git a/autocorrect/src/format.rs b/autocorrect/src/format.rs index 1bfd0737..65c5b233 100644 --- a/autocorrect/src/format.rs +++ b/autocorrect/src/format.rs @@ -232,6 +232,7 @@ mod tests { "《腾讯》-发布-《新版》本微信" => "《腾讯》- 发布 -《新版》本微信", "“腾讯”-发布-“新版”本微信" => "“腾讯” - 发布 - “新版”本微信", "‘腾讯’-发布-‘新版’本微信" => "‘腾讯’ - 发布 - ‘新版’本微信", + "行内`code`代码" => "行内 `code` 代码", ]; assert_cases(cases); diff --git a/autocorrect/src/rule/mod.rs b/autocorrect/src/rule/mod.rs index 1d77e754..680e9a45 100644 --- a/autocorrect/src/rule/mod.rs +++ b/autocorrect/src/rule/mod.rs @@ -25,6 +25,8 @@ lazy_static! { Rule::new("space-bracket", word::format_space_bracket), // Rule: space-dash Rule::new("space-dash", word::format_space_dash), + // Rule: space-backtick-quote + Rule::new("space-backtick-quote", word::format_space_backtick_quote), // Rule: fullwidth Rule::new("fullwidth", fullwidth::format), ]; @@ -200,6 +202,7 @@ mod tests { "space-punctuation", "space-bracket", "space-dash", + "space-backtick-quote", "fullwidth", "halfwidth-word", "halfwidth-punctuation", diff --git a/autocorrect/src/rule/word.rs b/autocorrect/src/rule/word.rs index 57c8c5bc..7427e265 100644 --- a/autocorrect/src/rule/word.rs +++ b/autocorrect/src/rule/word.rs @@ -32,6 +32,12 @@ lazy_static! { Strategery::new(r"[\]\)]", r"\p{CJK}"), ]; + static ref BACKTICK_QUOTE_STRATEGIES: Vec = vec![ + // Add space before and after backtick ` near the CJK + Strategery::new(r"\p{CJK}", r"`.+`"), + Strategery::new(r"`.+`", r"\p{CJK}"), + ]; + static ref DASH_RE : regex::Regex = regexp!(r"([\p{CJK}])[\-]([\p{CJK}])"); static ref NO_SPACE_FULLWIDTH_STRATEGIES: Vec = vec![ @@ -75,6 +81,14 @@ pub fn format_space_dash(input: &str) -> String { .to_string() } +pub fn format_space_backtick_quote(input: &str) -> String { + let mut out = String::from(input); + BACKTICK_QUOTE_STRATEGIES + .iter() + .for_each(|s| out = s.format(&out)); + out +} + pub fn format_no_space_fullwidth(input: &str) -> String { let mut out = String::from(input); @@ -103,7 +117,7 @@ pub fn format_no_space_fullwidth_quote(input: &str) -> String { #[cfg(test)] mod tests { - use crate::rule::word::format_space_dash; + use crate::rule::word::{format_space_backtick_quote, format_space_dash}; #[test] fn test_format_space_bracket() { @@ -112,4 +126,26 @@ mod tests { assert_eq!(format_space_dash("你好-world"), "你好-world"); assert_eq!(format_space_dash("hello-world"), "hello-world"); } + + #[test] + fn test_format_space_backtick_quote() { + assert_eq!(format_space_backtick_quote("代码`code`"), "代码 `code`"); + assert_eq!( + format_space_backtick_quote("代码`code`代码"), + "代码 `code` 代码" + ); + assert_eq!( + format_space_backtick_quote("`code`代码`code`"), + "`code` 代码 `code`" + ); + assert_eq!( + format_space_backtick_quote("`code`hello`code`"), + "`code`hello`code`" + ); + + assert_eq!(format_space_backtick_quote("```rs"), "```rs"); + assert_eq!(format_space_backtick_quote("``代码第1行"), "``代码第1行"); + assert_eq!(format_space_backtick_quote("`代码第1行"), "`代码第1行"); + assert_eq!(format_space_backtick_quote("代码第2行`"), "代码第2行`"); + } }