Skip to content

Commit

Permalink
Update to ZH normalizer regex
Browse files Browse the repository at this point in the history
* Update chinese-reading-normalizer.test.js

3 symbols added:

'
’
-

Signed-off-by: shiki-tm <[email protected]>

* Update chinese.js

3 symbols added:
'’-

Signed-off-by: shiki-tm <[email protected]>

* Update regex pattern for chinese.js

Signed-off-by: shiki-tm <[email protected]>

* revert change

* readd separators, escape quote

---------

Signed-off-by: shiki-tm <[email protected]>
Co-authored-by: Cashew <[email protected]>

<rikaitan.link>ZjI1Nzc1Y2Q2OTQ2ODUwOTE4ODZiZTYzM2EwNTkwOTE3NmI5MWI2NAo=</rikaitan.link>
  • Loading branch information
jason-ojisan committed Oct 13, 2024
1 parent 0ed4045 commit c4d87f9
Show file tree
Hide file tree
Showing 2 changed files with 4 additions and 1 deletion.
2 changes: 1 addition & 1 deletion ext/js/language/zh/chinese.js
Original file line number Diff line number Diff line change
Expand Up @@ -71,5 +71,5 @@ export function isCodePointChinese(codePoint) {

/** @type {import('language').ReadingNormalizer} */
export function normalizePinyin(str) {
return str.normalize('NFC').toLowerCase().replace(/[\s・:]|\/\//g, '');
return str.normalize('NFC').toLowerCase().replace(/[\s・:'’-]|\/\//g, '');
}
3 changes: 3 additions & 0 deletions test/language/chinese-reading-normalizer.test.js
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,9 @@ const tests = [
['wán:zhěng', 'wánzhěng'],
['fān・yì', 'fānyì'],
['fān//yì', 'fānyì'],
['fān’yì', 'fānyì'],
['fān\'yì', 'fānyì'],
['fān-yì', 'fānyì'],
];

describe('Normalize Pinyin', () => {
Expand Down

0 comments on commit c4d87f9

Please sign in to comment.