diff --git a/README.md b/README.md index 7a9eaddd..fa52ca6c 100644 --- a/README.md +++ b/README.md @@ -211,6 +211,7 @@ A few notes are in order regarding this data structure: | cjy-Latn | Jin (Wiktionary) | | cmn-Hans | Mandarin (Simplified)\* | | cmn-Hant | Mandarin (Traditional)\*| +| cmn-Latn | Mandarin (Pinyin)\* | | ckb-Arab | Sorani | | csb-Latn | Kashubian | | deu-Latn | German | diff --git a/epitran/data/map/cjy-Latn.csv b/epitran/data/map/cjy-Latn.csv index 7c614f93..310d73a3 100644 --- a/epitran/data/map/cjy-Latn.csv +++ b/epitran/data/map/cjy-Latn.csv @@ -34,13 +34,13 @@ e,ɤ ue,u̯ɤ ie,i̯e ye,y̯e -ai,a̯i +ai,ai̯ uai,u̯ai̯ ei,e̯i ui,u̯ei̯ -au,a̯u +au,au̯ iau,i̯au̯ -ou,əu +ou,əu̯ iou,i̯əu̯ an,æ̃ uan,u̯æ̃ @@ -51,14 +51,14 @@ ang,aŋ iang,i̯aŋ uang,u̯aŋ eng,ə̃ŋ -ing,ĩŋ +ing,ĩŋ ung,ũŋ yng,ỹŋ ah,aʔ iah,i̯aʔ uah,u̯aʔ eh,əʔ -ieh,iəʔ +ieh,i̯əʔ ueh,u̯əʔ yeh,y̯əʔ rr,z̩ɻ @@ -86,9 +86,9 @@ onr,ɒ̃ɻ ionr,i̯ɒ̃ɻ uonr,u̯ɒ̃ɻ engr,ʌ̃ɻ -ingr,ĩɻ -ungr,ũɻ -yngr,ỹɻ +ingr,ĩɻ +ungr,ũɻ +yngr,ỹɻ ahr,ɐɻ iahr,i̯ɐɻ uahr,u̯ɐɻ diff --git a/epitran/data/map/cmn-Latn.csv b/epitran/data/map/cmn-Latn.csv new file mode 100644 index 00000000..505fc19f --- /dev/null +++ b/epitran/data/map/cmn-Latn.csv @@ -0,0 +1,153 @@ +Orth,Phon +b,p +p,pʰ +m,m +f,f +d,t +t,tʰ +n,n +l,l +g,k +k,kʰ +h,x +j,t͡ɕ +ju,t͡ɕy +q,t͡ɕʰ +qu,t͡ɕʰy +quan,t͡ɕʰy̯ɛn +x,ɕ +xu,ɕy +zh,ʈ͡ʂ +zhi,ʈ͡ʂz̩ +ch,ʈ͡ʂʰ +chi,ʈ͡ʂʰz̩ +sh,ʂ +shi,ʂz̩ +r,ʐ +ri,ʐ̩ +z,t͡s +zi,t͡sz̩ +c,t͡sʰ +ci,t͡sʰz̩ +s,s +si,sz̩ +yi,i +wu,u +yu,y +i,i +u,u +ü,y +a,a +ya,i̯ä +ia,i̯ä +o,o +wo,u̯ɔ +uo,u̯ɔ +bo,pu̯ɔ +po,pʰu̯ɔ +mo,mu̯ɔ +fo,fu̯ɔ +e,ɤ +ie,i̯ɛ +ye,i̯ɛ +ai,ai̯ +uai,u̯ai̯ +wai,u̯ai̯ +ei,ei̯ +wei,u̯ei̯ +ui,u̯ei̯ +ao,au̯ +iao,i̯au̯ +yao,i̯au̯ +ou,oʊ̯ +you,i̯oʊ̯ +iu,i̯oʊ̯ +an,an +yan,i̯ɛn +yuan,y̯ɛn +ian,i̯ɛn +en,ən +yin,in +in,in +un,u̯ən +ang,aŋ +iang,i̯aŋ +eng,ɤŋ +ying,iŋ +ing,iŋ +ong,ʊŋ +er,ɻ̩ +w,u̯ +y,i̯ +yue,y̯ɛ +̄,˥ +́,˧˥ +̌,˨˩ +̀,˥˩ +yuanr,y̯ɑɻ +iangr,i̯ɑ̃ɻ +yangr,i̯ɑ̃ɻ +uangr,u̯ɑ̃ɻ +wangr,u̯ɑ̃ɻ +yingr,i̯ɤ̃ɻ +wengr,ʊ̃ɻ +iongr,i̯ʊ̃ɻ +yongr,i̯ʊ̃ɻ +yang,i̯ɑŋ +uang,u̯ɑŋ +wang,u̯ɑŋ +weng,u̯əŋ +iong,i̯ʊŋ +yong,i̯ʊŋ +ianr,i̯ɑɻ +yanr,i̯ɑɻ +uair,u̯ɑɻ +wair,u̯ɑɻ +uanr,u̯ɑɻ +wanr,u̯ɑɻ +iaor,i̯aʊ̯ɻʷ +yaor,i̯aʊ̯ɻʷ +üanr,y̯ɑɻ +vanr,y̯ɑɻ +angr,ɑ̃ɻ +yuer,y̯ɛɻ +weir,u̯əɻ +wenr,u̯əɻ +your,i̯ɤʊ̯ɻʷ +yinr,iə̯ɻ +yunr,yə̯ɻ +engr,ɤ̃ɻ +ingr,iɤ̯̃ɻ +ongr,ʊ̃ɻ +air,ɑɻ +anr,ɑɻ +iar,i̯ɑɻ +yar,i̯ɑɻ +uar,u̯ɑɻ +war,u̯ɑɻ +aor,aʊ̯ɻʷ +ier,i̯ɛɻ +yer,i̯ɛɻ +uor,u̯ɔɻ +wor,u̯ɔɻ +üer,y̯ɛɻ +ver,y̯ɛɻ +eir,əɻ +enr,əɻ +uir,u̯əɻ +unr,u̯əɻ +our,ɤʊ̯ɻʷ +iur,i̯ɤʊ̯ɻʷ +inr,iə̯ɻ +ünr,yə̯ɻ +vnr,yə̯ɻ +yir,iə̯ɻ +wur,uɻʷ +yur,yə̯ɻ +yor,i̯ɔɻ +ar,ɑɻ +or,u̯ɔɻ +ir,iə̯ɻ +ur,uɻʷ +ür,yə̯ɻ +vr,yə̯ɻ diff --git a/epitran/data/map/gan-Latn.csv b/epitran/data/map/gan-Latn.csv index 4ac2ecbe..376c53a2 100644 --- a/epitran/data/map/gan-Latn.csv +++ b/epitran/data/map/gan-Latn.csv @@ -12,7 +12,7 @@ s,s j,t͡ɕ q,t͡ɕʰ x,ɕ -ny,n̠ʲ +ny,ɲ g,k k,kʰ ng,ŋ @@ -33,11 +33,11 @@ ie,i̯e ue,u̯e ye,y̯e eo,ɵ -ai,a̯i +ai,ai̯ uai,u̯ai̯ -oi,o̯i -ei,e̯i -fi,fɨi +oi,oi̯ +ei,ei̯ +fi,fɨ̯i ui,u̯i au,a̯u eu,ɛ̯u @@ -64,12 +64,12 @@ ong,ɔŋ iong,i̯ɔŋ uong,u̯ɔŋ ung,uŋ -iung,iuŋ +iung,i̯uŋ at,at uat,u̯at ot,ɵt -uot,uɵt -yot,yɵt +uot,u̯ɵt +yot,y̯ɵt et,ɛt iet,i̯ɛt uet,u̯ɛt @@ -87,7 +87,7 @@ uh,uʔ iuh,i̯uʔ Bm,m̩ Bn,n̩ -Bng,ŋ̍ +Bng,ŋ̩ 1,˦˨ 2,˨˦ 3,˨˩˧ diff --git a/epitran/data/map/hak-Latn.csv b/epitran/data/map/hak-Latn.csv index 290cabbc..3ae74bb0 100644 --- a/epitran/data/map/hak-Latn.csv +++ b/epitran/data/map/hak-Latn.csv @@ -9,7 +9,6 @@ th,tʰ n,n l,l k,k -ki,k kh,kʰ ng,ŋ ngi,ɲ @@ -33,11 +32,11 @@ o,o io,i̯o Bm,m̩ Bn,n̩ -Bng,ŋ̍ +Bng,ŋ̩ ai,a̯i iai,i̯ai̯ oai,u̯ai̯ -oi,o̯i +oi,oi̯ ioi,i̯oi̯ ui,u̯i iui,i̯ui̯ @@ -88,7 +87,7 @@ iot,i̯ot ut,ut iut,i̯ut ak,ak -iak,iak +iak,i̯ak oak,u̯ak ok,ok iok,i̯ok diff --git a/epitran/data/map/hsn-Latn.csv b/epitran/data/map/hsn-Latn.csv index e38f8622..cac0b00f 100644 --- a/epitran/data/map/hsn-Latn.csv +++ b/epitran/data/map/hsn-Latn.csv @@ -16,7 +16,7 @@ r,ʐ j,t͡ɕ q,t͡ɕʰ x,ɕ -ny,n̠ʲ +ny,ɲ g,k k,kʰ ng,ŋ @@ -38,13 +38,13 @@ o,o io,i̯o e,ɤ̞ ue,u̯ɤ̞ -ai,a̯i +ai,ai̯ uai,u̯ai̯ yai,y̯ai̯ -ei,e̯̞i +ei,e̞i̯ uei,u̯e̞i̯ yei,y̯e̞i̯ -au,ɒ̯u +au,ɒu̯ iau,i̯ɒu̯ ou,ə̯u iou,i̯əu̯ diff --git a/epitran/data/map/nan-Latn.csv b/epitran/data/map/nan-Latn.csv index df063b2b..ecf791bf 100644 --- a/epitran/data/map/nan-Latn.csv +++ b/epitran/data/map/nan-Latn.csv @@ -22,6 +22,7 @@ ng,ŋ h,h a,a e,e +ee,ɛ er,ɘ or,ɤ o,ɤ @@ -30,6 +31,7 @@ ong,ɔŋ om,ɔm op,ɔp ok,ɔk +iok,i̯ɔk i,i ir,ɨ u,u @@ -38,8 +40,8 @@ au,au̯ ia,i̯a ian,i̯ɛn iat,i̯ɛt -eng,iə̯ŋ -ek,iə̯k +eng,i̯əŋ +ek,i̯ək ior,i̯ɤ io,i̯ɤ iong,i̯ɔŋ @@ -62,7 +64,7 @@ oaⁿ,ũ̯ã uiⁿ,ũ̯ĩ iauⁿ,ĩ̯ãũ̯ oaiⁿ,ũ̯ãĩ̯ -Bng,ŋ̍ +Bng,ŋ̩ Bm,m̩ ́,˥˩ ̀,˧˩ diff --git a/epitran/data/map/wuu-Latn.csv b/epitran/data/map/wuu-Latn.csv index 691760dc..d521e0d7 100644 --- a/epitran/data/map/wuu-Latn.csv +++ b/epitran/data/map/wuu-Latn.csv @@ -20,8 +20,8 @@ z,z̥ j,t͡ɕ q,t͡ɕʰ jj,d̥͡ʑ -ny,n̠ʲ -'ny,ʔn̠ʲ +ny,ɲ +'ny,ʔɲ x,ɕ xx,ʑ̥ k,k @@ -32,9 +32,9 @@ ng,ŋ h,h hh,ɦ mm,m̩ -ngg,ŋ̍ +ngg,ŋ̩ 'mm,ʔm̩ -'ngg,ʔŋ̍ +'ngg,ʔŋ̩ a,a̱ o,o au,ɔ @@ -42,37 +42,37 @@ eu,ɜ e,e̞ oe,ø i,i -ia,ia̱ -io,io -iau,iɔ -ieu,iɜ +ia,i̯a̱ +io,i̯o +iau,i̯ɔ +ieu,i̯ɜ u,v̩ʷ -ua,ɯa̱ -ue,ɯe̞ +ua,ɯa̱̯ +ue,ɯe̞̯ uoe,v̩ʷø y,y -yoe,yø -an,ã +yoe,y̯ø +an,ã aan,ɑ̃ en,əɲ on,ʊŋ aq,a̱ʔ oq,ʊʔ eq,əʔ -ian,iã -iaan,iɑ̃ +ian,i̯ã +iaan,i̯ɑ̃ in,ɪɲ ion,i̯ʊŋ -iaq,ia̱ʔ +iaq,i̯a̱ʔ ioq,i̯ʊʔ iq,i̯ɪʔ -uan,ɯã -uaan,ɯɑ̃ +uan,ɯã̯ +uaan,ɯã̯ un,ɯə̯ɲ -uaq,ɯa̱ʔ +uaq,ɯa̯̱ʔ ueq,ɯə̯ʔ yn,ʏɲ -yq,ɥ̯ɪʔ +yq,y̯ɪʔ er,əɻ r,z̩ 1,˥˧ @@ -80,3 +80,4 @@ r,z̩ 3,˨˧ 4,˥ 5,˩˨ +0, diff --git a/epitran/data/map/yue-Latn.csv b/epitran/data/map/yue-Latn.csv index 9c6178ae..2b83722e 100644 --- a/epitran/data/map/yue-Latn.csv +++ b/epitran/data/map/yue-Latn.csv @@ -19,24 +19,27 @@ c,t͡sʰ s,s j,j aa,aː -aai,aːj -aau,aːw +aai,aːi̯ +aau,aːu̯ a,ɐ +ai,ɐi̯ +au,ɐu̯ e,ɛː -ei,ej -eu,ɛːw +ei,ei̯ +eu,ɛːu̯ i,iː +iu,iːu̯ ing,ɪŋ ik,ɪk o,ɔː -oi,ɔːɥ -ou,ow +oi,ɔːi̯ +ou,ou̯ u,uː -ui,uːɥ +ui,uːy̯ ung,ʊŋ uk,ʊk eo,ɵ -eoi,ɵɥ +eoi,ɵy̯ oe,œː yu,yː 1,˥ @@ -44,4 +47,4 @@ yu,yː 3,˧ 4,˨˩ 5,˩˧ -6,˨ \ No newline at end of file +6,˨ diff --git a/epitran/data/post/cmn-Latn.txt b/epitran/data/post/cmn-Latn.txt new file mode 100644 index 00000000..e69de29b diff --git a/epitran/data/post/nan-Latn.txt b/epitran/data/post/nan-Latn.txt index cbe7300f..6de15494 100644 --- a/epitran/data/post/nan-Latn.txt +++ b/epitran/data/post/nan-Latn.txt @@ -5,9 +5,19 @@ h -> ʔ / _ # h -> ʔ / _ ˥ # +% neutral tone. create artificial marker that prevents the following rules from applying +% if the word already had a tone, it is unaffected +-- -> 輕 / # _ +(?P輕)(?P[^輕]+) -> 0 / _ + % the following rules rely on the tone already being moved to the end of the word % ends in h,p,t,k and does not have a tone diacritic - 4th tone +% and does not start with -- 0 -> ˧ / [ʔptk] _ # % if no tone diacritic and does not end in a stop - 1st tone -0 -> ˥ / ([^˩˨˧˦˥]) _ # +% ensure it is not neutral tone either (i.e. beginning with --) +0 -> ˥ / ([^˩˨˧˦˥輕]) _ # + +% remove artificial neutral tone marker +輕 -> 0 / _ # diff --git a/epitran/data/post/yue-Latn.txt b/epitran/data/post/yue-Latn.txt index 99d5177c..89640d47 100644 --- a/epitran/data/post/yue-Latn.txt +++ b/epitran/data/post/yue-Latn.txt @@ -1,6 +1,9 @@ +% note: any word-final rules need to +% account for the tones already being word-final in Jyutping + % Deaspirate word-finally -ʰ -> 0 / _ # +ʰ -> 0 / _ [˩˨˧˦˥]+ # % Make isolated nasals syllabic diff --git a/epitran/data/pre/cmn-Latn.txt b/epitran/data/pre/cmn-Latn.txt new file mode 100644 index 00000000..b9aa45f8 --- /dev/null +++ b/epitran/data/pre/cmn-Latn.txt @@ -0,0 +1,2 @@ +% Move tones to end of word because they break diphthongs +(?P[̄́̌̀]+)(?P\w+) -> 0 / _\b diff --git a/epitran/data/pre/hak-Latn.txt b/epitran/data/pre/hak-Latn.txt index 1d7b7ecb..d8907b76 100644 --- a/epitran/data/pre/hak-Latn.txt +++ b/epitran/data/pre/hak-Latn.txt @@ -21,6 +21,6 @@ y -> i / # _ % add extra i because the i should not be lost as a vowel but is also used to condition the consonant in the map chi -> chii / # _ -chhi -> chii / # _ +chhi -> chhii / # _ si -> sii / # _ ngi -> ngii / # _