-
Notifications
You must be signed in to change notification settings - Fork 1
/
pinyin_to_ipa_converter.js
107 lines (96 loc) · 3.3 KB
/
pinyin_to_ipa_converter.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
(function(main, global) {
(typeof exports == "object" ? exports : global).PinyinToIPA = main();
})(function() {
var regex = /(?:([zcs]h?|[bpmfdtnlgkhjqxr])((?:(?:(?:[aoe]|i[ao]?|ua)ng|(?:a|e|[iuü]a?)n)(?![aoeiuü])|a[oi]?|ou?|ei?|i(?:ao?|e|u)?|u(?:[oe]|a?i?)|üe?))|((?:(?:(?:[wy]?a|y?o|w?e|yi)ng|(?:w?[ae]|y(?:a|i|ua?))n)(?![aoeiuü])|a[oi]?|ou?|ei?|w(?:ai?|ei|o|u)|y(?:ao?|e|i|ou?|ue?))))(r(?![aoeiuü]))?([1-4])?/g;
var initialList = {
b: "p",
p: "pʰ",
m: "m",
f: "f",
d: "t",
t: "tʰ",
n: "n",
l: "l",
g: "k",
k: "kʰ",
h: "x",
j: "t͡ɕ",
q: "t͡ɕʰ",
x: "ɕ",
zh: "ʈ͡ʂ",
ch: "ʈ͡ʂʰ",
sh: "ʂ",
r: "ɻ",
z: "t͡s",
c: "t͡sʰ",
s: "s"
};
var finalList = [
["a", "a", "ä", "ɑ˞"],
["ai", "ai", "aɪ̯", "ɑ˞"],
["an", "an", "an", "ɑ˞"],
["ang", "ang", "ɑŋ", "ɑ̃˞"],
["ao", "ao", "ɑʊ̯", "ɑʊ̯˞"],
["e", "e", "ɤ", "ɤ˞"],
["ei", "ei", "eɪ̯", "ɚ"],
["en", "en", "ən", "ɚ"],
["eng", "eng", "əŋ", "ɚ̃"],
["i", "yi", "i", "iɚ̯"],
["ia", "ya", "jä", "jɑ˞"],
["ian", "yan", "jɛn", "jɑ˞"],
["iang", "yang", "jɑŋ", "jɑ̃˞"],
["iao", "yao", "jɑʊ̯", "jɑʊ̯˞"],
["ie", "ye", "jɛ", "jɛ˞"],
["in", "yin", "in", "iɚ̯"],
["ing", "ying", "iŋ", "iɚ̯̃"],
[null, "yo", "jɔ", "jɔ˞"],
["iong", "yong", "jʊŋ", "jʊ̃˞"],
["iu", "you", "joʊ̯", "joʊ̯˞"],
["o", null, "wɔ", "wɔ˞"],
[null, "o", "ɔ", "ɔ˞"],
["ong", "ong", "ʊŋ", "ʊ̃˞"],
["ou", "ou", "oʊ̯", "oʊ̯˞"],
["u", "wu", "u", "u˞"],
["ua", "wa", "wä", "wɑ˞"],
["uai", "wai", "waɪ̯", "wɑ˞"],
["uan", "wan", "wan", "wɑ˞"],
["uang", "wang", "wɑŋ", "wɑ̃˞"],
[null, "weng", "wəŋ", "wɚ̃"],
["ui", "wei", "weɪ̯", "wɚ"],
["un", "wen", "wən", "wɚ"],
["uo", "wo", "wɔ", "wɔ˞"],
["ü", "yu", "y", "yɚ̯"],
["üan", "yuan", "ɥɛn", "ɥɑ˞"],
["üe", "yue", "ɥɛ", "ɥɛ˞"],
["ün", "yun", "yn", "yɚ̯"]
];
return function(pinyin) {
var output = [];
pinyin = pinyin.normalize("NFD").toLowerCase().replace(/v|ü/g, "ü");
var pinyin_notone = pinyin.replace(/[̄́̌̀]/g, "");
var pinyin_normalized = pinyin.normalize("NFC");
pinyin_notone.replace(regex, function(match, initial, onsetFinal, zeroOnsetFinal, erhua, tone, index) {
if (/^[jqx]$/.test(initial)) onsetFinal = onsetFinal.replace(/^u(a?n|e)?$/, "ü$1");
if (onsetFinal == "ue") onsetFinal = "üe";
var syllable = zeroOnsetFinal
? finalList.find(function(element) {
return element[1] == zeroOnsetFinal;
})[2 + !!erhua]
: initial == "r" && onsetFinal == "i"
? "ɻ̩" + (erhua ? "ɚ̯" : "")
: initialList[initial] + (function(zcs) {
return zcs && onsetFinal == "i"
? (zcs[1] ? "ɻ̩" : "ɹ̩") + (erhua ? "ɚ̯" : "")
: finalList.find(function(element) {
return element[0] == onsetFinal;
})[2 + !!erhua];
})(initial.match(/^[zcs](h)?$/));
output.push((function(searchTone) {
return (searchTone = searchTone ? "̄́̌̀".indexOf(searchTone[0]) + 1 : tone)
? syllable.replace(/([aäɑɔoəɚɤeɛiʊuy]̃?|[ɹɻ]̩)/, "$1" + "́̌̀̂".charAt(searchTone - 1))
: syllable;
})(pinyin_normalized.substr(index, match.length).normalize("NFD").match(/[̄́̌̀]/)));
});
return output.join("<span>.</span><wbr>");
};
}, this);