forked from gokcehan/lf
-
Notifications
You must be signed in to change notification settings - Fork 0
/
diacritics.go
57 lines (46 loc) · 1.6 KB
/
diacritics.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
package main
import (
"strconv"
"unicode"
)
var normMap map[rune]rune
func init() {
normMap = make(map[rune]rune)
// (not only) european
appendTransliterate(
"ěřůøĉĝĥĵŝŭèùÿėįųāēīūļķņģőűëïąćęłńśźżõșțčďĺľňŕšťýžéíñóúüåäöçîşûğăâđêôơưáàãảạ",
"eruocghjsueuyeiuaeiulkngoueiacelnszzostcdllnrstyzeinouuaaocisugaadeoouaaaaa",
)
// Vietnamese
appendTransliterate(
"áạàảãăắặằẳẵâấậầẩẫéẹèẻẽêếệềểễiíịìỉĩoóọòỏõôốộồổỗơớợờởỡúụùủũưứựừửữyýỵỳỷỹđ",
"aaaaaaaaaaaaaaaaaeeeeeeeeeeeiiiiiioooooooooooooooooouuuuuuuuuuuyyyyyyd",
)
}
func appendTransliterate(base, norm string) {
normRunes := []rune(norm)
baseRunes := []rune(base)
lenNorm := len(normRunes)
lenBase := len(baseRunes)
if lenNorm != lenBase {
panic("Base and normalized strings have differend length: base=" + strconv.Itoa(lenBase) + ", norm=" + strconv.Itoa(lenNorm)) // programmer error in constant length
}
for i := 0; i < lenBase; i++ {
normMap[baseRunes[i]] = normRunes[i]
baseUpper := unicode.ToUpper(baseRunes[i])
normUpper := unicode.ToUpper(normRunes[i])
normMap[baseUpper] = normUpper
}
}
// Remove diacritics and make lowercase.
func removeDiacritics(baseString string) string {
var normalizedRunes []rune
for _, baseRune := range baseString {
if normRune, ok := normMap[baseRune]; ok {
normalizedRunes = append(normalizedRunes, normRune)
} else {
normalizedRunes = append(normalizedRunes, baseRune)
}
}
return string(normalizedRunes)
}