-
Notifications
You must be signed in to change notification settings - Fork 1
/
joinkatakanaoovplugin.go
104 lines (93 loc) · 2.47 KB
/
joinkatakanaoovplugin.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
package gosudachi
import (
"fmt"
"github.com/msnoigrs/gosudachi/dictionary"
)
type JoinKatakanaOovPluginConfig struct {
OovPOS *[]string
MinLength *int
}
type JoinKatakanaOovPlugin struct {
config *JoinKatakanaOovPluginConfig
oovPosId int16
minLength int
}
func NewJoinKatakanaOovPlugin(config *JoinKatakanaOovPluginConfig) *JoinKatakanaOovPlugin {
if config == nil {
config = &JoinKatakanaOovPluginConfig{}
}
return &JoinKatakanaOovPlugin{
config: config,
}
}
func (p *JoinKatakanaOovPlugin) GetConfigStruct() interface{} {
if p.config == nil {
p.config = &JoinKatakanaOovPluginConfig{}
}
return p.config
}
func (p *JoinKatakanaOovPlugin) SetUp(grammar *dictionary.Grammar) error {
if p.config.OovPOS == nil || len(*p.config.OovPOS) == 0 {
return fmt.Errorf("JoinKatakanaOovPlugin: oovPOS is not specified")
}
p.oovPosId = grammar.GetPartOfSpeechId(*p.config.OovPOS)
if p.oovPosId < 0 {
return fmt.Errorf("JoinKatakanaOovPlugin: oovPOS is invalid")
}
minLength := 1
if p.config.MinLength != nil {
minLength = *p.config.MinLength
if minLength < 0 {
return fmt.Errorf("JoinKatakanaOovPlugin: minLength is negative")
}
}
p.minLength = minLength
p.config = nil
return nil
}
func isShorter(length int, text *InputText, node *LatticeNode) bool {
return text.CodePointCount(node.Begin, node.End) < length
}
func isKatakanaNode(text *InputText, node *LatticeNode) bool {
types := GetCharCategoryTypes(text, node)
return (types & dictionary.KATAKANA) == dictionary.KATAKANA
}
func canOovBowNode(text *InputText, node *LatticeNode) bool {
types := GetCharCategoryTypes(text, node)
return types&dictionary.NOOOVBOW != dictionary.NOOOVBOW
}
func (p *JoinKatakanaOovPlugin) Rewrite(text *InputText, path *[]*LatticeNode, lattice *Lattice) error {
for i := 0; i < len(*path); i++ {
node := (*path)[i]
if (node.IsOov || isShorter(p.minLength, text, node)) &&
isKatakanaNode(text, node) {
begin := i - 1
for ; begin >= 0; begin-- {
if !isKatakanaNode(text, (*path)[begin]) {
begin++
break
}
}
if begin < 0 {
begin = 0
}
end := i + 1
for ; end < len(*path); end++ {
if !isKatakanaNode(text, (*path)[end]) {
break
}
}
for begin != end && !canOovBowNode(text, (*path)[begin]) {
begin++
}
if end-begin > 1 {
_, err := ConcatenateOov(path, begin, end, p.oovPosId, lattice)
if err != nil {
return fmt.Errorf("JoinKatakanaOovPlugin: %s", err)
}
i = begin + 1
}
}
}
return nil
}