-
Notifications
You must be signed in to change notification settings - Fork 0
/
local_api_gretel_configured_treebanks.go
175 lines (149 loc) · 5.13 KB
/
local_api_gretel_configured_treebanks.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
// +build !nodbxml
package main
import (
"encoding/json"
"fmt"
"strings"
)
type gretelTreebankComponent struct {
ID string `json:"id"`
Description string `json:"description"`
Title string `json:"title"`
Sentences interface{} `json:"sentences"` // number if known, else "?"
Words interface{} `json:"words"` // number if known, else "?"
}
type gretelTreebankMetadata struct {
Field string `json:"field"`
Type string `json:"type"` // 'text' | 'int' | 'date',
Facet string `json:"facet"` // 'checkbox' | 'slider' | 'range' | 'dropdown',
Show bool `json:"show"`
//minValue?: number | Date,
//maxValue?: number | Date
}
type gretelTreebank struct {
Components map[string]gretelTreebankComponent `json:"components"`
Description string `json:"description"`
Title string `json:"title"`
Metadata []gretelTreebankMetadata `json:"metadata"`
}
type gretelConfiguredTreebanksResponse map[string]gretelTreebank
func api_gretel_configured_treebanks(q *Context) {
q.w.Header().Set("Access-Control-Allow-Origin", "*")
treebanks := make(map[string]gretelTreebank)
TREEBANKS:
for id := range q.prefixes {
dactfiles, errval := getDactFiles(id)
if errval != nil {
continue TREEBANKS
}
treebanks[id] = gretelTreebank{
Components: make(map[string]gretelTreebankComponent),
Description: q.desc[id],
Title: id,
Metadata: make([]gretelTreebankMetadata, 0),
}
for _, dactfile := range dactfiles {
dactFileNameSplit := strings.FieldsFunc(dactfile.path, func(c rune) bool { return c == '/' || c == '\\' || c == '.' })
dactFileName := dactFileNameSplit[len(dactFileNameSplit)-2]
treebanks[id].Components[dactfile.id] = gretelTreebankComponent{
ID: dactfile.id,
Description: dactFileName,
Sentences: (func() interface{} {
if len(dactfiles) == 1 && q.lines[id] > 0 {
return q.lines[id]
} else {
return "?"
}
})(),
Title: dactFileName,
Words: (func() interface{} {
if len(dactfiles) == 1 && q.words[id] > 0 {
return q.words[id]
}
return "?"
})(),
}
}
}
// TODO error handling
rbyte, errval := json.Marshal(treebanks)
if gretelSendErr("Error encoding response", q, errval) {
return
}
q.w.Header().Set("Content-Type", "application/json; charset=utf-8")
q.w.Header().Set("Cache-Control", "no-cache")
q.w.Header().Add("Pragma", "no-cache")
fmt.Fprint(q.w, string(json.RawMessage(rbyte)[:]))
}
// This version is nicer,
// we subdivide the sentences based on their metadata, and return them as components
// but when requests come in to retrieve results, we don't know which dact file they came from.
// So to use it, gretel_results will need to be edited to reverse map that info
// which is more work than we have time for at the moment.
// func api_gretel_configured_treebanks_with_components(q *Context) {
// treebanks := make(map[string]Treebank)
// for id := range q.prefixes {
// treebanks[id] = Treebank{
// // TODO list .dact files as components. retrieve number of sentences and words from the file if possible?
// // might be a simple query we can do to get this.
// Components: make(map[string]TreebankComponent),
// Description: q.desc[id],
// Title: id,
// Metadata: make([]TreebankMetadata, 0),
// }
// // extract components (if possible, we use the 'source' metadata field for this, and just count occurances to figure out the size of the subcorpora)
// rows, errval := q.db.Query(fmt.Sprintf(
// `SELECT text, n
// FROM
// %s_c_%s_mval as mv
// LEFT JOIN
// %s_c_%s_midx as mid
// ON mv.id = mid.id
// WHERE
// mid.type = 'text' AND mid.name = 'source'`,
// Cfg.Prefix,
// id,
// Cfg.Prefix,
// id))
// knownSentences := 0
// if !logerr(errval) { // metadata tables exist
// for rows.Next() {
// var source string
// var sentenceCount int
// errval = rows.Scan(&source, &sentenceCount)
// if logerr(errval) {
// rows.Close()
// break
// }
// knownSentences += sentenceCount
// treebanks[id].Components[source] = TreebankComponent{
// ID: source,
// Description: "",
// Sentences: sentenceCount,
// Title: source,
// // can't retrieve from sql db it seems?
// // well, we could, I think, but it would require some smart decoding and summing of the mark column in the _deprel table
// Words: "?",
// }
// }
// }
// if q.lines[id]-knownSentences > 0 {
// treebanks[id].Components["unknown"] = TreebankComponent{
// ID: "unknown",
// Description: "Sentences of unknown origin",
// Sentences: q.lines[id] - knownSentences, // the rest?
// Title: "unknown",
// Words: "?",
// }
// }
// }
// // TODO error handling
// rbyte, errval := json.Marshal(treebanks)
// if logerr(errval) {
// return
// }
// q.w.Header().Set("Content-Type", "application/json; charset=utf-8")
// q.w.Header().Set("Cache-Control", "no-cache")
// q.w.Header().Add("Pragma", "no-cache")
// fmt.Fprint(q.w, string(json.RawMessage(rbyte)[:]))
// }