-
Notifications
You must be signed in to change notification settings - Fork 49
/
summary_data.py
35 lines (26 loc) · 1.21 KB
/
summary_data.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
import os
import pickle
DATA_DIR = "data/raw_data/ms-cntk-atis"
print(os.listdir(DATA_DIR))
def load_ds(fname='atis.train.pkl'):
with open(fname, 'rb') as stream:
ds, dicts = pickle.load(stream)
print('Done loading: ', fname)
print(' samples: {:4d}'.format(len(ds['query'])))
print(' vocab_size: {:4d}'.format(len(dicts['token_ids'])))
print(' slot count: {:4d}'.format(len(dicts['slot_ids'])))
print(' intent count: {:4d}'.format(len(dicts['intent_ids'])))
return ds, dicts
train_ds, dicts = load_ds(os.path.join(DATA_DIR, 'atis.train.pkl'))
test_ds, dicts = load_ds(os.path.join(DATA_DIR, 'atis.test.pkl'))
t2i, s2i, in2i = map(dicts.get, ['token_ids', 'slot_ids', 'intent_ids'])
i2t, i2s, i2in = map(lambda d: {d[k]: k for k in d.keys()}, [t2i, s2i, in2i])
query, slots, intent = map(train_ds.get,
['query', 'slot_labels', 'intent_labels'])
for i in range(5):
print('{:4d}:{:>15}: {}'.format(i, i2in[intent[i][0]],
' '.join(map(i2t.get, query[i]))))
for j in range(len(query[i])):
print('{:>33} {:>40}'.format(i2t[query[i][j]],
i2s[slots[i][j]]))
print('*' * 74)