-
Notifications
You must be signed in to change notification settings - Fork 5
/
testing.py
77 lines (57 loc) · 2.71 KB
/
testing.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
import unittest
from data_loading.data_loader import DataLoader
from data_loading.data_utils import pickle_call
import numpy as np
class DataLoadingTest(unittest.TestCase):
def test_generator_data_length_fast_text_SNLI_in_dict(self):
dl = DataLoader(embeddings_initial='FastText-Crawl', embedding_loading='in_dict')
# dl = DataLoader(embeddings_initial='FastText', embedding_loading='load_dict',
# embedding_params={'first_time_emb_load': False})
dl.load('data/pickles/')
dl.get_all_and_dump('data/pickles/')
gen = dl.get_generator(drop_last=False, initialize=True)
tr = dl.get_train_data()
nr_data_points = 0
# short analysis if the amount of data yielded equals the total amount of data points in the training set.
# TLDC; yes it does
while True:
data, batch = gen.next()
if data is None:
break
nr_data_points += len(data)
self.assertEqual(nr_data_points, len(tr))
def test_generator_data_length_Polyglot_SNLI_in_dict(self):
dl = DataLoader(embeddings_initial='Polyglot', embedding_loading='in_dict')
# dl = DataLoader(embeddings_initial='FastText', embedding_loading='load_dict',
# embedding_params={'first_time_emb_load': False})
dl.load('data/pickles/')
dl.get_all_and_dump('data/pickles/')
gen = dl.get_generator(drop_last=False, initialize=True)
tr = dl.get_train_data()
nr_data_points = 0
# short analysis if the amount of data yielded equals the total amount of data points in the training set.
# TLDC; yes it does
while True:
data, batch = gen.next()
if data is None:
break
nr_data_points += len(data)
self.assertEqual(nr_data_points, len(tr))
def test_loaded_polyglot_embeddings(self):
data = pickle_call('data/embeddings/polyglot-en.pkl')
dl = DataLoader(embeddings_initial='Polyglot', embedding_loading='in_dict')
dl.load('data/pickles/')
dl.get_all_and_dump('data/pickles/')
all_true = None
for i in range(len(data[0])):
term = data[0][i]
embedding = data[1][i]
if term in dl.embedding.vocab_dict:
position = dl.embedding.vocab_dict[term]
stored_embedding = dl.embedding.embeddings.weight[position].data.numpy()
if all_true is None:
all_true = np.array_equal(embedding, stored_embedding)
else:
all_true = all_true and np.array_equal(embedding, stored_embedding)
self.assertTrue(all_true)
unittest.main()