-
Notifications
You must be signed in to change notification settings - Fork 40
/
utils.py
91 lines (83 loc) · 2.84 KB
/
utils.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
from __future__ import print_function
import os, math
import torch
from tree import Tree
from vocab import Vocab
# loading GLOVE word vectors
# if .pth file is found, will load that
# else will load from .txt file & save
def load_word_vectors(path):
if os.path.isfile(path+'.pth') and os.path.isfile(path+'.vocab'):
print('==> File found, loading to memory')
vectors = torch.load(path+'.pth')
vocab = Vocab(filename=path+'.vocab')
return vocab, vectors
# saved file not found, read from txt file
# and create tensors for word vectors
print('==> File not found, preparing, be patient')
count = sum(1 for line in open(path+'.txt'))
with open(path+'.txt','r') as f:
contents = f.readline().rstrip('\n').split(' ')
dim = len(contents[1:])
words = [None]*(count)
vectors = torch.zeros(count,dim)
with open(path+'.txt','r') as f:
idx = 0
for line in f:
contents = line.rstrip('\n').split(' ')
words[idx] = contents[0]
#vectors[idx] = torch.Tensor(map(float, contents[1:]))
vectors[idx] = torch.Tensor(list(map(float, contents[1:])))
idx += 1
with open(path+'.vocab','w') as f:
for word in words:
f.write(word+'\n')
vocab = Vocab(filename=path+'.vocab')
torch.save(vectors, path+'.pth')
return vocab, vectors
# write unique words from a set of files to a new file
def build_vocab(filenames, vocabfile):
vocab = set()
for filename in filenames:
with open(filename,'r') as f:
for line in f:
tokens = line.rstrip('\n').split(' ')
vocab |= set(tokens)
with open(vocabfile,'w') as f:
for token in vocab:
f.write(token+'\n')
# mapping from scalar to vector
def map_label_to_target(label,num_classes):
target = torch.Tensor(1,num_classes)
ceil = int(math.ceil(label))
floor = int(math.floor(label))
if ceil==floor:
target[0][floor-1] = 1
else:
target[0][floor-1] = ceil - label
target[0][ceil-1] = label - floor
return target
def map_label_to_target_sentiment(label, num_classes = 0 ,fine_grain = False):
# num_classes not use yet
target = torch.LongTensor(1)
target[0] = int(label) # nothing to do here as we preprocess data
return target
def count_param(model):
print('_param count_')
params = list(model.parameters())
sum_param = 0
for p in params:
sum_param+= p.numel()
print (p.size())
# emb_sum = params[0].numel()
# sum_param-= emb_sum
print ('sum', sum_param)
print('____________')
def print_tree(tree, level):
indent = ''
for i in range(level):
indent += '| '
line = indent + str(tree.idx)
print (line)
for i in xrange(tree.num_children):
print_tree(tree.children[i], level+1)