-
Notifications
You must be signed in to change notification settings - Fork 0
/
batch_jpdrmm.py
345 lines (335 loc) · 17.2 KB
/
batch_jpdrmm.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
#!/usr/bin/env python
# -*- coding: utf-8 -*-
import os
import json
import time
import random
import logging
import subprocess
import torch
import torch.nn.functional as F
import torch.nn as nn
import numpy as np
import torch.optim as optim
# import cPickle as pickle
import pickle
import torch.autograd as autograd
from tqdm import tqdm
from pprint import pprint
from gensim.models.keyedvectors import KeyedVectors
from nltk.tokenize import sent_tokenize
from difflib import SequenceMatcher
import re
import nltk
import math
bioclean = lambda t: re.sub('[.,?;*!%^&_+():-\[\]{}]', '', t.replace('"', '').replace('/', '').replace('\\', '').replace("'", '').strip().lower()).split()
softmax = lambda z: np.exp(z) / np.sum(np.exp(z))
stopwords = nltk.corpus.stopwords.words("english")
class Sent_Posit_Drmm_Modeler(nn.Module):
def __init__(self,
embedding_dim = 30,
k_for_maxpool = 5,
sentence_out_method = 'MLP',
k_sent_maxpool = 1
):
super(Sent_Posit_Drmm_Modeler, self).__init__()
self.k = k_for_maxpool
self.k_sent_maxpool = k_sent_maxpool
self.doc_add_feats = 11
self.sent_add_feats = 10
#
self.embedding_dim = embedding_dim
self.sentence_out_method = sentence_out_method
# to create q weights
self.init_context_module()
self.init_question_weight_module()
self.init_mlps_for_pooled_attention()
self.init_sent_output_layer()
self.init_doc_out_layer()
# doc loss func
self.margin_loss = nn.MarginRankingLoss(margin=1.0)
if(use_cuda):
self.margin_loss = self.margin_loss.cuda()
def init_mesh_module(self):
self.mesh_h0 = autograd.Variable(torch.randn(1, 1, self.embedding_dim))
self.mesh_gru = nn.GRU(self.embedding_dim, self.embedding_dim)
if(use_cuda):
self.mesh_h0 = self.mesh_h0.cuda()
self.mesh_gru = self.mesh_gru.cuda()
def init_context_module(self):
self.trigram_conv_1 = nn.Conv1d(self.embedding_dim, self.embedding_dim, 3, padding=2, bias=True)
self.trigram_conv_activation_1 = torch.nn.Sigmoid()
if(use_cuda):
self.trigram_conv_1 = self.trigram_conv_1.cuda()
self.trigram_conv_activation_1 = self.trigram_conv_activation_1.cuda()
def init_question_weight_module(self):
self.q_weights_mlp = nn.Linear(self.embedding_dim+1, 1, bias=True)
if(use_cuda):
self.q_weights_mlp = self.q_weights_mlp.cuda()
def init_mlps_for_pooled_attention(self):
self.linear_per_q1 = nn.Linear(3 * 3, 8, bias=True)
self.my_relu1 = torch.nn.LeakyReLU(negative_slope=0.1)
self.linear_per_q2 = nn.Linear(8, 1, bias=True)
if(use_cuda):
self.linear_per_q1 = self.linear_per_q1.cuda()
self.linear_per_q2 = self.linear_per_q2.cuda()
self.my_relu1 = self.my_relu1.cuda()
def init_sent_output_layer(self):
if(self.sentence_out_method == 'MLP'):
self.sent_out_layer_1 = nn.Linear(self.sent_add_feats+1, 8, bias=False)
self.sent_out_activ_1 = torch.nn.LeakyReLU(negative_slope=0.1)
self.sent_out_layer_2 = nn.Linear(8, 1, bias=False)
if(use_cuda):
self.sent_out_layer_1 = self.sent_out_layer_1.cuda()
self.sent_out_activ_1 = self.sent_out_activ_1.cuda()
self.sent_out_layer_2 = self.sent_out_layer_2.cuda()
else:
self.sent_res_h0 = autograd.Variable(torch.randn(2, 1, 5))
self.sent_res_bigru = nn.GRU(input_size=self.sent_add_feats+1, hidden_size=5, bidirectional=True, batch_first=False)
self.sent_res_mlp = nn.Linear(10, 1, bias=False)
if(use_cuda):
self.sent_res_h0 = self.sent_res_h0.cuda()
self.sent_res_bigru = self.sent_res_bigru.cuda()
self.sent_res_mlp = self.sent_res_mlp.cuda()
def init_doc_out_layer(self):
self.final_layer_1 = nn.Linear(self.doc_add_feats+self.k_sent_maxpool, 8, bias=True)
self.final_activ_1 = torch.nn.LeakyReLU(negative_slope=0.1)
self.final_layer_2 = nn.Linear(8, 1, bias=True)
self.oo_layer = nn.Linear(2, 1, bias=True)
if(use_cuda):
self.final_layer_1 = self.final_layer_1.cuda()
self.final_activ_1 = self.final_activ_1.cuda()
self.final_layer_2 = self.final_layer_2.cuda()
self.oo_layer = self.oo_layer.cuda()
def my_hinge_loss(self, positives, negatives, margin=1.0):
delta = negatives - positives
loss_q_pos = torch.sum(F.relu(margin + delta), dim=-1)
return loss_q_pos
def apply_context_gru(self, the_input, h0):
output, hn = self.context_gru(the_input.unsqueeze(1), h0)
output = self.context_gru_activation(output)
out_forward = output[:, 0, :self.embedding_dim]
out_backward = output[:, 0, self.embedding_dim:]
output = out_forward + out_backward
res = output + the_input
return res, hn
def apply_context_convolution(self, the_input, the_filters, activation):
conv_res = the_filters(the_input.transpose(-2,-1))
if(activation is not None):
conv_res = activation(conv_res)
conv_res = F.avg_pool1d(conv_res, kernel_size=3, stride=1)
conv_res = conv_res.transpose(-1, -2)
conv_res = conv_res + the_input
return conv_res.squeeze(0)
def my_cosine_sim(self, A, B):
Ar = A.reshape(-1, A.size()[-1])
Br = B.reshape(-1, B.size()[-1])
A_mag = torch.norm(Ar, 2, dim=-1)
B_mag = torch.norm(Br, 2, dim=-1)
num = torch.mm(Ar, Br.transpose(-1, -2))
den = torch.mm(A_mag.unsqueeze(-1), B_mag.unsqueeze(-1).transpose(-1,-2))
dist_mat = num / den
return dist_mat
def pooling_method(self, sim_matrix):
sorted_res = torch.sort(sim_matrix, -1)[0] # sort the input minimum to maximum
k_max_pooled = sorted_res[:,-self.k:] # select the last k of each instance in our data
average_k_max_pooled = k_max_pooled.sum(-1)/float(self.k) # average these k values
the_maximum = k_max_pooled[:, -1] # select the maximum value of each instance
the_average_over_all = sorted_res.sum(-1)/float(sim_matrix.size(1)) # add average of all elements as long sentences might have more matches
the_concatenation = torch.stack([the_maximum, average_k_max_pooled, the_average_over_all], dim=-1) # concatenate maximum value and average of k-max values
return the_concatenation # return the concatenation
def get_output(self, input_list, weights):
temp = torch.cat(input_list, -1)
lo = self.linear_per_q1(temp)
lo = self.my_relu1(lo)
lo = self.linear_per_q2(lo)
lo = lo.squeeze(-1)
lo = lo * weights
sr = lo.sum(-1) / lo.size(-1)
return sr
def apply_sent_res_bigru(self, the_input):
output, hn = self.sent_res_bigru(the_input.unsqueeze(1), self.sent_res_h0)
output = self.sent_res_mlp(output)
return output.squeeze(-1).squeeze(-1)
def do_for_one_doc_cnn(self, doc_sents_embeds, sents_af, question_embeds, q_conv_res_trigram, q_weights, k2):
res = []
for i in range(len(doc_sents_embeds)):
sent_embeds = autograd.Variable(torch.FloatTensor(doc_sents_embeds[i]), requires_grad=False)
gaf = autograd.Variable(torch.FloatTensor(sents_af[i]), requires_grad=False)
if(use_cuda):
sent_embeds = sent_embeds.cuda()
gaf = gaf.cuda()
conv_res = self.apply_context_convolution(sent_embeds, self.trigram_conv_1, self.trigram_conv_activation_1)
#
sim_insens = self.my_cosine_sim(question_embeds, sent_embeds).squeeze(0)
sim_oh = (sim_insens > (1 - (1e-3))).float()
sim_sens = self.my_cosine_sim(q_conv_res_trigram, conv_res).squeeze(0)
#
insensitive_pooled = self.pooling_method(sim_insens)
sensitive_pooled = self.pooling_method(sim_sens)
oh_pooled = self.pooling_method(sim_oh)
#
sent_emit = self.get_output([oh_pooled, insensitive_pooled, sensitive_pooled], q_weights)
sent_add_feats = torch.cat([gaf, sent_emit.unsqueeze(-1)])
res.append(sent_add_feats)
res = torch.stack(res)
if(self.sentence_out_method == 'MLP'):
res = self.sent_out_layer_1(res)
res = self.sent_out_activ_1(res)
res = self.sent_out_layer_2(res).squeeze(-1)
else:
res = self.apply_sent_res_bigru(res)
# ret = self.get_max(res).unsqueeze(0)
ret = self.get_kmax(res, k2)
return ret, res
def do_for_one_doc_bigru(self, doc_sents_embeds, sents_af, question_embeds, q_conv_res_trigram, q_weights, k2):
res = []
hn = self.context_h0
for i in range(len(doc_sents_embeds)):
sent_embeds = autograd.Variable(torch.FloatTensor(doc_sents_embeds[i]), requires_grad=False)
gaf = autograd.Variable(torch.FloatTensor(sents_af[i]), requires_grad=False)
if(use_cuda):
sent_embeds = sent_embeds.cuda()
gaf = gaf.cuda()
conv_res, hn = self.apply_context_gru(sent_embeds, hn)
#
sim_insens = self.my_cosine_sim(question_embeds, sent_embeds).squeeze(0)
sim_oh = (sim_insens > (1 - (1e-3))).float()
sim_sens = self.my_cosine_sim(q_conv_res_trigram, conv_res).squeeze(0)
#
insensitive_pooled = self.pooling_method(sim_insens)
sensitive_pooled = self.pooling_method(sim_sens)
oh_pooled = self.pooling_method(sim_oh)
#
sent_emit = self.get_output([oh_pooled, insensitive_pooled, sensitive_pooled], q_weights)
sent_add_feats = torch.cat([gaf, sent_emit.unsqueeze(-1)])
res.append(sent_add_feats)
res = torch.stack(res)
if(self.sentence_out_method == 'MLP'):
res = self.sent_out_layer_1(res)
res = self.sent_out_activ_1(res)
res = self.sent_out_layer_2(res).squeeze(-1)
else:
res = self.apply_sent_res_bigru(res)
# ret = self.get_max(res).unsqueeze(0)
ret = self.get_kmax(res, k2)
res = torch.sigmoid(res)
return ret, res
def get_max(self, res):
return torch.max(res)
def get_kmax(self, res, k):
res = torch.sort(res,0)[0]
res = res[-k:].squeeze(-1)
if(len(res.size())==0):
res = res.unsqueeze(0)
if(res.size()[0] < k):
to_concat = torch.zeros(k - res.size()[0])
if(use_cuda):
to_concat = to_concat.cuda()
res = torch.cat([res, to_concat], -1)
return res
def get_max_and_average_of_k_max(self, res, k):
k_max_pooled = self.get_kmax(res, k)
average_k_max_pooled = k_max_pooled.sum()/float(k)
the_maximum = k_max_pooled[-1]
the_concatenation = torch.cat([the_maximum, average_k_max_pooled.unsqueeze(0)])
return the_concatenation
def get_average(self, res):
res = torch.sum(res) / float(res.size()[0])
return res
def get_maxmin_max(self, res):
res = self.min_max_norm(res)
res = torch.max(res)
return res
def apply_mesh_gru(self, mesh_embeds):
mesh_embeds = autograd.Variable(torch.FloatTensor(mesh_embeds), requires_grad=False)
if(use_cuda):
mesh_embeds = mesh_embeds.cuda()
output, hn = self.mesh_gru(mesh_embeds.unsqueeze(1), self.mesh_h0)
return output[-1,0,:]
def get_mesh_rep(self, meshes_embeds, q_context):
meshes_embeds = [self.apply_mesh_gru(mesh_embeds) for mesh_embeds in meshes_embeds]
meshes_embeds = torch.stack(meshes_embeds)
sim_matrix = self.my_cosine_sim(meshes_embeds, q_context).squeeze(0)
max_sim = torch.sort(sim_matrix, -1)[0][:, -1]
output = torch.mm(max_sim.unsqueeze(0), meshes_embeds)[0]
return output
def emit_one(
self, doc_sents_embeds, doc_mask, question_embeds,
question_mask, q_idfs, sent_external, doc_external
):
doc_sents_embeds = autograd.Variable(torch.FloatTensor(doc_sents_embeds),requires_grad=False)
doc_mask = autograd.Variable(torch.FloatTensor(doc_mask), requires_grad=False)
question_embeds = autograd.Variable(torch.FloatTensor(question_embeds), requires_grad=False)
question_mask = autograd.Variable(torch.FloatTensor(question_mask), requires_grad=False)
q_idfs = autograd.Variable(torch.FloatTensor(q_idfs), requires_grad=False)
doc_external = autograd.Variable(torch.FloatTensor(doc_external), requires_grad=False)
sent_external = autograd.Variable(torch.FloatTensor(sent_external), requires_grad=False)
if(use_cuda):
doc_mask = doc_mask.cuda()
question_mask = question_mask.cuda()
sent_external = sent_external.cuda()
doc_external = doc_external.cuda()
q_idfs = q_idfs.cuda()
question_embeds = question_embeds.cuda()
doc_sents_embeds = doc_sents_embeds.cuda()
#
# print(question_embeds.size())
q_context = self.apply_context_convolution(question_embeds, self.trigram_conv_1, self.trigram_conv_activation_1)
# print(q_context.size())
#
q_weights = torch.cat([q_context, q_idfs.unsqueeze(-1)], -1)
q_weights = self.q_weights_mlp(q_weights).squeeze(-1)
q_weights = F.softmax(q_weights, dim=-1)
# print(q_weights.size())
#
print(question_embeds.size())
print(doc_sents_embeds.size())
reshaped = doc_sents_embeds.reshape(-1, doc_sents_embeds.size()[-2], doc_sents_embeds.size()[-1])
print(reshaped.size())
s_context = self.apply_context_convolution(reshaped, self.trigram_conv_1, self.trigram_conv_activation_1)
print(q_context.size())
print(s_context.size())
similarity = self.my_cosine_sim(q_context, s_context)
print(similarity.size())
# s_context = s_context.reshape_as(doc_sents_embeds)
# print(s_context.size())
#
good_out, gs_emits = self.do_for_one_doc_cnn(doc1_sents_embeds, sents_gaf, question_embeds, q_context, q_weights, self.k_sent_maxpool)
#
good_out_pp = torch.cat([good_out, doc_gaf], -1)
#
final_good_output = self.final_layer_1(good_out_pp)
final_good_output = self.final_activ_1(final_good_output)
final_good_output = self.final_layer_2(final_good_output)
#
gs_emits = gs_emits.unsqueeze(-1)
gs_emits = torch.cat([gs_emits, final_good_output.unsqueeze(-1).expand_as(gs_emits)], -1)
gs_emits = self.oo_layer(gs_emits).squeeze(-1)
gs_emits = torch.sigmoid(gs_emits)
#
return final_good_output, gs_emits
use_cuda = False
b_size = 10
max_sents = 20
max_toks = 100
embedding_dim = 30
k_for_maxpool = 5
model = Sent_Posit_Drmm_Modeler(embedding_dim=embedding_dim, k_for_maxpool=k_for_maxpool)
model.train()
doc_batch = np.random.randn(b_size, max_sents, max_toks, embedding_dim)
doc_mask = np.ones(shape= (b_size, max_sents, max_toks, embedding_dim))
doc_external = np.random.randn(b_size, 5)
sent_external = np.random.randn(b_size, max_sents, 5)
quest_batch = np.random.randn(b_size, max_toks, embedding_dim)
quest_mask = np.ones(shape= (b_size, max_toks, embedding_dim))
q_idfs = np.random.randn(b_size, max_toks)
model.emit_one(
doc_sents_embeds = doc_batch,
doc_mask = doc_mask,
question_embeds = quest_batch,
question_mask = quest_mask,
q_idfs = q_idfs,
sent_external = sent_external,
doc_external = doc_external
)