-
Notifications
You must be signed in to change notification settings - Fork 0
/
calculate_similarity_metric.py
55 lines (51 loc) · 1.33 KB
/
calculate_similarity_metric.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
import pickle as pk
SM = None
if(False):
import spacy
nlp = spacy.load('en')
verbList = []
with open('new_verb_count_result.csv','r')as f:
for l in f:
verbList.append(l.split(',')[1].strip())
vl = list(enumerate(verbList))
vl_dict = dict()
for i in vl:
vl_dict[i[0]] = i[1]#.decode('utf-8')
# print vl_dict
SM = [[0.0 for i in xrange(len(vl))] for j in xrange(len(vl))] # Similarity Metric
for a in xrange(len(vl)):
print a
for b in xrange(len(vl)):
A = vl_dict[a]
B = vl_dict[b]
SM[a][b] = nlp(u'%s'%A).similarity(nlp(u'%s'%B))
f = open('pickled-data','w')
pk.dump(SM,f)
f.close()
else:
f = open('pickled-data','r')
SM = pk.load(f)
f.close()
if(False):
import matplotlib.pyplot as plt
from matplotlib import cm
import numpy as np
SMa = np.array(SM)
fig,ax = plt.subplots()
cax = ax.imshow(SMa, interpolation='nearest', cmap=cm.coolwarm)
cbar = fig.colorbar(cax, ticks=[0,0.5, 1])
cbar.ax.set_yticklabels(['< 0', '0.5', '> 1']) # vertically oriented colorbar
plt.show()
if(True):
verbList = []
with open('new_verb_count_result.csv','r')as f:
for l in f:
verbList.append(l.split(',')[1].strip())
vl = list(enumerate(verbList))
vl_dict = dict()
for i in vl:
vl_dict[i[0]] = i[1]
for a in xrange(len(vl)):
for b in xrange(a+1,len(vl)):
if(SM[a][b])>0.4:
print vl_dict[a],vl_dict[b], SM[a][b]