-
Notifications
You must be signed in to change notification settings - Fork 1
/
common.py
137 lines (120 loc) · 4.59 KB
/
common.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
import collections
import datetime
import matplotlib
matplotlib.use('Agg')
import matplotlib.pyplot
import time
import pandas
import os
from pandas.tools.plotting import table
Accuracy = collections.namedtuple('Accuracy', 'acc ppv npv')
DATE_FORMAT_STRING = '%Y-%m-%d %H:%M:%S'
def timeit(func):
def wrapper(*args, **kwds):
start = time.time()
print('Measure times for function: {} ({})'.format(func.__name__, datetime.datetime.now().strftime(DATE_FORMAT_STRING)))
ret = func(*args, **kwds)
end = time.time()
print('Total running time of {} in seconds: {}'.format(func.__name__, int(end - start)))
return ret
return wrapper
def compute_accuracy(prediction, real, corpus=None, debug=False):
if len(prediction) != len(real):
raise ValueError('prediction {} and real {} length should by equal'.format(len(prediction), len(real)))
correct = 0
true_positive = 0
true_negative = 0
num_of_pred_pos = len([x for x in prediction if x == 1])
num_of_pred_neg = len([x for x in prediction if x == 0])
num_of_real_pos = len([x for x in real if x == 1])
num_of_real_neg = len([x for x in real if x == 0])
for i in xrange(len(prediction)):
p = int(prediction[i])
r = int(real[i])
if p == r:
correct += 1
if p == 0:
true_negative += 1
else:
true_positive += 1
if debug and corpus is not None and p != r:
# print false-positives and false-negatives
print "Real: %s, Prediction: %s" % (r, p)
print "Tweet is: %s" % corpus[i]
print
npv = float(true_negative) / num_of_pred_neg if num_of_pred_neg else None
ppv = float(true_positive) / num_of_pred_pos if num_of_pred_pos else None
#specificity = float(true_negative) / num_of_real_neg
return Accuracy(float(correct) / len(prediction), ppv, npv)
def plot(xs, ys, colors, x_label, y_label, title, func_labels=None, x_scale=None, legend_location=None, save=None):
f, ax = matplotlib.pyplot.subplots(1)
plots = []
for i, params in enumerate(zip(xs, ys, colors)):
x, y, color = params
if func_labels is not None:
plots.append(ax.plot(x, y, color, label=func_labels[i]))
else:
ax.plot(x, y, color)
ax.set_ylim(ymin=max(0, min([x for y in ys for x in y]) * 0.9), ymax=min(1, max([x for y in ys for x in y]) * 1.1))
ax.set_xlabel(x_label)
ax.set_ylabel(y_label)
if x_scale is not None:
ax.set_xscale(x_scale)
if plots:
legend_location = legend_location if legend_location is not None else 'best'
ax.legend([plot for plot, in plots], [plot.get_label() for plot, in plots], loc=legend_location)
f.suptitle(title, fontsize=14, fontweight='bold')
if save is not None:
print('Saving graph into: {}'.format(save))
try:
os.makedirs(os.path.dirname(save))
except:
pass
matplotlib.pyplot.savefig(save)
else:
matplotlib.pyplot.show()
matplotlib.pyplot.close(f)
def plot_table(title, cells, column_names, row_names, save=None):
f, ax = matplotlib.pyplot.subplots(1)
ax.xaxis.set_visible(False)
ax.yaxis.set_visible(False)
ax.set_frame_on(False)
f.suptitle(title, fontsize=14, fontweight='bold')
df = pandas.DataFrame(cells)
df.columns = column_names
df.index = row_names
tab = table(
ax = ax,
data = df,
colLabels = column_names,
rowLabels = row_names,
loc = 'upper right',
)
tab.auto_set_font_size(False)
tab.set_fontsize(12)
print(df)
if save is not None:
print('Saving table into: {}'.format(save))
try:
os.makedirs(os.path.dirname(save))
except:
pass
matplotlib.pyplot.savefig(save)
else:
matplotlib.pyplot.show()
matplotlib.pyplot.close(f)
def max_specific_accuracy(accuracies):
max_acc = 0
max_idx = 0
for i, acc in enumerate(accuracies):
if acc > max_acc:
max_acc = acc
max_idx = i
return max_idx, max_acc
def max_accuracy(accuracies):
max_acc_idx, max_acc = max_specific_accuracy([acc.acc for acc in accuracies])
max_ppv_idx, max_ppv = max_specific_accuracy([acc.ppv for acc in accuracies])
max_npv_idx, max_npv = max_specific_accuracy([acc.npv for acc in accuracies])
return max_acc_idx, max_ppv_idx, max_npv_idx
def best_feature_names(named_features, name, bools):
return [f.__name__ for f, b in zip(named_features[name], bools) if b]