-
Notifications
You must be signed in to change notification settings - Fork 0
/
logistic_regression_functions.py
139 lines (118 loc) · 5.53 KB
/
logistic_regression_functions.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
from sklearn.linear_model import LogisticRegression
from equation_parser import *
from inequalities import *
from equation_parser_extension import *
import numpy as np
import torch
delta = 0.05
ineq = Inequality.HOEFFDING_INEQUALITY
rev_polish_notation = "TP(1) TP(0) - abs 0.25 TP(1) * -"
candidate_ratio = 0.40
def predict(theta, theta1, X):
"""
This is the predict function for Logistic Regression. This can be changed into predict function of the user defined model.
Currently, it implements the following:
\frac{1}{1 + e^-(X.theta + theta1)}
:param theta: The optimal theta values for the model
:param theta1: The additional optimal theta values for the model
:param X: The features of the dataset
:return: The probability value of label 1 of the complete dataset
"""
# returns tensor
# \frac{1}{1 + e^-(X.theta + theta1)}
if theta1 is None or theta is None:
return torch.ones(len(X))
return torch.pow(
torch.add(
torch.exp(
torch.mul(-1,
torch.add(torch.matmul(torch.tensor(X), theta), theta1)
)
)
, 1),
-1)
def fHat(theta, theta1, X, Y):
"""
This is the main objective function.
This must be change by the user according to his/her needs.
Currently, it implements negative log loss of the model.
:param theta: The optimal theta values for the model
:param theta1: The additional optimal theta values for the model
:param X: The features of the dataset
:param Y: The true labels of the dataset
:return: The negative log loss
"""
# -ve log loss
pred = predict(theta, theta1, X)
predicted_Y = torch.stack([torch.sub(1, pred), pred], dim = 1)
loss = torch.nn.CrossEntropyLoss()
return -loss(predicted_Y, torch.tensor(Y).long())
def simple_logistic(X, Y):
"""
This function runs the simple logistic regression.
This must be replaced by the user to include his/her own model.
:param X: The features of the dataset
:param Y: The true labels of the dataset
:return: The theta values (parameters) of the model
"""
# return tensor
try:
reg = LogisticRegression(solver = 'lbfgs').fit(X, Y)
theta0 = reg.intercept_[0]
theta1 = reg.coef_[0]
return torch.tensor(np.array([theta1[0], theta1[1], theta1[2], theta1[3], theta1[4]]),
requires_grad=True),\
torch.tensor(np.array([theta0]),
requires_grad=True)
except Exception as e:
print("Exception in logRes:", e)
return None
def eval_ghat(theta, theta1, X, Y, T, seldonian_type):
if seldonian_type == "base":
return eval_ghat_base(theta, theta1, X, Y, T, False)
elif seldonian_type == "mod":
return eval_ghat_base(theta, theta1, X, Y, T, True)
elif seldonian_type == "bound":
return eval_ghat_extend(theta, theta1, X, Y, T, True, False, False)
elif seldonian_type == "const":
return eval_ghat_extend(theta, theta1, X, Y, T, False, True, False)
elif seldonian_type == "opt":
return eval_ghat_extend(theta, theta1, X, Y, T, True, True, True)
def ghat(theta, theta1, X, Y, T, candidate_ratio, seldonian_type):
if seldonian_type == "base":
return ghat_base(theta, theta1, X, Y, T, True, candidate_ratio, False)
elif seldonian_type == "mod":
return ghat_base(theta, theta1, X, Y, T, True, candidate_ratio, True)
elif seldonian_type == "bound":
return ghat_extend(theta, theta1, X, Y, T, True, candidate_ratio, True, False, False)
elif seldonian_type == "const":
return ghat_extend(theta, theta1, X, Y, T, True, candidate_ratio, False, True, False)
elif seldonian_type == "opt":
return ghat_extend(theta, theta1, X, Y, T, True, candidate_ratio, True, True, True)
def ghat_base(theta, theta1, X, Y, T, predict_bound, candidate_ratio, modified_h):
pred = predict(theta, theta1, X)
r = construct_expr_tree_base(rev_polish_notation)
cand_safe_ratio = None
if candidate_ratio:
cand_safe_ratio = (1-candidate_ratio)/candidate_ratio
_, u = eval_expr_tree_conf_interval_base(t_node=r, Y=Y, predicted_Y=pred, T=T,
delta=delta, inequality=ineq,
candidate_safety_ratio=cand_safe_ratio,
predict_bound=predict_bound, modified_h=modified_h)
return u
def eval_ghat_base(theta, theta1, X, Y, T, modified_h):
return ghat_base(theta, theta1, X, Y, T, False, None, modified_h)
def ghat_extend(theta, theta1, X, Y, T, predict_bound, candidate_ratio,
check_bound, check_const, modified_h):
pred = predict(theta, theta1, X)
r = construct_expr_tree(rev_polish_notation, delta,
check_bound=check_bound, check_constant=check_const)
cand_safe_ratio = None
if candidate_ratio:
cand_safe_ratio = (1 - candidate_ratio) / candidate_ratio
_, u = eval_expr_tree_conf_interval(t_node=r, Y=Y, predicted_Y=pred, T=T,
inequality=ineq, candidate_safety_ratio=cand_safe_ratio,
predict_bound=predict_bound, modified_h=modified_h)
return u
def eval_ghat_extend(theta, theta1, X, Y, T, check_bound, check_const, modified_h):
return ghat_extend(theta, theta1, X, Y, T, False, None, check_bound, check_const, modified_h)