-
Notifications
You must be signed in to change notification settings - Fork 3
/
AdaBoostBinary.py
112 lines (95 loc) · 3.39 KB
/
AdaBoostBinary.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
import numpy as np
import pandas as pd
from sklearn.tree import DecisionTreeClassifier
from sklearn.base import clone
from sklearn.ensemble import AdaBoostClassifier
class AdaBoostBinaryClassifier(object):
'''
INPUT:
- n_estimator (int)
* The number of estimators to use in boosting
* Default: 50
- learning_rate (float)
* Determines how fast the error would shrink
* Lower learning rate means more accurate decision boundary,
but slower to converge
* Default: 1
'''
def __init__(self,
n_estimators=50,
learning_rate=1):
self.base_estimator = DecisionTreeClassifier(max_depth=1)
self.n_estimator = n_estimators
self.learning_rate = learning_rate
# Will be filled-in in the fit() step
self.estimators_ = []
self.estimator_weight_ = np.zeros(self.n_estimator, dtype=np.float)
def fit(self, x, y):
'''
INPUT:
- x: 2d numpy array, feature matrix
- y: numpy array, labels
Build the estimators for the AdaBoost estimator.
'''
sample_weight = np.ones(x.shape[0])/x.shape[0]
for tree in range(self.n_estimator):
estimator, sample_weight, estimator_weight= \
self._boost(x,y, sample_weight)
self.estimators_.append(estimator)
self.estimator_weight_[tree]=estimator_weight
def _boost(self, x, y, sample_weight):
'''
INPUT:
- x: 2d numpy array, feature matrix
- y: numpy array, labels
- sample_weight: numpy array
OUTPUT:
- estimator: DecisionTreeClassifier
- sample_weight: numpy array (updated weights)
- estimator_weight: float (weight of estimator)
Go through one iteration of the AdaBoost algorithm. Build one estimator.
'''
estimator = clone(self.base_estimator)
dtc = estimator
dtc.fit(x, y, sample_weight=sample_weight)
pred_y = dtc.predict(x)
indicator = np.ones(x.shape[0])*[pred_y!=y][0]
err = np.dot(sample_weight, indicator) / np.sum(sample_weight)
alpha = np.log((1-err)/err)
new_sample_weight = sample_weight* np.exp(alpha*indicator)
return estimator, new_sample_weight, alpha
def predict(self, x):
'''
INPUT:
- x: 2d numpy array, feature matrix
OUTPUT:
- labels: numpy array of predictions (0 or 1)
'''
predicts = []
for estimator in self.estimators_:
pred = estimator.predict(x)
pred[pred==0] = -1
predicts.append(pred)
predicts = np.array(predicts)
pr = np.sign(np.dot(self.estimator_weight_, predicts))
pr[pr==-1] = 0
return pr
def score(self, x, y):
'''
INPUT:
- x: 2d numpy array, feature matrix
- y: numpy array, labels
OUTPUT:
- score: float (accuracy score between 0 and 1)
'''
#accuracy = TP+TN / n
predictions = self.predict(x)
n= x.shape[0]
tp = np.sum(predictions * y)
tn = np.sum((1-predictions)* (1-y))
acc = (tp+tn)/n
return acc
def sklearn_AdaBoostClassifier(self, x_train, y_train, x_test, y_test):
model = AdaBoostClassifier(self.base_estimator, self.n_estimator)
model.fit(x_train, y_train)
return model.score(x_test, y_test)