-
Notifications
You must be signed in to change notification settings - Fork 0
/
SVM-e8.py
110 lines (81 loc) · 3.08 KB
/
SVM-e8.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
'''
Name: Kevin Chen
Professor: Haim Schweitzer
Due Date: 8/2/2020
Project 3 - Exponential 8
Python 3.7.6
'''
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import StratifiedKFold, GridSearchCV
from sklearn.svm import SVC
from sklearn.utils import resample
import numpy as np
# ====================================
# STEP 1: read the training and testing data.
# Do not change any code of this step.
# specify path to training data and testing data
train_x_location = "x_train8.csv"
train_y_location = "y_train8.csv"
test_x_location = "x_test.csv"
test_y_location = "y_test.csv"
print("Reading training data")
x_train = np.loadtxt(train_x_location, dtype="uint8", delimiter=",")
y_train = np.loadtxt(train_y_location, dtype="uint8", delimiter=",")
m, n = x_train.shape # m training examples, each with n features
m_labels, = y_train.shape # m2 examples, each with k labels
l_min = y_train.min()
assert m_labels == m, "x_train and y_train should have same length."
assert l_min == 0, "each label should be in the range 0 - k-1."
k = y_train.max()+1
print(m, "examples,", n, "features,", k, "categories.")
print("Reading testing data")
x_test = np.loadtxt(test_x_location, dtype="uint8", delimiter=",")
y_test = np.loadtxt(test_y_location, dtype="uint8", delimiter=",")
m_test, n_test = x_test.shape
m_test_labels, = y_test.shape
l_min = y_train.min()
assert m_test_labels == m_test, "x_test and y_test should have same length."
assert n_test == n, "train and x_test should have same number of features."
print(m_test, "test examples.")
# ====================================
# STEP 2: pre processing
# Please modify the code in this step.
print("Pre processing data")
np.nan_to_num(x_train)
np.nan_to_num(x_test)
# Adding normalized noise to the training data
x_train_noise = x_train.copy()
np.random.seed(1)
x_train_noise = x_train_noise + np.random.normal(0, 0.1, x_train.shape)
y_train_noise = y_train.copy()
x_train = np.append(x_train, x_train_noise, axis=0)
y_train = np.append(y_train, y_train_noise, axis=0)
# Scaling the data
min_max_scaler = MinMaxScaler()
x_train = min_max_scaler.fit_transform(x_train)
x_test = min_max_scaler.transform(x_test)
# ====================================
# STEP 3: train model.
# Please modify the code in this step.
print("---train")
C_range = np.logspace(-2, 10, 10)
gamma_range = np.logspace(-9, 3, 10)
param_grid = dict(
gamma=gamma_range, C=C_range)
cv = StratifiedKFold(n_splits=5, shuffle=True)
grid = GridSearchCV(SVC(kernel="rbf"),
param_grid=param_grid, cv=cv, n_jobs=-1, scoring="balanced_accuracy")
grid.fit(x_train, y_train)
print("The best parameters are %s with a score of %0.2f"
% (grid.best_params_, grid.best_score_))
model = SVC(kernel="rbf", C=grid.best_params_[
"C"], gamma=grid.best_params_["gamma"], cache_size=2000)
model.fit(x_train, y_train)
# ====================================
# STEP3: evaluate model
# Don"t modify the code below.
print("---evaluate")
print(" number of support vectors: ", model.n_support_)
acc = model.score(x_test, y_test)
#print(acc)
print("acc:", acc)