Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Exercise1 #30

Open
wants to merge 12 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
97 changes: 97 additions & 0 deletions ML_1.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,97 @@
import numpy as np
import torch
from torch.autograd import Variable
import pandas as pd

#过采样函数
def oversampling(data):
data0 = data[data[:,0]==0]
data1 = data[data[:,0]==1]#0、1分开
size0 = data0.shape[0]
size1 = data1.shape[0]
if size0 >= size1:#多的减少的
index= np.random.choice(size1,size0-size1,replace=False,p=None)#随机采样
data1 = np.concatenate((data1, data1[index,:]), axis=0)#合并
else:
index= np.random.choice(size0,size1-size0,replace=False,p=None)#随机采样
data0 = np.concatenate((data0, data0[index,:]), axis=0)#合并
x = np.concatenate((data0[:, 1:31], data1[:, 1:31]), axis=0)#合并
y = np.concatenate((data0[:, 0:1], data1[:, 0:1]), axis=0)#合并
return x, y

#对数据进行预处理
def pre_process(dataset):
data = dataset.iloc[:, 1:32].values
data[data=='M'] = 1
data[data=='B'] = 0 #MB分类
data = data.astype(np.float64)
x,y = oversampling(data)
size = x.shape[0]
index = np.random.permutation(size) #随机排列
x = x[index]
y = y[index]
mu = np.mean(x, axis=0)
sigma = np.std(x, axis=0)
x=(x - mu)/sigma
split = int(len(y)*0.3) #0.3
x_train = x[:split]
y_train = y[:split]
x_test = x[split:]
y_test = y[split:]
return x_train, y_train, x_test, y_test

#精确度计算
def cal_acc(x, y, y_pred):
size = x.shape[0]
y_pred = np.array([0 if y_pred[i] < 0.5 else 1 for i in range(size)])
acc = np.mean([1 if y[i] == y_pred[i] else 0 for i in range(size)])
return acc

#模型
class Model(torch.nn.Module):
def __init__(self):
super(Model, self).__init__()
self.linear = torch.nn.Linear(30, 1)

def forward(self, x):
y_pred = torch.sigmoid(self.linear(x))
return y_pred

#主体
dataset= pd.read_csv('./data.csv')
x_train, y_train, x_test, y_test = pre_process(dataset)#处理
x_train = Variable(torch.Tensor(x_train))
y_train = Variable(torch.Tensor(y_train))
x_test = Variable(torch.Tensor(x_test))
y_test = Variable(torch.Tensor(y_test))

model = Model()
criterion = torch.nn.BCELoss(reduction="mean")#损失(二分类交叉熵)
optimizer = torch.optim.SGD(model.parameters(), lr=1e-3)#优化器

#进行循环训练
i=50 #隔50个打印
j=1 #计数器
print('每',i,'个epoch一组')

for epoch in range(1000): #1000个
optimizer.zero_grad()
y_pred = model(x_train)
loss = criterion(y_pred, y_train)
loss.backward() #反向传播
optimizer.step() #更新优化器
if epoch % i == 0:
print('第',j,'组','损失loss:', loss.data.item(), '精确度accuracy:', cal_acc(x_train, y_train, y_pred))
j=j+1

y_test_pred = model(x_test)
test_acc = cal_acc(x_test, y_test, y_test_pred)
print('Test总精确度:', test_acc)

for f in model.parameters():
print('具体数据:')
print(f.data)
print(f.grad)



6 changes: 4 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
# SES2020spring
# Exercise 1

## Another day
# The code is in ML_1.py, please let data.csv and ML_1.py in the same file.

# A result for reference only is shown in figure named result.
Loading