forked from ouprince/SVD
-
Notifications
You must be signed in to change notification settings - Fork 0
/
svd.py
69 lines (60 loc) · 2.56 KB
/
svd.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
# -*- coding:utf-8 -*-
# SVD 矩阵分解程序
# 算法:梯度下降 + 权重正则
import sys, os
import numpy as np
class SVD(object):
# UI 出现未知值的矩阵, 用 -1 表示未知数
# K 表示分解矩阵 K 的维度
# learning_rate 学习率
# lambda 正则化权重
def __init__(self,UI,learning_rate = 0.005, lambdax = 0.5, K = 100):
self.UI = UI # UI 矩阵,用 -1 代表 未知项
self.learning_rate = learning_rate
self.lambdax = lambdax
self.K = K
self.U, self.I = np.shape(UI)
self.X = np.random.randn(self.U,self.K) # X 矩阵
self.Y = np.random.randn(self.K,self.I) # Y 矩阵
self.res = None
def train(self,epochs = 100):
for epoch in range(epochs):
# 计算相乘矩阵
R = np.dot(self.X,self.Y)
# 计算误差矩阵 eui
eui = self.UI - R
SSE = 0
for u in range(self.U):
for i in range(self.I):
if UI[u,i] != -1:SSE+=eui[u,i] ** 2
print("After %d epochs, The SSE = %.6f" %(epoch, SSE))
if epoch > 0 and SSE > SSE_YUAN:
self.learning_rate = 0.8 * self.learning_rate
SSE_YUAN = SSE
# 计算 puk 梯度 + 正则化 lambda , delta 为 X 的梯度矩阵
delta_X = np.zeros((self.U,self.K))
delta_Y = np.zeros((self.K,self.I))
for u in range(self.U):
for k in range(self.K):
x = sum([eui[u,i] * self.Y[k,i] for i in range(self.I) if self.UI[u,i] != -1])/float(self.I)
y = self.lambdax * self.X[u,k]
delta_X[u,k] = (-x + y) * self.learning_rate
for k in range(self.K):
for i in range(self.I):
x = sum([eui[u,i] * self.X[u,k] for u in range(self.U) if self.UI[u,i] != -1])/float(self.U)
y = self.lambdax * self.Y[k,i]
delta_Y[k,i] = (-x + y) * self.learning_rate
# 梯度下降
self.X = self.X - delta_X
self.Y = self.Y - delta_Y
self.res = np.dot(self.X,self.Y)
def predict(self, user, item): # 预测 用户u 对 商品 i 的评价
if self.res is None:
raise BaseException("svd is not trained yet ... ")
return self.res[user,item]
if __name__ == "__main__":
# UI 矩阵,用 -1 表示 未知
UI = np.array([[-1, 0, 0.5,0.8],[0.5,0.2,-1,0.1],[0.4,0.3,0.3,-1]])
svd = SVD(UI)
svd.train()
print(svd.res)