-
Notifications
You must be signed in to change notification settings - Fork 1
/
svd_movielens.py
136 lines (92 loc) · 2.73 KB
/
svd_movielens.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
import csv
import numpy as np
import pandas as pd
def SVD(M):
Mt=np.transpose(M)
prd=np.dot(M,Mt)
#Eigen Value Decomposition
eigenvalue,eigenvec=np.linalg.eig(prd)
#Indirect sort on eigenvalue to find out the proper indices, the same can
#be used with corresponding eigenvectors
sortindex=eigenvalue.argsort()[::-1]
#Sort Eigen values
eigenvalue=eigenvalue[sortindex]
#To calculate sigma
sigma=np.sqrt(abs(eigenvalue))
sigma=np.around(sigma,decimals=2)
totalsigma=np.sum(sigma,dtype=float)
#To Calculate Variance of Data preserved
dim=600
sumsigma=0.0
cs=0
while(cs<600):
sumsigma+=sigma[cs]
cs+=1
print('We have', dim, 'components preserving',(sumsigma/totalsigma)*100,'% variance of data')
sigma=sigma[0:dim]
#To Calculate U - we had earlier calculated eigenvec for MMt
#Sort and reduce U to nXdim
U=eigenvec[:,sortindex]
U=U[:,0:dim]
U=np.real(U)
U=np.around(U,decimals=2)
#To Calculate V
prd=np.dot(Mt,M)
eigenvalue,eigenvec=np.linalg.eig(prd)
sortindex=eigenvalue.argsort()[::-1]
V=eigenvec[:,sortindex]
V=V[:,0:dim]
V=np.real(V)
V=np.around(V,decimals=2)
return U,sigma,V
def query(q,V):
#find q*v, w
prd=np.dot(q,V)
Vt=np.transpose(V)
other=np.dot(prd,Vt)
return other
#To Prepare list of movies - for recommending
print('Movie Recommender using SVD')
fileh=open('u.item','r')
reader = csv.reader(fileh, delimiter='|')
movienames=list()
# The list of all the movies with movieid-1 as list index
for row in reader:
movienames.append(row[1])
num_users=943
num_movies=1682
#To Prepare matrix M
fp2=open('u.data','r')
reader = csv.reader(fp2, delimiter='\t')
m=list()
for j in range(num_users):
m.append([0]*num_movies)
for row in reader:
m[int(row[0])-1][int(row[1])-1]=float(row[2])
M=np.array(m)
U,sigma,V=SVD(M)
#To preduct movies for a user.
print("Enter userid (1-943)")
uid=int(input())
q=m[uid-1]
predict=query(q,V)
#Sorting the user_rating row based on index
idx=predict.argsort()[::-1]
predicted=predict[idx]
#To display 10 movies, can change it by taking input from user
nm=10
i=0
j=0
mr=list()
print("\n\nRecommended movies for UserID",uid,'\n')
while(i<nm):
if(m[uid-1][idx[j]]==0):
mr1=list()
mr1.append(idx[j])
mr1.append(movienames[idx[j]-1])
mr.append(mr1)
#print(idx[j],'\t',,'\t',predict[idx[j]], j)
i+=1
j+=1
df=pd.DataFrame(mr, columns=['MovieID', 'MovieName'])
print(df)