numpy
as np
import scipy.sparse
as sparse
from scipy.sparse.linalg
import spsolve
import time
def load_matrix(filename, num_users, num_items):
t0 = time.time()
counts = np.zeros((num_users, num_items))
total =
0.0
num_zeros = num_users * num_items
‘‘‘假设要对一个列表或者数组既要遍历索引又要遍历元素时。能够用enumerate,当传入參数为文件时,索引为
行号,元素相应的一行内容‘‘‘
for i, line
in enumerate(open(filename,
‘r‘)):
user, item, count = line.strip().split(
‘\t‘)
user = int(user)
item = int(item)
count = float(count)
if user >= num_users:
continue
if item >= num_items:
continue
if count !=
0:
counts[user, item] = count
total += count
num_zeros -=
1
if i %
100000 ==
0:
print ‘loaded %i counts...‘ % i
alpha = num_zeros / total
print ‘alpha %.2f‘ % alpha
counts *= alpha
counts = sparse.csr_matrix(counts)
t1 = time.time()
print ‘Finished loading matrix in %f seconds‘ % (t1 - t0)
return counts
class ImplicitMF():
def __init__(self, counts, num_factors=40, num_iterations=30,
reg_param=0.8):
self.counts = counts
self.num_users = counts.shape[
0]
self.num_items = counts.shape[
1]
self.num_factors = num_factors
self.num_iterations = num_iterations
self.reg_param = reg_param
def train_model(self):
self.user_vectors = np.random.normal(size=(self.num_users,
self.num_factors))
self.item_vectors = np.random.normal(size=(self.num_items,
self.num_factors))
‘‘‘要生成非常大的数字序列的时候,用xrange会比range性能优非常多,
因为不须要一上来就开辟一块非常大的内存空间,这两个基本上都是在循环的时候用‘‘‘
for i
in xrange(self.num_iterations):
t0 = time.time()
print ‘Solving for user vectors...‘
self.user_vectors = self.iteration(
True, sparse.csr_matrix(self.item_vectors))
print ‘Solving for item vectors...‘
self.item_vectors = self.iteration(
False, sparse.csr_matrix(self.user_vectors))
t1 = time.time()
print ‘iteration %i finished in %f seconds‘ % (i +
1, t1 - t0)
def iteration(self, user, fixed_vecs):
num_solve = self.num_users
if user
else self.num_items
num_fixed = fixed_vecs.shape[
0]
YTY = fixed_vecs.T.dot(fixed_vecs)
eye = sparse.eye(num_fixed)
lambda_eye = self.reg_param * sparse.eye(self.num_factors)
solve_vecs = np.zeros((num_solve, self.num_factors))
t = time.time()
for i
in xrange(num_solve):
if user:
counts_i = self.counts[i].toarray()
else:
counts_i = self.counts[:, i].T.toarray()
‘‘‘ 原论文中c_ui=1+alpha*r_ui,可是在计算Y’CuY时为了减少时间复杂度,利用了
Y‘CuY=Y‘Y+Y‘(Cu-I)Y,因为Cu是对角矩阵,其元素为c_ui,即1+alpha*r_ui。
所以Cu-I也就是对角元素为alpha*r_ui的对角矩阵‘‘‘
CuI = sparse.diags(counts_i, [
0])
pu = counts_i.copy()
pu[np.where(pu !=
0)] =
1.0
YTCuIY = fixed_vecs.T.dot(CuI).dot(fixed_vecs)
YTCupu = fixed_vecs.T.dot(CuI + eye).dot(sparse.csr_matrix(pu).T)
xu = spsolve(YTY + YTCuIY + lambda_eye, YTCupu)
solve_vecs[i] = xu
if i %
1000 ==
0:
print ‘Solved %i vecs in %d seconds‘ % (i, time.time() - t)
t = time.time()
return solve_vecs
Alternating Least Squares(ASL) for Implicit Feedback Datasets的数学推导以及用Python实现
标签:序列 continue csdn 运算 运算符 利用 imp 它的 i++