From d999f868b62f7e86f66d7803fd9433415ea196c9 Mon Sep 17 00:00:00 2001 From: ivagliano Date: Fri, 7 Jun 2019 14:14:47 +0200 Subject: [PATCH 1/7] Using average item embeddings intead of user embeddings --- irgan/cf_gan.py | 14 +++++++------- irgan/dis_model.py | 9 +++++++-- irgan/gen_model.py | 23 +++++++++++++++++++---- 3 files changed, 33 insertions(+), 13 deletions(-) diff --git a/irgan/cf_gan.py b/irgan/cf_gan.py index 943a32f..5610723 100644 --- a/irgan/cf_gan.py +++ b/irgan/cf_gan.py @@ -91,9 +91,9 @@ def generate_for_d(self, filename, condition_data=None): pos = self.user_pos_train[u] if self.conditions: c_batch = [c[u, :] for c in condition_data] - rating = self.generator.all_rating(u, c_batch) + rating = self.generator.all_rating(self.user_pos_train[u], c_batch) else: - rating = self.generator.all_rating(u) + rating = self.generator.all_rating(self.user_pos_train[u]) rating = rating.detach_().cpu().numpy() rating = np.array(rating[0]) / 0.2 # Temperature exp_rating = np.exp(rating) @@ -155,9 +155,9 @@ def fit(self, X, y=None, condition_data=None): for u in set(input_user): raw_c_batch.append(c[u]) c_batch.append(np.asarray(raw_c_batch).repeat(list(user_cnt.values()), axis=0)) - D_loss = self.discriminator(input_user, input_item, input_label, c_batch) + D_loss = self.discriminator(self.user_pos_train[input_user], input_item, input_label, c_batch) else: - D_loss = self.discriminator(input_user, input_item, input_label) + D_loss = self.discriminator(self.user_pos_train[input_user], input_item, input_label) self.discriminator.step(D_loss) index += self.batch_size @@ -172,9 +172,9 @@ def fit(self, X, y=None, condition_data=None): if use_condition: c_batch = [c[u] for c in condition_data] - rating = self.generator.all_logits(u, c_batch) + rating = self.generator.all_logits(self.user_pos_train[u], c_batch) else: - rating = self.generator.all_logits(u) + rating = self.generator.all_logits(self.user_pos_train[u]) rating = rating.detach_().cpu().numpy() exp_rating = np.exp(rating) prob = exp_rating / np.sum(exp_rating) # prob is generator distribution p_\theta @@ -229,7 +229,7 @@ def predict(self, X, condition_data=None): c_batch = [c[index:index + batch_size] for c in condition_data] index += batch_size - user_batch_rating = self.generator.all_rating(user_batch, c_batch, impose_dim=1) + user_batch_rating = self.generator.all_rating(X[user_batch], c_batch, impose_dim=1) user_batch_rating = user_batch_rating.detach_().cpu().numpy() for user_batch_rating_uid in zip(user_batch_rating, user_batch): diff --git a/irgan/dis_model.py b/irgan/dis_model.py index d267b09..f12c8bb 100644 --- a/irgan/dis_model.py +++ b/irgan/dis_model.py @@ -45,8 +45,13 @@ def __init__(self, itemNum, userNum, emb_dim, lamda, param=None, initdelta=0.05, self.D_item_bias = self.D_item_bias.cuda() self.l2l = self.l2l.cuda() - def pre_logits(self, input_user, input_item, condition_data=None): - u_embedding = self.D_user_embeddings[input_user, :] + def pre_logits(self, user_pos, input_item, condition_data=None): + # u_embedding = self.D_user_embeddings[input_user, :] + u_embedding = torch.zeros(self.emb_dim, dtype=torch.float32) + for u in user_pos: + for i in u: + u_embedding[u].add(self.G_item_embeddings[i]) + u_embedding[u] /= len(user_pos) if self.conditions: # In generator need to use dimension 0 in discriminator 1 so by default 0 (given in condition creation) # and here we use one through the dim parameter diff --git a/irgan/gen_model.py b/irgan/gen_model.py index f568ca4..b95e9d9 100644 --- a/irgan/gen_model.py +++ b/irgan/gen_model.py @@ -43,8 +43,19 @@ def __init__(self, itemNum, userNum, emb_dim, lamda, param=None, initdelta=0.05, self.G_item_embeddings = self.G_item_embeddings.cuda() self.G_item_bias = self.G_item_bias.cuda() - def all_rating(self, user_index, condition_data=None, impose_dim=None): - u_embedding = self.G_user_embeddings[user_index, :] + def all_rating(self, user_pos, condition_data=None, impose_dim=None): + # u_embedding = self.G_user_embeddings[user_index, :] + if impose_dim or impose_dim == 0: + u_embedding = torch.zeros(self.emb_dim, dtype=torch.float32) + for i in user_pos: + u_embedding.add(self.G_item_embeddings[i]) + u_embedding /= len(user_pos) + else: + u_embedding = torch.zeros([len(user_pos), self.emb_dim], dtype=torch.float32) + for u in user_pos: + for i in u: + u_embedding[u].add(self.G_item_embeddings[i]) + u_embedding[u] /= len(user_pos) item_embeddings = self.G_item_embeddings if self.conditions: @@ -54,8 +65,12 @@ def all_rating(self, user_index, condition_data=None, impose_dim=None): all_rating = torch.mm(u_embedding.view(-1, 5), item_embeddings.t()) + self.G_item_bias return all_rating - def all_logits(self, user_index, condition_data=None): - u_embedding = self.G_user_embeddings[user_index] + def all_logits(self, user_pos, condition_data=None): + # u_embedding = self.G_user_embeddings[user_index] + u_embedding = torch.zeros(self.emb_dim, dtype=torch.float32) + for i in user_pos: + u_embedding.add(self.G_item_embeddings[i]) + u_embedding /= len(user_pos) if self.conditions: u_embedding = self.conditions.encode_impose(u_embedding, condition_data) From b9a0373dcc61485e2288f6bf9e0cff2d709d57e3 Mon Sep 17 00:00:00 2001 From: ivagliano Date: Fri, 7 Jun 2019 14:29:30 +0200 Subject: [PATCH 2/7] Minor fix --- irgan/gen_model.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/irgan/gen_model.py b/irgan/gen_model.py index b95e9d9..4f6c015 100644 --- a/irgan/gen_model.py +++ b/irgan/gen_model.py @@ -45,7 +45,7 @@ def __init__(self, itemNum, userNum, emb_dim, lamda, param=None, initdelta=0.05, def all_rating(self, user_pos, condition_data=None, impose_dim=None): # u_embedding = self.G_user_embeddings[user_index, :] - if impose_dim or impose_dim == 0: + if impose_dim == None or impose_dim == 0: u_embedding = torch.zeros(self.emb_dim, dtype=torch.float32) for i in user_pos: u_embedding.add(self.G_item_embeddings[i]) From 4759860383d59653f97303706e24ea2131728293 Mon Sep 17 00:00:00 2001 From: ivagliano Date: Fri, 7 Jun 2019 14:38:34 +0200 Subject: [PATCH 3/7] using cuda for u_embedding if available --- irgan/dis_model.py | 2 ++ irgan/gen_model.py | 6 ++++++ 2 files changed, 8 insertions(+) diff --git a/irgan/dis_model.py b/irgan/dis_model.py index f12c8bb..a278e0c 100644 --- a/irgan/dis_model.py +++ b/irgan/dis_model.py @@ -48,6 +48,8 @@ def __init__(self, itemNum, userNum, emb_dim, lamda, param=None, initdelta=0.05, def pre_logits(self, user_pos, input_item, condition_data=None): # u_embedding = self.D_user_embeddings[input_user, :] u_embedding = torch.zeros(self.emb_dim, dtype=torch.float32) + if torch.cuda.is_available(): + u_embedding = u_embedding.cuda() for u in user_pos: for i in u: u_embedding[u].add(self.G_item_embeddings[i]) diff --git a/irgan/gen_model.py b/irgan/gen_model.py index 4f6c015..1f28885 100644 --- a/irgan/gen_model.py +++ b/irgan/gen_model.py @@ -47,11 +47,15 @@ def all_rating(self, user_pos, condition_data=None, impose_dim=None): # u_embedding = self.G_user_embeddings[user_index, :] if impose_dim == None or impose_dim == 0: u_embedding = torch.zeros(self.emb_dim, dtype=torch.float32) + if torch.cuda.is_available(): + u_embedding = u_embedding.cuda() for i in user_pos: u_embedding.add(self.G_item_embeddings[i]) u_embedding /= len(user_pos) else: u_embedding = torch.zeros([len(user_pos), self.emb_dim], dtype=torch.float32) + if torch.cuda.is_available(): + u_embedding = u_embedding.cuda() for u in user_pos: for i in u: u_embedding[u].add(self.G_item_embeddings[i]) @@ -68,6 +72,8 @@ def all_rating(self, user_pos, condition_data=None, impose_dim=None): def all_logits(self, user_pos, condition_data=None): # u_embedding = self.G_user_embeddings[user_index] u_embedding = torch.zeros(self.emb_dim, dtype=torch.float32) + if torch.cuda.is_available(): + u_embedding = u_embedding.cuda() for i in user_pos: u_embedding.add(self.G_item_embeddings[i]) u_embedding /= len(user_pos) From 50e544c57531ecf31afa9fde03a07e62e6a2ac17 Mon Sep 17 00:00:00 2001 From: ivagliano Date: Fri, 7 Jun 2019 14:49:45 +0200 Subject: [PATCH 4/7] Converting input_users in list of items per user --- irgan/cf_gan.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/irgan/cf_gan.py b/irgan/cf_gan.py index 5610723..68e71da 100644 --- a/irgan/cf_gan.py +++ b/irgan/cf_gan.py @@ -155,9 +155,11 @@ def fit(self, X, y=None, condition_data=None): for u in set(input_user): raw_c_batch.append(c[u]) c_batch.append(np.asarray(raw_c_batch).repeat(list(user_cnt.values()), axis=0)) - D_loss = self.discriminator(self.user_pos_train[input_user], input_item, input_label, c_batch) + D_loss = self.discriminator([self.user_pos_train[u] for u in input_user], + input_item, input_label, c_batch) else: - D_loss = self.discriminator(self.user_pos_train[input_user], input_item, input_label) + D_loss = self.discriminator([self.user_pos_train[u] for u in input_user], + input_item, input_label) self.discriminator.step(D_loss) index += self.batch_size From 24e01d812931e222d4b959ad0326f4245397de4e Mon Sep 17 00:00:00 2001 From: ivagliano Date: Fri, 7 Jun 2019 15:00:02 +0200 Subject: [PATCH 5/7] Minor fixes --- irgan/dis_model.py | 4 ++-- irgan/gen_model.py | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/irgan/dis_model.py b/irgan/dis_model.py index a278e0c..9508db3 100644 --- a/irgan/dis_model.py +++ b/irgan/dis_model.py @@ -52,8 +52,8 @@ def pre_logits(self, user_pos, input_item, condition_data=None): u_embedding = u_embedding.cuda() for u in user_pos: for i in u: - u_embedding[u].add(self.G_item_embeddings[i]) - u_embedding[u] /= len(user_pos) + u_embedding[u].add(self.D_item_embeddings[i]) + u_embedding[u] /= len(u) if self.conditions: # In generator need to use dimension 0 in discriminator 1 so by default 0 (given in condition creation) # and here we use one through the dim parameter diff --git a/irgan/gen_model.py b/irgan/gen_model.py index 1f28885..93d3cd1 100644 --- a/irgan/gen_model.py +++ b/irgan/gen_model.py @@ -59,7 +59,7 @@ def all_rating(self, user_pos, condition_data=None, impose_dim=None): for u in user_pos: for i in u: u_embedding[u].add(self.G_item_embeddings[i]) - u_embedding[u] /= len(user_pos) + u_embedding[u] /= len(u) item_embeddings = self.G_item_embeddings if self.conditions: From c257b4b90eac1e6904b84cfe73647d8659723128 Mon Sep 17 00:00:00 2001 From: ivagliano Date: Fri, 7 Jun 2019 15:51:28 +0200 Subject: [PATCH 6/7] Adapt also the reward of disriminator to use avg item embeddings instead of user embeddings --- irgan/cf_gan.py | 2 +- irgan/dis_model.py | 12 +++++++++--- 2 files changed, 10 insertions(+), 4 deletions(-) diff --git a/irgan/cf_gan.py b/irgan/cf_gan.py index 68e71da..b80f950 100644 --- a/irgan/cf_gan.py +++ b/irgan/cf_gan.py @@ -192,7 +192,7 @@ def fit(self, X, y=None, condition_data=None): ########################################################################### # Get reward and adapt it with importance sampling ########################################################################### - reward = self.discriminator.get_reward(u, sample) + reward = self.discriminator.get_reward(self.user_pos_train[u], sample) reward = reward.detach_().cpu().numpy() * prob[sample] / pn[sample] ########################################################################### # Update G diff --git a/irgan/dis_model.py b/irgan/dis_model.py index 9508db3..68d6958 100644 --- a/irgan/dis_model.py +++ b/irgan/dis_model.py @@ -47,7 +47,7 @@ def __init__(self, itemNum, userNum, emb_dim, lamda, param=None, initdelta=0.05, def pre_logits(self, user_pos, input_item, condition_data=None): # u_embedding = self.D_user_embeddings[input_user, :] - u_embedding = torch.zeros(self.emb_dim, dtype=torch.float32) + u_embedding = torch.zeros([len(user_pos), self.emb_dim], dtype=torch.float32) if torch.cuda.is_available(): u_embedding = u_embedding.cuda() for u in user_pos: @@ -72,8 +72,14 @@ def forward(self, input_user, input_item, pred_data_label, condition_data=None): + self.lamda * (self.l2l(self.D_user_embeddings) + self.l2l(self.D_item_embeddings) + self.l2l(self.D_item_bias)) return loss - def get_reward(self, user_index, sample): - u_embedding = self.D_user_embeddings[user_index, :] + def get_reward(self, user_pos, sample): + # u_embedding = self.D_user_embeddings[user_index, :] + u_embedding = torch.zeros(self.emb_dim, dtype=torch.float32) + if torch.cuda.is_available(): + u_embedding = u_embedding.cuda() + for i in user_pos: + u_embedding.add(self.D_item_embeddings[i]) + u_embedding /= len(user_pos) item_embeddings = self.D_item_embeddings[sample, :] D_item_bias = self.D_item_bias[sample] From 4600e3efe5987f137b1434002f13cd68c9147781 Mon Sep 17 00:00:00 2001 From: ivagliano Date: Tue, 11 Jun 2019 16:47:04 +0200 Subject: [PATCH 7/7] Fix errors on using average item embeddings instead of user embeddings in IRGAN --- irgan/cf_gan.py | 11 +++++++---- irgan/dis_model.py | 6 +++--- irgan/gen_model.py | 6 +++--- 3 files changed, 13 insertions(+), 10 deletions(-) diff --git a/irgan/cf_gan.py b/irgan/cf_gan.py index b80f950..d0bc121 100644 --- a/irgan/cf_gan.py +++ b/irgan/cf_gan.py @@ -205,9 +205,9 @@ def fit(self, X, y=None, condition_data=None): reward = torch.tensor(reward) if use_condition: c_batch = [c[u] for c in condition_data] - G_loss = self.generator(u, sample, reward, c_batch) + G_loss = self.generator(self.user_pos_train[u], sample, reward, c_batch) else: - G_loss = self.generator(u, sample, reward) + G_loss = self.generator(self.user_pos_train[u], sample, reward) self.generator.step(G_loss) if self.verbose: @@ -229,9 +229,11 @@ def predict(self, X, condition_data=None): user_batch = test_users[index:index + batch_size] if use_condition: c_batch = [c[index:index + batch_size] for c in condition_data] - index += batch_size + user_batch_rating = self.generator.all_rating([X[u] for u in user_batch], c_batch, impose_dim=1) + else: + user_batch_rating = self.generator.all_rating([X[u] for u in user_batch], impose_dim=1) - user_batch_rating = self.generator.all_rating(X[user_batch], c_batch, impose_dim=1) + index += batch_size user_batch_rating = user_batch_rating.detach_().cpu().numpy() for user_batch_rating_uid in zip(user_batch_rating, user_batch): @@ -346,6 +348,7 @@ def main(): user_num = evaluate.train_set.size()[0] + evaluate.test_set.size()[0] item_num = evaluate.train_set.size()[1] models = [IRGANRecommender(user_num, item_num, g_epochs=1, d_epochs=1, n_epochs=1, conditions=CONDITIONS)] + # models = [IRGANRecommender(user_num, item_num, g_epochs=1, d_epochs=1, n_epochs=1, conditions=None)] evaluate(models) diff --git a/irgan/dis_model.py b/irgan/dis_model.py index 68d6958..ea71581 100644 --- a/irgan/dis_model.py +++ b/irgan/dis_model.py @@ -50,10 +50,10 @@ def pre_logits(self, user_pos, input_item, condition_data=None): u_embedding = torch.zeros([len(user_pos), self.emb_dim], dtype=torch.float32) if torch.cuda.is_available(): u_embedding = u_embedding.cuda() - for u in user_pos: + for idx,u in enumerate(user_pos): for i in u: - u_embedding[u].add(self.D_item_embeddings[i]) - u_embedding[u] /= len(u) + u_embedding[idx].add(self.D_item_embeddings[i]) + u_embedding[idx] /= len(u) if self.conditions: # In generator need to use dimension 0 in discriminator 1 so by default 0 (given in condition creation) # and here we use one through the dim parameter diff --git a/irgan/gen_model.py b/irgan/gen_model.py index 93d3cd1..1cea4bb 100644 --- a/irgan/gen_model.py +++ b/irgan/gen_model.py @@ -56,10 +56,10 @@ def all_rating(self, user_pos, condition_data=None, impose_dim=None): u_embedding = torch.zeros([len(user_pos), self.emb_dim], dtype=torch.float32) if torch.cuda.is_available(): u_embedding = u_embedding.cuda() - for u in user_pos: + for idx,u in enumerate(user_pos): for i in u: - u_embedding[u].add(self.G_item_embeddings[i]) - u_embedding[u] /= len(u) + u_embedding[idx].add(self.G_item_embeddings[i]) + u_embedding[idx] /= len(u) item_embeddings = self.G_item_embeddings if self.conditions: