Skip to content

Commit

Permalink
feat: implement validate and evaluate methods in Trainer and add weig…
Browse files Browse the repository at this point in the history
…ht decay config #12
  • Loading branch information
twndus committed May 13, 2024
1 parent 2ad1e11 commit 73f8ac6
Show file tree
Hide file tree
Showing 5 changed files with 172 additions and 11 deletions.
2 changes: 1 addition & 1 deletion loss.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,4 +24,4 @@ def __init__(self):

def forward(self, positive_preds, negative_preds):
difference = positive_preds - negative_preds
return -torch.log(self.sigmoid(difference)).mean()
return torch.mean(-torch.log(self.sigmoid(difference)))
8 changes: 6 additions & 2 deletions models/mf.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,17 +3,21 @@

from models.base_model import BaseModel

from loguru import logger
class MatrixFactorization(BaseModel):

def __init__(self, cfg, num_users, num_items):
super().__init__()
self.user_embedding = nn.Embedding(num_users, cfg.embed_size, dtype=torch.float32)
self.item_embedding = nn.Embedding(num_items, cfg.embed_size, dtype=torch.float32)
self._init_weights()

def _init_weights(self):
for child in self.children():
if isinstance(child, nn.Embedding):
nn.init.normal_(child.weights)
nn.init.normal_(child.weight)

def forward(self, user_id, item_id):
return torch.matmul(self.user_embedding(user_id), self.item_embedding(item_id).T)
user_emb = self.user_embedding(user_id)
item_emb = self.item_embedding(item_id)
return torch.sum(user_emb * item_emb, dim=1)
11 changes: 6 additions & 5 deletions train.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,24 +27,24 @@ def main(cfg: OmegaConf):
raise ValueError()

df = data_pipeline.preprocess()
train_data, valid_data, test_data = data_pipeline.split(df)
logger.info(train_data)

if cfg.model_name in ('CDAE', ):
train_data, valid_data, test_data = data_pipeline.split(df)
train_dataset = CDAEDataset(train_data, 'train', neg_times=cfg.neg_times)
valid_dataset = CDAEDataset(valid_data, 'valid', neg_times=cfg.neg_times)
test_dataset = CDAEDataset(test_data, 'test')
elif cfg.model_name == 'MF':
train_data, valid_data, valid_eval_data, test_eval_data = data_pipeline.split(df)
train_dataset = MFDataset(train_data, 'train', num_items=data_pipeline.num_items)
valid_dataset = MFDataset(valid_data, 'valid', num_items=data_pipeline.num_items)
test_dataset = MFDataset(test_data, 'test')
else:
raise ValueError()

# pos_samples 를 이용한 negative sample을 수행해줘야 함
train_dataloader = DataLoader(train_dataset, batch_size=cfg.batch_size, shuffle=cfg.shuffle)
valid_dataloader = DataLoader(valid_dataset, batch_size=cfg.batch_size, shuffle=cfg.shuffle)
test_dataloader = DataLoader(test_dataset, batch_size=cfg.batch_size)
if cfg.model_name != 'MF':
test_dataloader = DataLoader(test_dataset, batch_size=cfg.batch_size)

if cfg.model_name in ('CDAE', ):
trainer = CDAETrainer(cfg, len(df.columns)-1, len(train_dataset))
Expand All @@ -53,7 +53,8 @@ def main(cfg: OmegaConf):
trainer.evaluate(test_dataloader)
elif cfg.model_name in ('MF', ):
trainer = MFTrainer(cfg, data_pipeline.num_items, data_pipeline.num_users)
trainer.run(train_dataloader, valid_dataloader)
trainer.run(train_dataloader, valid_dataloader, valid_eval_data)
trainer.evaluate(test_eval_data, 'test')

if __name__ == '__main__':
main()
6 changes: 3 additions & 3 deletions trainers/base_trainer.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,11 +32,11 @@ def _model(self, model_name: str) -> Module:
logger.error(f"Not implemented model: {model_name}")
raise NotImplementedError(f"Not implemented model: {model_name}")

def _optimizer(self, optimizer_name: str, model: Module, learning_rate: float) -> Optimizer:
def _optimizer(self, optimizer_name: str, model: Module, learning_rate: float, weight_decay: float=0) -> Optimizer:
if optimizer_name.lower() == 'adam':
return Adam(model.parameters(), lr=learning_rate)
return Adam(model.parameters(), lr=learning_rate, weight_decay=weight_decay)
elif optimizer_name.lower() == 'adamw':
return AdamW(model.parameters(), lr=learning_rate)
return AdamW(model.parameters(), lr=learning_rate, weight_decay=weight_decay)
else:
logger.error(f"Optimizer Not Exists: {optimizer_name}")
raise NotImplementedError(f"Optimizer Not Exists: {optimizer_name}")
Expand Down
156 changes: 156 additions & 0 deletions trainers/mf_trainer.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,156 @@
import numpy as np
import pandas as pd
from tqdm import tqdm

import torch
import torch.nn as nn
from torch import Tensor
from torch.utils.data import DataLoader
from torch.optim import Optimizer

from loguru import logger
from omegaconf.dictconfig import DictConfig

from models.mf import MatrixFactorization
from .base_trainer import BaseTrainer
from metric import *
from loss import BPRLoss

class MFTrainer(BaseTrainer):
def __init__(self, cfg: DictConfig, num_items: int, num_users: int) -> None:
super().__init__(cfg)
self.num_items = num_items
self.num_users = num_users
self.model = MatrixFactorization(self.cfg, num_users, num_items).to(self.device)
self.optimizer: Optimizer = self._optimizer(self.cfg.optimizer, self.model, self.cfg.lr, self.cfg.weight_decay)
self.loss = self._loss()

def _loss(self):
return BPRLoss()

def run(self, train_dataloader: DataLoader, valid_dataloader: DataLoader, valid_eval_data: pd.DataFrame):
logger.info(f"[Trainer] run...")

best_valid_loss: float = 1e+6
best_valid_precision_at_k: float = .0
best_valid_recall_at_k: float = .0
best_valid_map_at_k: float = .0
best_valid_ndcg_at_k: float = .0
best_epoch: int = 0
endurance: int = 0

# train
for epoch in range(self.cfg.epochs):
train_loss: float = self.train(train_dataloader)
valid_loss: float = self.validate(valid_dataloader)
(valid_precision_at_k,
valid_recall_at_k,
valid_map_at_k,
valid_ndcg_at_k) = self.evaluate(valid_eval_data, 'valid')
logger.info(f'''\n[Trainer] epoch: {epoch} > train loss: {train_loss:.4f} /
valid loss: {valid_loss:.4f} /
precision@K : {valid_precision_at_k:.4f} /
Recall@K: {valid_recall_at_k:.4f} /
MAP@K: {valid_map_at_k:.4f} /
NDCG@K: {valid_ndcg_at_k:.4f}''')

# update model
if best_valid_loss > valid_loss:
logger.info(f"[Trainer] update best model...")
best_valid_loss = valid_loss
best_valid_precision_at_k = valid_precision_at_k
best_recall_k = valid_recall_at_k
best_valid_ndcg_at_k = valid_ndcg_at_k
best_valid_map_at_k = valid_map_at_k
best_epoch = epoch
endurance = 0

# TODO: add mlflow

torch.save(self.model.state_dict(), f'{self.cfg.model_dir}/best_model.pt')
else:
endurance += 1
if endurance > self.cfg.patience:
logger.info(f"[Trainer] ealry stopping...")
break


def train(self, train_dataloader: DataLoader) -> float:
self.model.train()
train_loss = 0
for data in tqdm(train_dataloader):
user_id, pos_item, neg_item = data['user_id'].to(self.device), data['pos_item'].to(self.device), \
data['neg_item'].to(self.device)
pos_pred = self.model(user_id, pos_item)
neg_pred = self.model(user_id, neg_item)

self.optimizer.zero_grad()
loss = self.loss(pos_pred, neg_pred)
loss.backward()
self.optimizer.step()

train_loss += loss.item()

return train_loss

def validate(self, valid_dataloader: DataLoader) -> tuple[float]:
self.model.eval()
valid_loss = 0
actual, predicted = [], []
for data in tqdm(valid_dataloader):
user_id, pos_item, neg_item = data['user_id'].to(self.device), data['pos_item'].to(self.device), \
data['neg_item'].to(self.device)
pos_pred = self.model(user_id, pos_item)
neg_pred = self.model(user_id, neg_item)

loss = self.loss(pos_pred, neg_pred)

valid_loss += loss.item()

return valid_loss

def evaluate(self, eval_data: pd.DataFrame, mode='valid') -> tuple:

self.model.eval()
actual, predicted = [], []
item_input = torch.tensor([item_id for item_id in range(self.num_items)]).to(self.device)
for user_id, row in tqdm(eval_data.iterrows(), total=eval_data.shape[0]):
pred = self.model(torch.tensor([user_id,]*self.num_items).to(self.device), item_input)
batch_predicted = \
self._generate_top_k_recommendation(pred, row['mask_items'])
actual.append(row['pos_items'])
predicted.append(batch_predicted)

test_precision_at_k = precision_at_k(actual, predicted, self.cfg.top_n)
test_recall_at_k = recall_at_k(actual, predicted, self.cfg.top_n)
test_map_at_k = map_at_k(actual, predicted, self.cfg.top_n)
test_ndcg_at_k = ndcg_at_k(actual, predicted, self.cfg.top_n)

if mode == 'test':
logger.info(f'''\n[Trainer] Test >
precision@{self.cfg.top_n} : {test_precision_at_k:.4f} /
Recall@{self.cfg.top_n}: {test_recall_at_k:.4f} /
MAP@{self.cfg.top_n}: {test_map_at_k:.4f} /
NDCG@{self.cfg.top_n}: {test_ndcg_at_k:.4f}''')

return (test_precision_at_k,
test_recall_at_k,
test_map_at_k,
test_ndcg_at_k)

def _generate_top_k_recommendation(self, pred: Tensor, mask_items) -> tuple[list]:

# mask to train items
pred = pred.cpu().detach().numpy()
pred[mask_items] = 0

# find the largest topK item indexes by user
topn_index = np.argpartition(pred, -self.cfg.top_n)[ -self.cfg.top_n:]
# take probs from predictions using above indexes
topn_prob = np.take_along_axis(pred, topn_index, axis=0)
# sort topK probs and find their indexes
sorted_indices = np.argsort(-topn_prob)
# apply sorted indexes to item indexes to get sorted topK item indexes by user
topn_index_sorted = np.take_along_axis(topn_index, sorted_indices, axis=0)

return topn_index_sorted

0 comments on commit 73f8ac6

Please sign in to comment.