Skip to content

Commit

Permalink
refactor: validate model and training process #16
Browse files Browse the repository at this point in the history
  • Loading branch information
twndus committed May 22, 2024
1 parent 8412151 commit d5da30f
Show file tree
Hide file tree
Showing 5 changed files with 34 additions and 40 deletions.
8 changes: 4 additions & 4 deletions configs/train_config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -8,16 +8,16 @@ log_dir: logs/
sweep: False

# wandb config
wandb: False # True/ False
wandb: True # True/ False
project: YelpRecommendation
notes: "..."
tags: [test, yelp, cdae]

# train config
device: cuda # cpu
epochs: 10
epochs: 100
batch_size: 32
lr: 0.001
lr: 0.0001
optimizer: adam # adamw
loss_name: bpr # bpr # pointwise # bce
patience: 5
Expand All @@ -38,5 +38,5 @@ weight_decay: 0 #1e-5

model_name: DCN
hidden_dims: [1024, 1024]
cross_orders: 6
cross_orders: 1 #6
embed_size: 64
10 changes: 6 additions & 4 deletions data/datasets/mf_data_pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,11 +24,13 @@ def split(self, df):

for _, user_df in df.groupby('user_id'):
if self.cfg.loss_name == 'pointwise':
user_train_df, user_test_df = train_test_split(user_df, test_size=.2, stratify=user_df['rating'])
user_train_df, user_valid_df = train_test_split(user_train_df, test_size=.25, stratify=user_train_df['rating'])
user_train_df, user_test_df = train_test_split(
user_df, test_size=.2, random_state=self.cfg.seed, stratify=user_df['rating'])
user_train_df, user_valid_df = train_test_split(
user_train_df, test_size=.25, random_state=self.cfg.seed, stratify=user_train_df['rating'])
else:
user_train_df, user_test_df = train_test_split(user_df, test_size=.2)
user_train_df, user_valid_df = train_test_split(user_train_df, test_size=.25)
user_train_df, user_test_df = train_test_split(user_df, test_size=.2, random_state=self.cfg.seed)
user_train_df, user_valid_df = train_test_split(user_train_df, test_size=.25, random_state=self.cfg.seed)
train_df.append(user_train_df)
valid_df.append(user_valid_df)
test_df.append(user_test_df)
Expand Down
11 changes: 6 additions & 5 deletions models/dcn.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
import torch.nn as nn

from models.base_model import BaseModel
from loguru import logger

class DCN(BaseModel):
def __init__(self, cfg, num_users, num_items, attributes_count: list):
Expand All @@ -21,7 +22,7 @@ def __init__(self, cfg, num_users, num_items, attributes_count: list):

def _deep(self):
deep = nn.Sequential()
for idx in range(len(self.hidden_dims)-1): #
for idx in range(len(self.hidden_dims)-1):
deep.append(nn.Linear(self.hidden_dims[idx], self.hidden_dims[idx+1]))
deep.append(nn.ReLU())
return deep
Expand All @@ -34,10 +35,10 @@ def _cross(self):
def _init_weights(self):
for child in self.children():
if isinstance(child, nn.Embedding):
nn.init.xavier_uniform_(child.weight)
nn.init.kaiming_normal_(child.weight)
elif isinstance(child, nn.Linear):
nn.init.xavier_uniform_(child.weight)
nn.init.uniform_(child.bias)
nn.init.kaiming_normal_(child.weight)
nn.init.zeros_(child.bias)

def forward(self, user_id, item_id, *attributes):
user_emb = self.user_embedding(user_id)
Expand All @@ -54,7 +55,7 @@ def forward(self, user_id, item_id, *attributes):
input_x = torch.cat([self.deep(input_x), self._forward_cross(input_x)], dim=1)

return torch.sigmoid(self.output_layer(input_x))

def _forward_cross(self, x):
prev_x = x
for weight, bias in zip(self.cross_weights, self.cross_bias):
Expand Down
2 changes: 2 additions & 0 deletions train.py
Original file line number Diff line number Diff line change
Expand Up @@ -81,11 +81,13 @@ def train(cfg, args):#train_dataset, valid_dataset, test_dataset, model_info):
elif cfg.model_name in ('MF', ):
trainer = MFTrainer(cfg, args.model_info['num_items'], args.model_info['num_users'])
trainer.run(train_dataloader, valid_dataloader, args.valid_eval_data)
trainer.load_best_model()
trainer.evaluate(args.test_eval_data, 'test')
elif cfg.model_name in ('DCN', ):
trainer = DCNTrainer(cfg, args.model_info['num_items'], args.model_info['num_users'],
args.data_pipeline.item2attributes, args.data_pipeline.attributes_count)
trainer.run(train_dataloader, valid_dataloader, args.valid_eval_data)
trainer.load_best_model()
trainer.evaluate(args.test_eval_data, 'test')

@hydra.main(version_base=None, config_path="configs", config_name="train_config")
Expand Down
43 changes: 16 additions & 27 deletions trainers/dcn_trainer.py
Original file line number Diff line number Diff line change
Expand Up @@ -82,18 +82,12 @@ def train(self, train_dataloader: DataLoader) -> float:
for data in tqdm(train_dataloader):
user_id, pos_item, neg_item = data['user_id'].to(self.device), data['pos_item'].to(self.device), \
data['neg_item'].to(self.device)
# pos_item_categories, pos_item_statecity, neg_item_categories, neg_item_statecity = \
# data['pos_item_categories'].to(self.device), data['pos_item_statecity'].to(self.device), \
# data['neg_item_categories'].to(self.device), data['neg_item_statecity'].to(self.device)

pos_item_categories = torch.tensor([self.item2attributes[item.item()]['categories'] for item in data['pos_item']]).to(self.device)
pos_item_statecity = torch.tensor([self.item2attributes[item.item()]['statecity'] for item in data['pos_item']]).to(self.device)
neg_item_categories = torch.tensor([self.item2attributes[item.item()]['categories'] for item in data['neg_item']]).to(self.device)
neg_item_statecity = torch.tensor([self.item2attributes[item.item()]['statecity'] for item in data['neg_item']]).to(self.device)

# logger.info(f"pos_categories: {torch.equal(pos_item_categories, torch.tensor([self.item2attributes[item.item()]['categories'] for item in data['pos_item']]).to(self.device))}")
# logger.info(f"pos_statecity: {torch.equal(pos_item_statecity, torch.tensor([self.item2attributes[item.item()]['statecity'] for item in data['pos_item']]).to(self.device))}")
# logger.info(f"neg_categories: {torch.equal(neg_item_categories, torch.tensor([self.item2attributes[item.item()]['categories'] for item in data['neg_item']]).to(self.device))}")
# logger.info(f"neg_statecity: {torch.equal(neg_item_statecity, torch.tensor([self.item2attributes[item.item()]['statecity'] for item in data['neg_item']]).to(self.device))}")
pos_pred = self.model(user_id, pos_item, pos_item_categories, pos_item_statecity)
neg_pred = self.model(user_id, neg_item, neg_item_categories, neg_item_statecity)

Expand All @@ -112,9 +106,6 @@ def validate(self, valid_dataloader: DataLoader) -> tuple[float]:
for data in tqdm(valid_dataloader):
user_id, pos_item, neg_item = data['user_id'].to(self.device), data['pos_item'].to(self.device), \
data['neg_item'].to(self.device)
# pos_item_categories, pos_item_statecity, neg_item_categories, neg_item_statecity = \
# data['pos_item_categories'].to(self.device), data['pos_item_statecity'].to(self.device), \
# data['neg_item_categories'].to(self.device), data['neg_item_statecity'].to(self.device)
pos_item_categories = torch.tensor([self.item2attributes[item.item()]['categories'] for item in data['pos_item']]).to(self.device)
pos_item_statecity = torch.tensor([self.item2attributes[item.item()]['statecity'] for item in data['pos_item']]).to(self.device)
neg_item_categories = torch.tensor([self.item2attributes[item.item()]['categories'] for item in data['neg_item']]).to(self.device)
Expand All @@ -132,33 +123,32 @@ def validate(self, valid_dataloader: DataLoader) -> tuple[float]:
def evaluate(self, eval_data: pd.DataFrame, mode='valid') -> tuple:
self.model.eval()
actual, predicted = [], []
logger.info(f"Before inference #0: {torch.cuda.memory_allocated(self.device)} allocated and {torch.cuda.memory_reserved(self.device)} reserved")
item_input = torch.tensor([item_id for item_id in range(self.num_items)], dtype=torch.int32).to(self.device)
# item_categories = torch.tensor([self.item2attributes[item]['categories'] for item in range(self.num_items)], dtype=torch.int32).to(self.device)
# item_statecity = torch.tensor([self.item2attributes[item]['statecity'] for item in range(self.num_items)], dtype=torch.int32).to(self.device)
chunk_size = 32 # self.cfg.batch_size
# logger.info(f"Before inference #1: {torch.cuda.memory_allocated(self.device)} allocated and {torch.cuda.memory_reserved(self.device)} reserved")
torch.cuda.empty_cache()
# logger.info(f"Before inference #2: {torch.cuda.memory_allocated(self.device)} allocated and {torch.cuda.memory_reserved(self.device)} reserved")
for user_id, row in tqdm(eval_data[:10].iterrows(), total=eval_data.shape[0]):
chunk_size = self.cfg.batch_size

# for efficient learning
if mode == 'valid':
eval_data = eval_data[:1000]

for user_id, row in tqdm(eval_data.iterrows(), total=eval_data.shape[0]):
pred = []
for idx in range(0, eval_data.shape[0], chunk_size):
chunk_item_input = item_input[idx:idx+chunk_size]
chunk_item_categories = torch.tensor([self.item2attributes[item]['categories'] for item in range(idx, min(self.num_items, idx+chunk_size))], dtype=torch.int32).to(self.device)
chunk_item_statecity = torch.tensor([self.item2attributes[item]['statecity'] for item in range(idx, min(self.num_items, idx+chunk_size))], dtype=torch.int32).to(self.device)
# print(f"{chunk_size}, {chunk_item_input.size()}, {chunk_item_categories.size()}, {chunk_item_statecity.size()}")
# logger.info(f"{torch.cuda.memory_allocated(self.device)} allocated and {torch.cuda.memory_reserved(self.device)} reserved")
chunk_item_categories = torch.tensor([
self.item2attributes[item]['categories'] for item in range(idx, min(self.num_items, idx+chunk_size))], dtype=torch.int32).to(self.device)
chunk_item_statecity = torch.tensor([
self.item2attributes[item]['statecity'] for item in range(idx, min(self.num_items, idx+chunk_size))], dtype=torch.int32).to(self.device)

chunk_pred: Tensor = self.model(torch.tensor([user_id,]*len(chunk_item_input), dtype=torch.int32).to(self.device), chunk_item_input, chunk_item_categories, chunk_item_statecity)
chunk_pred: Tensor = self.model(
torch.tensor([user_id,]*len(chunk_item_input), dtype=torch.int32).to(self.device), chunk_item_input, chunk_item_categories, chunk_item_statecity)
pred.extend(chunk_pred.detach().cpu().numpy())

# torch.cuda.empty_cache()
# pred = self.model(torch.tensor([user_id,]*self.num_items).to(self.device), item_input, item_categories, item_statecity)
batch_predicted = \
self._generate_top_k_recommendation(np.array(pred).reshape(-1), row['mask_items'])
actual.append(row['pos_items'])
predicted.append(batch_predicted)

logger.info(f'0 users predicted: {predicted[0]} actual: {actual[0]}')
test_precision_at_k = precision_at_k(actual, predicted, self.cfg.top_n)
test_recall_at_k = recall_at_k(actual, predicted, self.cfg.top_n)
test_map_at_k = map_at_k(actual, predicted, self.cfg.top_n)
Expand All @@ -178,8 +168,7 @@ def evaluate(self, eval_data: pd.DataFrame, mode='valid') -> tuple:

def _generate_top_k_recommendation(self, pred: np.ndarray, mask_items) -> tuple[list]:
# mask to train items
# pred = pred.cpu().detach().numpy()
pred[mask_items] = -3.40282e+38 # finfo(float32)
pred[mask_items] = 0 # sigmoid

# find the largest topK item indexes by user
topn_index = np.argpartition(pred, -self.cfg.top_n)[-self.cfg.top_n:]
Expand Down

0 comments on commit d5da30f

Please sign in to comment.