From 950313a806dd732ac52fb9a8447e0d35f376aaf9 Mon Sep 17 00:00:00 2001 From: GiancarloCroce Date: Mon, 11 Mar 2024 20:19:12 +0100 Subject: [PATCH] release MixTcpred_v1.0 --- MixTCRpred.py | 3 +- src/imgt_ref_seq/get_CDR12_fromVgene.py | 2 ++ src/models.py | 43 +++++++++++++------------ 3 files changed, 26 insertions(+), 22 deletions(-) diff --git a/MixTCRpred.py b/MixTCRpred.py index ab62fda..2199ad5 100644 --- a/MixTCRpred.py +++ b/MixTCRpred.py @@ -8,7 +8,8 @@ import configparser import pandas as pd -path_pretrained_models = './pretrained_models' +path_pretrained_models = '/home/giancarlo/Documents/lab_work/MixTCRpred/pretrained_models' +#path_pretrained_models = './pretrained_models' if __name__ == '__main__': diff --git a/src/imgt_ref_seq/get_CDR12_fromVgene.py b/src/imgt_ref_seq/get_CDR12_fromVgene.py index 4548b5e..17ddce9 100644 --- a/src/imgt_ref_seq/get_CDR12_fromVgene.py +++ b/src/imgt_ref_seq/get_CDR12_fromVgene.py @@ -22,6 +22,7 @@ name = seq_record.id vgene = name.split("|")[1] seq = seq_record.seq + break cdr1 = str(seq[26:38]).replace("-","") cdr2 = str(seq[55:65]).replace("-","") all_cdr1.append(cdr1) @@ -62,6 +63,7 @@ name = seq_record.id vgene = name.split("|")[1] seq = seq_record.seq + print(len(seq)) cdr1 = str(seq[26:38]).replace("-","") cdr2 = str(seq[55:65]).replace("-","") all_cdr1.append(cdr1) diff --git a/src/models.py b/src/models.py index 6e80dd9..60184ea 100644 --- a/src/models.py +++ b/src/models.py @@ -37,7 +37,7 @@ def __init__(self, vocab_size, embedding_dim, hidden_dim, num_heads, num_layers self.scale = torch.sqrt(torch.FloatTensor([self.embedding_dim])) self.embedding_pos_epi = PositionWiseEmbedding(self.vocab_size, self.embedding_dim, self.padding[0]) self.embedding_pos_TRA = PositionWiseEmbedding(self.vocab_size, self.embedding_dim, self.padding[1] + 2*self.padding[-1]) - self.embedding_pos_TRB = PositionWiseEmbedding(self.vocab_size, self.embedding_dim, self.padding[1] + 2*self.padding[-1]) + self.embedding_pos_TRB = PositionWiseEmbedding(self.vocab_size, self.embedding_dim, self.padding[2] + 2*self.padding[-1]) # Transformer - Encoder self.transformer_encoder = TransformerEncoder(num_layers=num_layers, input_dim=embedding_dim, dim_feedforward=hidden_dim, num_heads=num_heads, dropout=dropout) ### Output classifier @@ -131,11 +131,11 @@ def training_step(self, batch, batch_idx): preds = self.forward(inp_data, mask = True) loss = self.loss_function(preds, labels) self.log('train_loss', loss) - #compute auc - fpr, tpr, threshold = metrics.roc_curve(labels.cpu().numpy(), preds.data[:,1].cpu().numpy()) - AUC = metrics.auc(fpr, tpr) - #print("AUC_train:{0}".format(AUC)) - self.log('train_auc', AUC) + ##compute auc + #fpr, tpr, threshold = metrics.roc_curve(labels.cpu().numpy(), preds.data[:,1].cpu().numpy()) + #AUC = metrics.auc(fpr, tpr) + ##print("AUC_train:{0}".format(AUC)) + #self.log('train_auc', AUC) return loss def validation_step(self, batch, batch_idx): inp_data = batch[1] @@ -143,11 +143,11 @@ def validation_step(self, batch, batch_idx): preds = self.forward(inp_data, mask = True) loss = self.loss_function(preds, labels) self.log('val_loss', loss) - #compute auc - fpr, tpr, threshold = metrics.roc_curve(labels.cpu().numpy(), preds.data[:,1].cpu().numpy()) - AUC = metrics.auc(fpr, tpr) - #print("AUC_train:{0}".format(AUC)) - self.log('val_auc', AUC) + ##compute auc + #fpr, tpr, threshold = metrics.roc_curve(labels.cpu().numpy(), preds.data[:,1].cpu().numpy()) + #AUC = metrics.auc(fpr, tpr) + ##print("AUC_train:{0}".format(AUC)) + #self.log('val_auc', AUC) return loss def test_step(self, batch, batch_idx): test_seq = batch[0] @@ -156,6 +156,7 @@ def test_step(self, batch, batch_idx): preds = self.forward(inp_data, mask = True) loss = self.loss_function(preds, labels) self.prob.extend(preds.data[:,1].cpu().numpy()) + #self.prob.extend(preds.data.cpu().numpy()) self.test_tp.extend(labels.cpu().numpy()) self.test_seq.extend(test_seq) @@ -188,7 +189,7 @@ def __init__(self, vocab_size, embedding_dim, hidden_dim, num_heads, num_layers self.embedding_pos_cdr12= PositionWiseEmbedding(self.vocab_size, self.embedding_dim, 4*self.padding[-1]) ########## TEST ############3 self.embedding_pos_TRA = PositionWiseEmbedding(self.vocab_size, self.embedding_dim, self.padding[1] + 2*self.padding[-1]) - self.embedding_pos_TRB = PositionWiseEmbedding(self.vocab_size, self.embedding_dim, self.padding[1] + 2*self.padding[-1]) + self.embedding_pos_TRB = PositionWiseEmbedding(self.vocab_size, self.embedding_dim, self.padding[2] + 2*self.padding[-1]) # Transformer - Encoder self.transformer_encoder = TransformerEncoder(num_layers=num_layers, input_dim=embedding_dim, @@ -344,10 +345,10 @@ def training_step(self, batch, batch_idx): loss = self.loss_function(preds, labels) self.log('train_loss', loss) #compute auc - fpr, tpr, threshold = metrics.roc_curve(labels.cpu().numpy(), preds.data[:,1].cpu().numpy()) - AUC = metrics.auc(fpr, tpr) - #print("AUC_train:{0}".format(AUC)) - self.log('train_auc', AUC) + #fpr, tpr, threshold = metrics.roc_curve(labels.cpu().numpy(), preds.data[:,1].cpu().numpy()) + #AUC = metrics.auc(fpr, tpr) + ##print("AUC_train:{0}".format(AUC)) + #self.log('train_auc', AUC) return loss def validation_step(self, batch, batch_idx): inp_data = batch[1] @@ -355,11 +356,11 @@ def validation_step(self, batch, batch_idx): preds = self.forward(inp_data, mask = True) loss = self.loss_function(preds, labels) self.log('val_loss', loss) - #compute auc - fpr, tpr, threshold = metrics.roc_curve(labels.cpu().numpy(), preds.data[:,1].cpu().numpy()) - AUC = metrics.auc(fpr, tpr) - #print("AUC_train:{0}".format(AUC)) - self.log('val_auc', AUC) + ##compute auc + #fpr, tpr, threshold = metrics.roc_curve(labels.cpu().numpy(), preds.data[:,1].cpu().numpy()) + #AUC = metrics.auc(fpr, tpr) + ##print("AUC_train:{0}".format(AUC)) + #self.log('val_auc', AUC) return loss def test_step(self, batch, batch_idx): test_seq = batch[0]