Skip to content

Commit

Permalink
Updated Roberta Model scripts for gpu runs (#26)
Browse files Browse the repository at this point in the history
* Updated Roberta Model scripts for gpu runs

* Updaated scipy version test fix

---------

Co-authored-by: falgun malhotra <[email protected]>
  • Loading branch information
advaithsrao and FalgunMalhotra authored Nov 6, 2023
1 parent fbfdadc commit 1fffd31
Show file tree
Hide file tree
Showing 4 changed files with 366 additions and 239 deletions.
16 changes: 13 additions & 3 deletions detector/modeler.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
import os
os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "max_split_size_mb:512"

import shutil
import pandas as pd
import numpy as np
Expand All @@ -16,7 +18,8 @@ def __init__(
learning_rate=2e-5,
epsilon=1e-8,
num_epochs=40,
batch_size=128
batch_size=128,
device=None
):
self.num_labels = num_labels
self.path = path
Expand All @@ -25,8 +28,14 @@ def __init__(
self.epsilon = epsilon
self.num_epochs = num_epochs
self.batch_size = batch_size
self.device = device

self.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
if not self.device and torch.cuda.is_available():
self.device = 'cuda'
elif not self.device:
self.device = 'cpu'

self.device = torch.device(self.device)
self.tokenizer = RobertaTokenizer.from_pretrained(self.model_name)

if self.path != '':
Expand Down Expand Up @@ -275,4 +284,5 @@ def accuracy(

pred_flat = np.argmax(preds, axis=1).flatten()
labels_flat = labels.flatten()
return np.sum(pred_flat == labels_flat) / len(labels_flat)
return np.sum(pred_flat == labels_flat) / len(labels_flat)

19 changes: 12 additions & 7 deletions pipelines/roberta_trainer.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ def parse_args():
parser.add_argument("--num_labels", "-l", type=int, default=2, help="Number of labels")
parser.add_argument("--num_epochs", "-e", type=int, default=40, help="Number of epochs")
parser.add_argument("--batch_size", "-b", type=int, default=128, help="Batch size")
parser.add_argument("--device", "-d", type=str, default='cpu', help="Device to train the model on: 'cpu', 'cuda' or 'gpu'")
return parser.parse_args()

def load_data():
Expand Down Expand Up @@ -106,9 +107,9 @@ def data_split(data):
]
train['Split'] = 'Train'
else:
train = data[data['Split'] == 'Train'].head(50)
gold_fraud = data[data['Split'] == 'Gold Fraud'].head(50)
sanity = data[data['Split'] == 'Sanity'].head(50)
train = data[data['Split'] == 'Train']
gold_fraud = data[data['Split'] == 'Gold Fraud']
sanity = data[data['Split'] == 'Sanity']
return train, sanity, gold_fraud

def train_model(train_data, hyper_params):
Expand Down Expand Up @@ -197,19 +198,22 @@ def dump_logs_to_wandb(hyper_params, f1_scores, true_pred_map):
if __name__ == '__main__':
# Parse the arguments
args = parse_args()

device = args.device
device = device if device != 'gpu' else 'cuda'

# Define model hyperparameters
hyper_params = {
'num_labels': args.num_labels,
'num_epochs': args.num_epochs,
'batch_size': args.batch_size
'batch_size': args.batch_size,
'device': args.device,
}

# Log in to Weights and Biases
wandbdict = {
'key': os.getenv('WANDB_API_KEY'),
'entity': os.getenv('WANDB_ENTITY'),
'project': os.getenv('WANDB_PROJECT')
'project': os.getenv('WANDB_PROJECT'),
}
wandb.login(key=wandbdict['key'])
run = wandb.init(project=wandbdict['project'], entity=wandbdict['entity'])
Expand Down Expand Up @@ -242,4 +246,5 @@ def dump_logs_to_wandb(hyper_params, f1_scores, true_pred_map):
dump_logs_to_wandb(hyper_params, f1_scores, true_pred_map)

# Close the Weights and Biases run
run.finish()
run.finish()

Loading

0 comments on commit 1fffd31

Please sign in to comment.