From f6b00b1fa7f50da8124af122964036a9751c3b2c Mon Sep 17 00:00:00 2001 From: Advaith Rao Date: Sun, 26 Nov 2023 00:16:18 -0500 Subject: [PATCH] Moved use_aug from hyperparameters to separate variable --- pipelines/distilbert_trainer.py | 9 ++++----- pipelines/roberta_trainer.py | 9 ++++----- pipelines/svm_trainer.py | 9 ++++----- 3 files changed, 12 insertions(+), 15 deletions(-) diff --git a/pipelines/distilbert_trainer.py b/pipelines/distilbert_trainer.py index 2d08156..c2f6056 100644 --- a/pipelines/distilbert_trainer.py +++ b/pipelines/distilbert_trainer.py @@ -1,4 +1,4 @@ -#usage: python3 -m pipelines.distilbert_trainer --num_epochs 20 --batch_size 8 --num_labels 2 --device 'cuda' --save_path '/tmp' --model_name 'distilbert-base-uncased' --use_aug True +#usage: python3 -m pipelines.distilbert_trainer --num_epochs 20 --batch_size 8 --num_labels 2 --device 'cuda' --save_path '/tmp' --model_name 'distilbert-base-uncased' --use_aug 'True' import sys sys.path.append('..') @@ -125,7 +125,7 @@ def data_split(data): return train, sanity, gold_fraud -def train_model(train_data, hyper_params): +def train_model(train_data, hyper_params, use_aug=False): run = wandb.init(config=hyper_params) model = DistilbertModel(**hyper_params) @@ -133,7 +133,7 @@ def train_model(train_data, hyper_params): # train_data = train_data[~((train_data['Label'] == 1) & (train_data['Body'].str.split().str.len() < 4))] # train_data = train_data.reset_index(drop=True) - if hyper_params['use_aug']: + if use_aug: augmentor = Augmentor() train_body, train_labels = augmentor( @@ -256,7 +256,6 @@ def dump_logs_to_wandb(hyper_params, f1_scores, save_path): 'num_epochs': args.num_epochs, 'batch_size': args.batch_size, 'device': args.device, - 'use_aug': args.use_aug, } # Log in to Weights and Biases @@ -289,7 +288,7 @@ def dump_logs_to_wandb(hyper_params, f1_scores, save_path): train_data, sanity_data, gold_fraud_data = data_split(data) # Train the model - model = train_model(train_data, hyper_params) + model = train_model(train_data, hyper_params, use_aug=args.use_aug) # Test the model f1_scores = test_model(train_data, sanity_data, gold_fraud_data, save_path) diff --git a/pipelines/roberta_trainer.py b/pipelines/roberta_trainer.py index 2bedd25..032b2d5 100644 --- a/pipelines/roberta_trainer.py +++ b/pipelines/roberta_trainer.py @@ -1,4 +1,4 @@ -#usage: python3 -m pipelines.roberta_trainer --num_epochs 20 --batch_size 8 --num_labels 2 --device 'cuda' --save_path '/tmp' --model_name 'roberta-base' --use_aug True +#usage: python3 -m pipelines.roberta_trainer --num_epochs 20 --batch_size 8 --num_labels 2 --device 'cuda' --save_path '/tmp' --model_name 'roberta-base' --use_aug 'True' import sys sys.path.append('..') @@ -125,11 +125,11 @@ def data_split(data): return train, sanity, gold_fraud -def train_model(train_data, hyper_params): +def train_model(train_data, hyper_params, use_aug=False): run = wandb.init(config=hyper_params) model = RobertaModel(**hyper_params) - if hyper_params['use_aug']: + if use_aug: augmentor = Augmentor() train_body, train_labels = augmentor( @@ -252,7 +252,6 @@ def dump_logs_to_wandb(hyper_params, f1_scores, save_path): 'num_epochs': args.num_epochs, 'batch_size': args.batch_size, 'device': args.device, - 'use_aug': args.use_aug, } # Log in to Weights and Biases @@ -285,7 +284,7 @@ def dump_logs_to_wandb(hyper_params, f1_scores, save_path): train_data, sanity_data, gold_fraud_data = data_split(data) # Train the model - model = train_model(train_data, hyper_params) + model = train_model(train_data, hyper_params, use_aug=args.use_aug) # Test the model f1_scores = test_model(train_data, sanity_data, gold_fraud_data, save_path) diff --git a/pipelines/svm_trainer.py b/pipelines/svm_trainer.py index 83e6fe3..1cc39fe 100644 --- a/pipelines/svm_trainer.py +++ b/pipelines/svm_trainer.py @@ -1,4 +1,4 @@ -#usage: python3 -m pipelines.svm_trainer --num_labels 2 --C 10 --kernel 'rbf' --save_path '/tmp/model' --use_aug True +#usage: python3 -m pipelines.svm_trainer --num_labels 2 --C 10 --kernel 'rbf' --save_path '/tmp/model' --use_aug 'True' import sys sys.path.append('..') @@ -123,7 +123,7 @@ def data_split(data): return train, sanity, gold_fraud -def train_model(train_data, hyper_params): +def train_model(train_data, hyper_params, use_aug=False): run = wandb.init(config=hyper_params) model = SVMModel(**hyper_params) @@ -131,7 +131,7 @@ def train_model(train_data, hyper_params): # train_data = train_data[~((train_data['Label'] == 1) & (train_data['Body'].str.split().str.len() < 4))] # train_data = train_data.reset_index(drop=True) - if hyper_params['use_aug']: + if use_aug: augmentor = Augmentor() train_body, train_labels = augmentor( @@ -240,7 +240,6 @@ def dump_logs_to_wandb(hyper_params, f1_scores, save_path): 'num_labels': args.num_labels, 'C': args.C, 'kernel': args.kernel, - 'use_aug': args.use_aug, } # Log in to Weights and Biases @@ -273,7 +272,7 @@ def dump_logs_to_wandb(hyper_params, f1_scores, save_path): train_data, sanity_data, gold_fraud_data = data_split(data) # Train the model - model = train_model(train_data, hyper_params) + model = train_model(train_data, hyper_params, use_aug=args.use_aug) # Test the model f1_scores = test_and_save_model(train_data, sanity_data, gold_fraud_data, save_path)