Skip to content

Commit

Permalink
Moved use_aug from hyperparameters to separate variable
Browse files Browse the repository at this point in the history
  • Loading branch information
advaithsrao committed Nov 26, 2023
1 parent 1c4900d commit f6b00b1
Show file tree
Hide file tree
Showing 3 changed files with 12 additions and 15 deletions.
9 changes: 4 additions & 5 deletions pipelines/distilbert_trainer.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
#usage: python3 -m pipelines.distilbert_trainer --num_epochs 20 --batch_size 8 --num_labels 2 --device 'cuda' --save_path '/tmp' --model_name 'distilbert-base-uncased' --use_aug True
#usage: python3 -m pipelines.distilbert_trainer --num_epochs 20 --batch_size 8 --num_labels 2 --device 'cuda' --save_path '/tmp' --model_name 'distilbert-base-uncased' --use_aug 'True'
import sys
sys.path.append('..')

Expand Down Expand Up @@ -125,15 +125,15 @@ def data_split(data):

return train, sanity, gold_fraud

def train_model(train_data, hyper_params):
def train_model(train_data, hyper_params, use_aug=False):
run = wandb.init(config=hyper_params)
model = DistilbertModel(**hyper_params)

# #drop train examples with Label=1 and Body less than 4 words
# train_data = train_data[~((train_data['Label'] == 1) & (train_data['Body'].str.split().str.len() < 4))]
# train_data = train_data.reset_index(drop=True)

if hyper_params['use_aug']:
if use_aug:
augmentor = Augmentor()

train_body, train_labels = augmentor(
Expand Down Expand Up @@ -256,7 +256,6 @@ def dump_logs_to_wandb(hyper_params, f1_scores, save_path):
'num_epochs': args.num_epochs,
'batch_size': args.batch_size,
'device': args.device,
'use_aug': args.use_aug,
}

# Log in to Weights and Biases
Expand Down Expand Up @@ -289,7 +288,7 @@ def dump_logs_to_wandb(hyper_params, f1_scores, save_path):
train_data, sanity_data, gold_fraud_data = data_split(data)

# Train the model
model = train_model(train_data, hyper_params)
model = train_model(train_data, hyper_params, use_aug=args.use_aug)

# Test the model
f1_scores = test_model(train_data, sanity_data, gold_fraud_data, save_path)
Expand Down
9 changes: 4 additions & 5 deletions pipelines/roberta_trainer.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
#usage: python3 -m pipelines.roberta_trainer --num_epochs 20 --batch_size 8 --num_labels 2 --device 'cuda' --save_path '/tmp' --model_name 'roberta-base' --use_aug True
#usage: python3 -m pipelines.roberta_trainer --num_epochs 20 --batch_size 8 --num_labels 2 --device 'cuda' --save_path '/tmp' --model_name 'roberta-base' --use_aug 'True'
import sys
sys.path.append('..')

Expand Down Expand Up @@ -125,11 +125,11 @@ def data_split(data):

return train, sanity, gold_fraud

def train_model(train_data, hyper_params):
def train_model(train_data, hyper_params, use_aug=False):
run = wandb.init(config=hyper_params)
model = RobertaModel(**hyper_params)

if hyper_params['use_aug']:
if use_aug:
augmentor = Augmentor()

train_body, train_labels = augmentor(
Expand Down Expand Up @@ -252,7 +252,6 @@ def dump_logs_to_wandb(hyper_params, f1_scores, save_path):
'num_epochs': args.num_epochs,
'batch_size': args.batch_size,
'device': args.device,
'use_aug': args.use_aug,
}

# Log in to Weights and Biases
Expand Down Expand Up @@ -285,7 +284,7 @@ def dump_logs_to_wandb(hyper_params, f1_scores, save_path):
train_data, sanity_data, gold_fraud_data = data_split(data)

# Train the model
model = train_model(train_data, hyper_params)
model = train_model(train_data, hyper_params, use_aug=args.use_aug)

# Test the model
f1_scores = test_model(train_data, sanity_data, gold_fraud_data, save_path)
Expand Down
9 changes: 4 additions & 5 deletions pipelines/svm_trainer.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
#usage: python3 -m pipelines.svm_trainer --num_labels 2 --C 10 --kernel 'rbf' --save_path '/tmp/model' --use_aug True
#usage: python3 -m pipelines.svm_trainer --num_labels 2 --C 10 --kernel 'rbf' --save_path '/tmp/model' --use_aug 'True'
import sys
sys.path.append('..')

Expand Down Expand Up @@ -123,15 +123,15 @@ def data_split(data):

return train, sanity, gold_fraud

def train_model(train_data, hyper_params):
def train_model(train_data, hyper_params, use_aug=False):
run = wandb.init(config=hyper_params)
model = SVMModel(**hyper_params)

# #drop train examples with Label=1 and Body less than 4 words
# train_data = train_data[~((train_data['Label'] == 1) & (train_data['Body'].str.split().str.len() < 4))]
# train_data = train_data.reset_index(drop=True)

if hyper_params['use_aug']:
if use_aug:
augmentor = Augmentor()

train_body, train_labels = augmentor(
Expand Down Expand Up @@ -240,7 +240,6 @@ def dump_logs_to_wandb(hyper_params, f1_scores, save_path):
'num_labels': args.num_labels,
'C': args.C,
'kernel': args.kernel,
'use_aug': args.use_aug,
}

# Log in to Weights and Biases
Expand Down Expand Up @@ -273,7 +272,7 @@ def dump_logs_to_wandb(hyper_params, f1_scores, save_path):
train_data, sanity_data, gold_fraud_data = data_split(data)

# Train the model
model = train_model(train_data, hyper_params)
model = train_model(train_data, hyper_params, use_aug=args.use_aug)

# Test the model
f1_scores = test_and_save_model(train_data, sanity_data, gold_fraud_data, save_path)
Expand Down

0 comments on commit f6b00b1

Please sign in to comment.