Skip to content

Commit

Permalink
Changed loss calculations and accuracy function for bert models
Browse files Browse the repository at this point in the history
  • Loading branch information
advaithsrao committed Nov 26, 2023
1 parent 4252b71 commit ae52140
Showing 1 changed file with 30 additions and 82 deletions.
112 changes: 30 additions & 82 deletions detector/modeler.py
Original file line number Diff line number Diff line change
Expand Up @@ -127,17 +127,9 @@ def train(

# Initialize variables for early stopping
best_validation_loss = float("inf")
patience = 3 # Number of epochs to wait for improvement
patience = 5 # Number of epochs to wait for improvement
wait = 0

class_weights = compute_class_weight('balanced', classes=np.unique(label), y=label)
class_weights = torch.tensor(class_weights, dtype=torch.float32).to(self.device)

# Define the loss function with class weights
# loss_function = torch.nn.CrossEntropyLoss(weight=class_weights)
pos_weight = torch.tensor(class_weights[1], dtype=torch.float32).to(self.device)
loss_function = torch.nn.BCEWithLogitsLoss(pos_weight=pos_weight)

for epoch in range(self.num_epochs):
print(f'{"="*20} Epoch {epoch + 1}/{self.num_epochs} {"="*20}')

Expand All @@ -152,25 +144,18 @@ def train(

# Forward pass
outputs = self.model(b_input_ids, attention_mask=b_input_mask, labels=b_labels)
logits = outputs.logits

# sigmoid_output = torch.sigmoid(logits[:, 1])
loss = outputs[0]
logits = outputs[1]

# # Thresholding to convert probabilities to binary values (0 or 1)
# binary_output = (sigmoid_output > 0.5)

# # Convert labels to one-hot encoding
b_labels_one_hot = F.one_hot(b_labels, num_classes=2).float()

# Calculate the loss using the weighted loss function
loss = loss_function(logits, b_labels_one_hot)
total_train_loss += loss.item()

# Backward pass
loss.backward()

# Update the model parameters
optimizer.step()

# Update the learning rate
scheduler.step()

if step % 100 == 0 and step != 0:
Expand All @@ -194,19 +179,9 @@ def train(

with torch.no_grad():
outputs = self.model(b_input_ids, attention_mask=b_input_mask, labels=b_labels)
# loss = outputs[0]
logits = outputs.logits
loss = outputs[0]
logits = outputs[1]

# sigmoid_output = torch.sigmoid(logits[:, 1])

# # Thresholding to convert probabilities to binary values (0 or 1)
# binary_output = (sigmoid_output > 0.5)

# # Convert labels to one-hot encoding
b_labels_one_hot = F.one_hot(b_labels, num_classes=2).float()

# Calculate the loss using the weighted loss function
loss = loss_function(logits, b_labels_one_hot)
total_eval_loss += loss.item()
logits = logits.detach().cpu().numpy()
label_ids = b_labels.detach().cpu().numpy()
Expand Down Expand Up @@ -286,16 +261,15 @@ def predict(

with torch.no_grad():
outputs = self.model(b_input_ids, attention_mask=b_input_mask)
logits = outputs.logits
loss = outputs[0]
logits = outputs[1]

logits = logits.detach().cpu().numpy()
_, prediction= torch.max(logits, dim=1)

# Apply a threshold (e.g., 0.5) to convert logits to class predictions
class_predictions = np.argmax(logits, axis=1)
predictions.extend(class_predictions.tolist())
predictions.extend(prediction.cpu().numpy().tolist())

return predictions

def save_model(
self,
path: str
Expand Down Expand Up @@ -327,9 +301,9 @@ def accuracy(
float: The accuracy of the model.
"""

pred_flat = np.argmax(preds, axis=1).flatten()
labels_flat = labels.flatten()
return np.sum(pred_flat == labels_flat) / len(labels_flat)
_, preds = torch.max(preds, dim=1)

return torch.tensor(torch.sum(preds == labels).item() / len(preds))


class DistilbertModel:
Expand Down Expand Up @@ -434,17 +408,9 @@ def train(

# Initialize variables for early stopping
best_validation_loss = float("inf")
patience = 3 # Number of epochs to wait for improvement
patience = 5 # Number of epochs to wait for improvement
wait = 0

class_weights = compute_class_weight('balanced', classes=np.unique(label), y=label)
class_weights = torch.tensor(class_weights, dtype=torch.float32).to(self.device)

# Define the loss function with class weights
# loss_function = torch.nn.CrossEntropyLoss(weight=class_weights)
pos_weight = torch.tensor(class_weights[1], dtype=torch.float32).to(self.device)
loss_function = torch.nn.BCEWithLogitsLoss(pos_weight=pos_weight)

for epoch in range(self.num_epochs):
print(f'{"="*20} Epoch {epoch + 1}/{self.num_epochs} {"="*20}')

Expand All @@ -459,18 +425,8 @@ def train(

# Forward pass
outputs = self.model(b_input_ids, attention_mask=b_input_mask, labels=b_labels)
logits = outputs.logits

# sigmoid_output = torch.sigmoid(logits[:, 1])

# # Thresholding to convert probabilities to binary values (0 or 1)
# binary_output = (sigmoid_output > 0.5)

# # Convert labels to one-hot encoding
b_labels_one_hot = F.one_hot(b_labels, num_classes=2).float()

# Calculate the loss using the weighted loss function
loss = loss_function(logits, b_labels_one_hot)
loss = outputs[0]
logits = outputs[1]

total_train_loss += loss.item()

Expand All @@ -479,6 +435,8 @@ def train(

# Update the model parameters
optimizer.step()

# Update the learning rate
scheduler.step()

if step % 100 == 0 and step != 0:
Expand All @@ -502,18 +460,9 @@ def train(

with torch.no_grad():
outputs = self.model(b_input_ids, attention_mask=b_input_mask, labels=b_labels)
logits = outputs.logits
loss = outputs[0]
logits = outputs[1]

# sigmoid_output = torch.sigmoid(logits[:, 1])

# # Thresholding to convert probabilities to binary values (0 or 1)
# binary_output = (sigmoid_output > 0.5)

# # Convert labels to one-hot encoding
b_labels_one_hot = F.one_hot(b_labels, num_classes=2).float()

# Calculate the loss using the weighted loss function
loss = loss_function(logits, b_labels_one_hot)
total_eval_loss += loss.item()
logits = logits.detach().cpu().numpy()
label_ids = b_labels.detach().cpu().numpy()
Expand Down Expand Up @@ -593,16 +542,15 @@ def predict(

with torch.no_grad():
outputs = self.model(b_input_ids, attention_mask=b_input_mask)
logits = outputs.logits
loss = outputs[0]
logits = outputs[1]

logits = logits.detach().cpu().numpy()
_, prediction= torch.max(logits, dim=1)

# Apply a threshold (e.g., 0.5) to convert logits to class predictions
class_predictions = np.argmax(logits, axis=1)
predictions.extend(class_predictions.tolist())
predictions.extend(prediction.cpu().numpy().tolist())

return predictions

def save_model(
self,
path: str
Expand Down Expand Up @@ -634,9 +582,9 @@ def accuracy(
float: The accuracy of the model.
"""

pred_flat = np.argmax(preds, axis=1).flatten()
labels_flat = labels.flatten()
return np.sum(pred_flat == labels_flat) / len(labels_flat)
_, preds = torch.max(preds, dim=1)

return torch.tensor(torch.sum(preds == labels).item() / len(preds))


class SVMModel:
Expand Down

0 comments on commit ae52140

Please sign in to comment.