From b7cd9fa39247651f62eb9cd1317068faf622308f Mon Sep 17 00:00:00 2001 From: Elizaveta Sivak <30319803+lisasivak@users.noreply.github.com> Date: Tue, 30 Apr 2024 15:10:10 +0200 Subject: [PATCH 1/3] Update score.py update the formula to account for missing values in predictions --- score.py | 33 +++++++++++++++++++++++++-------- 1 file changed, 25 insertions(+), 8 deletions(-) diff --git a/score.py b/score.py index e294c41..9a6dc75 100644 --- a/score.py +++ b/score.py @@ -13,6 +13,17 @@ The predictions need to be in a separate file with two columns (nomem_encr, prediction). +Update from April 30: +Starting from the second intermediate leaderboard, we use this updated `score.py` script. +When calculating recall, we now take into account not only the cases when a predicted value was available (i.e., not missing) but all cases in the holdout set. +Specifically, in the updated script, we divide the number of true positives by the total number of positive cases in the ground truth data +(i.e., the number of people who actually had a new child), rather than by the sum of true positives and false negatives. +This change only matters if there are missing values in predictions. +We made this change to avoid a situation where a model makes very accurate predictions for only a small number of cases +(where the remaining cases were not predicted because of missing values on predictor variables), +yet gets the same result as a model that makes similar accurate predictions but for all cases. +Commented lines of code were part of our original scoring function. + """ import sys @@ -55,26 +66,32 @@ def score(prediction_path, ground_truth_path, output): merged_df ) - # Calculate true positives, false positives, and false negatives + # Calculate true positives and false positives true_positives = len( merged_df[(merged_df["prediction"] == 1) & (merged_df["new_child"] == 1)] ) false_positives = len( merged_df[(merged_df["prediction"] == 1) & (merged_df["new_child"] == 0)] ) - false_negatives = len( - merged_df[(merged_df["prediction"] == 0) & (merged_df["new_child"] == 1)] - ) + #false_negatives = len( + # merged_df[(merged_df["prediction"] == 0) & (merged_df["new_child"] == 1)] + #) + # Calculate the actual number of positive instances (N of people who actually had a new child) for calculating recall + n_all_positive_instances = len(merged_df[merged_df["new_child"] == 1]) + # Calculate precision, recall, and F1 score try: precision = true_positives / (true_positives + false_positives) except ZeroDivisionError: precision = 0 - try: - recall = true_positives / (true_positives + false_negatives) - except ZeroDivisionError: - recall = 0 + #try: + #recall = true_positives / (true_positives + false_negatives) + #except ZeroDivisionError: + #recall = 0 + + recall = true_positives / n_all_positive_instances + try: f1_score = 2 * (precision * recall) / (precision + recall) except ZeroDivisionError: From 14887500ad7c45b090b29a41ab68572394f82a6a Mon Sep 17 00:00:00 2001 From: Jeroen Date: Thu, 9 May 2024 07:53:48 +0200 Subject: [PATCH 2/3] Removed commented code --- score.py | 11 ++--------- 1 file changed, 2 insertions(+), 9 deletions(-) diff --git a/score.py b/score.py index 9a6dc75..a8a6b08 100644 --- a/score.py +++ b/score.py @@ -73,25 +73,18 @@ def score(prediction_path, ground_truth_path, output): false_positives = len( merged_df[(merged_df["prediction"] == 1) & (merged_df["new_child"] == 0)] ) - #false_negatives = len( - # merged_df[(merged_df["prediction"] == 0) & (merged_df["new_child"] == 1)] - #) # Calculate the actual number of positive instances (N of people who actually had a new child) for calculating recall n_all_positive_instances = len(merged_df[merged_df["new_child"] == 1]) - + # Calculate precision, recall, and F1 score try: precision = true_positives / (true_positives + false_positives) except ZeroDivisionError: precision = 0 - #try: - #recall = true_positives / (true_positives + false_negatives) - #except ZeroDivisionError: - #recall = 0 recall = true_positives / n_all_positive_instances - + try: f1_score = 2 * (precision * recall) / (precision + recall) except ZeroDivisionError: From 1cfade12763e4c7296d86f0ce69a4f624b33f92f Mon Sep 17 00:00:00 2001 From: Jeroen Date: Thu, 9 May 2024 07:59:45 +0200 Subject: [PATCH 3/3] Ensure no crash in scoring when 0 n_all_positive_instances --- score.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/score.py b/score.py index a8a6b08..23041cb 100644 --- a/score.py +++ b/score.py @@ -83,7 +83,10 @@ def score(prediction_path, ground_truth_path, output): except ZeroDivisionError: precision = 0 - recall = true_positives / n_all_positive_instances + try: + recall = true_positives / n_all_positive_instances + except ZeroDivisionError: + recall = 0 try: f1_score = 2 * (precision * recall) / (precision + recall)