From b7cd9fa39247651f62eb9cd1317068faf622308f Mon Sep 17 00:00:00 2001
From: Elizaveta Sivak <30319803+lisasivak@users.noreply.github.com>
Date: Tue, 30 Apr 2024 15:10:10 +0200
Subject: [PATCH 1/3] Update score.py

update the formula to account for missing values in predictions
---
 score.py | 33 +++++++++++++++++++++++++--------
 1 file changed, 25 insertions(+), 8 deletions(-)

diff --git a/score.py b/score.py
index e294c41..9a6dc75 100644
--- a/score.py
+++ b/score.py
@@ -13,6 +13,17 @@
 
 The predictions need to be in a separate file with two columns (nomem_encr, prediction).
 
+Update from April 30:  
+Starting from the second intermediate leaderboard, we use this updated `score.py` script. 
+When calculating recall, we now take into account not only the cases when a predicted value was available (i.e., not missing) but all cases in the holdout set. 
+Specifically, in the updated script, we divide the number of true positives by the total number of positive cases in the ground truth data 
+(i.e., the number of people who actually had a new child), rather than by the sum of true positives and false negatives. 
+This change only matters if there are missing values in predictions. 
+We made this change to avoid a situation where a model makes very accurate predictions for only a small number of cases 
+(where the remaining cases were not predicted because of missing values on predictor variables), 
+yet gets the same result as a model that makes similar accurate predictions but for all cases. 
+Commented lines of code were part of our original scoring function. 
+
 """
 
 import sys
@@ -55,26 +66,32 @@ def score(prediction_path, ground_truth_path, output):
         merged_df
     )
 
-    # Calculate true positives, false positives, and false negatives
+    # Calculate true positives and false positives
     true_positives = len(
         merged_df[(merged_df["prediction"] == 1) & (merged_df["new_child"] == 1)]
     )
     false_positives = len(
         merged_df[(merged_df["prediction"] == 1) & (merged_df["new_child"] == 0)]
     )
-    false_negatives = len(
-        merged_df[(merged_df["prediction"] == 0) & (merged_df["new_child"] == 1)]
-    )
+    #false_negatives = len(
+    #    merged_df[(merged_df["prediction"] == 0) & (merged_df["new_child"] == 1)]
+    #)
 
+    # Calculate the actual number of positive instances (N of people who actually had a new child) for calculating recall
+    n_all_positive_instances = len(merged_df[merged_df["new_child"] == 1])
+    
     # Calculate precision, recall, and F1 score
     try:
         precision = true_positives / (true_positives + false_positives)
     except ZeroDivisionError:
         precision = 0
-    try:
-        recall = true_positives / (true_positives + false_negatives)
-    except ZeroDivisionError:
-        recall = 0
+    #try:
+        #recall = true_positives / (true_positives + false_negatives)        
+    #except ZeroDivisionError:
+        #recall = 0
+
+    recall = true_positives / n_all_positive_instances
+    
     try:
         f1_score = 2 * (precision * recall) / (precision + recall)
     except ZeroDivisionError:

From 14887500ad7c45b090b29a41ab68572394f82a6a Mon Sep 17 00:00:00 2001
From: Jeroen <jeroen@adaptiveconstant.com>
Date: Thu, 9 May 2024 07:53:48 +0200
Subject: [PATCH 2/3] Removed commented code

---
 score.py | 11 ++---------
 1 file changed, 2 insertions(+), 9 deletions(-)

diff --git a/score.py b/score.py
index 9a6dc75..a8a6b08 100644
--- a/score.py
+++ b/score.py
@@ -73,25 +73,18 @@ def score(prediction_path, ground_truth_path, output):
     false_positives = len(
         merged_df[(merged_df["prediction"] == 1) & (merged_df["new_child"] == 0)]
     )
-    #false_negatives = len(
-    #    merged_df[(merged_df["prediction"] == 0) & (merged_df["new_child"] == 1)]
-    #)
 
     # Calculate the actual number of positive instances (N of people who actually had a new child) for calculating recall
     n_all_positive_instances = len(merged_df[merged_df["new_child"] == 1])
-    
+
     # Calculate precision, recall, and F1 score
     try:
         precision = true_positives / (true_positives + false_positives)
     except ZeroDivisionError:
         precision = 0
-    #try:
-        #recall = true_positives / (true_positives + false_negatives)        
-    #except ZeroDivisionError:
-        #recall = 0
 
     recall = true_positives / n_all_positive_instances
-    
+
     try:
         f1_score = 2 * (precision * recall) / (precision + recall)
     except ZeroDivisionError:

From 1cfade12763e4c7296d86f0ce69a4f624b33f92f Mon Sep 17 00:00:00 2001
From: Jeroen <jeroen@adaptiveconstant.com>
Date: Thu, 9 May 2024 07:59:45 +0200
Subject: [PATCH 3/3] Ensure no crash in scoring when 0
 n_all_positive_instances

---
 score.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/score.py b/score.py
index a8a6b08..23041cb 100644
--- a/score.py
+++ b/score.py
@@ -83,7 +83,10 @@ def score(prediction_path, ground_truth_path, output):
     except ZeroDivisionError:
         precision = 0
 
-    recall = true_positives / n_all_positive_instances
+    try:
+        recall = true_positives / n_all_positive_instances
+    except ZeroDivisionError:
+        recall = 0
 
     try:
         f1_score = 2 * (precision * recall) / (precision + recall)