From 175092f6ba2de5939f72165addbe2ad079b0d5fe Mon Sep 17 00:00:00 2001
From: hjabbot <42661345+hjabbot@users.noreply.github.com>
Date: Tue, 3 Sep 2024 10:04:56 +0100
Subject: [PATCH 1/8] Generalising splitting conditions between curl and dmag,
 deprecating Reynolds

---
 .../dataloaders/vector/abstract_vector.py     | 31 ++++++++++++-------
 1 file changed, 19 insertions(+), 12 deletions(-)

diff --git a/meshiphi/dataloaders/vector/abstract_vector.py b/meshiphi/dataloaders/vector/abstract_vector.py
index ed89a642..29c3becd 100644
--- a/meshiphi/dataloaders/vector/abstract_vector.py
+++ b/meshiphi/dataloaders/vector/abstract_vector.py
@@ -558,20 +558,27 @@ def get_hom_condition(self, bounds, splitting_conds, agg_type='MEAN', data=None)
             # To allow multiple modes of splitting, chuck them in the splitting conditions
             # Split if magnitude of curl(data) is larger than threshold 
             if 'curl' in splitting_conds:
-                curl = self.calc_curl(bounds)
-                if np.abs(curl) > splitting_conds['curl']:
-                    hom_type =  'HET'
+                flow = self.calc_curl(bounds, collapse=False)
+                sc = splitting_conds['curl']
             # Split if max magnitude(any_vector - ave_vector) is larger than threshold
-            if 'dmag' in splitting_conds:
-                dmag = self.calc_dmag(bounds)
-                if np.abs(dmag) > splitting_conds['dmag']:
-                    hom_type = 'HET'
+            elif 'dmag' in splitting_conds:
+                flow = self.calc_dmag(bounds, collapse=False)
+                sc = splitting_conds['dmag']
+
+            if isinstance(flow, type(np.nan)) and np.isnan(flow):
+                return "CLR"
+            num_over_threshold = (flow > sc['threshold']).sum()
+            frac_over_threshold = num_over_threshold / flow.size
+
+            if   frac_over_threshold <= sc['lower_bound']: 
+                hom_type = "CLR"
+            elif frac_over_threshold >= sc['upper_bound']:
+                if splitting_conds['split_lock'] == True:
+                    hom_type = "HOM"
+                else: 
+                    hom_type = "CLR"
+            else: hom_type = "HET"
                 
-            # Split if Reynolds number is larger than threshold
-            if 'reynolds' in splitting_conds:        
-                reynolds = self.calc_reynolds_number(bounds)
-                if reynolds > splitting_conds['reynolds']:
-                    hom_type = 'HET'
 
         logging.debug(f"\thom_condition for attribute: '{self.data_name}' in bounds:'{bounds}' returned '{hom_type}'")
         

From 278c73e83d4dfcb29b8151c08e3104f2c1e7012c Mon Sep 17 00:00:00 2001
From: hjabbot <42661345+hjabbot@users.noreply.github.com>
Date: Tue, 3 Sep 2024 10:06:44 +0100
Subject: [PATCH 2/8] Fixing crash on aggregation when nan returned

---
 meshiphi/dataloaders/vector/abstract_vector.py | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/meshiphi/dataloaders/vector/abstract_vector.py b/meshiphi/dataloaders/vector/abstract_vector.py
index 29c3becd..c0707478 100644
--- a/meshiphi/dataloaders/vector/abstract_vector.py
+++ b/meshiphi/dataloaders/vector/abstract_vector.py
@@ -422,10 +422,16 @@ def get_value_from_df(dps, variable_names, bounds, agg_type, skipna):
                 values = [data_count, data_count]
             elif agg_type == 'MIN':
                 index = dps['_magnitude'].idxmin(skipna=skipna)
-                values = [dps[name][index] for name in variable_names]
+                if ~np.isnan(index):
+                    values = [dps[name][index] for name in variable_names]
+                else:
+                    values = [np.nan for name in variable_names]
             elif agg_type == 'MAX':
                 index = dps['_magnitude'].idxmax(skipna=skipna)
-                values = [dps[name][index] for name in variable_names]
+                if ~np.isnan(index):
+                    values = [dps[name][index] for name in variable_names]
+                else:
+                    values = [np.nan for name in variable_names]
             elif agg_type == 'MEAN':
                 values = [dps[name].mean(skipna=skipna) for name in variable_names]
             elif agg_type == 'STD':

From 6ef536f6f5c22b04ae22658be3a559a89c4aebf1 Mon Sep 17 00:00:00 2001
From: hjabbot <42661345+hjabbot@users.noreply.github.com>
Date: Thu, 5 Sep 2024 13:00:33 +0100
Subject: [PATCH 3/8] Increment version number

---
 meshiphi/__init__.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/meshiphi/__init__.py b/meshiphi/__init__.py
index de93938d..13c67c61 100644
--- a/meshiphi/__init__.py
+++ b/meshiphi/__init__.py
@@ -1,4 +1,4 @@
-__version__ = "2.1.12"
+__version__ = "2.1.13"
 __description__ = "MeshiPhi: Earth's digital twin mapped on a non-uniform mesh"
 __license__ = "MIT"
 __author__ = "Autonomous Marine Operations Planning (AMOP) Team, AI Lab, British Antarctic Survey"

From 57f8f8f35481a270ce2e5f661ec976bdaa2f3873 Mon Sep 17 00:00:00 2001
From: hjabbot <42661345+hjabbot@users.noreply.github.com>
Date: Thu, 5 Sep 2024 13:01:20 +0100
Subject: [PATCH 4/8] Fixed bug where NaN values were being included in the
 calculation of frac_over_threshold

---
 meshiphi/dataloaders/scalar/abstract_scalar.py | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/meshiphi/dataloaders/scalar/abstract_scalar.py b/meshiphi/dataloaders/scalar/abstract_scalar.py
index 6f51f5ed..57f065c7 100644
--- a/meshiphi/dataloaders/scalar/abstract_scalar.py
+++ b/meshiphi/dataloaders/scalar/abstract_scalar.py
@@ -560,7 +560,11 @@ def get_hom_condition_from_xr(dps, splitting_conds):
             else:
                 # Determine fraction of datapoints over threshold value
                 num_over_threshold = np.count_nonzero(dps > splitting_conds['threshold'])
-                frac_over_threshold = num_over_threshold/dps.size
+                num_non_nan = np.count_nonzero(~np.isnan(dps))
+                if num_non_nan > 0:
+                    frac_over_threshold = num_over_threshold/num_non_nan
+                else:
+                    frac_over_threshold = 0
                 # Return homogeneity condition
                 if   frac_over_threshold <= splitting_conds['lower_bound']: hom_type = "CLR"
                 elif frac_over_threshold >= splitting_conds['upper_bound']: 

From 8b95b1a4d3b4249c379f46b7f04c3b00c14e654d Mon Sep 17 00:00:00 2001
From: hjabbot <42661345+hjabbot@users.noreply.github.com>
Date: Fri, 6 Sep 2024 09:32:57 +0100
Subject: [PATCH 5/8] Fixed dataframe version of bug where NaNs counted in
 frac_over_threshold

---
 meshiphi/dataloaders/scalar/abstract_scalar.py | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/meshiphi/dataloaders/scalar/abstract_scalar.py b/meshiphi/dataloaders/scalar/abstract_scalar.py
index 57f065c7..36cb389c 100644
--- a/meshiphi/dataloaders/scalar/abstract_scalar.py
+++ b/meshiphi/dataloaders/scalar/abstract_scalar.py
@@ -524,7 +524,11 @@ def get_hom_condition_from_df(dps, splitting_conds):
             else:
                 # Determine fraction of datapoints over threshold value
                 num_over_threshold = dps[dps > splitting_conds['threshold']]
-                frac_over_threshold = num_over_threshold.shape[0]/dps.shape[0]
+                num_non_nan = np.count_nonzero(~np.isnan(dps))
+                if num_non_nan > 0:
+                    frac_over_threshold = num_over_threshold.shape[0]/num_non_nan
+                else:
+                    frac_over_threshold = 0
 
                 # Return homogeneity condition
                 if   frac_over_threshold <= splitting_conds['lower_bound']: hom_type = "CLR"

From 7cb91a6fb72633cf6d6f6a55af90d19fb9e0c6a6 Mon Sep 17 00:00:00 2001
From: hjabbot <42661345+hjabbot@users.noreply.github.com>
Date: Thu, 19 Sep 2024 11:43:48 +0100
Subject: [PATCH 6/8] Avoiding counting NaN values in frac_over_threshold for
 VectorDataLoader

---
 meshiphi/dataloaders/vector/abstract_vector.py | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/meshiphi/dataloaders/vector/abstract_vector.py b/meshiphi/dataloaders/vector/abstract_vector.py
index c0707478..b90260fe 100644
--- a/meshiphi/dataloaders/vector/abstract_vector.py
+++ b/meshiphi/dataloaders/vector/abstract_vector.py
@@ -574,7 +574,13 @@ def get_hom_condition(self, bounds, splitting_conds, agg_type='MEAN', data=None)
             if isinstance(flow, type(np.nan)) and np.isnan(flow):
                 return "CLR"
             num_over_threshold = (flow > sc['threshold']).sum()
-            frac_over_threshold = num_over_threshold / flow.size
+            
+            num_non_nan = np.count_nonzero(~np.isnan(flow))
+            if num_non_nan > 0:
+                frac_over_threshold = num_over_threshold/num_non_nan
+            else:
+                frac_over_threshold = 0
+
 
             if   frac_over_threshold <= sc['lower_bound']: 
                 hom_type = "CLR"

From f1c85e8a4eed998e333ab0b73c4c53d596b22c2d Mon Sep 17 00:00:00 2001
From: hjabbot <42661345+hjabbot@users.noreply.github.com>
Date: Thu, 19 Sep 2024 11:59:47 +0100
Subject: [PATCH 7/8] Fixing split lock key error in vector dataloader

---
 meshiphi/dataloaders/vector/abstract_vector.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/meshiphi/dataloaders/vector/abstract_vector.py b/meshiphi/dataloaders/vector/abstract_vector.py
index b90260fe..53dbf434 100644
--- a/meshiphi/dataloaders/vector/abstract_vector.py
+++ b/meshiphi/dataloaders/vector/abstract_vector.py
@@ -571,6 +571,9 @@ def get_hom_condition(self, bounds, splitting_conds, agg_type='MEAN', data=None)
                 flow = self.calc_dmag(bounds, collapse=False)
                 sc = splitting_conds['dmag']
 
+            if 'split_lock' not in sc:
+                sc['split_lock'] = False
+
             if isinstance(flow, type(np.nan)) and np.isnan(flow):
                 return "CLR"
             num_over_threshold = (flow > sc['threshold']).sum()
@@ -585,7 +588,7 @@ def get_hom_condition(self, bounds, splitting_conds, agg_type='MEAN', data=None)
             if   frac_over_threshold <= sc['lower_bound']: 
                 hom_type = "CLR"
             elif frac_over_threshold >= sc['upper_bound']:
-                if splitting_conds['split_lock'] == True:
+                if sc['split_lock'] == True:
                     hom_type = "HOM"
                 else: 
                     hom_type = "CLR"

From f8982e2072f7343888e38b1099b78172dc2ff465 Mon Sep 17 00:00:00 2001
From: hjabbot <42661345+hjabbot@users.noreply.github.com>
Date: Thu, 19 Sep 2024 12:44:05 +0100
Subject: [PATCH 8/8] Deprecating Reynolds and divergence based splitting

---
 .../dataloaders/vector/abstract_vector.py     | 88 +------------------
 1 file changed, 2 insertions(+), 86 deletions(-)

diff --git a/meshiphi/dataloaders/vector/abstract_vector.py b/meshiphi/dataloaders/vector/abstract_vector.py
index 53dbf434..26cc2220 100644
--- a/meshiphi/dataloaders/vector/abstract_vector.py
+++ b/meshiphi/dataloaders/vector/abstract_vector.py
@@ -925,89 +925,6 @@ def set_data_col_name_list(self, new_names):
         self.data_name_list = new_names
         return self.set_data_col_name(new_data_name)
 
-    def calc_reynolds_number(self, bounds):
-        '''
-        Calculates an approximate Reynolds number from the mean vector velocity
-        and cellbox size.
-        
-        CURRENTLY ASSUMES DENSITY AND VISCOSITY OF SEAWATER AT 4°C! 
-        WILL NEED MINOR REWORKING TO INCLUDE DIFFERENT FLUIDS
-        
-        Args:
-            bounds (Boundary): 
-                Cellbox boundary to calculate characteristic length from
-                
-        Returns:
-            float:
-                Reynolds number of cellbox
-        '''
-        # Extract the speed
-        velocity = self.get_value(bounds, agg_type='MEAN')
-        speed = np.linalg.norm(list(velocity.values())) # Calculates magnitude
-        # Extract the characteristic length
-        length = bounds.calc_size()
-        # Calculate the reynolds number and return
-        logging.warning("\tReynold number used for splitting, this function assumes properties of ocean water!")
-        return 1028 * 0.00167 * speed * length
-
-    def calc_divergence(self, bounds, data=None, collapse=True, agg_type='MAX'):
-        '''
-        Calculates the divergence of vectors in a cellbox
-        
-        Args:
-            bounds (Boundary):
-                Cellbox boundary in which all relevant vectors are contained
-            data (pd.DataFrame or xr.Dataset):
-                Dataset with 'lat' and 'long' columns/dimensions with vectors
-            collapes (bool): 
-                Flag determining whether to return an aggregated value, or a 
-                vector field (values for each individual vector).
-            agg_type (str):
-                Method of aggregation if collapsing value. 
-                Accepts 'MAX' or 'MEAN'
-        
-        Returns:
-            float or pd.DataFrame:
-                float value of aggregated div if collapse=True, or
-                pd.DataFrame of div vector field if collapse=False 
-
-        Raises:
-            ValueError: If agg_type is not 'MAX' or 'MEAN'
-        '''
-        if data is None:    dps = self.trim_datapoints(bounds, data=data)
-        else:               dps = data
-        
-        # Create a meshgrid of vectors from the data
-        vector_field = self._create_vector_meshgrid(dps, self.data_name_list)
-
-        # Get component values for each vector
-        fx, fy = vector_field[:, :, 0], vector_field[:, :, 1]
-        # If not enough datapoints to compute gradient
-        if 1 in fx.shape or 1 in fy.shape:
-            logging.debug('\tUnable to compute gradient across cell for divergence calculation')
-            div = np.nan
-        else:
-            # Compute partial derivatives
-            dfx_dy = np.gradient(fx, axis=1)
-            dfy_dx = np.gradient(fy, axis=0)
-            # Compute curl
-            div = dfy_dx + dfx_dy
-        
-        # If div is nan
-        if np.isnan(div).all():
-            logging.debug('\tAll NaN cellbox encountered')
-            return np.nan
-        # If want to collapse to max mag value, return scalar
-        elif collapse:   
-            if agg_type == 'MAX':       return max(np.nanmax(div), np.nanmin(div), key=abs)
-            elif agg_type == 'MEAN':    return np.nanmean(div)
-            else: 
-                raise ValueError(f"agg_type '{agg_type}' not understood! Requires 'MAX' or 'MEAN'")
-        # Else return field
-        else:
-            return div
-
-
     def calc_curl(self, bounds, data=None, collapse=True, agg_type='MAX'):
         '''
         Calculates the curl of vectors in a cellbox
@@ -1049,7 +966,7 @@ def calc_curl(self, bounds, data=None, collapse=True, agg_type='MAX'):
             # Compute curl
             curl = dfy_dx - dfx_dy
 
-        # If div is nan
+        # If curl is nan
         if np.isnan(curl).all():
             logging.debug('\tAll NaN cellbox encountered')
             return np.nan
@@ -1103,7 +1020,7 @@ def calc_dmag(self, bounds, data=None, collapse=True, agg_type='MEAN'):
         if len(d_mag) == 0:
             logging.debug('\tEmpty cellbox encountered')
             return np.nan
-        # If div is nan
+        # If d_mag is nan
         elif np.isnan(d_mag).all():
             logging.debug('\tAll NaN cellbox encountered')
             return np.nan
@@ -1116,7 +1033,6 @@ def calc_dmag(self, bounds, data=None, collapse=True, agg_type='MEAN'):
         # Else return field
         else:          return d_mag
 
-    
     @staticmethod
     def _create_vector_meshgrid(data, data_name_list):
         '''