From 712506065491e6f5a55fe7df2477796edd11746c Mon Sep 17 00:00:00 2001 From: Emma Ai Date: Wed, 27 Nov 2024 05:10:18 +0000 Subject: [PATCH 1/8] fix typo on class code in landcover level4 --- odc/stats/plugins/l34_utils/l4_natural_aquatic.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/odc/stats/plugins/l34_utils/l4_natural_aquatic.py b/odc/stats/plugins/l34_utils/l4_natural_aquatic.py index 1a7b9ad9..0bf9b956 100644 --- a/odc/stats/plugins/l34_utils/l4_natural_aquatic.py +++ b/odc/stats/plugins/l34_utils/l4_natural_aquatic.py @@ -302,15 +302,15 @@ def natural_auquatic_veg(l4, veg_cover, water_season): { "a": res, }, - name="mark_final", + name="mark_herbaceous", dtype="uint8", ) res = expr_eval( - "where((a>=253)&(a<=254), 58, a)", + "where((a>=253)&(a<=254), 56, a)", { "a": res, }, - name="mark_final", + name="mark_woody", dtype="uint8", ) From 2ddd9d1be5b83d87442ea575762455603a5af135 Mon Sep 17 00:00:00 2001 From: Emma Ai Date: Wed, 27 Nov 2024 11:51:24 +0000 Subject: [PATCH 2/8] classify landcover level34 by class def and code csv --- odc/stats/plugins/_registry.py | 1 - odc/stats/plugins/_utils.py | 98 ++++++ odc/stats/plugins/l34_utils/__init__.py | 0 .../plugins/l34_utils/l4_bare_gradation.py | 46 --- odc/stats/plugins/l34_utils/l4_cultivated.py | 98 ------ .../plugins/l34_utils/l4_natural_aquatic.py | 317 ------------------ odc/stats/plugins/l34_utils/l4_natural_veg.py | 133 -------- odc/stats/plugins/l34_utils/l4_surface.py | 37 -- odc/stats/plugins/l34_utils/l4_veg_cover.py | 90 ----- odc/stats/plugins/l34_utils/l4_water.py | 65 ---- .../plugins/l34_utils/l4_water_persistence.py | 56 ---- odc/stats/plugins/l34_utils/lc_level3.py | 75 ----- odc/stats/plugins/lc_level34.py | 138 ++++---- 13 files changed, 179 insertions(+), 975 deletions(-) delete mode 100644 odc/stats/plugins/l34_utils/__init__.py delete mode 100644 odc/stats/plugins/l34_utils/l4_bare_gradation.py delete mode 100644 odc/stats/plugins/l34_utils/l4_cultivated.py delete mode 100644 odc/stats/plugins/l34_utils/l4_natural_aquatic.py delete mode 100644 odc/stats/plugins/l34_utils/l4_natural_veg.py delete mode 100644 odc/stats/plugins/l34_utils/l4_surface.py delete mode 100644 odc/stats/plugins/l34_utils/l4_veg_cover.py delete mode 100644 odc/stats/plugins/l34_utils/l4_water.py delete mode 100644 odc/stats/plugins/l34_utils/l4_water_persistence.py delete mode 100644 odc/stats/plugins/l34_utils/lc_level3.py diff --git a/odc/stats/plugins/_registry.py b/odc/stats/plugins/_registry.py index 3f428d89..f9d8c649 100644 --- a/odc/stats/plugins/_registry.py +++ b/odc/stats/plugins/_registry.py @@ -40,7 +40,6 @@ def import_all(): # TODO: make that more automatic modules = [ "odc.stats.plugins.lc_treelite_cultivated", - "odc.stats.plugins.lc_level3", "odc.stats.plugins.lc_treelite_woody", "odc.stats.plugins.lc_tf_urban", "odc.stats.plugins.lc_level34", diff --git a/odc/stats/plugins/_utils.py b/odc/stats/plugins/_utils.py index 06f218a8..b4deab91 100644 --- a/odc/stats/plugins/_utils.py +++ b/odc/stats/plugins/_utils.py @@ -1,3 +1,5 @@ +import re +import operator import dask from osgeo import gdal, ogr, osr @@ -42,3 +44,99 @@ def rasterize_vector_mask( return dask.array.ones(dst_shape, name=False) return dask.array.from_array(mask.reshape(dst_shape), name=False) + + +OPERATORS = { + ">": operator.gt, + ">=": operator.ge, + "<": operator.lt, + "<=": operator.le, + "==": operator.eq, + "!=": operator.ne, +} + +BRACKETS = { + "[": operator.ge, # Inclusive lower bound + "(": operator.gt, # Exclusive lower bound + "]": operator.le, # Inclusive upper bound + ")": operator.lt, # Exclusive upper bound +} + + +def parse_rule(rule): + """ + Parse a single condition or range condition. + Supports range notations like '[]', '[)', '(]', and '()', + and treats standalone numbers as '=='. + """ + # Special case for 255 (rule doesn't apply) + if (rule == "255") | (rule == "nan"): + return None + + # Check for range conditions like '[a, b)' or '(a, b]' + range_pattern = r"([\[\(])(-?\d+\.?\d*),\s*(-?\d+\.?\d*)([\]\)])" + match = re.match(range_pattern, rule) + if match: + # Extract the bounds and the bracket types + lower_bracket, lower_value, upper_value, upper_bracket = match.groups() + return [ + (BRACKETS[lower_bracket], float(lower_value)), + (BRACKETS[upper_bracket], float(upper_value)), + ] + + ordered_operators = sorted(OPERATORS.items(), key=lambda x: -len(x[0])) + + # Single condition (no range notation, no explicit operator) + for op_str, op_func in ordered_operators: + if op_str in rule: + value = float(rule.replace(op_str, "").strip()) + return [(op_func, value)] + + # Default to equality (==) if no operator is found + return [(operator.eq, int(rule.strip()))] + + +def generate_numexpr_expressions(rules_df, final_class_column, previous): + """ + Generate a list of numexpr-compatible expressions for classification rules. + :param rules_df: DataFrame containing the classification rules + :param final_class_column: Name of the column containing the final class values + :return: List of expressions (one for each rule) + """ + expressions = [] + + for _, rules in rules_df.iterrows(): + conditions = [] + + for col in rules.index: + if col == final_class_column: + continue + subconditions = parse_rule(rules[col]) + if subconditions is None: # Skip rule if it's None + continue + for op_func, value in subconditions: + if op_func is operator.eq: + conditions.append(f"({col}=={value})") + elif op_func is operator.gt: + conditions.append(f"({col}>{value})") + elif op_func is operator.ge: + conditions.append(f"({col}>={value})") + elif op_func is operator.lt: + conditions.append(f"({col}<{value})") + elif op_func is operator.le: + conditions.append(f"({col}<={value})") + elif op_func is operator.ne: + conditions.append(f"({col}!={value})") + + if not conditions: + continue + + condition = "&".join(conditions) + + final_class = rules[final_class_column] + expressions.append(f"where({condition}, {final_class}, {previous})") + + expressions = list(set(expressions)) + expressions = sorted(expressions, key=len) + + return expressions diff --git a/odc/stats/plugins/l34_utils/__init__.py b/odc/stats/plugins/l34_utils/__init__.py deleted file mode 100644 index e69de29b..00000000 diff --git a/odc/stats/plugins/l34_utils/l4_bare_gradation.py b/odc/stats/plugins/l34_utils/l4_bare_gradation.py deleted file mode 100644 index 45e08804..00000000 --- a/odc/stats/plugins/l34_utils/l4_bare_gradation.py +++ /dev/null @@ -1,46 +0,0 @@ -import xarray as xr -from odc.stats._algebra import expr_eval - - -NODATA = 255 - - -def bare_gradation(xx: xr.Dataset, bare_threshold, veg_cover): - - # Address nodata - bs_pc_50 = expr_eval( - "where((a!=a), nodata, a)", - {"a": xx.bs_pc_50.data}, - name="mark_bare_gradation_nodata", - dtype="float32", - **{"nodata": NODATA}, - ) - - # 60% <= data --> 15 - bs_mask = expr_eval( - "where((a>=m)&(a!=nodata), 15, a)", - {"a": bs_pc_50}, - name="mark_bare", - dtype="uint8", - **{"m": bare_threshold[1], "nodata": NODATA}, - ) - - # 20% <= data < 60% --> 12 - bs_mask = expr_eval( - "where((a>=m)&(a 10 - bs_mask = expr_eval( - "where(a woody - # everything else -> herbaceous - - water_seasonality = expr_eval( - "where((a==a), a, nodata)", - { - "a": water_season, - }, - name="mark_water_season", - dtype="float32", - **{"nodata": NODATA}, - ) - - res = expr_eval( - "where((a==124), 56, a)", - { - "a": l4, - }, - name="mark_woody", - dtype="uint8", - ) - - res = expr_eval( - "where((a==125), 57, a)", - { - "a": res, - }, - name="mark_herbaceous", - dtype="uint8", - ) - - # res = expr_eval( - # "where((a!=124)|(a!=125), 255, a)", - # { - # "a": res, - # }, - # name="mark_nodata", - # dtype="uint8", - # ) - - # mark water season - # use some value not used in final class - res = expr_eval( - "where((a==56)&(b==1), 254, a)", - { - "a": res, - "b": water_seasonality, - }, - name="mark_water_season", - dtype="uint8", - ) - - res = expr_eval( - "where((a==56)&(b==2), 253, a)", - { - "a": res, - "b": water_seasonality, - }, - name="mark_water_season", - dtype="uint8", - ) - - res = expr_eval( - "where((a==57)&(b==1), 252, a)", - { - "a": res, - "b": water_seasonality, - }, - name="mark_water_season", - dtype="uint8", - ) - - res = expr_eval( - "where((a==57)&(b==2), 251, a)", - { - "a": res, - "b": water_seasonality, - }, - name="mark_water_season", - dtype="uint8", - ) - - # mark final - - res = expr_eval( - "where((a==254)&(b==10), 64, a)", - { - "a": res, - "b": veg_cover, - }, - name="mark_final", - dtype="uint8", - ) - - res = expr_eval( - "where((a==253)&(b==10), 65, a)", - { - "a": res, - "b": veg_cover, - }, - name="mark_final", - dtype="uint8", - ) - - res = expr_eval( - "where((a==252)&(b==10), 79, a)", - { - "a": res, - "b": veg_cover, - }, - name="mark_final", - dtype="uint8", - ) - - res = expr_eval( - "where((a==251)&(b==10), 80, a)", - { - "a": res, - "b": veg_cover, - }, - name="mark_final", - dtype="uint8", - ) - ######################################### - res = expr_eval( - "where((a==254)&(b==12), 67, a)", - { - "a": res, - "b": veg_cover, - }, - name="mark_final", - dtype="uint8", - ) - - res = expr_eval( - "where((a==253)&(b==12), 68, a)", - { - "a": res, - "b": veg_cover, - }, - name="mark_final", - dtype="uint8", - ) - - res = expr_eval( - "where((a==252)&(b==12), 82, a)", - { - "a": res, - "b": veg_cover, - }, - name="mark_final", - dtype="uint8", - ) - - res = expr_eval( - "where((a==251)&(b==12), 83, a)", - { - "a": res, - "b": veg_cover, - }, - name="mark_final", - dtype="uint8", - ) - ########################################## - res = expr_eval( - "where((a==254)&(b==13), 70, a)", - { - "a": res, - "b": veg_cover, - }, - name="mark_final", - dtype="uint8", - ) - - res = expr_eval( - "where((a==253)&(b==13), 71, a)", - { - "a": res, - "b": veg_cover, - }, - name="mark_final", - dtype="uint8", - ) - - res = expr_eval( - "where((a==252)&(b==13), 85, a)", - { - "a": res, - "b": veg_cover, - }, - name="mark_final", - dtype="uint8", - ) - - res = expr_eval( - "where((a==251)&(b==13), 86, a)", - { - "a": res, - "b": veg_cover, - }, - name="mark_final", - dtype="uint8", - ) - ######################################### - - res = expr_eval( - "where((a==254)&(b==15), 73, a)", - { - "a": res, - "b": veg_cover, - }, - name="mark_final", - dtype="uint8", - ) - - res = expr_eval( - "where((a==253)&(b==15), 74, a)", - { - "a": res, - "b": veg_cover, - }, - name="mark_final", - dtype="uint8", - ) - - res = expr_eval( - "where((a==252)&(b==15), 88, a)", - { - "a": res, - "b": veg_cover, - }, - name="mark_final", - dtype="uint8", - ) - - res = expr_eval( - "where((a==251)&(b==15), 89, a)", - { - "a": res, - "b": veg_cover, - }, - name="mark_final", - dtype="uint8", - ) - ########################################## - res = expr_eval( - "where((a==254)&(b==16), 76, a)", - { - "a": res, - "b": veg_cover, - }, - name="mark_final", - dtype="uint8", - ) - - res = expr_eval( - "where((a==253)&(b==16), 77, a)", - { - "a": res, - "b": veg_cover, - }, - name="mark_final", - dtype="uint8", - ) - - res = expr_eval( - "where((a==252)&(b==16), 91, a)", - { - "a": res, - "b": veg_cover, - }, - name="mark_final", - dtype="uint8", - ) - - res = expr_eval( - "where((a==251)&(b==16), 92, a)", - { - "a": res, - "b": veg_cover, - }, - name="mark_final", - dtype="uint8", - ) - - # There are cases where a tile falls over water. - # In these cases, the PC will have no data so we map back 251-254 to their corresponding classes - res = expr_eval( - "where((a>=251)&(a<=252), 57, a)", - { - "a": res, - }, - name="mark_herbaceous", - dtype="uint8", - ) - res = expr_eval( - "where((a>=253)&(a<=254), 56, a)", - { - "a": res, - }, - name="mark_woody", - dtype="uint8", - ) - - return res diff --git a/odc/stats/plugins/l34_utils/l4_natural_veg.py b/odc/stats/plugins/l34_utils/l4_natural_veg.py deleted file mode 100644 index 8a039994..00000000 --- a/odc/stats/plugins/l34_utils/l4_natural_veg.py +++ /dev/null @@ -1,133 +0,0 @@ -from odc.stats._algebra import expr_eval - -NODATA = 255 - - -def lc_l4_natural_veg(l4, l3, woody, veg_cover): - - woody = expr_eval( - "where((a!=a), nodata, a)", - {"a": woody.data}, - name="mask_woody_nodata", - dtype="float32", - **{"nodata": NODATA}, - ) - - l4 = expr_eval( - "where((b==nodata), nodata, a)", - {"a": l4, "b": l3}, - name="mark_cultivated", - dtype="uint8", - **{"nodata": NODATA}, - ) - - l4 = expr_eval( - "where((a==112)&(b==113), 20, d)", - {"a": l3, "b": woody, "d": l4}, - name="mark_cultivated", - dtype="uint8", - ) - - l4 = expr_eval( - "where((a==112)&(b==114), 21, d)", - {"a": l3, "b": woody, "d": l4}, - name="mark_cultivated", - dtype="uint8", - ) - - l4 = expr_eval( - "where((a==112)&(c==10), 22, d)", - {"a": l3, "c": veg_cover, "d": l4}, - name="mark_cultivated", - dtype="uint8", - ) - l4 = expr_eval( - "where((a==112)&(c==12), 23, d)", - {"a": l3, "c": veg_cover, "d": l4}, - name="mark_cultivated", - dtype="uint8", - ) - l4 = expr_eval( - "where((a==112)&(c==13), 24, d)", - {"a": l3, "c": veg_cover, "d": l4}, - name="mark_cultivated", - dtype="uint8", - ) - l4 = expr_eval( - "where((a==112)&(c==15), 25, d)", - {"a": l3, "c": veg_cover, "d": l4}, - name="mark_cultivated", - dtype="uint8", - ) - l4 = expr_eval( - "where((a==112)&(c==16), 26, d)", - {"a": l3, "c": veg_cover, "d": l4}, - name="mark_cultivated", - dtype="uint8", - ) - - l4 = expr_eval( - "where((a==112)&(c==10)&(b==113), 27, d)", - {"a": l3, "b": woody, "c": veg_cover, "d": l4}, - name="mark_cultivated", - dtype="uint8", - ) - l4 = expr_eval( - "where((a==112)&(c==12)&(b==113), 28, d)", - {"a": l3, "b": woody, "c": veg_cover, "d": l4}, - name="mark_cultivated", - dtype="uint8", - ) - l4 = expr_eval( - "where((a==112)&(c==13)&(b==113), 29, d)", - {"a": l3, "b": woody, "c": veg_cover, "d": l4}, - name="mark_cultivated", - dtype="uint8", - ) - l4 = expr_eval( - "where((a==112)&(c==15)&(b==113), 30, d)", - {"a": l3, "b": woody, "c": veg_cover, "d": l4}, - name="mark_cultivated", - dtype="uint8", - ) - - l4 = expr_eval( - "where((a==112)&(c==16)&(b==113), 31, d)", - {"a": l3, "b": woody, "c": veg_cover, "d": l4}, - name="mark_cultivated", - dtype="uint8", - ) - - l4 = expr_eval( - "where((a==112)&(c==10)&(b==114), 32, d)", - {"a": l3, "b": woody, "c": veg_cover, "d": l4}, - name="mark_cultivated", - dtype="uint8", - ) - l4 = expr_eval( - "where((a==112)&(c==12)&(b==114), 33, d)", - {"a": l3, "b": woody, "c": veg_cover, "d": l4}, - name="mark_cultivated", - dtype="uint8", - ) - l4 = expr_eval( - "where((a==112)&(c==13)&(b==114), 34, d)", - {"a": l3, "b": woody, "c": veg_cover, "d": l4}, - name="mark_cultivated", - dtype="uint8", - ) - l4 = expr_eval( - "where((a==112)&(c==15)&(b==114), 35, d)", - {"a": l3, "b": woody, "c": veg_cover, "d": l4}, - name="mark_cultivated", - dtype="uint8", - ) - - l4 = expr_eval( - "where((a==112)&(c==16)&(b==114), 36, d)", - {"a": l3, "b": woody, "c": veg_cover, "d": l4}, - name="mark_cultivated", - dtype="uint8", - ) - - return l4 diff --git a/odc/stats/plugins/l34_utils/l4_surface.py b/odc/stats/plugins/l34_utils/l4_surface.py deleted file mode 100644 index fcf640ba..00000000 --- a/odc/stats/plugins/l34_utils/l4_surface.py +++ /dev/null @@ -1,37 +0,0 @@ -from odc.stats._algebra import expr_eval - - -def lc_l4_surface(l4, level3, bare_gradation): - - l4 = expr_eval( - "where((c==215), 93, a)", - {"a": l4, "c": level3}, - name="mark_surface", - dtype="uint8", - ) - l4 = expr_eval( - "where((c==216), 94, a)", - {"a": l4, "c": level3}, - name="mark_surface", - dtype="uint8", - ) - l4 = expr_eval( - "where((b==10)&(c==216), 95, a)", - {"a": l4, "b": bare_gradation, "c": level3}, - name="mark_surface", - dtype="uint8", - ) - l4 = expr_eval( - "where((b==12)&(c==216), 96, a)", - {"a": l4, "b": bare_gradation, "c": level3}, - name="mark_surface", - dtype="uint8", - ) - l4 = expr_eval( - "where((b==15)&(c==216), 97, a)", - {"a": l4, "b": bare_gradation, "c": level3}, - name="mark_surface", - dtype="uint8", - ) - - return l4 diff --git a/odc/stats/plugins/l34_utils/l4_veg_cover.py b/odc/stats/plugins/l34_utils/l4_veg_cover.py deleted file mode 100644 index 60bc8201..00000000 --- a/odc/stats/plugins/l34_utils/l4_veg_cover.py +++ /dev/null @@ -1,90 +0,0 @@ -# from typing import Tuple, Optional, Dict, List -import xarray as xr -from odc.stats._algebra import expr_eval - -NODATA = 255 - - -def canopyco_veg_con(xx: xr.Dataset, veg_threshold): - - # Mask NODATA - pv_pc_50 = expr_eval( - "where(a==a, a, nodata)", - {"a": xx.pv_pc_50.data}, - name="mark_nodata", - dtype="float32", - **{"nodata": NODATA}, - ) - - # data < 1 ---> 0 - veg_mask = expr_eval( - "where(a 16 - veg_mask = expr_eval( - "where((a>=m)&(a 15 - veg_mask = expr_eval( - "where((a>=m)&(a 13 - veg_mask = expr_eval( - "where((a>=m)&(a 12 - veg_mask = expr_eval( - "where((a>=m)&(a 10 - veg_mask = expr_eval( - "where((a>=m)&(a<=n), 10, b)", - { - "a": pv_pc_50, - "b": veg_mask, - }, - name="mark_veg", - dtype="uint8", - **{"m": veg_threshold[4], "n": veg_threshold[5]}, - ) - - return veg_mask diff --git a/odc/stats/plugins/l34_utils/l4_water.py b/odc/stats/plugins/l34_utils/l4_water.py deleted file mode 100644 index c93ecce9..00000000 --- a/odc/stats/plugins/l34_utils/l4_water.py +++ /dev/null @@ -1,65 +0,0 @@ -from odc.stats._algebra import expr_eval - -NODATA = 255 - - -def water_classification(xx, water_persistence): - - # Replace nan with nodata - l4 = expr_eval( - "where((a==a), a, nodata)", - {"a": xx.level_3_4.data}, - name="mark_water", - dtype="uint8", - **{"nodata": NODATA}, - ) - - l4 = expr_eval( - "where((a==223)|(a==221), 98, a)", {"a": l4}, name="mark_water", dtype="uint8" - ) - - l4 = expr_eval( - "where((a==98)&(b!=_u), 99, a)", - {"a": l4, "b": xx.level_3_4.data}, - name="mark_water", - dtype="uint8", - **{"_u": 223}, - ) - - l4 = expr_eval( - "where((a==98)&(b==_u), 100, a)", - {"a": l4, "b": xx.level_3_4.data}, - name="mark_water", - dtype="uint8", - **{"_u": 223}, - ) - - l4 = expr_eval( - "where((a==99)&(b==1), 101, a)", - {"a": l4, "b": water_persistence}, - name="mark_water", - dtype="uint8", - ) - - l4 = expr_eval( - "where((a==99)&(b==7), 102, a)", - {"a": l4, "b": water_persistence}, - name="mark_water", - dtype="uint8", - ) - - l4 = expr_eval( - "where((a==99)&(b==8), 103, a)", - {"a": l4, "b": water_persistence}, - name="mark_water", - dtype="uint8", - ) - - l4 = expr_eval( - "where((a==99)&(b==9), 104, a)", - {"a": l4, "b": water_persistence}, - name="mark_water", - dtype="uint8", - ) - - return l4 diff --git a/odc/stats/plugins/l34_utils/l4_water_persistence.py b/odc/stats/plugins/l34_utils/l4_water_persistence.py deleted file mode 100644 index fdab934c..00000000 --- a/odc/stats/plugins/l34_utils/l4_water_persistence.py +++ /dev/null @@ -1,56 +0,0 @@ -import xarray as xr - -from odc.stats._algebra import expr_eval - -NODATA = 255 -WATER_FREQ_NODATA = -999 - - -def water_persistence(xx: xr.Dataset, watper_threshold): - - # Address nan - water_frequency = expr_eval( - "where((a!=a), nodata, a)", - {"a": xx.water_frequency.data}, - name="mark_water", - dtype="float32", - **{"nodata": NODATA}, - ) - - # 10 <= water_frequency < 1 --> 1 - water_mask = expr_eval( - "where((a>=m)&(a!=nodata), 1, a)", - {"a": water_frequency}, - name="mark_water", - dtype="uint8", - **{"m": watper_threshold[3], "nodata": NODATA}, - ) - - # 7 <= water_frequency < 10 --> 7 - water_mask = expr_eval( - "where((a>=m)&(a 8 - water_mask = expr_eval( - "where((a>=m)&(a 9 - water_mask = expr_eval( - "where((a>=m)&(a=nodata), b, a)", - {"a": xx.cultivated.data, "b": xx.level_3_4.data}, - name="mask_cultivated", - dtype="float32", - **{"nodata": xx.cultivated.attrs.get("nodata")}, - ) - - # Mask urban results with bare sfc (210) - - res = expr_eval( - "where((a==_u), b, a)", - { - "a": res, - "b": xx.artificial_surface.data, - }, - name="mark_urban", - dtype="float32", - **{"_u": 210}, - ) - - # Enforce non-urban mask area to be n/artificial (216) - - res = expr_eval( - "where((b<=0)&(a==_u), _nu, a)", - { - "a": res, - "b": urban_mask, - }, - name="mask_non_urban", - dtype="float32", - **{"_u": 215, "_nu": 216}, - ) - - # Mark nodata to 255 in case any nan - res = expr_eval( - "where(a==a, a, nodata)", - { - "a": res, - }, - name="mark_nodata", - dtype="uint8", - **{"nodata": NODATA}, - ) - # Add intertidal as water - res = expr_eval( - "where((a==223)|(a==221), 220, b)", - {"a": xx.level_3_4.data, "b": res}, - name="mark_urban", - dtype="uint8", - ) - - # Combine woody and herbaceous aquatic vegetation - res = expr_eval( - "where((a==124)|(a==125), 124, b)", - {"a": xx.level_3_4.data, "b": res}, - name="mark_aquatic_veg", - dtype="uint8", - ) - - return res diff --git a/odc/stats/plugins/lc_level34.py b/odc/stats/plugins/lc_level34.py index 00f4472d..edbdf985 100644 --- a/odc/stats/plugins/lc_level34.py +++ b/odc/stats/plugins/lc_level34.py @@ -2,28 +2,19 @@ Plugin of Module A3 in LandCover PipeLine """ -from typing import Optional, List +from typing import Optional, Dict -import numpy as np import xarray as xr +import s3fs +import os +import pandas as pd +import dask.array as da from ._registry import StatsPluginInterface, register -from ._utils import rasterize_vector_mask +from ._utils import rasterize_vector_mask, generate_numexpr_expressions +from odc.stats._algebra import expr_eval from osgeo import gdal -from .l34_utils import ( - l4_water_persistence, - l4_veg_cover, - lc_level3, - l4_cultivated, - l4_natural_veg, - l4_natural_aquatic, - l4_surface, - l4_bare_gradation, - l4_water, -) - - NODATA = 255 @@ -35,15 +26,23 @@ class StatsLccsLevel4(StatsPluginInterface): def __init__( self, + class_def_path: str = None, urban_mask: str = None, filter_expression: str = None, mask_threshold: Optional[float] = None, - veg_threshold: Optional[List] = None, - bare_threshold: Optional[List] = None, - watper_threshold: Optional[List] = None, + data_var_condition: Optional[Dict] = None, **kwargs, ): super().__init__(**kwargs) + if class_def_path is None: + raise ValueError("Missing level34 class definition csv") + + if class_def_path.startswith("s3://"): + if not s3fs.S3FileSystem().exists(class_def_path): + raise FileNotFoundError(f"{class_def_path} not found") + elif not os.path.exists(class_def_path): + raise FileNotFoundError(f"{class_def_path} not found") + if urban_mask is None: raise ValueError("Missing urban mask shapefile") @@ -54,33 +53,57 @@ def __init__( if filter_expression is None: raise ValueError("Missing urban mask filter") + self.class_def = pd.read_csv(class_def_path) + cols = list(self.class_def.columns[:6]) + list(self.class_def.columns[9:-6]) + self.class_def = self.class_def[cols].astype(str).fillna("nan") + self.urban_mask = urban_mask self.filter_expression = filter_expression self.mask_threshold = mask_threshold - - self.veg_threshold = ( - veg_threshold if veg_threshold is not None else [1, 4, 15, 40, 65, 100] - ) - self.bare_threshold = bare_threshold if bare_threshold is not None else [20, 60] - self.watper_threshold = ( - watper_threshold if watper_threshold is not None else [1, 4, 7, 10] + self.data_var_condition = ( + {} if data_var_condition is None else data_var_condition ) def fuser(self, xx): return xx - # pylint: disable=too-many-locals - def reduce(self, xx: xr.Dataset) -> xr.Dataset: - - # Water persistence - water_persistence = l4_water_persistence.water_persistence( - xx, self.watper_threshold + def classification(self, xx, class_def, con_cols, class_col): + expressions = generate_numexpr_expressions( + class_def[con_cols + [class_col]], class_col, "res" + ) + local_dict = { + key: xx[self.data_var_condition.get(key, key)].data for key in con_cols + } + res = da.full(xx.level_3_4.shape, 0, dtype="uint8") + + for expression in expressions: + local_dict.update({"res": res}) + res = expr_eval( + expression, + local_dict, + name="apply_rule", + dtype="uint8", + ) + + # This seems redundant while res can be init to NODATA above, + # but it's a point to sanity check no class is missed + res = expr_eval( + "where((a!=a)|(a>=_n), _n, b)", + {"a": xx.level_3_4.data, "b": res}, + name="mark_nodata", + dtype="uint8", + **{"_n": NODATA}, ) - # #TODO WATER (99-104) - l4 = l4_water.water_classification(xx, water_persistence) + return res + + def reduce(self, xx: xr.Dataset) -> xr.Dataset: + con_cols = ["level1", "artificial_surface", "cultivated"] + class_col = "level3" + level3 = self.classification(xx, self.class_def, con_cols, class_col) - # Generate Level3 classes + # apply urban mask + # 215 -> 216 if urban_mask == 0 urban_mask = rasterize_vector_mask( self.urban_mask, xx.geobox.transform, @@ -89,38 +112,39 @@ def reduce(self, xx: xr.Dataset) -> xr.Dataset: threshold=self.mask_threshold, ) - level3 = lc_level3.lc_level3(xx, urban_mask) - - # Vegetation cover - veg_cover = l4_veg_cover.canopyco_veg_con(xx, self.veg_threshold) - - # Apply cultivated Level-4 classes (1-18) - l4 = l4_cultivated.lc_l4_cultivated(l4, level3, xx.woody, veg_cover) - - # Apply terrestrial vegetation classes [19-36] - l4 = l4_natural_veg.lc_l4_natural_veg(l4, level3, xx.woody, veg_cover) - - # Bare gradation - bare_gradation = l4_bare_gradation.bare_gradation( - xx, self.bare_threshold, veg_cover + level3 = expr_eval( + "where((a==215)&(b<1), 216, a)", + {"a": level3, "b": urban_mask}, + name="mask_non_urban", + dtype="uint8", ) - l4 = l4_natural_aquatic.natural_auquatic_veg(l4, veg_cover, xx.water_season) - - level4 = l4_surface.lc_l4_surface(l4, level3, bare_gradation) - - level3 = level3.astype(np.uint8) - level4 = level4.astype(np.uint8) - attrs = xx.attrs.copy() attrs["nodata"] = NODATA dims = xx.level_3_4.dims[1:] + coords = dict((dim, xx.coords[dim]) for dim in dims) + xx["level3"] = xr.DataArray( + level3.squeeze(), dims=dims, attrs=attrs, coords=coords + ) + + con_cols = [ + "level1", + "level3", + "woody", + "water_season", + "water_frequency", + "pv_pc_50", + "bs_pc_50", + ] + class_col = "level4" + + level4 = self.classification(xx, self.class_def, con_cols, class_col) + data_vars = { k: xr.DataArray(v, dims=dims, attrs=attrs) for k, v in zip(self.measurements, [level3.squeeze(), level4.squeeze()]) } - coords = dict((dim, xx.coords[dim]) for dim in dims) leve34 = xr.Dataset(data_vars=data_vars, coords=coords, attrs=xx.attrs) return leve34 From 9b75a301ad39f8cd6c1ebaea380bdb40a360e946 Mon Sep 17 00:00:00 2001 From: Emma Ai Date: Wed, 27 Nov 2024 12:19:12 +0000 Subject: [PATCH 3/8] remove unneeded test --- tests/test_lc_l4_ctv.py | 372 -------------------------- tests/test_lc_l4_natural_surface.py | 212 --------------- tests/test_lc_l4_nav.py | 400 ---------------------------- tests/test_lc_l4_ntv.py | 372 -------------------------- tests/test_lc_l4_water.py | 270 ------------------- 5 files changed, 1626 deletions(-) delete mode 100644 tests/test_lc_l4_ctv.py delete mode 100644 tests/test_lc_l4_natural_surface.py delete mode 100644 tests/test_lc_l4_nav.py delete mode 100644 tests/test_lc_l4_ntv.py delete mode 100644 tests/test_lc_l4_water.py diff --git a/tests/test_lc_l4_ctv.py b/tests/test_lc_l4_ctv.py deleted file mode 100644 index 92cb6073..00000000 --- a/tests/test_lc_l4_ctv.py +++ /dev/null @@ -1,372 +0,0 @@ -import numpy as np -import xarray as xr -import dask.array as da - -from odc.stats.plugins.l34_utils import ( - l4_cultivated, - lc_level3, - l4_veg_cover, -) - -import pandas as pd - -NODATA = 255 - - -def image_groups(l34, urban, cultivated, woody, pv_pc_50): - - tuples = [ - (np.datetime64("2000-01-01T00"), np.datetime64("2000-01-01")), - ] - index = pd.MultiIndex.from_tuples(tuples, names=["time", "solar_day"]) - coords = { - "x": np.linspace(10, 20, l34.shape[2]), - "y": np.linspace(0, 5, l34.shape[1]), - } - - data_vars = { - "level_3_4": xr.DataArray( - da.from_array(l34, chunks=(1, -1, -1)), - dims=("spec", "y", "x"), - attrs={"nodata": 255}, - ), - "artificial_surface": xr.DataArray( - da.from_array(urban, chunks=(1, -1, -1)), - dims=("spec", "y", "x"), - attrs={"nodata": 255}, - ), - "cultivated": xr.DataArray( - da.from_array(cultivated, chunks=(1, -1, -1)), - dims=("spec", "y", "x"), - attrs={"nodata": 255}, - ), - "woody": xr.DataArray( - da.from_array(woody, chunks=(1, -1, -1)), - dims=("spec", "y", "x"), - attrs={"nodata": 255}, - ), - "pv_pc_50": xr.DataArray( - da.from_array(pv_pc_50, chunks=(1, -1, -1)), - dims=("spec", "y", "x"), - attrs={"nodata": 255}, - ), - } - xx = xr.Dataset(data_vars=data_vars, coords=coords) - xx = xx.assign_coords(xr.Coordinates.from_pandas_multiindex(index, "spec")) - return xx - - -def test_ctv_classes_woody(veg_threshold): - - expected_cultivated_classes = [ - [13, 10, 9], - [110, 10, 10], - [13, 11, 11], - [12, 13, 10], - ] - - l34 = np.array( - [ - [ - [110, 110, 110], - [110, 110, 110], - [110, 110, 110], - [110, 110, 110], - ] - ], - dtype="uint8", - ) - - urban = np.array( - [ - [ - [216, 216, 216], - [216, 216, 216], - [216, 216, 216], - [216, 216, 216], - ] - ], - dtype="uint8", - ) - # 111 --> cultivated - cultivated = np.array( - [ - [ - [111, 111, 111], - [255, 111, 111], - [111, 111, 111], - [111, 111, 111], - ] - ], - dtype="uint8", - ) - - woody = np.array( - [ - [ - [113, 113, 113], - [113, 113, 113], - [113, 113, 113], - [113, 113, 113], - ] - ], - dtype="uint8", - ) - - pv_pc_50 = np.array( - [ - [ - [1, 64, 65], - [66, 40, 41], - [3, 16, 15], - [4, 1, 42], - ] - ], - dtype="uint8", - ) - xx = image_groups(l34, urban, cultivated, woody, pv_pc_50) - mock_urban_mask = da.ones(xx.artificial_surface.shape) - - level3 = lc_level3.lc_level3(xx, mock_urban_mask) - - veg_cover = l4_veg_cover.canopyco_veg_con(xx, veg_threshold) - l4_ctv = l4_cultivated.lc_l4_cultivated(xx.level_3_4, level3, xx.woody, veg_cover) - - assert (l4_ctv.compute() == expected_cultivated_classes).all() - - -def test_ctv_classes_herbaceous(veg_threshold): - - expected_cultivated_classes = [ - [18, 15, 14], - [110, 15, 15], - [18, 16, 16], - [17, 18, 15], - ] - - l34 = np.array( - [ - [ - [110, 110, 110], - [110, 110, 110], - [110, 110, 110], - [110, 110, 110], - ] - ], - dtype="uint8", - ) - - urban = np.array( - [ - [ - [216, 216, 216], - [216, 216, 216], - [216, 216, 216], - [216, 216, 216], - ] - ], - dtype="uint8", - ) - - cultivated = np.array( - [ - [ - [111, 111, 111], - [255, 111, 111], - [111, 111, 111], - [111, 111, 111], - ] - ], - dtype="uint8", - ) - - woody = np.array( - [ - [ - [114, 114, 114], - [114, 114, 114], - [114, 114, 114], - [114, 114, 114], - ] - ], - dtype="uint8", - ) - - pv_pc_50 = np.array( - [ - [ - [1, 64, 65], - [66, 40, 41], - [3, 16, 15], - [4, 1, 42], - ] - ], - dtype="uint8", - ) - xx = image_groups(l34, urban, cultivated, woody, pv_pc_50) - mock_urban_mask = da.ones(xx.artificial_surface.shape) - - level3 = lc_level3.lc_level3(xx, mock_urban_mask) - veg_cover = l4_veg_cover.canopyco_veg_con(xx, veg_threshold) - - l4_ctv = l4_cultivated.lc_l4_cultivated(xx.level_3_4, level3, xx.woody, veg_cover) - assert (l4_ctv.compute() == expected_cultivated_classes).all() - - -def test_ctv_classes_woody_herbaceous(veg_threshold): - - expected_cultivated_classes = [ - [13, 10, 9], - [110, 15, 15], - [13, 11, 11], - [17, 18, 15], - ] - - l34 = np.array( - [ - [ - [110, 110, 110], - [110, 110, 110], - [110, 110, 110], - [110, 110, 110], - ] - ], - dtype="int", - ) - - urban = np.array( - [ - [ - [216, 216, 216], - [216, 216, 216], - [216, 216, 216], - [216, 216, 216], - ] - ], - dtype="int", - ) - - cultivated = np.array( - [ - [ - [111, 111, 111], - [255, 111, 111], - [111, 111, 111], - [111, 111, 111], - ] - ], - dtype="int", - ) - - woody = np.array( - [ - [ - [113, 113, 113], - [114, 114, 114], - [113, 113, 113], - [114, 114, 114], - ] - ], - dtype="int", - ) - - pv_pc_50 = np.array( - [ - [ - [1, 64, 65], - [66, 40, 41], - [3, 16, 15], - [4, 1, 42], - ] - ], - dtype="int", - ) - xx = image_groups(l34, urban, cultivated, woody, pv_pc_50) - mock_urban_mask = da.ones(xx.artificial_surface.shape) - - level3 = lc_level3.lc_level3(xx, mock_urban_mask) - - veg_cover = l4_veg_cover.canopyco_veg_con(xx, veg_threshold) - l4_ctv = l4_cultivated.lc_l4_cultivated(xx.level_3_4, level3, xx.woody, veg_cover) - - assert (l4_ctv.compute() == expected_cultivated_classes).all() - - -def test_ctv_classes_no_vegcover(veg_threshold): - - expected_cultivated_classes = [ - [2, 2, 2], - [110, 3, 3], - [2, 2, 2], - [3, 3, 3], - ] - - l34 = np.array( - [ - [ - [110, 110, 110], - [110, 110, 110], - [110, 110, 110], - [110, 110, 110], - ] - ], - dtype="int", - ) - - urban = np.array( - [ - [ - [216, 216, 216], - [216, 216, 216], - [216, 216, 216], - [216, 216, 216], - ] - ], - dtype="int", - ) - - cultivated = np.array( - [ - [ - [111, 111, 111], - [255, 111, 111], - [111, 111, 111], - [111, 111, 111], - ] - ], - dtype="int", - ) - - woody = np.array( - [ - [ - [113, 113, 113], - [114, 114, 114], - [113, 113, 113], - [114, 114, 114], - ] - ], - dtype="int", - ) - - pv_pc_50 = np.array( - [ - [ - [255, 255, 255], - [255, 255, 255], - [255, 255, 255], - [255, 255, 255], - ] - ], - dtype="int", - ) - xx = image_groups(l34, urban, cultivated, woody, pv_pc_50) - - mock_urban_mask = da.ones(xx.artificial_surface.shape) - - level3 = lc_level3.lc_level3(xx, mock_urban_mask) - - veg_cover = l4_veg_cover.canopyco_veg_con(xx, veg_threshold) - l4_ctv = l4_cultivated.lc_l4_cultivated(xx.level_3_4, level3, xx.woody, veg_cover) - - assert (l4_ctv.compute() == expected_cultivated_classes).all() diff --git a/tests/test_lc_l4_natural_surface.py b/tests/test_lc_l4_natural_surface.py deleted file mode 100644 index b77e6044..00000000 --- a/tests/test_lc_l4_natural_surface.py +++ /dev/null @@ -1,212 +0,0 @@ -""" - Unit tests for LandCover Natural Aquatic Vegetation classes -""" - -import numpy as np -import xarray as xr -import dask.array as da - -from odc.stats.plugins.l34_utils import ( - l4_cultivated, - lc_level3, - l4_veg_cover, - l4_natural_veg, - l4_natural_aquatic, - l4_surface, - l4_bare_gradation, -) - -import pandas as pd - -NODATA = 255 - - -def image_groups( - l34, urban, woody, bs_pc_50, pv_pc_50, cultivated, water_frequency, water_season -): - - tuples = [ - (np.datetime64("2000-01-01T00"), np.datetime64("2000-01-01")), - ] - index = pd.MultiIndex.from_tuples(tuples, names=["time", "solar_day"]) - coords = { - "x": np.linspace(10, 20, l34.shape[2]), - "y": np.linspace(0, 5, l34.shape[1]), - } - - data_vars = { - "level_3_4": xr.DataArray( - da.from_array(l34, chunks=(1, -1, -1)), - dims=("spec", "y", "x"), - attrs={"nodata": 255}, - ), - "artificial_surface": xr.DataArray( - da.from_array(urban, chunks=(1, -1, -1)), - dims=("spec", "y", "x"), - attrs={"nodata": 255}, - ), - "cultivated": xr.DataArray( - da.from_array(cultivated, chunks=(1, -1, -1)), - dims=("spec", "y", "x"), - attrs={"nodata": 255}, - ), - "woody": xr.DataArray( - da.from_array(woody, chunks=(1, -1, -1)), - dims=("spec", "y", "x"), - attrs={"nodata": 255}, - ), - "pv_pc_50": xr.DataArray( - da.from_array(pv_pc_50, chunks=(1, -1, -1)), - dims=("spec", "y", "x"), - attrs={"nodata": 255}, - ), - "bs_pc_50": xr.DataArray( - da.from_array(bs_pc_50, chunks=(1, -1, -1)), - dims=("spec", "y", "x"), - attrs={"nodata": 255}, - ), - "water_frequency": xr.DataArray( - da.from_array(water_frequency, chunks=(1, -1, -1)), - dims=("spec", "y", "x"), - attrs={"nodata": 255}, - ), - "water_season": xr.DataArray( - da.from_array(water_season, chunks=(1, -1, -1)), - dims=("spec", "y", "x"), - attrs={"nodata": 255}, - ), - } - xx = xr.Dataset(data_vars=data_vars, coords=coords) - xx = xx.assign_coords(xr.Coordinates.from_pandas_multiindex(index, "spec")) - return xx - - -def test_ns(veg_threshold): - expected_l4_srf_classes = [ - [95, 97, 93], - [97, 96, 96], - [95, 95, 95], - [94, 95, 96], - ] - - l34 = np.array( - [ - [ - [210, 210, 210], - [210, 210, 210], - [210, 210, 210], - [210, 210, 210], - ] - ], - dtype="uint8", - ) - - urban = np.array( - [ - [ - [216, 216, 215], - [216, 216, 216], - [216, 216, 216], - [216, 216, 216], - ] - ], - dtype="uint8", - ) - - woody = np.array( - [ - [ - [113, 113, 113], - [113, 113, 255], - [114, 114, 114], - [114, 114, 255], - ] - ], - dtype="uint8", - ) - - pv_pc_50 = np.array( - [ - [ - [1, 64, 65], - [66, 40, 41], - [3, 16, 15], - [4, 1, 42], - ] - ], - dtype="uint8", - ) - - bs_pc_50 = np.array( - [ - [ - [1, 64, NODATA], - [66, 40, 41], - [3, 16, 15], - [NODATA, 1, 42], - ] - ], - dtype="uint8", - ) - # 112 --> natural veg - cultivated = np.array( - [ - [ - [255, 255, 255], - [255, 255, 255], - [255, 255, 255], - [255, 255, 255], - ] - ], - dtype="uint8", - ) - - water_frequency = np.array( - [ - [ - [1, 3, 2], - [4, 5, 6], - [9, 2, 11], - [10, 11, 12], - ] - ], - dtype="uint8", - ) - water_season = np.array( - [ - [ - [1, 2, 1], - [2, 1, 2], - [1, 1, 2], - [2, 2, 1], - ] - ], - dtype="uint8", - ) - - xx = image_groups( - l34, urban, woody, bs_pc_50, pv_pc_50, cultivated, water_frequency, water_season - ) - - mock_urban_mask = da.ones(xx.artificial_surface.shape) - level3 = lc_level3.lc_level3(xx, mock_urban_mask) - - veg_cover = l4_veg_cover.canopyco_veg_con(xx, veg_threshold) - - # Apply cultivated to match the code in Level4 processing - l4_ctv = l4_cultivated.lc_l4_cultivated(xx.level_3_4, level3, xx.woody, veg_cover) - l4_ctv_ntv = l4_natural_veg.lc_l4_natural_veg(l4_ctv, level3, xx.woody, veg_cover) - - l4_ctv_ntv_nav = l4_natural_aquatic.natural_auquatic_veg( - l4_ctv_ntv, veg_cover, xx.water_season - ) - - # Bare gradation - bare_threshold = [20, 60] - bare_gradation = l4_bare_gradation.bare_gradation(xx, bare_threshold, veg_cover) - - l4_ctv_ntv_nav_surface = l4_surface.lc_l4_surface( - l4_ctv_ntv_nav, level3, bare_gradation - ) - - assert (l4_ctv_ntv_nav_surface.compute() == expected_l4_srf_classes).all() diff --git a/tests/test_lc_l4_nav.py b/tests/test_lc_l4_nav.py deleted file mode 100644 index e3e1849e..00000000 --- a/tests/test_lc_l4_nav.py +++ /dev/null @@ -1,400 +0,0 @@ -""" - Unit tests for LandCover Natural Aquatic Vegetation classes -""" - -import numpy as np -import xarray as xr -import dask.array as da - -from odc.stats.plugins.l34_utils import ( - l4_cultivated, - lc_level3, - l4_veg_cover, - l4_natural_veg, - l4_natural_aquatic, -) - -import pandas as pd - -NODATA = 255 - - -def image_groups( - l34, urban, cultivated, woody, pv_pc_50, water_frequency, water_season -): - - tuples = [ - (np.datetime64("2000-01-01T00"), np.datetime64("2000-01-01")), - ] - index = pd.MultiIndex.from_tuples(tuples, names=["time", "solar_day"]) - coords = { - "x": np.linspace(10, 20, l34.shape[2]), - "y": np.linspace(0, 5, l34.shape[1]), - } - - data_vars = { - "level_3_4": xr.DataArray( - da.from_array(l34, chunks=(1, -1, -1)), - dims=("spec", "y", "x"), - attrs={"nodata": 255}, - ), - "artificial_surface": xr.DataArray( - da.from_array(urban, chunks=(1, -1, -1)), - dims=("spec", "y", "x"), - attrs={"nodata": 255}, - ), - "cultivated": xr.DataArray( - da.from_array(cultivated, chunks=(1, -1, -1)), - dims=("spec", "y", "x"), - attrs={"nodata": 255}, - ), - "woody": xr.DataArray( - da.from_array(woody, chunks=(1, -1, -1)), - dims=("spec", "y", "x"), - attrs={"nodata": 255}, - ), - "pv_pc_50": xr.DataArray( - da.from_array(pv_pc_50, chunks=(1, -1, -1)), - dims=("spec", "y", "x"), - attrs={"nodata": 255}, - ), - "water_frequency": xr.DataArray( - da.from_array(water_frequency, chunks=(1, -1, -1)), - dims=("spec", "y", "x"), - attrs={"nodata": 255}, - ), - "water_season": xr.DataArray( - da.from_array(water_season, chunks=(1, -1, -1)), - dims=("spec", "y", "x"), - attrs={"nodata": 255}, - ), - } - xx = xr.Dataset(data_vars=data_vars, coords=coords) - xx = xx.assign_coords(xr.Coordinates.from_pandas_multiindex(index, "spec")) - return xx - - -def test_ntv_classes_woody_herbaceous(veg_threshold): - expected_l4_ntv_classes = [[56, 56, 56], [57, 57, 57], [56, 56, 56], [57, 57, 57]] - - l34 = np.array( - [ - [ - [124, 124, 124], - [125, 125, 125], - [124, 124, 124], - [125, 125, 125], - ] - ], - dtype="uint8", - ) - - urban = np.array( - [ - [ - [216, 216, 216], - [216, 216, 216], - [216, 216, 216], - [216, 216, 216], - ] - ], - dtype="uint8", - ) - # 112 --> natural veg - cultivated = np.array( - [ - [ - [255, 255, 255], - [255, 255, 255], - [255, 255, 255], - [255, 255, 255], - ] - ], - dtype="uint8", - ) - - woody = np.array( - [ - [ - [113, 113, 113], - [113, 113, 255], - [114, 114, 114], - [114, 114, 255], - ] - ], - dtype="uint8", - ) - - pv_pc_50 = np.array( - [ - [ - [255, 255, 255], - [255, 255, 255], - [255, 255, 255], - [255, 255, 255], - ] - ], - dtype="uint8", - ) - - water_frequency = np.array( - [ - [ - [255, 255, 255], - [255, 255, 255], - [255, 255, 255], - [255, 255, 255], - ] - ], - dtype="uint8", - ) - - water_season = np.array( - [ - [ - [0, 0, 0], - [0, 0, 0], - [0, 0, 0], - [0, 0, 0], - ] - ], - dtype="uint8", - ) - - xx = image_groups( - l34, urban, cultivated, woody, pv_pc_50, water_frequency, water_season - ) - mock_urban_mask = da.ones(xx.artificial_surface.shape) - - level3 = lc_level3.lc_level3(xx, mock_urban_mask) - veg_cover = l4_veg_cover.canopyco_veg_con(xx, veg_threshold) - - # Apply cultivated to match the code in Level4 processing - l4_ctv = l4_cultivated.lc_l4_cultivated(xx.level_3_4, level3, xx.woody, veg_cover) - l4_ctv_ntv = l4_natural_veg.lc_l4_natural_veg(l4_ctv, level3, xx.woody, veg_cover) - - l4_ctv_ntv_nav = l4_natural_aquatic.natural_auquatic_veg( - l4_ctv_ntv, veg_cover, xx.water_season - ) - - assert (l4_ctv_ntv_nav.compute() == expected_l4_ntv_classes).all() - - -def test_ntv_herbaceous_seasonal_water_veg_cover(veg_threshold): - expected_l4_ntv_classes = [ - [91, 83, 79], - [80, 82, 83], - [91, 85, 86], - [89, 92, 82], - ] - - l34 = np.array( - [ - [ - [125, 125, 125], - [125, 125, 125], - [125, 125, 125], - [125, 125, 125], - ] - ], - dtype="uint8", - ) - - urban = np.array( - [ - [ - [216, 216, 216], - [216, 216, 216], - [216, 216, 216], - [216, 216, 216], - ] - ], - dtype="uint8", - ) - - cultivated = np.array( - [ - [ - [255, 255, 255], - [255, 255, 255], - [255, 255, 255], - [255, 255, 255], - ] - ], - dtype="uint8", - ) - - woody = np.array( - [ - [ - [114, 114, 114], - [114, 114, 114], - [114, 114, 114], - [114, 114, 114], - ] - ], - dtype="uint8", - ) - - pv_pc_50 = np.array( - [ - [ - [1, 64, 65], - [66, 40, 41], - [3, 16, 15], - [4, 1, 42], - ] - ], - dtype="uint8", - ) - water_frequency = np.array( - [ - [ - [2, 2, 2], - [2, 2, 2], - [2, 2, 2], - [2, 2, 2], - ] - ], - dtype="uint8", - ) - - water_season = np.array( - [ - [ - [1, 2, 1], - [2, 1, 2], - [1, 1, 2], - [2, 2, 1], - ] - ], - dtype="uint8", - ) - - xx = image_groups( - l34, urban, cultivated, woody, pv_pc_50, water_frequency, water_season - ) - mock_urban_mask = da.ones(xx.artificial_surface.shape) - - level3 = lc_level3.lc_level3(xx, mock_urban_mask) - veg_cover = l4_veg_cover.canopyco_veg_con(xx, veg_threshold) - - # Apply cultivated to match the code in Level4 processing - l4_ctv = l4_cultivated.lc_l4_cultivated(xx.level_3_4, level3, xx.woody, veg_cover) - l4_ctv_ntv = l4_natural_veg.lc_l4_natural_veg(l4_ctv, level3, xx.woody, veg_cover) - - l4_ctv_ntv_nav = l4_natural_aquatic.natural_auquatic_veg( - l4_ctv_ntv, veg_cover, xx.water_season - ) - - assert (l4_ctv_ntv_nav.compute() == expected_l4_ntv_classes).all() - - -def test_ntv_woody_seasonal_water_veg_cover(veg_threshold): - expected_l4_ntv_classes = [ - [76, 68, 64], - [65, 67, 68], - [76, 70, 71], - [74, 77, 67], - ] - - l34 = np.array( - [ - [ - [124, 124, 124], - [124, 124, 124], - [124, 124, 124], - [124, 124, 124], - ] - ], - dtype="uint8", - ) - - urban = np.array( - [ - [ - [216, 216, 216], - [216, 216, 216], - [216, 216, 216], - [216, 216, 216], - ] - ], - dtype="uint8", - ) - - cultivated = np.array( - [ - [ - [255, 255, 255], - [255, 255, 255], - [255, 255, 255], - [255, 255, 255], - ] - ], - dtype="uint8", - ) - - woody = np.array( - [ - [ - [114, 114, 114], - [114, 114, 114], - [114, 114, 114], - [114, 114, 114], - ] - ], - dtype="uint8", - ) - - pv_pc_50 = np.array( - [ - [ - [1, 64, 65], - [66, 40, 41], - [3, 16, 15], - [4, 1, 42], - ] - ], - dtype="uint8", - ) - - water_frequency = np.array( - [ - [ - [1, 2, 3], - [1, 2, 3], - [1, 2, 3], - [1, 2, 3], - ] - ], - dtype="uint8", - ) - - water_season = np.array( - [ - [ - [1, 2, 1], - [2, 1, 2], - [1, 1, 2], - [2, 2, 1], - ] - ], - dtype="uint8", - ) - xx = image_groups( - l34, urban, cultivated, woody, pv_pc_50, water_frequency, water_season - ) - mock_urban_mask = da.ones(xx.artificial_surface.shape) - - level3 = lc_level3.lc_level3(xx, mock_urban_mask) - veg_cover = l4_veg_cover.canopyco_veg_con(xx, veg_threshold) - - # Apply cultivated to match the code in Level4 processing - l4_ctv = l4_cultivated.lc_l4_cultivated(xx.level_3_4, level3, xx.woody, veg_cover) - l4_ctv_ntv = l4_natural_veg.lc_l4_natural_veg(l4_ctv, level3, xx.woody, veg_cover) - - l4_ctv_ntv_nav = l4_natural_aquatic.natural_auquatic_veg( - l4_ctv_ntv, veg_cover, xx.water_season - ) - - assert (l4_ctv_ntv_nav.compute() == expected_l4_ntv_classes).all() diff --git a/tests/test_lc_l4_ntv.py b/tests/test_lc_l4_ntv.py deleted file mode 100644 index fb3bbe4f..00000000 --- a/tests/test_lc_l4_ntv.py +++ /dev/null @@ -1,372 +0,0 @@ -""" - Unit tests for LandCover Natural Terrestrial Vegetated classes -""" - -import numpy as np -import xarray as xr -import dask.array as da - -from odc.stats.plugins.l34_utils import ( - lc_level3, - l4_veg_cover, - l4_natural_veg, -) - -import pandas as pd - -NODATA = 255 - - -def image_groups(l34, urban, cultivated, woody, pv_pc_50): - - tuples = [ - (np.datetime64("2000-01-01T00"), np.datetime64("2000-01-01")), - ] - index = pd.MultiIndex.from_tuples(tuples, names=["time", "solar_day"]) - coords = { - "x": np.linspace(10, 20, l34.shape[2]), - "y": np.linspace(0, 5, l34.shape[1]), - } - - data_vars = { - "level_3_4": xr.DataArray( - da.from_array(l34, chunks=(1, -1, -1)), - dims=("spec", "y", "x"), - attrs={"nodata": 255}, - ), - "artificial_surface": xr.DataArray( - da.from_array(urban, chunks=(1, -1, -1)), - dims=("spec", "y", "x"), - attrs={"nodata": 255}, - ), - "cultivated": xr.DataArray( - da.from_array(cultivated, chunks=(1, -1, -1)), - dims=("spec", "y", "x"), - attrs={"nodata": 255}, - ), - "woody": xr.DataArray( - da.from_array(woody, chunks=(1, -1, -1)), - dims=("spec", "y", "x"), - attrs={"nodata": 255}, - ), - "pv_pc_50": xr.DataArray( - da.from_array(pv_pc_50, chunks=(1, -1, -1)), - dims=("spec", "y", "x"), - attrs={"nodata": 255}, - ), - } - xx = xr.Dataset(data_vars=data_vars, coords=coords) - xx = xx.assign_coords(xr.Coordinates.from_pandas_multiindex(index, "spec")) - return xx - - -def test_ntv_classes_herbaceous(veg_threshold): - - expected_natural_terrestrial_veg_classes = [ - [36, 33, 32], - [110, 33, 33], - [36, 34, 34], - [35, 36, 33], - ] - - l34 = np.array( - [ - [ - [110, 110, 110], - [110, 110, 110], - [110, 110, 110], - [110, 110, 110], - ] - ], - dtype="uint8", - ) - - urban = np.array( - [ - [ - [216, 216, 216], - [216, 216, 216], - [216, 216, 216], - [216, 216, 216], - ] - ], - dtype="uint8", - ) - # 112 --> natural veg - cultivated = np.array( - [ - [ - [112, 112, 112], - [255, 112, 112], - [112, 112, 112], - [112, 112, 112], - ] - ], - dtype="uint8", - ) - - woody = np.array( - [ - [ - [114, 114, 114], - [114, 114, 114], - [114, 114, 114], - [114, 114, 114], - ] - ], - dtype="uint8", - ) - - pv_pc_50 = np.array( - [ - [ - [1, 64, 65], - [66, 40, 41], - [3, 16, 15], - [4, 1, 42], - ] - ], - dtype="uint8", - ) - xx = image_groups(l34, urban, cultivated, woody, pv_pc_50) - mock_urban_mask = da.ones(xx.artificial_surface.shape) - - level3 = lc_level3.lc_level3(xx, mock_urban_mask) - - veg_cover = l4_veg_cover.canopyco_veg_con(xx, veg_threshold) - l4_ntv = l4_natural_veg.lc_l4_natural_veg(xx.level_3_4, level3, xx.woody, veg_cover) - assert (l4_ntv.compute() == expected_natural_terrestrial_veg_classes).all() - - -def test_ntv_classes_woody(veg_threshold): - - expected_natural_terrestrial_veg_classes = [ - [31, 28, 27], - [110, 28, 28], - [31, 29, 29], - [30, 26, 28], - ] - - l34 = np.array( - [ - [ - [110, 110, 110], - [110, 110, 110], - [110, 110, 110], - [110, 110, 110], - ] - ], - dtype="uint8", - ) - - urban = np.array( - [ - [ - [216, 216, 216], - [216, 216, 216], - [216, 216, 216], - [216, 216, 216], - ] - ], - dtype="uint8", - ) - # 112 --> natural veg - cultivated = np.array( - [ - [ - [112, 112, 112], - [255, 112, 112], - [112, 112, 112], - [112, 112, 112], - ] - ], - dtype="uint8", - ) - - woody = np.array( - [ - [ - [113, 113, 113], - [113, 113, 113], - [113, 113, 113], - [113, 255, 113], - ] - ], - dtype="uint8", - ) - - pv_pc_50 = np.array( - [ - [ - [1, 64, 65], - [66, 40, 41], - [3, 16, 15], - [4, 1, 42], - ] - ], - dtype="uint8", - ) - xx = image_groups(l34, urban, cultivated, woody, pv_pc_50) - mock_urban_mask = da.ones(xx.artificial_surface.shape) - - level3 = lc_level3.lc_level3(xx, mock_urban_mask) - veg_cover = l4_veg_cover.canopyco_veg_con(xx, veg_threshold) - - l4_ntv = l4_natural_veg.lc_l4_natural_veg(xx.level_3_4, level3, xx.woody, veg_cover) - assert (l4_ntv.compute() == expected_natural_terrestrial_veg_classes).all() - - -def test_ntv_classes_no_veg(veg_threshold): - - expected_natural_terrestrial_veg_classes = [ - [20, 20, 20], - [110, 21, 21], - [20, 20, 20], - [21, 21, 21], - ] - - l34 = np.array( - [ - [ - [110, 110, 110], - [110, 110, 110], - [110, 110, 110], - [110, 110, 110], - ] - ], - dtype="uint8", - ) - - urban = np.array( - [ - [ - [216, 216, 216], - [216, 216, 216], - [216, 216, 216], - [216, 216, 216], - ] - ], - dtype="uint8", - ) - # 112 --> natural veg - cultivated = np.array( - [ - [ - [112, 112, 112], - [255, 112, 112], - [112, 112, 112], - [112, 112, 112], - ] - ], - dtype="uint8", - ) - - woody = np.array( - [ - [ - [113, 113, 113], - [114, 114, 114], - [113, 113, 113], - [114, 114, 114], - ] - ], - dtype="uint8", - ) - - pv_pc_50 = np.array( - [ - [ - [255, 255, 255], - [255, 255, 255], - [255, 255, 255], - [255, 255, 255], - ] - ], - dtype="uint8", - ) - xx = image_groups(l34, urban, cultivated, woody, pv_pc_50) - mock_urban_mask = da.ones(xx.artificial_surface.shape) - - level3 = lc_level3.lc_level3(xx, mock_urban_mask) - - veg_cover = l4_veg_cover.canopyco_veg_con(xx, veg_threshold) - l4_ntv = l4_natural_veg.lc_l4_natural_veg(xx.level_3_4, level3, xx.woody, veg_cover) - assert (l4_ntv.compute() == expected_natural_terrestrial_veg_classes).all() - - -def test_ntv_classes_no_lifeform(veg_threshold): - - expected_natural_terrestrial_veg_classes = [ - [26, 23, 22], - [22, 23, 23], - [26, 24, 24], - [25, 26, 23], - ] - - l34 = np.array( - [ - [ - [110, 110, 110], - [110, 110, 110], - [110, 110, 110], - [110, 110, 110], - ] - ], - dtype="uint8", - ) - - urban = np.array( - [ - [ - [216, 216, 216], - [216, 216, 216], - [216, 216, 216], - [216, 216, 216], - ] - ], - dtype="uint8", - ) - # 112 --> natural veg - cultivated = np.array( - [ - [ - [112, 112, 112], - [112, 112, 112], - [112, 112, 112], - [112, 112, 112], - ] - ], - dtype="uint8", - ) - - woody = np.array( - [ - [ - [255, 255, 255], - [255, 255, 255], - [255, 255, 255], - [255, 255, 255], - ] - ], - dtype="uint8", - ) - - pv_pc_50 = np.array( - [ - [ - [1, 64, 65], - [66, 40, 41], - [3, 16, 15], - [4, 1, 42], - ] - ], - dtype="uint8", - ) - xx = image_groups(l34, urban, cultivated, woody, pv_pc_50) - mock_urban_mask = da.ones(xx.artificial_surface.shape) - - level3 = lc_level3.lc_level3(xx, mock_urban_mask) - - veg_cover = l4_veg_cover.canopyco_veg_con(xx, veg_threshold) - l4_ntv = l4_natural_veg.lc_l4_natural_veg(xx.level_3_4, level3, xx.woody, veg_cover) - assert (l4_ntv.compute() == expected_natural_terrestrial_veg_classes).all() diff --git a/tests/test_lc_l4_water.py b/tests/test_lc_l4_water.py deleted file mode 100644 index e0d021ee..00000000 --- a/tests/test_lc_l4_water.py +++ /dev/null @@ -1,270 +0,0 @@ -""" - Unit tests for LandCover water classes -""" - -import numpy as np -import xarray as xr -import dask.array as da - -from odc.stats.plugins.l34_utils import ( - l4_water_persistence, - l4_water, -) - -import pandas as pd - -NODATA = 255 - - -# @pytest.fixture(scope="module") -def image_groups(l34, urban, cultivated, woody, bs_pc_50, pv_pc_50, water_frequency): - - tuples = [ - (np.datetime64("2000-01-01T00"), np.datetime64("2000-01-01")), - ] - index = pd.MultiIndex.from_tuples(tuples, names=["time", "solar_day"]) - coords = { - "x": np.linspace(10, 20, l34.shape[2]), - "y": np.linspace(0, 5, l34.shape[1]), - } - - data_vars = { - "level_3_4": xr.DataArray( - da.from_array(l34, chunks=(1, -1, -1)), - dims=("spec", "y", "x"), - attrs={"nodata": 255}, - ), - "artificial_surface": xr.DataArray( - da.from_array(urban, chunks=(1, -1, -1)), - dims=("spec", "y", "x"), - attrs={"nodata": 255}, - ), - "cultivated_class": xr.DataArray( - da.from_array(cultivated, chunks=(1, -1, -1)), - dims=("spec", "y", "x"), - attrs={"nodata": 255}, - ), - "woody_cover": xr.DataArray( - da.from_array(woody, chunks=(1, -1, -1)), - dims=("spec", "y", "x"), - attrs={"nodata": 255}, - ), - "pv_pc_50": xr.DataArray( - da.from_array(pv_pc_50, chunks=(1, -1, -1)), - dims=("spec", "y", "x"), - attrs={"nodata": 255}, - ), - "bs_pc_50": xr.DataArray( - da.from_array(bs_pc_50, chunks=(1, -1, -1)), - dims=("spec", "y", "x"), - attrs={"nodata": 255}, - ), - "water_frequency": xr.DataArray( - da.from_array(water_frequency, chunks=(1, -1, -1)), - dims=("spec", "y", "x"), - attrs={"nodata": 255}, - ), - } - xx = xr.Dataset(data_vars=data_vars, coords=coords) - xx = xx.assign_coords(xr.Coordinates.from_pandas_multiindex(index, "spec")) - return xx - - -def test_water_classes(watper_threshold): - expected_water_classes = [ - [[104, 104, 104], [103, 103, 103], [102, 102, 101], [99, 101, 101]], - ] - - l34 = np.array( - [ - [ - [221, 221, 221], - [221, 221, 221], - [221, 221, 221], - [221, 221, 221], - ] - ], - dtype="uint8", - ) - - urban = np.array( - [ - [ - [216, 216, 216], - [216, 216, 216], - [216, 216, 216], - [216, 216, 216], - ] - ], - dtype="uint8", - ) - # 112 --> natural veg - cultivated = np.array( - [ - [ - [112, 112, 112], - [255, 112, 112], - [112, 112, 112], - [112, 112, 112], - ] - ], - dtype="uint8", - ) - - woody = np.array( - [ - [ - [114, 114, 114], - [114, 114, 114], - [114, 114, 114], - [114, 114, 114], - ] - ], - dtype="uint8", - ) - - pv_pc_50 = np.array( - [ - [ - [1, 64, 65], - [66, 40, 41], - [3, 16, 15], - [4, 1, 42], - ] - ], - dtype="uint8", - ) - bs_pc_50 = np.array( - [ - [ - [1, 64, NODATA], - [66, 40, 41], - [3, 16, 15], - [NODATA, 1, 42], - ] - ], - dtype="uint8", - ) - water_frequency = np.array( - [ - [ - [1, 3, 2], - [4, 5, 6], - [9, 7, 11], - [NODATA, 11, 12], - ] - ], - dtype="float", - ) - xx = image_groups( - l34, urban, cultivated, woody, bs_pc_50, pv_pc_50, water_frequency - ) - - # Water persistence - water_persistence = l4_water_persistence.water_persistence(xx, watper_threshold) - - l4_water_classes = l4_water.water_classification(xx, water_persistence) - - assert (l4_water_classes.compute() == expected_water_classes).all() - - -def test_water_intertidal(watper_threshold): - - expected_water_classes = [ - [100, 100, 100], - [100, 100, 100], - [102, 102, 101], - [101, 99, 100], - ] - - l34 = np.array( - [ - [ - [223, 223, 223], - [223, 223, 223], - [221, 221, 221], - [221, 221, 223], - ] - ], - dtype="uint8", - ) - - urban = np.array( - [ - [ - [216, 216, 216], - [216, 216, 216], - [216, 216, 216], - [216, 216, 216], - ] - ], - dtype="uint8", - ) - # 112 --> natural veg - cultivated = np.array( - [ - [ - [112, 112, 112], - [255, 112, 112], - [112, 112, 112], - [112, 112, 112], - ] - ], - dtype="uint8", - ) - - woody = np.array( - [ - [ - [114, 114, 114], - [114, 114, 114], - [114, 114, 114], - [114, 114, 114], - ] - ], - dtype="uint8", - ) - - pv_pc_50 = np.array( - [ - [ - [1, 64, 65], - [66, 40, 41], - [3, 16, 15], - [4, 1, 42], - ] - ], - dtype="uint8", - ) - bs_pc_50 = np.array( - [ - [ - [1, 64, NODATA], - [66, 40, 41], - [3, 16, 15], - [NODATA, 1, 42], - ] - ], - dtype="uint8", - ) - water_frequency = np.array( - [ - [ - [1, 3, 2], - [4, 5, 6], - [9, 7, 11], - [10, 255, 255], - ] - ], - dtype="uint8", - ) - xx = image_groups( - l34, urban, cultivated, woody, bs_pc_50, pv_pc_50, water_frequency - ) - - # Water persistence - water_persistence = l4_water_persistence.water_persistence(xx, watper_threshold) - - l4_water_classes = l4_water.water_classification(xx, water_persistence) - - assert (l4_water_classes.compute() == expected_water_classes).all() From daf6e199337e0b46b024ccb9f2ae8c73537390fe Mon Sep 17 00:00:00 2001 From: Emma Ai Date: Wed, 27 Nov 2024 12:25:16 +0000 Subject: [PATCH 4/8] add required package --- docker/env.yaml | 1 + setup.cfg | 1 + tests/requirements.txt | 1 + 3 files changed, 3 insertions(+) diff --git a/docker/env.yaml b/docker/env.yaml index bd263f70..9e0d7617 100644 --- a/docker/env.yaml +++ b/docker/env.yaml @@ -109,6 +109,7 @@ dependencies: - rsa - ruamel.yaml - ruamel.yaml.clib + - s3fs - s3transfer - scikit-image - scipy diff --git a/setup.cfg b/setup.cfg index f67afc64..ba01f18d 100644 --- a/setup.cfg +++ b/setup.cfg @@ -41,6 +41,7 @@ install_requires = fsspec>=2022.1.0 fiona rasterio>=1.3.2 + s3fs tflite-runtime tl2cgen diff --git a/tests/requirements.txt b/tests/requirements.txt index c7767686..8c4faafb 100644 --- a/tests/requirements.txt +++ b/tests/requirements.txt @@ -16,6 +16,7 @@ pytest pytest-cov pytest-httpserver pytest-timeout +s3fs tflite-runtime tl2cgen From 50ed73be038fef62064aa7fc90ed0a49d3118fe6 Mon Sep 17 00:00:00 2001 From: Emma Ai Date: Thu, 28 Nov 2024 01:28:30 +0000 Subject: [PATCH 5/8] move columns of dependencies into config --- odc/stats/plugins/lc_level34.py | 38 +++++++++++++++++++-------------- 1 file changed, 22 insertions(+), 16 deletions(-) diff --git a/odc/stats/plugins/lc_level34.py b/odc/stats/plugins/lc_level34.py index edbdf985..238850d0 100644 --- a/odc/stats/plugins/lc_level34.py +++ b/odc/stats/plugins/lc_level34.py @@ -2,13 +2,14 @@ Plugin of Module A3 in LandCover PipeLine """ -from typing import Optional, Dict +from typing import Optional, Dict, List import xarray as xr import s3fs import os import pandas as pd import dask.array as da +import logging from ._registry import StatsPluginInterface, register from ._utils import rasterize_vector_mask, generate_numexpr_expressions @@ -16,6 +17,7 @@ from osgeo import gdal NODATA = 255 +_log = logging.getLogger(__name__) class StatsLccsLevel4(StatsPluginInterface): @@ -27,6 +29,7 @@ class StatsLccsLevel4(StatsPluginInterface): def __init__( self, class_def_path: str = None, + class_condition: Dict[str, List] = None, urban_mask: str = None, filter_expression: str = None, mask_threshold: Optional[float] = None, @@ -43,6 +46,9 @@ def __init__( elif not os.path.exists(class_def_path): raise FileNotFoundError(f"{class_def_path} not found") + if class_condition is None: + raise ValueError("Missing input to generate classification conditions") + if urban_mask is None: raise ValueError("Missing urban mask shapefile") @@ -54,8 +60,12 @@ def __init__( raise ValueError("Missing urban mask filter") self.class_def = pd.read_csv(class_def_path) - cols = list(self.class_def.columns[:6]) + list(self.class_def.columns[9:-6]) - self.class_def = self.class_def[cols].astype(str).fillna("nan") + self.class_condition = class_condition + cols = set() + for k, v in self.class_condition.items(): + cols |= {k} | set(v) + + self.class_def = self.class_def[list(cols)].astype(str).fillna("nan") self.urban_mask = urban_mask self.filter_expression = filter_expression @@ -77,6 +87,7 @@ def classification(self, xx, class_def, con_cols, class_col): res = da.full(xx.level_3_4.shape, 0, dtype="uint8") for expression in expressions: + _log.info(expression) local_dict.update({"res": res}) res = expr_eval( expression, @@ -98,9 +109,10 @@ def classification(self, xx, class_def, con_cols, class_col): return res def reduce(self, xx: xr.Dataset) -> xr.Dataset: - con_cols = ["level1", "artificial_surface", "cultivated"] class_col = "level3" - level3 = self.classification(xx, self.class_def, con_cols, class_col) + level3 = self.classification( + xx, self.class_def, self.class_condition[class_col], class_col + ) # apply urban mask # 215 -> 216 if urban_mask == 0 @@ -119,6 +131,8 @@ def reduce(self, xx: xr.Dataset) -> xr.Dataset: dtype="uint8", ) + # append level3 to the input dataset so it can be used + # to classify level4 attrs = xx.attrs.copy() attrs["nodata"] = NODATA dims = xx.level_3_4.dims[1:] @@ -127,18 +141,10 @@ def reduce(self, xx: xr.Dataset) -> xr.Dataset: level3.squeeze(), dims=dims, attrs=attrs, coords=coords ) - con_cols = [ - "level1", - "level3", - "woody", - "water_season", - "water_frequency", - "pv_pc_50", - "bs_pc_50", - ] class_col = "level4" - - level4 = self.classification(xx, self.class_def, con_cols, class_col) + level4 = self.classification( + xx, self.class_def, self.class_condition[class_col], class_col + ) data_vars = { k: xr.DataArray(v, dims=dims, attrs=attrs) From 2846b646985df73973e92eb0845e976e27797cb0 Mon Sep 17 00:00:00 2001 From: Emma Ai Date: Thu, 28 Nov 2024 02:26:10 +0000 Subject: [PATCH 6/8] add tests for level34 --- odc/stats/plugins/lc_level34.py | 2 +- tests/test_lc_level3.py | 107 ------------------- tests/{test_lc_l34.py => test_lc_level34.py} | 95 +++++++++++++++- 3 files changed, 95 insertions(+), 109 deletions(-) delete mode 100644 tests/test_lc_level3.py rename tests/{test_lc_l34.py => test_lc_level34.py} (58%) diff --git a/odc/stats/plugins/lc_level34.py b/odc/stats/plugins/lc_level34.py index 238850d0..ef48287d 100644 --- a/odc/stats/plugins/lc_level34.py +++ b/odc/stats/plugins/lc_level34.py @@ -87,7 +87,7 @@ def classification(self, xx, class_def, con_cols, class_col): res = da.full(xx.level_3_4.shape, 0, dtype="uint8") for expression in expressions: - _log.info(expression) + _log.debug(expression) local_dict.update({"res": res}) res = expr_eval( expression, diff --git a/tests/test_lc_level3.py b/tests/test_lc_level3.py deleted file mode 100644 index 90ecbd75..00000000 --- a/tests/test_lc_level3.py +++ /dev/null @@ -1,107 +0,0 @@ -import numpy as np -import pandas as pd -import xarray as xr -import dask.array as da - -from odc.stats.plugins.l34_utils import lc_level3 -from odc.stats.plugins._utils import rasterize_vector_mask -from datacube.utils.geometry import GeoBox -from affine import Affine - -import pytest - -NODATA = 255 - -expected_l3_classes = [ - [111, 112, 215], - [124, 112, 215], - [220, 215, 216], - [220, 255, 220], -] - - -@pytest.fixture(scope="module") -def image_groups(): - l34 = np.array( - [ - [ - [110, 110, 210], - [124, 110, 210], - [221, 210, 210], - [223, np.nan, 223], - ] - ], - dtype="float32", - ) - - urban = np.array( - [ - [ - [215, 215, 215], - [216, 216, 215], - [116, 215, 216], - [216, 216, 216], - ] - ], - dtype="uint8", - ) - - cultivated = np.array( - [ - [ - [111, 112, 255], - [255, 112, 255], - [255, 255, 255], - [255, np.nan, np.nan], - ] - ], - dtype="float32", - ) - - tuples = [ - (np.datetime64("2000-01-01T00"), np.datetime64("2000-01-01")), - ] - index = pd.MultiIndex.from_tuples(tuples, names=["time", "solar_day"]) - - affine = Affine.translation(10, 0) * Affine.scale( - (20 - 10) / l34.shape[2], (5 - 0) / l34.shape[1] - ) - geobox = GeoBox( - crs="epsg:3577", affine=affine, width=l34.shape[2], height=l34.shape[1] - ) - coords = geobox.xr_coords() - - data_vars = { - "level_3_4": xr.DataArray( - da.from_array(l34, chunks=(1, -1, -1)), - dims=("spec", "y", "x"), - attrs={"nodata": 255}, - ), - "artificial_surface": xr.DataArray( - da.from_array(urban, chunks=(1, -1, -1)), - dims=("spec", "y", "x"), - attrs={"nodata": 255}, - ), - "cultivated": xr.DataArray( - da.from_array(cultivated, chunks=(1, -1, -1)), - dims=("spec", "y", "x"), - attrs={"nodata": 255}, - ), - } - xx = xr.Dataset(data_vars=data_vars, coords=coords) - xx = xx.assign_coords(xr.Coordinates.from_pandas_multiindex(index, "spec")) - return xx - - -def test_l3_classes(image_groups, urban_shape): - filter_expression = "mock > 9" - urban_mask = rasterize_vector_mask( - urban_shape, - image_groups.geobox.transform, - image_groups.artificial_surface.shape, - filter_expression=filter_expression, - threshold=0.3, - ) - - level3_classes = lc_level3.lc_level3(image_groups, urban_mask) - assert (level3_classes == expected_l3_classes).all() diff --git a/tests/test_lc_l34.py b/tests/test_lc_level34.py similarity index 58% rename from tests/test_lc_l34.py rename to tests/test_lc_level34.py index 54706d05..5f1113a3 100644 --- a/tests/test_lc_l34.py +++ b/tests/test_lc_level34.py @@ -1,4 +1,7 @@ from odc.stats.plugins.lc_level34 import StatsLccsLevel4 +from odc.stats.plugins._utils import generate_numexpr_expressions + +import re import numpy as np import pandas as pd import xarray as xr @@ -6,7 +9,6 @@ from datacube.utils.geometry import GeoBox from affine import Affine - import pytest @@ -178,6 +180,21 @@ def test_l4_classes(image_groups, urban_shape): expected_l4 = [[95, 97, 93], [97, 96, 96], [100, 93, 93], [101, 101, 101]] stats_l4 = StatsLccsLevel4( measurements=["level3", "level4"], + class_def_path="s3://dea-public-data-dev/lccs_validation/c3/data_to_plot/" + "lccs_colour_scheme_golden_dark_au_c3.csv", + class_condition={ + "level3": ["level1", "artificial_surface", "cultivated"], + "level4": [ + "level1", + "level3", + "woody", + "water_season", + "water_frequency", + "pv_pc_50", + "bs_pc_50", + ], + }, + data_var_condition={"level1": "level_3_4"}, urban_mask=urban_shape, filter_expression="mock > 9", mask_threshold=0.3, @@ -186,3 +203,79 @@ def test_l4_classes(image_groups, urban_shape): assert (ds.level3.compute() == expected_l3).all() assert (ds.level4.compute() == expected_l4).all() + + +@pytest.mark.parametrize( + "rules_df, expected_expressions", + [ + # Test with range conditions + # when condition numbers are the same the order doesn't matter + ( + pd.DataFrame( + { + "condition_1": ["[5, 10)", "(1, 4]"], + "condition_2": ["==2", "!=2"], + "final_class": [1, 2], + } + ), + [ + "where((condition_1>1.0)&(condition_1<=4.0)&(condition_2!=2.0), 2, previous)", + "where((condition_1>=5.0)&(condition_1<10.0)&(condition_2==2.0), 1, previous)", + ], + ), + # Test with NaN + # when clause with smaller number of conditions always takes precedence + ( + pd.DataFrame( + { + "condition_1": ["[5, 10)", "nan"], + "condition_2": ["==2", "!=2"], + "final_class": [1, 2], + } + ), + [ + "where((condition_2!=2.0), 2, previous)", + "where((condition_1>=5.0)&(condition_1<10.0)&(condition_2==2.0), 1, previous)", + ], + ), + # Test with single value implying "==" and "255" + ( + pd.DataFrame( + { + "condition_1": ["3", "255"], + "condition_2": ["==2", "!=2"], + "final_class": [1, 2], + } + ), + [ + "where((condition_2!=2.0), 2, previous)", + "where((condition_1==3)&(condition_2==2.0), 1, previous)", + ], + ), + ], +) +def test_generate_numexpr_expressions(rules_df, expected_expressions): + con_cols = ["condition_1", "condition_2"] + class_col = "final_class" + + generated_expressions = generate_numexpr_expressions( + rules_df[con_cols + [class_col]], class_col, "previous" + ) + + def normalize_expression(expression): + match = re.match(r"where\((.*), (.*?), (.*?)\)", expression) + if match: + conditions, true_value, false_value = match.groups() + # Split conditions, sort them, and rejoin + sorted_conditions = "&".join(sorted(conditions.split("&"))) + return f"where({sorted_conditions}, {true_value}, {false_value})" + return expression + + normalized_expected = [normalize_expression(expr) for expr in expected_expressions] + normalized_generated = [ + normalize_expression(expr) for expr in generated_expressions + ] + + assert ( + normalized_generated == normalized_expected + ), f"Expected {expected_expressions}, but got {generated_expressions}" From 9524c6d6a9e66de5afbca9843b7f2c23b566560d Mon Sep 17 00:00:00 2001 From: Emma Ai Date: Thu, 28 Nov 2024 03:48:07 +0000 Subject: [PATCH 7/8] allow test access remote csv --- odc/stats/plugins/lc_level34.py | 2 +- tests/test_lc_level34.py | 51 ++++++++++++++++++++------------- 2 files changed, 32 insertions(+), 21 deletions(-) diff --git a/odc/stats/plugins/lc_level34.py b/odc/stats/plugins/lc_level34.py index ef48287d..4b17107d 100644 --- a/odc/stats/plugins/lc_level34.py +++ b/odc/stats/plugins/lc_level34.py @@ -41,7 +41,7 @@ def __init__( raise ValueError("Missing level34 class definition csv") if class_def_path.startswith("s3://"): - if not s3fs.S3FileSystem().exists(class_def_path): + if not s3fs.S3FileSystem(anon=True).exists(class_def_path): raise FileNotFoundError(f"{class_def_path} not found") elif not os.path.exists(class_def_path): raise FileNotFoundError(f"{class_def_path} not found") diff --git a/tests/test_lc_level34.py b/tests/test_lc_level34.py index 5f1113a3..fc27e7f5 100644 --- a/tests/test_lc_level34.py +++ b/tests/test_lc_level34.py @@ -2,12 +2,15 @@ from odc.stats.plugins._utils import generate_numexpr_expressions import re +import os import numpy as np import pandas as pd import xarray as xr import dask.array as da from datacube.utils.geometry import GeoBox from affine import Affine +from unittest.mock import patch + import pytest @@ -178,27 +181,35 @@ def test_l4_classes(image_groups, urban_shape): expected_l3 = [[216, 216, 215], [216, 216, 216], [220, 215, 215], [220, 220, 220]] expected_l4 = [[95, 97, 93], [97, 96, 96], [100, 93, 93], [101, 101, 101]] - stats_l4 = StatsLccsLevel4( - measurements=["level3", "level4"], - class_def_path="s3://dea-public-data-dev/lccs_validation/c3/data_to_plot/" - "lccs_colour_scheme_golden_dark_au_c3.csv", - class_condition={ - "level3": ["level1", "artificial_surface", "cultivated"], - "level4": [ - "level1", - "level3", - "woody", - "water_season", - "water_frequency", - "pv_pc_50", - "bs_pc_50", - ], + with patch.dict( + os.environ, + { + "AWS_ACCESS_KEY_ID": "fake-access-key", + "AWS_SECRET_ACCESS_KEY": "fake-secret-key", + "AWS_SESSION_TOKEN": "fake-session-token", # Optional }, - data_var_condition={"level1": "level_3_4"}, - urban_mask=urban_shape, - filter_expression="mock > 9", - mask_threshold=0.3, - ) + ): + stats_l4 = StatsLccsLevel4( + measurements=["level3", "level4"], + class_def_path="s3://dea-public-data-dev/lccs_validation/c3/data_to_plot/" + "lccs_colour_scheme_golden_dark_au_c3.csv", + class_condition={ + "level3": ["level1", "artificial_surface", "cultivated"], + "level4": [ + "level1", + "level3", + "woody", + "water_season", + "water_frequency", + "pv_pc_50", + "bs_pc_50", + ], + }, + data_var_condition={"level1": "level_3_4"}, + urban_mask=urban_shape, + filter_expression="mock > 9", + mask_threshold=0.3, + ) ds = stats_l4.reduce(image_groups) assert (ds.level3.compute() == expected_l3).all() From d58b0c06708a2003f5bcec9e2e9e656678919c71 Mon Sep 17 00:00:00 2001 From: Toktam Ebadi Date: Thu, 28 Nov 2024 04:18:19 +0000 Subject: [PATCH 8/8] Add a unit test to cover all the classes at level4 --- tests/test_lc_level34.py | 207 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 207 insertions(+) diff --git a/tests/test_lc_level34.py b/tests/test_lc_level34.py index fc27e7f5..0dc66203 100644 --- a/tests/test_lc_level34.py +++ b/tests/test_lc_level34.py @@ -290,3 +290,210 @@ def normalize_expression(expression): assert ( normalized_generated == normalized_expected ), f"Expected {expected_expressions}, but got {generated_expressions}" + + +def test_level4(urban_shape): + + level_3_4 = np.array( + [ + [ + [110, 110, 110, 110, 110, 110], + [110, 110, 110, 110, 110, 110], + [110, 110, 110, 110, 110, 110], + [110, 110, 110, 110, 110, 110], + ] + ], + dtype="uint8", + ) + + cultivated = np.array( + [ + [ + [111, 111, 111, 111, 111, 111], + [111, 111, 111, 111, 111, 111], + [112, 112, 112, 112, 112, 112], + [112, 112, 112, 112, 112, 112], + ] + ], + dtype="uint8", + ) + + urban = np.array( + [ + [ + [255, 255, 255, 255, 255, 255], + [255, 255, 255, 255, 255, 255], + [255, 255, 255, 255, 255, 255], + [255, 255, 255, 255, 255, 255], + ] + ], + dtype="uint8", + ) + + woody = np.array( + [ + [ + [113, 114, 113, 113, 113, 113], + [113, 114, 114, 114, 114, 114], + [113, 114, 113, 113, 113, 113], + [113, 114, 114, 114, 114, 114], + ] + ], + dtype="uint8", + ) + + pv_pc_50 = np.array( + [ + [ + [np.nan, np.nan, 66, 41, 21, 5], + [3, 66, 41, 21, 5, 3], + [np.nan, np.nan, 66, 41, 21, 5], + [3, 66, 41, 21, 5, 3], + ] + ], + dtype="float32", + ) + + water_season = np.array( + [ + [ + [255, 255, 255, 255, 255, 255], + [255, 255, 255, 255, 255, 255], + [255, 255, 255, 255, 255, 255], + [255, 255, 255, 255, 255, 255], + ] + ], + dtype="uint8", + ) + + water_frequency = np.array( + [ + [ + [np.nan, np.nan, np.nan, np.nan, np.nan, np.nan], + [np.nan, np.nan, np.nan, np.nan, np.nan, np.nan], + [np.nan, np.nan, np.nan, np.nan, np.nan, np.nan], + [np.nan, np.nan, np.nan, np.nan, np.nan, np.nan], + ] + ], + dtype="float32", + ) + + bs_pc_50 = np.array( + [ + [ + [np.nan, np.nan, np.nan, np.nan, np.nan, np.nan], + [np.nan, np.nan, np.nan, np.nan, np.nan, np.nan], + [np.nan, np.nan, np.nan, np.nan, np.nan, np.nan], + [np.nan, np.nan, np.nan, np.nan, np.nan, np.nan], + ] + ], + dtype="float32", + ) + + tuples = [ + (np.datetime64("2000-01-01T00"), np.datetime64("2000-01-01")), + ] + index = pd.MultiIndex.from_tuples(tuples, names=["time", "solar_day"]) + + affine = Affine.translation(10, 0) * Affine.scale( + (20 - 10) / level_3_4.shape[2], (5 - 0) / level_3_4.shape[1] + ) + geobox = GeoBox( + crs="epsg:3577", + affine=affine, + width=level_3_4.shape[2], + height=level_3_4.shape[1], + ) + coords = geobox.xr_coords() + + data_vars = { + "level_3_4": xr.DataArray( + da.from_array(level_3_4, chunks=(1, -1, -1)), + dims=("spec", "y", "x"), + attrs={"nodata": 255}, + ), + "artificial_surface": xr.DataArray( + da.from_array(urban, chunks=(1, -1, -1)), + dims=("spec", "y", "x"), + attrs={"nodata": 255}, + ), + "cultivated": xr.DataArray( + da.from_array(cultivated, chunks=(1, -1, -1)), + dims=("spec", "y", "x"), + attrs={"nodata": 255}, + ), + "woody": xr.DataArray( + da.from_array(woody, chunks=(1, -1, -1)), + dims=("spec", "y", "x"), + attrs={"nodata": 255}, + ), + "pv_pc_50": xr.DataArray( + da.from_array(pv_pc_50, chunks=(1, -1, -1)), + dims=("spec", "y", "x"), + attrs={"nodata": 255}, + ), + "bs_pc_50": xr.DataArray( + da.from_array(bs_pc_50, chunks=(1, -1, -1)), + dims=("spec", "y", "x"), + attrs={"nodata": 255}, + ), + "water_frequency": xr.DataArray( + da.from_array(water_frequency, chunks=(1, -1, -1)), + dims=("spec", "y", "x"), + attrs={"nodata": 255}, + ), + "water_season": xr.DataArray( + da.from_array(water_season, chunks=(1, -1, -1)), + dims=("spec", "y", "x"), + attrs={"nodata": 255}, + ), + } + + xx = xr.Dataset(data_vars=data_vars, coords=coords) + xx = xx.assign_coords(xr.Coordinates.from_pandas_multiindex(index, "spec")) + + expected_l4 = np.array( + [ + [2, 3, 9, 10, 11, 12], + [13, 14, 15, 16, 17, 18], + [20, 21, 27, 28, 29, 30], + [31, 32, 33, 34, 35, 36], + ], + dtype="uint8", + ) + + expected_l3 = np.array( + [ + [111, 111, 111, 111, 111, 111], + [111, 111, 111, 111, 111, 111], + [112, 112, 112, 112, 112, 112], + [112, 112, 112, 112, 112, 112], + ], + dtype="uint8", + ) + + stats_l4 = StatsLccsLevel4( + measurements=["level3", "level4"], + class_def_path="s3://dea-public-data-dev/lccs_validation/c3/data_to_plot/" + "lccs_colour_scheme_golden_dark_au_c3.csv", + class_condition={ + "level3": ["level1", "artificial_surface", "cultivated"], + "level4": [ + "level1", + "level3", + "woody", + "water_season", + "water_frequency", + "pv_pc_50", + "bs_pc_50", + ], + }, + data_var_condition={"level1": "level_3_4"}, + urban_mask=urban_shape, + filter_expression="mock > 9", + mask_threshold=0.3, + ) + ds = stats_l4.reduce(xx) + + assert (ds.level3.compute() == expected_l3).all() + assert (ds.level4.compute() == expected_l4).all()