diff --git a/CHANGELOG.md b/CHANGELOG.md index 77db3a9c..18e8a101 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -21,8 +21,10 @@ and start a new "In Progress" section above it. ## In progress -## 0.107.8 +# 0.108.0 +- Added support for `apply_vectorcube` UDF signature in `run_udf_code` ([Open-EO/openeo-geopyspark-driver#881]https://github.com/Open-EO/openeo-geopyspark-driver/issues/881) +## 0.107.8 - add `check_config_definition` helper to check definition of `OpenEoBackendConfig` based configs ## 0.107.7 diff --git a/openeo_driver/_version.py b/openeo_driver/_version.py index e60bf092..033f92ca 100644 --- a/openeo_driver/_version.py +++ b/openeo_driver/_version.py @@ -1 +1 @@ -__version__ = "0.107.8a1" +__version__ = "0.108.0a1" diff --git a/openeo_driver/datacube.py b/openeo_driver/datacube.py index b705ec9e..10414d6d 100644 --- a/openeo_driver/datacube.py +++ b/openeo_driver/datacube.py @@ -3,6 +3,7 @@ import inspect import io import logging +import re import zipfile from pathlib import Path from typing import Any, Dict, List, Optional, Sequence, Tuple, Union @@ -770,9 +771,44 @@ def apply_dimension( or (dimension in {self.DIM_BANDS, self.DIM_PROPERTIES}.intersection(self.get_dimension_names())) and target_dimension is None ): + + def check_udf_result(result: openeo.udf.UdfData, require_datacube=False): + if not isinstance(result, openeo.udf.UdfData): + raise ValueError(f"UDF should return UdfData, but got {type(result)}") + feature_list = result.get_feature_collection_list() + if not (feature_list and len(feature_list) == 1): + raise ValueError( + f"UDF should return single feature collection but got {feature_list and len(feature_list)}" + ) + if require_datacube: + datacube_list = result.get_datacube_list() + if not (datacube_list and len(datacube_list) == 1): + raise ValueError( + f"UDF should return single datacube but got {datacube_list and len(datacube_list)}" + ) + return feature_list[0].data, datacube_list[0].array + return feature_list[0].data, None + log.warning( f"Using experimental feature: DriverVectorCube.apply_dimension along dim {dimension} and empty cube" ) + if re.search(r"^def\s+apply_vectorcube\s*\(", single_run_udf.udf, re.MULTILINE): + datacube = openeo.udf.XarrayDataCube(array=self._cube) + feature_collection = openeo.udf.FeatureCollection(id="_", data=self._geometries) + udf_data = openeo.udf.UdfData( + proj={"EPSG": self._geometries.crs.to_epsg()} if self._geometries.crs else None, + datacube_list=[datacube], + feature_collection_list=[feature_collection], + user_context=context, + ) + log.info( + f"[run_udf] Running apply_vectorcube UDF {str_truncate(single_run_udf.udf, width = 256)!r} on {udf_data!r}" + ) + result_data = env.backend_implementation.processing.run_udf(udf=single_run_udf.udf, data=udf_data) + log.info(f"[run_udf] UDF resulted in {result_data!r}") + result_features, result_datacube = check_udf_result(result_data, require_datacube=True) + return DriverVectorCube(geometries=result_features, cube=result_datacube) + # TODO: data chunking (e.g. large feature collections) gdf = self._as_geopandas_df() feature_collection = openeo.udf.FeatureCollection(id="_", data=gdf) @@ -785,15 +821,8 @@ def apply_dimension( log.info(f"[run_udf] Running UDF {str_truncate(single_run_udf.udf, width=256)!r} on {udf_data!r}") result_data = env.backend_implementation.processing.run_udf(udf=single_run_udf.udf, data=udf_data) log.info(f"[run_udf] UDF resulted in {result_data!r}") - - if not isinstance(result_data, openeo.udf.UdfData): - raise ValueError(f"UDF should return UdfData, but got {type(result_data)}") - result_features = result_data.get_feature_collection_list() - if not (result_features and len(result_features) == 1): - raise ValueError( - f"UDF should return single feature collection but got {result_features and len(result_features)}" - ) - return DriverVectorCube.from_geodataframe(result_features[0].data) + result_features, _ = check_udf_result(result_data) + return DriverVectorCube.from_geodataframe(result_features) raise FeatureUnsupportedException( message=f"DriverVectorCube.apply_dimension with {dimension=} and {bool(single_run_udf)=}" diff --git a/setup.py b/setup.py index f293cbb4..7aafb612 100644 --- a/setup.py +++ b/setup.py @@ -53,7 +53,7 @@ "flask>=2.0.0", "werkzeug>=3.0.3", # https://github.com/Open-EO/openeo-python-driver/issues/243 "requests>=2.28.0", - "openeo>=0.25.0", + "openeo>=0.32.0.a2.dev", "openeo_processes==0.0.4", # 0.0.4 is special build/release, also see https://github.com/Open-EO/openeo-python-driver/issues/152 "gunicorn>=20.0.1", "numpy>=1.22.0",