Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

add support for apply_vectorcube udf Open-EO/openeo-geopyspark-driver… #314

Conversation

JeroenVerstraelen
Copy link
Contributor

…#881

openeo_driver/datacube.py Outdated Show resolved Hide resolved
@soxofaan
Copy link
Member

also consider adding tests,

e.g. see

@pytest.mark.parametrize("dimension", ["bands", "properties"])
def test_apply_dimension_run_udf_change_geometry(self, gdf, backend_implementation, dimension):
vc = DriverVectorCube.from_geodataframe(gdf, dimension_name=dimension)
udf = textwrap.dedent(
"""
from openeo.udf import UdfData, FeatureCollection
def process_vector_cube(udf_data: UdfData) -> UdfData:
[feature_collection] = udf_data.get_feature_collection_list()
gdf = feature_collection.data
gdf["geometry"] = gdf["geometry"].buffer(distance=1, resolution=2)
udf_data.set_feature_collection_list([
FeatureCollection(id="_", data=gdf),
])
"""
)
callback = {
"runudf1": {
"process_id": "run_udf",
"arguments": {"data": {"from_parameter": "data"}, "udf": udf, "runtime": "Python"},
"result": True,
}
}
env = EvalEnv({"backend_implementation": backend_implementation})
result = vc.apply_dimension(process=callback, dimension=dimension, env=env)
assert isinstance(result, DriverVectorCube)
feature_collection = result.to_geojson()
assert feature_collection == DictSubSet(
{
"type": "FeatureCollection",
"bbox": pytest.approx((0, 0, 6, 5), abs=0.1),
"features": [
{
"type": "Feature",
"bbox": pytest.approx((0, 0, 4, 4), abs=0.1),
"geometry": DictSubSet({"type": "Polygon"}),
"id": "0",
"properties": {"id": "first", "pop": 1234},
},
{
"type": "Feature",
"bbox": pytest.approx((2, 1, 6, 5), abs=0.1),
"geometry": DictSubSet({"type": "Polygon"}),
"id": "1",
"properties": {"id": "second", "pop": 5678},
},
],
}
)
@pytest.mark.parametrize("dimension", ["bands", "properties"])
def test_apply_dimension_run_udf_add_properties(self, gdf, backend_implementation, dimension):
vc = DriverVectorCube.from_geodataframe(gdf, dimension_name=dimension)
udf = textwrap.dedent(
"""
from openeo.udf import UdfData, FeatureCollection
def process_vector_cube(udf_data: UdfData) -> UdfData:
[feature_collection] = udf_data.get_feature_collection_list()
gdf = feature_collection.data
gdf["popone"] = gdf["pop"] + 1
gdf["poppop"] = gdf["pop"] ** 2
udf_data.set_feature_collection_list([
FeatureCollection(id="_", data=gdf),
])
"""
)
callback = {
"runudf1": {
"process_id": "run_udf",
"arguments": {"data": {"from_parameter": "data"}, "udf": udf, "runtime": "Python"},
"result": True,
}
}
env = EvalEnv({"backend_implementation": backend_implementation})
result = vc.apply_dimension(process=callback, dimension=dimension, env=env)
assert isinstance(result, DriverVectorCube)
assert result.to_internal_json() == {
"geometries": {
"type": "FeatureCollection",
"features": [
{
"type": "Feature",
"geometry": ApproxGeoJSONByBounds(1, 1, 3, 3, types=["Polygon"], abs=0.01),
"id": "0",
"properties": {"id": "first", "pop": 1234, "popone": 1235, "poppop": 1522756},
"bbox": pytest.approx((1, 1, 3, 3), abs=0.01),
},
{
"type": "Feature",
"geometry": ApproxGeoJSONByBounds(3, 2, 5, 4, types=["Polygon"], abs=0.01),
"id": "1",
"properties": {"id": "second", "pop": 5678, "popone": 5679, "poppop": 32239684},
"bbox": pytest.approx((3, 2, 5, 4), abs=0.01),
},
],
"bbox": pytest.approx((1, 1, 5, 4), abs=0.01),
},
"cube": {
"name": None,
"dims": ("geometry", "properties"),
"coords": {
"geometry": {"attrs": {}, "data": [0, 1], "dims": ("geometry",)},
"properties": {"attrs": {}, "data": ["pop", "popone", "poppop"], "dims": ("properties",)},
},
"data": [[1234, 1235, 1522756], [5678, 5679, 32239684]],
"attrs": {},
},
}

and
class TestVectorCubeRunUDF:
"""
Tests about running UDF based manipulations on vector cubes
References:
- https://github.com/Open-EO/openeo-python-driver/issues/197
- https://github.com/Open-EO/openeo-python-driver/pull/200
- https://github.com/Open-EO/openeo-geopyspark-driver/issues/437
"""
def _build_run_udf_callback(self, udf_code: str) -> dict:
udf_code = textwrap.dedent(udf_code)
return {
"process_graph": {
"runudf1": {
"process_id": "run_udf",
"arguments": {
"data": {"from_parameter": "data"},
"udf": udf_code,
"runtime": "Python",
},
"result": True,
}
},
}
@pytest.mark.parametrize(
"dimension",
[
"properties",
"geometry",
],
)
def test_apply_dimension_run_udf_change_geometry(self, api, dimension):
"""VectorCube + apply_dimension + UDF (changing geometry)"""
process_graph = {
"load": {
"process_id": "load_geojson",
"arguments": {
"data": load_json("geojson/FeatureCollection02.json"),
"properties": ["pop"],
},
},
"apply_dimension": {
"process_id": "apply_dimension",
"arguments": {
"data": {"from_node": "load"},
"dimension": dimension,
"process": self._build_run_udf_callback(
"""
from openeo.udf import UdfData, FeatureCollection
def process_vector_cube(udf_data: UdfData) -> UdfData:
[feature_collection] = udf_data.get_feature_collection_list()
gdf = feature_collection.data
gdf["geometry"] = gdf["geometry"].buffer(distance=1, resolution=2)
udf_data.set_feature_collection_list([
FeatureCollection(id="_", data=gdf),
])
"""
),
},
"result": True,
},
}
resp = api.check_result(process_graph)
assert resp.json == DictSubSet(
{
"type": "FeatureCollection",
"features": [
{
"type": "Feature",
"geometry": ApproxGeoJSONByBounds(0, 0, 4, 4, types=["Polygon"], abs=0.1),
"properties": {"id": "first", "pop": 1234},
},
{
"type": "Feature",
"geometry": ApproxGeoJSONByBounds(2, 1, 6, 5, types=["Polygon"], abs=0.1),
"properties": {"id": "second", "pop": 5678},
},
],
}
)
@pytest.mark.parametrize(
"dimension",
[
# TODO: this "dimension="properties" use case does not strictly follow the openEO API spec
# `apply_dimension` only allows changing the cardinality of the provided dimension ("properties"),
# not any other dimension ("geometries" here).
"properties",
"geometry",
],
)
def test_apply_dimension_run_udf_filter_on_geometries(self, api, dimension):
"""
Test to use `apply_dimension(dimension="...", process=UDF)` to filter out certain
entries from geometries dimension based on geometry (e.g. intersection with another geometry)
"""
process_graph = {
"load": {
"process_id": "load_geojson",
"arguments": {
"data": load_json("geojson/FeatureCollection10.json"),
"properties": ["pop"],
},
},
"apply_dimension": {
"process_id": "apply_dimension",
"arguments": {
"data": {"from_node": "load"},
"dimension": dimension,
"process": self._build_run_udf_callback(
"""
from openeo.udf import UdfData, FeatureCollection
import shapely.geometry
def process_vector_cube(udf_data: UdfData) -> UdfData:
[feature_collection] = udf_data.get_feature_collection_list()
gdf = feature_collection.data
to_intersect = shapely.geometry.box(4, 3, 8, 4)
gdf = gdf[gdf["geometry"].intersects(to_intersect)]
udf_data.set_feature_collection_list([
FeatureCollection(id="_", data=gdf),
])
"""
),
},
"result": True,
},
}
resp = api.check_result(process_graph)
assert resp.json == DictSubSet(
{
"type": "FeatureCollection",
"features": [
{
"type": "Feature",
"geometry": ApproxGeoJSONByBounds(3, 2, 5, 4, types=["Polygon"], abs=0.1),
"properties": {"id": "second", "pop": 456},
},
{
"type": "Feature",
"geometry": ApproxGeoJSONByBounds(6, 2, 12, 6, types=["Polygon"], abs=0.1),
"properties": {"id": "third", "pop": 789},
},
],
}
)
@pytest.mark.parametrize(
"dimension",
[
# TODO: this "dimension="properties" use case does not strictly follow the openEO API spec
# `apply_dimension` only allows changing the cardinality of the provided dimension ("properties"),
# not any other dimension ("geometries" here).
"properties",
"geometry",
],
)
def test_apply_dimension_run_udf_filter_on_properties(self, api, dimension):
"""
Test to use `apply_dimension(dimension="...", process=UDF)` to filter out certain
entries from geometries dimension, based on feature properties
Note in case of dimension="properties":
strictly speaking, this approach draws outside the lines of the openEO API spec
as apply_dimension only allows changing the cardinality of the provided dimension ("properties" in this case),
not any other dimension (like "geometries" in this case).
"""
process_graph = {
"load": {
"process_id": "load_geojson",
"arguments": {
"data": load_json("geojson/FeatureCollection10.json"),
"properties": ["pop"],
},
},
"apply_dimension": {
"process_id": "apply_dimension",
"arguments": {
"data": {"from_node": "load"},
"dimension": dimension,
"process": self._build_run_udf_callback(
"""
from openeo.udf import UdfData, FeatureCollection
def process_vector_cube(udf_data: UdfData) -> UdfData:
[feature_collection] = udf_data.get_feature_collection_list()
gdf = feature_collection.data
gdf = gdf[gdf["pop"] > 500]
udf_data.set_feature_collection_list([
FeatureCollection(id="_", data=gdf),
])
"""
),
},
"result": True,
},
}
resp = api.check_result(process_graph)
assert resp.json == DictSubSet(
{
"type": "FeatureCollection",
"features": [
{
"type": "Feature",
"geometry": ApproxGeoJSONByBounds(6.0, 2.0, 12.0, 6.0, types=["Polygon"], abs=0.1),
"properties": {"id": "third", "pop": 789},
},
{
"type": "Feature",
"geometry": ApproxGeoJSONByBounds(-2.0, 7.0, 5.0, 14.0, types=["Polygon"], abs=0.1),
"properties": {"id": "fourth", "pop": 101112},
},
],
}
)
@pytest.mark.parametrize(
"dimension",
[
"properties",
# TODO: this "dimension="geometry" use case does not strictly follow the openEO API spec
# `apply_dimension` only allows changing the cardinality of the provided dimension ("geometry"),
# not any other dimension ("properties" here).
"geometry",
],
)
def test_apply_dimension_run_udf_add_properties(self, api, dimension):
"""
Test to use `apply_dimension(dimension="...", process=UDF)` to add properties
"""
process_graph = {
"load": {
"process_id": "load_geojson",
"arguments": {
"data": load_json("geojson/FeatureCollection02.json"),
"properties": ["pop"],
},
},
"apply_dimension": {
"process_id": "apply_dimension",
"arguments": {
"data": {"from_node": "load"},
"dimension": dimension,
"process": self._build_run_udf_callback(
"""
from openeo.udf import UdfData, FeatureCollection
def process_vector_cube(udf_data: UdfData) -> UdfData:
[feature_collection] = udf_data.get_feature_collection_list()
gdf = feature_collection.data
gdf["poppop"] = gdf["pop"] ** 2
udf_data.set_feature_collection_list([
FeatureCollection(id="_", data=gdf),
])
"""
),
},
"result": True,
},
}
resp = api.check_result(process_graph)
assert resp.json == DictSubSet(
{
"type": "FeatureCollection",
"features": [
{
"type": "Feature",
"geometry": ApproxGeoJSONByBounds(1.0, 1.0, 3.0, 3.0, types=["Polygon"], abs=0.1),
"properties": {"id": "first", "pop": 1234, "poppop": 1234 * 1234},
},
{
"type": "Feature",
"geometry": ApproxGeoJSONByBounds(3.0, 2.0, 5.0, 4.0, types=["Polygon"], abs=0.1),
"properties": {"id": "second", "pop": 5678, "poppop": 5678 * 5678},
},
],
}
)

openeo_driver/datacube.py Outdated Show resolved Hide resolved
@soxofaan
Copy link
Member

and the client dependency in setup.py

"openeo>=0.25.0",

should be bumped to properly signal the dependency on Open-EO/openeo-python-client#631
I think it should become "openeo>=0.32.0.a2.dev"

@JeroenVerstraelen JeroenVerstraelen merged commit 590c341 into master Sep 26, 2024
@JeroenVerstraelen JeroenVerstraelen deleted the 881-apply_vectorcubegeometries-cube-udf-for-vectorcubes branch September 26, 2024 17:35
@soxofaan
Copy link
Member

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
None yet
Projects
None yet
Development

Successfully merging this pull request may close these issues.

apply_vectorcube(geometries, cube) udf for VectorCubes
2 participants