diff --git a/ecoscope/io/earthranger.py b/ecoscope/io/earthranger.py index f15fafce..e282ccdb 100644 --- a/ecoscope/io/earthranger.py +++ b/ecoscope/io/earthranger.py @@ -17,6 +17,7 @@ clean_kwargs, clean_time_cols, dataframe_to_dict, + filter_bad_geojson, format_iso_time, to_gdf, to_hex, @@ -512,6 +513,9 @@ def get_subjectgroup_observations( else: subjects = self.get_subjects(subject_group_name=subject_group_name, include_inactive=include_inactive) + if subjects.empty: + return subjects + return self.get_subject_observations(subjects, **kwargs) def get_event_types(self, include_inactive=False, **addl_kwargs): @@ -629,9 +633,8 @@ def get_events( if not gdf.empty: gdf = clean_time_cols(gdf) if gdf.loc[0, "location"] is not None: - gdf.loc[~gdf["geojson"].isna(), "geometry"] = gpd.GeoDataFrame.from_features( - gdf.loc[~gdf["geojson"].isna(), "geojson"] - )["geometry"] + gdf = filter_bad_geojson(gdf) + gdf["geometry"] = gpd.GeoDataFrame.from_features(gdf["geojson"])["geometry"] gdf.set_geometry("geometry", inplace=True) gdf.set_crs(4326, inplace=True) gdf.sort_values("time", inplace=True) @@ -641,7 +644,9 @@ def get_events( def get_patrol_types(self): df = pd.DataFrame(self._get("activity/patrols/types")) - return df.set_index("id") + if not df.empty: + df = df.set_index("id") + return df def get_patrols(self, since=None, until=None, patrol_type=None, patrol_type_value=None, status=None, **addl_kwargs): """ @@ -738,19 +743,20 @@ def get_patrol_events( events = [] for _, row in patrol_df.iterrows(): - if row["patrol_segments"]: - for segment in row["patrol_segments"]: - for event in segment.get("events", []): - event["patrol_id"] = row.get("id") - event["patrol_segment_id"] = segment.get("id") - event["patrol_start_time"] = (segment.get("time_range") or {}).get("start_time") - events.append(event) + for segment in row.get("patrol_segments", []): + for event in segment.get("events", []): + event["patrol_id"] = row.get("id") + event["patrol_segment_id"] = segment.get("id") + event["patrol_start_time"] = (segment.get("time_range") or {}).get("start_time") + events.append(event) events_df = pd.DataFrame(events) if events_df.empty: return events_df + events_df = filter_bad_geojson(events_df) events_df["geometry"] = events_df["geojson"].apply(lambda x: shape(x.get("geometry"))) - events_df["time"] = events_df["geojson"].apply(lambda x: x.get("properties").get("datetime")) + events_df["time"] = events_df["geojson"].apply(lambda x: x.get("properties", {}).get("datetime")) + events_df = events_df.loc[events_df["time"].notnull()] events_df = clean_time_cols(events_df) return gpd.GeoDataFrame(events_df, geometry="geometry", crs=4326) @@ -871,31 +877,33 @@ def get_patrol_observations(self, patrols_df, include_patrol_details=False, **kw until=patrol_end_time, **kwargs, ) - if include_patrol_details: - observation["patrol_id"] = patrol["id"] - observation["patrol_title"] = patrol["title"] - observation["patrol_serial_number"] = patrol["serial_number"] - observation["patrol_start_time"] = patrol_start_time - observation["patrol_end_time"] = patrol_end_time - observation["patrol_type"] = patrol_type - observation = ( - observation.reset_index() - .merge( - pd.DataFrame(df_pt).add_prefix("patrol_type__"), - left_on="patrol_type", - right_on="id", - ) - .drop( - columns=[ - "patrol_type__ordernum", - "patrol_type__icon_id", - "patrol_type__default_priority", - "patrol_type__is_active", - ] - ) - ) if len(observation) > 0: observation["groupby_col"] = patrol["id"] + + if include_patrol_details: + observation["patrol_id"] = patrol["id"] + observation["patrol_title"] = patrol["title"] + observation["patrol_serial_number"] = patrol["serial_number"] + observation["patrol_start_time"] = patrol_start_time + observation["patrol_end_time"] = patrol_end_time + observation["patrol_type"] = patrol_type + observation = ( + observation.reset_index() + .merge( + pd.DataFrame(df_pt).add_prefix("patrol_type__"), + left_on="patrol_type", + right_on="id", + ) + .drop( + columns=[ + "patrol_type__ordernum", + "patrol_type__icon_id", + "patrol_type__default_priority", + "patrol_type__is_active", + ] + ) + ) + observations.append(observation) except Exception as e: print( @@ -903,6 +911,9 @@ def get_patrol_observations(self, patrols_df, include_patrol_details=False, **kw f"end_time={patrol_end_time} failed for: {e}" ) + if not observations: + return pd.DataFrame() + df = pd.concat(observations) df = clean_time_cols(df) df = ecoscope.base.Relocations(df) diff --git a/ecoscope/io/earthranger_utils.py b/ecoscope/io/earthranger_utils.py index f622a316..cf04190d 100644 --- a/ecoscope/io/earthranger_utils.py +++ b/ecoscope/io/earthranger_utils.py @@ -70,3 +70,7 @@ def pack_columns(dataframe: pd.DataFrame, columns: typing.List): dataframe.drop(metadata_cols, inplace=True, axis=1) dataframe.rename(columns={"metadata": "additional"}, inplace=True) return dataframe + + +def filter_bad_geojson(dataframe: pd.DataFrame): + return dataframe[dataframe["geojson"].apply(lambda x: True if isinstance(x, dict) and x.get("geometry") else False)] diff --git a/tests/sample_data/io/get_events_bad_geojson.feather b/tests/sample_data/io/get_events_bad_geojson.feather new file mode 100644 index 00000000..073ee1ba Binary files /dev/null and b/tests/sample_data/io/get_events_bad_geojson.feather differ diff --git a/tests/sample_data/io/get_patrol_events_bad_geojson.json b/tests/sample_data/io/get_patrol_events_bad_geojson.json new file mode 100644 index 00000000..5060a778 --- /dev/null +++ b/tests/sample_data/io/get_patrol_events_bad_geojson.json @@ -0,0 +1,312 @@ +{ + "id": { + "0": "c9f6018b-b51d-4093-8516-d82141d93982" + }, + "priority": { + "0": 0 + }, + "state": { + "0": "done" + }, + "objective": { + "0": "Demo Patrol" + }, + "serial_number": { + "0": 23474 + }, + "title": { + "0": "Ecoscope Demo Patrol" + }, + "files": { + "0": [] + }, + "notes": { + "0": [] + }, + "patrol_segments": { + "0": [ + { + "id": "0e91ec92-bf8b-4e86-b579-47b814419ceb", + "patrol_type": "ecoscope_patrol", + "leader": { + "content_type": "observations.subject", + "id": "ef343e5d-f6c7-4867-ab33-2b2933b3bce5", + "name": "eco_1", + "subject_type": "person", + "subject_subtype": "ranger", + "common_name": null, + "additional": { + "rgb": "", + "sex": "", + "region": "", + "country": "", + "external_id": "", + "tm_animal_id": "", + "external_name": "" + }, + "created_at": "2024-09-09T13:29:59.189245+03:00", + "updated_at": "2024-10-23T21:07:01.505307+03:00", + "is_active": true, + "user": null, + "region": "", + "country": "", + "sex": "", + "tracks_available": false, + "image_url": "\/static\/ranger-black.svg" + }, + "scheduled_start": null, + "scheduled_end": null, + "time_range": { + "start_time": "2017-02-04T13:30:00+03:00", + "end_time": "2017-02-05T07:30:00+03:00" + }, + "start_location": { + "latitude": -2.8935350263, + "longitude": 39.2917871115 + }, + "end_location": { + "latitude": -2.9053535333, + "longitude": 39.334974731 + }, + "events": [ + { + "id": "a8c1ff24-83fc-48ff-a09c-c66d57518c34", + "serial_number": 443915, + "event_type": "poacher_camp_rep", + "priority": 0, + "title": "poacher_camp_rep", + "state": "new", + "contains": [], + "updated_at": "2024-09-09T21:01:10.460147+03:00", + "created_at": "2024-09-09T21:01:10.460077+03:00", + "geojson": { + "type": "Feature", + "properties": { + "message": "", + "datetime": "2017-02-05T00:10:00+00:00", + "image": "https://test-data.pamdas.org/static/poacher_camp-gray.svg", + "icon": { + "iconUrl": "https://test-data.pamdas.org/static/poacher_camp-gray.svg", + "iconSize": [ + 25, + 25 + ], + "iconAncor": [ + 12, + 12 + ], + "popupAncor": [ + 0, + -13 + ], + "className": "dot" + } + } + }, + "is_collection": false + }, + { + "id": "c62e681d-d955-4f29-a855-1fe95669387a", + "serial_number": 443916, + "event_type": "injured_animal_rep", + "priority": 0, + "title": "injured_animal_rep", + "state": "new", + "contains": [], + "updated_at": "2024-09-09T21:01:10.537946+03:00", + "created_at": "2024-09-09T21:01:10.537915+03:00", + "geojson": { + "type": "Feature", + "geometry": { + "type": "Point", + "coordinates": [ + 39.335383647060006, + -2.894132395575768 + ] + } + }, + "is_collection": false + }, + { + "id": "da236714-fed2-4c72-8f4f-d4b1ec8dffe5", + "serial_number": 443917, + "event_type": "hwc_rep", + "priority": 0, + "title": "hwc_rep", + "state": "new", + "contains": [], + "updated_at": "2024-09-09T21:01:10.603993+03:00", + "created_at": "2024-09-09T21:01:10.603959+03:00", + "geojson": { + "type": "Feature", + "geometry": { + "type": "Point", + "coordinates": [ + 39.33850583956563, + -2.890615124552055 + ] + }, + "properties": { + "message": "", + "image": "https://test-data.pamdas.org/static/hwc_rep-gray.svg", + "icon": { + "iconUrl": "https://test-data.pamdas.org/static/hwc_rep-gray.svg", + "iconSize": [ + 25, + 25 + ], + "iconAncor": [ + 12, + 12 + ], + "popupAncor": [ + 0, + -13 + ], + "className": "dot" + } + } + }, + "is_collection": false + }, + { + "id": "ebf812f5-e616-40e4-8fcf-ebb3ef6a6364", + "serial_number": 443918, + "event_type": "hwc_rep", + "priority": 0, + "title": "hwc_rep", + "state": "new", + "contains": [], + "updated_at": "2024-09-09T21:01:10.669634+03:00", + "created_at": "2024-09-09T21:01:10.669604+03:00", + "geojson": { + "type": "Feature", + "geometry": { + "type": "Point", + "coordinates": [ + 39.337466773044824, + -2.888976612685578 + ] + }, + "properties": { + "message": "", + "datetime": "2017-02-05T02:30:00+00:00", + "image": "https://test-data.pamdas.org/static/hwc_rep-gray.svg", + "icon": { + "iconUrl": "https://test-data.pamdas.org/static/hwc_rep-gray.svg", + "iconSize": [ + 25, + 25 + ], + "iconAncor": [ + 12, + 12 + ], + "popupAncor": [ + 0, + -13 + ], + "className": "dot" + } + } + }, + "is_collection": false + }, + { + "id": "42a76ef1-02f8-43de-8304-753c7749e213", + "serial_number": 444080, + "event_type": "injured_animal_rep", + "priority": 0, + "title": "injured_animal_rep", + "state": "new", + "contains": [], + "updated_at": "2024-09-10T00:53:42.143250+03:00", + "created_at": "2024-09-10T00:53:42.143217+03:00", + "geojson": {}, + "is_collection": false + }, + { + "id": "42a76ef1-02f8-43de-8304-753c7749e214", + "serial_number": 444081, + "event_type": "hwc_rep", + "priority": 0, + "title": "hwc_rep", + "state": "new", + "contains": [], + "updated_at": "2024-09-10T00:53:42.143250+03:00", + "created_at": "2024-09-10T00:53:42.143217+03:00", + "is_collection": false + } + ], + "image_url": "https:\/\/test-data.pamdas.org\/static\/sprite-src\/plane-patrol-icon.svg", + "icon_id": "plane-patrol-icon", + "updates": [ + { + "message": "Report Added", + "time": "2024-09-09T18:01:10.677469+00:00", + "user": { + "username": "user1", + "first_name": "Elebot", + "last_name": "Elebot", + "id": "abc123", + "content_type": "accounts.user" + }, + "type": "add_event" + }, + { + "message": "Report Added", + "time": "2024-09-09T18:01:10.613940+00:00", + "user": { + "username": "user1", + "first_name": "Elebot", + "last_name": "Elebot", + "id": "abc123", + "content_type": "accounts.user" + }, + "type": "add_event" + }, + { + "message": "Report Added", + "time": "2024-09-09T18:01:10.544995+00:00", + "user": { + "username": "user1", + "first_name": "Elebot", + "last_name": "Elebot", + "id": "abc123", + "content_type": "accounts.user" + }, + "type": "add_event" + }, + { + "message": "Report Added", + "time": "2024-09-09T18:01:10.470386+00:00", + "user": { + "username": "user1", + "first_name": "Elebot", + "last_name": "Elebot", + "id": "abc123", + "content_type": "accounts.user" + }, + "type": "add_event" + } + ] + } + ] + }, + "updates": { + "0": [ + { + "message": "Patrol Added", + "time": "2024-09-09T18:01:10.177573+00:00", + "user": { + "username": "user1", + "first_name": "Elebot", + "last_name": "Elebot", + "id": "abc123", + "content_type": "accounts.user" + }, + "type": "add_patrol" + } + ] + } +} \ No newline at end of file diff --git a/tests/test_earthranger_io.py b/tests/test_earthranger_io.py index 6f6d7dfc..cc519d4c 100644 --- a/tests/test_earthranger_io.py +++ b/tests/test_earthranger_io.py @@ -11,11 +11,37 @@ import ecoscope from erclient import ERClientException +from ecoscope.io.earthranger import EarthRangerIO from ecoscope.io.earthranger_utils import TIME_COLS pytestmark = pytest.mark.io +@pytest.fixture +def sample_bad_events_geojson(): + """ + A mock get_events response with intentionally bad geojson: + There are 6 events in this mock + event 0: 'geometry' is None + event 5: 'geomtery' and 'properties' are None + """ + return pd.read_feather("tests/sample_data/io/get_events_bad_geojson.feather") + + +@pytest.fixture +def sample_bad_patrol_events_geojson(): + """ + A mock get_patrol_events response with intentionally bad geojson: + There's a single patrol in this mock with events that have the following problems in their json + event 0: 'geometry' key is not present + event 1: 'properties' key is not present + event 2: 'datetime' key is not present within 'properties + event 3: is untouched + event 4: 'geojson' is an empty dict + """ + return pd.read_json("tests/sample_data/io/get_patrol_events_bad_geojson.json") + + def check_time_is_parsed(df): for col in TIME_COLS: if col in df.columns: @@ -374,3 +400,48 @@ def test_get_patrol_observations_with_patrol_filter(er_io): assert "patrol_title" in observations.columns assert "patrol_start_time" in observations.columns pd.testing.assert_series_equal(observations["patrol_id"], observations["groupby_col"], check_names=False) + + +@patch("erclient.client.ERClient.get_objects_multithreaded") +def test_get_events_bad_geojson(get_objects_mock, sample_bad_events_geojson, er_io): + get_objects_mock.return_value = sample_bad_events_geojson + + events = er_io.get_events(event_type=["e00ce1f6-f9f1-48af-93c9-fb89ec493b8a"]) + assert not events.empty + # of the 6 id's in the mock we expect these 4 to be returned + assert events.index.to_list() == [ + "bcda9c6a-628c-4825-947d-72f66115fc09", + "d464672a-3cc2-4d9a-bb3f-a69c34efb09c", + "4a599a57-7a89-4eb3-bb11-d2a36d1627e2", + "bcb01505-c635-48eb-b176-2b1390a0a5bf", + ] + + +@patch("erclient.client.ERClient.get_objects_multithreaded") +def test_get_patrol_events_bad_geojson(get_objects_mock, sample_bad_patrol_events_geojson, er_io): + get_objects_mock.return_value = sample_bad_patrol_events_geojson + + patrol_events = er_io.get_patrol_events( + since=pd.Timestamp("2017-01-01").isoformat(), + until=pd.Timestamp("2017-04-01").isoformat(), + ) + assert not patrol_events.empty + # We're rejecting any geojson that's missing geometry or a timestamp + assert patrol_events.id.to_list() == ["ebf812f5-e616-40e4-8fcf-ebb3ef6a6364"] + + +@pytest.mark.parametrize( + "er_callable, er_kwargs", + [ + (EarthRangerIO.get_patrols, {}), + (EarthRangerIO.get_subjectgroup_observations, {"subject_group_id": "12345"}), + (EarthRangerIO.get_patrol_observations_with_patrol_filter, {}), + (EarthRangerIO.get_patrol_events, {}), + (EarthRangerIO.get_events, {}), + ], +) +@patch("erclient.client.ERClient._get") +def test_empty_responses(_get_mock, er_io, er_callable, er_kwargs): + _get_mock.return_value = {} + df = er_callable(er_io, **er_kwargs) + assert df.empty