From 775ff38fa3176d3d5256db5d067137354633bdfa Mon Sep 17 00:00:00 2001 From: Geoff Boeing Date: Mon, 11 Nov 2024 11:37:34 -0800 Subject: [PATCH 1/3] improve graph_from_gdfs input checks --- osmnx/convert.py | 47 +++++++++++++++++++++++++++++++---------------- 1 file changed, 31 insertions(+), 16 deletions(-) diff --git a/osmnx/convert.py b/osmnx/convert.py index 742ba399a..27cc65abd 100644 --- a/osmnx/convert.py +++ b/osmnx/convert.py @@ -258,31 +258,46 @@ def graph_from_gdfs( ------- G """ + # ensure gdf_nodes contains x and y columns representing node geometries if not ("x" in gdf_nodes.columns and "y" in gdf_nodes.columns): # pragma: no cover msg = "`gdf_nodes` must contain 'x' and 'y' columns." raise ValueError(msg) - if not hasattr(gdf_nodes, "geometry"): - msg = "`gdf_nodes` must have a 'geometry' attribute." + # ensure gdf_nodes and gdf_edges are uniquely indexed + if not (gdf_nodes.index.is_unique and gdf_edges.index.is_unique): # pragma: no cover + msg = "`gdf_nodes` and `gdf_edges` must each be uniquely indexed." raise ValueError(msg) - # drop geometry column from gdf_nodes (as we use x and y for geometry + # ensure 1) gdf_edges are multi-indexed with 3 levels and 2) that its u + # and v values (first two index levels) all appear among gdf_nodes index + edges_index_levels = 3 + check1 = gdf_edges.index.nlevels == edges_index_levels + uv = set(gdf_edges.index.get_level_values(0)) | set(gdf_edges.index.get_level_values(1)) + check2 = uv.issubset(set(gdf_nodes.index)) + if not (check1 and check2): # pragma: no cover + msg = "`gdf_edges` must be multi-indexed by `(u, v, key)`." + raise ValueError(msg) + + # drop geometry column from gdf_nodes (since we use x and y for geometry # information), but warn the user if the geometry values differ from the # coordinates in the x and y columns. this results in a df instead of gdf. - msg = ( - "Discarding the `gdf_nodes` 'geometry' column, though its values " - "differ from the coordinates in the 'x' and 'y' columns." - ) - try: - all_x_match = (gdf_nodes.geometry.x == gdf_nodes["x"]).all() - all_y_match = (gdf_nodes.geometry.y == gdf_nodes["y"]).all() - if not (all_x_match and all_y_match): - # warn if x/y coords don't match geometry column + if gdf_nodes.active_geometry_name is None: # pragma: no cover + df_nodes = pd.DataFrame(gdf_nodes) + else: + msg = ( + "Discarding the `gdf_nodes` 'geometry' column, though its values " + "differ from the coordinates in the 'x' and 'y' columns." + ) + try: + all_x_match = (gdf_nodes.geometry.x == gdf_nodes["x"]).all() + all_y_match = (gdf_nodes.geometry.y == gdf_nodes["y"]).all() + if not (all_x_match and all_y_match): + # warn if x/y coords don't match geometry column + warn(msg, category=UserWarning, stacklevel=2) + except ValueError: # pragma: no cover + # warn if geometry column contains non-point geometry types warn(msg, category=UserWarning, stacklevel=2) - except ValueError: # pragma: no cover - # warn if geometry column contains non-point geometry types - warn(msg, category=UserWarning, stacklevel=2) - df_nodes = gdf_nodes.drop(columns=gdf_nodes.geometry.name) + df_nodes = gdf_nodes.drop(columns=gdf_nodes.geometry.name) # create graph and add graph-level attribute dict if graph_attrs is None: From 9faa450679bb24c449ab50cd0feb71ad9264cb54 Mon Sep 17 00:00:00 2001 From: Geoff Boeing Date: Mon, 11 Nov 2024 11:38:35 -0800 Subject: [PATCH 2/3] update pre-commit --- .pre-commit-config.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 5c23c5645..50c5e2c73 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -35,14 +35,14 @@ repos: args: [--disable=MD013] - repo: https://github.com/astral-sh/ruff-pre-commit - rev: "v0.7.0" + rev: "v0.7.3" hooks: - id: ruff args: [--fix] - id: ruff-format - repo: https://github.com/pre-commit/mirrors-mypy - rev: "v1.12.1" + rev: "v1.13.0" hooks: - id: mypy additional_dependencies: From f918c8073d1d86dc654e62eec197cb8645a648c2 Mon Sep 17 00:00:00 2001 From: Geoff Boeing Date: Mon, 11 Nov 2024 13:50:52 -0800 Subject: [PATCH 3/3] refactor gdf validation into own function --- osmnx/convert.py | 82 ++++++++++++++++++++++++++++++++---------------- 1 file changed, 55 insertions(+), 27 deletions(-) diff --git a/osmnx/convert.py b/osmnx/convert.py index 27cc65abd..2909bc352 100644 --- a/osmnx/convert.py +++ b/osmnx/convert.py @@ -222,26 +222,14 @@ def _make_edge_geometry( raise ValueError(msg) -def graph_from_gdfs( +def _validate_node_edge_gdfs( gdf_nodes: gpd.GeoDataFrame, gdf_edges: gpd.GeoDataFrame, - *, - graph_attrs: dict[str, Any] | None = None, -) -> nx.MultiDiGraph: +) -> None: """ - Convert node and edge GeoDataFrames to a MultiDiGraph. - - This function is the inverse of `graph_to_gdfs` and is designed to work in - conjunction with it. However, you can convert arbitrary node and edge - GeoDataFrames as long as 1) `gdf_nodes` is uniquely indexed by `osmid`, 2) - `gdf_nodes` contains `x` and `y` coordinate columns representing node - geometries, 3) `gdf_edges` is uniquely multi-indexed by `(u, v, key)` - (following normal MultiDiGraph structure). This allows you to load any - node/edge Shapefiles or GeoPackage layers as GeoDataFrames then convert - them to a MultiDiGraph for network analysis. + Validate that node/edge GeoDataFrames can be converted to a MultiDiGraph. - Note that any `geometry` attribute on `gdf_nodes` is discarded, since `x` - and `y` provide the necessary node geometry information instead. + Raises a `ValueError` if validation fails. Parameters ---------- @@ -250,13 +238,10 @@ def graph_from_gdfs( gdf_edges GeoDataFrame of graph edges uniquely multi-indexed by `(u, v, key)`. graph_attrs - The new `G.graph` attribute dictionary. If None, use `gdf_edges`'s CRS - as the only graph-level attribute (`gdf_edges` must have its `crs` - attribute set). Returns ------- - G + None """ # ensure gdf_nodes contains x and y columns representing node geometries if not ("x" in gdf_nodes.columns and "y" in gdf_nodes.columns): # pragma: no cover @@ -278,12 +263,9 @@ def graph_from_gdfs( msg = "`gdf_edges` must be multi-indexed by `(u, v, key)`." raise ValueError(msg) - # drop geometry column from gdf_nodes (since we use x and y for geometry - # information), but warn the user if the geometry values differ from the - # coordinates in the x and y columns. this results in a df instead of gdf. - if gdf_nodes.active_geometry_name is None: # pragma: no cover - df_nodes = pd.DataFrame(gdf_nodes) - else: + # warn user if geometry values differ from coordinates in x/y columns, + # because we discard the geometry column + if gdf_nodes.active_geometry_name is not None: # pragma: no cover msg = ( "Discarding the `gdf_nodes` 'geometry' column, though its values " "differ from the coordinates in the 'x' and 'y' columns." @@ -297,7 +279,53 @@ def graph_from_gdfs( except ValueError: # pragma: no cover # warn if geometry column contains non-point geometry types warn(msg, category=UserWarning, stacklevel=2) - df_nodes = gdf_nodes.drop(columns=gdf_nodes.geometry.name) + + +def graph_from_gdfs( + gdf_nodes: gpd.GeoDataFrame, + gdf_edges: gpd.GeoDataFrame, + *, + graph_attrs: dict[str, Any] | None = None, +) -> nx.MultiDiGraph: + """ + Convert node and edge GeoDataFrames to a MultiDiGraph. + + This function is the inverse of `graph_to_gdfs` and is designed to work in + conjunction with it. However, you can convert arbitrary node and edge + GeoDataFrames as long as 1) `gdf_nodes` is uniquely indexed by `osmid`, 2) + `gdf_nodes` contains `x` and `y` coordinate columns representing node + geometries, 3) `gdf_edges` is uniquely multi-indexed by `(u, v, key)` + (following normal MultiDiGraph structure). This allows you to load any + node/edge Shapefiles or GeoPackage layers as GeoDataFrames then convert + them to a MultiDiGraph for network analysis. + + Note that any `geometry` attribute on `gdf_nodes` is discarded, since `x` + and `y` provide the necessary node geometry information instead. + + Parameters + ---------- + gdf_nodes + GeoDataFrame of graph nodes uniquely indexed by `osmid`. + gdf_edges + GeoDataFrame of graph edges uniquely multi-indexed by `(u, v, key)`. + graph_attrs + The new `G.graph` attribute dictionary. If None, use `gdf_edges`'s CRS + as the only graph-level attribute (`gdf_edges` must have its `crs` + attribute set). + + Returns + ------- + G + """ + _validate_node_edge_gdfs(gdf_nodes, gdf_edges) + + # drop geometry column from gdf_nodes (since we use x and y for geometry + # information), but warn the user if the geometry values differ from the + # coordinates in the x and y columns. this results in a df instead of gdf. + if gdf_nodes.active_geometry_name is None: # pragma: no cover + df_nodes = pd.DataFrame(gdf_nodes) + else: + df_nodes = gdf_nodes.drop(columns=gdf_nodes.active_geometry_name) # create graph and add graph-level attribute dict if graph_attrs is None: