Skip to content

Commit

Permalink
refactor gdf validation into own function
Browse files Browse the repository at this point in the history
  • Loading branch information
gboeing committed Nov 11, 2024
1 parent 9faa450 commit f918c80
Showing 1 changed file with 55 additions and 27 deletions.
82 changes: 55 additions & 27 deletions osmnx/convert.py
Original file line number Diff line number Diff line change
Expand Up @@ -222,26 +222,14 @@ def _make_edge_geometry(
raise ValueError(msg)


def graph_from_gdfs(
def _validate_node_edge_gdfs(
gdf_nodes: gpd.GeoDataFrame,
gdf_edges: gpd.GeoDataFrame,
*,
graph_attrs: dict[str, Any] | None = None,
) -> nx.MultiDiGraph:
) -> None:
"""
Convert node and edge GeoDataFrames to a MultiDiGraph.
This function is the inverse of `graph_to_gdfs` and is designed to work in
conjunction with it. However, you can convert arbitrary node and edge
GeoDataFrames as long as 1) `gdf_nodes` is uniquely indexed by `osmid`, 2)
`gdf_nodes` contains `x` and `y` coordinate columns representing node
geometries, 3) `gdf_edges` is uniquely multi-indexed by `(u, v, key)`
(following normal MultiDiGraph structure). This allows you to load any
node/edge Shapefiles or GeoPackage layers as GeoDataFrames then convert
them to a MultiDiGraph for network analysis.
Validate that node/edge GeoDataFrames can be converted to a MultiDiGraph.
Note that any `geometry` attribute on `gdf_nodes` is discarded, since `x`
and `y` provide the necessary node geometry information instead.
Raises a `ValueError` if validation fails.
Parameters
----------
Expand All @@ -250,13 +238,10 @@ def graph_from_gdfs(
gdf_edges
GeoDataFrame of graph edges uniquely multi-indexed by `(u, v, key)`.
graph_attrs
The new `G.graph` attribute dictionary. If None, use `gdf_edges`'s CRS
as the only graph-level attribute (`gdf_edges` must have its `crs`
attribute set).
Returns
-------
G
None
"""
# ensure gdf_nodes contains x and y columns representing node geometries
if not ("x" in gdf_nodes.columns and "y" in gdf_nodes.columns): # pragma: no cover
Expand All @@ -278,12 +263,9 @@ def graph_from_gdfs(
msg = "`gdf_edges` must be multi-indexed by `(u, v, key)`."
raise ValueError(msg)

# drop geometry column from gdf_nodes (since we use x and y for geometry
# information), but warn the user if the geometry values differ from the
# coordinates in the x and y columns. this results in a df instead of gdf.
if gdf_nodes.active_geometry_name is None: # pragma: no cover
df_nodes = pd.DataFrame(gdf_nodes)
else:
# warn user if geometry values differ from coordinates in x/y columns,
# because we discard the geometry column
if gdf_nodes.active_geometry_name is not None: # pragma: no cover
msg = (
"Discarding the `gdf_nodes` 'geometry' column, though its values "
"differ from the coordinates in the 'x' and 'y' columns."
Expand All @@ -297,7 +279,53 @@ def graph_from_gdfs(
except ValueError: # pragma: no cover
# warn if geometry column contains non-point geometry types
warn(msg, category=UserWarning, stacklevel=2)
df_nodes = gdf_nodes.drop(columns=gdf_nodes.geometry.name)


def graph_from_gdfs(
gdf_nodes: gpd.GeoDataFrame,
gdf_edges: gpd.GeoDataFrame,
*,
graph_attrs: dict[str, Any] | None = None,
) -> nx.MultiDiGraph:
"""
Convert node and edge GeoDataFrames to a MultiDiGraph.
This function is the inverse of `graph_to_gdfs` and is designed to work in
conjunction with it. However, you can convert arbitrary node and edge
GeoDataFrames as long as 1) `gdf_nodes` is uniquely indexed by `osmid`, 2)
`gdf_nodes` contains `x` and `y` coordinate columns representing node
geometries, 3) `gdf_edges` is uniquely multi-indexed by `(u, v, key)`
(following normal MultiDiGraph structure). This allows you to load any
node/edge Shapefiles or GeoPackage layers as GeoDataFrames then convert
them to a MultiDiGraph for network analysis.
Note that any `geometry` attribute on `gdf_nodes` is discarded, since `x`
and `y` provide the necessary node geometry information instead.
Parameters
----------
gdf_nodes
GeoDataFrame of graph nodes uniquely indexed by `osmid`.
gdf_edges
GeoDataFrame of graph edges uniquely multi-indexed by `(u, v, key)`.
graph_attrs
The new `G.graph` attribute dictionary. If None, use `gdf_edges`'s CRS
as the only graph-level attribute (`gdf_edges` must have its `crs`
attribute set).
Returns
-------
G
"""
_validate_node_edge_gdfs(gdf_nodes, gdf_edges)

# drop geometry column from gdf_nodes (since we use x and y for geometry
# information), but warn the user if the geometry values differ from the
# coordinates in the x and y columns. this results in a df instead of gdf.
if gdf_nodes.active_geometry_name is None: # pragma: no cover
df_nodes = pd.DataFrame(gdf_nodes)
else:
df_nodes = gdf_nodes.drop(columns=gdf_nodes.active_geometry_name)

# create graph and add graph-level attribute dict
if graph_attrs is None:
Expand Down

0 comments on commit f918c80

Please sign in to comment.