Skip to content

Commit

Permalink
Merge pull request #1234 from gboeing/convert
Browse files Browse the repository at this point in the history
improve graph_from_gdfs input checks
  • Loading branch information
gboeing authored Nov 11, 2024
2 parents aab45c4 + f918c80 commit a5704ea
Show file tree
Hide file tree
Showing 2 changed files with 67 additions and 24 deletions.
4 changes: 2 additions & 2 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -35,14 +35,14 @@ repos:
args: [--disable=MD013]

- repo: https://github.com/astral-sh/ruff-pre-commit
rev: "v0.7.0"
rev: "v0.7.3"
hooks:
- id: ruff
args: [--fix]
- id: ruff-format

- repo: https://github.com/pre-commit/mirrors-mypy
rev: "v1.12.1"
rev: "v1.13.0"
hooks:
- id: mypy
additional_dependencies:
Expand Down
87 changes: 65 additions & 22 deletions osmnx/convert.py
Original file line number Diff line number Diff line change
Expand Up @@ -222,6 +222,65 @@ def _make_edge_geometry(
raise ValueError(msg)


def _validate_node_edge_gdfs(
gdf_nodes: gpd.GeoDataFrame,
gdf_edges: gpd.GeoDataFrame,
) -> None:
"""
Validate that node/edge GeoDataFrames can be converted to a MultiDiGraph.
Raises a `ValueError` if validation fails.
Parameters
----------
gdf_nodes
GeoDataFrame of graph nodes uniquely indexed by `osmid`.
gdf_edges
GeoDataFrame of graph edges uniquely multi-indexed by `(u, v, key)`.
graph_attrs
Returns
-------
None
"""
# ensure gdf_nodes contains x and y columns representing node geometries
if not ("x" in gdf_nodes.columns and "y" in gdf_nodes.columns): # pragma: no cover
msg = "`gdf_nodes` must contain 'x' and 'y' columns."
raise ValueError(msg)

# ensure gdf_nodes and gdf_edges are uniquely indexed
if not (gdf_nodes.index.is_unique and gdf_edges.index.is_unique): # pragma: no cover
msg = "`gdf_nodes` and `gdf_edges` must each be uniquely indexed."
raise ValueError(msg)

# ensure 1) gdf_edges are multi-indexed with 3 levels and 2) that its u
# and v values (first two index levels) all appear among gdf_nodes index
edges_index_levels = 3
check1 = gdf_edges.index.nlevels == edges_index_levels
uv = set(gdf_edges.index.get_level_values(0)) | set(gdf_edges.index.get_level_values(1))
check2 = uv.issubset(set(gdf_nodes.index))
if not (check1 and check2): # pragma: no cover
msg = "`gdf_edges` must be multi-indexed by `(u, v, key)`."
raise ValueError(msg)

# warn user if geometry values differ from coordinates in x/y columns,
# because we discard the geometry column
if gdf_nodes.active_geometry_name is not None: # pragma: no cover
msg = (
"Discarding the `gdf_nodes` 'geometry' column, though its values "
"differ from the coordinates in the 'x' and 'y' columns."
)
try:
all_x_match = (gdf_nodes.geometry.x == gdf_nodes["x"]).all()
all_y_match = (gdf_nodes.geometry.y == gdf_nodes["y"]).all()
if not (all_x_match and all_y_match):
# warn if x/y coords don't match geometry column
warn(msg, category=UserWarning, stacklevel=2)
except ValueError: # pragma: no cover
# warn if geometry column contains non-point geometry types
warn(msg, category=UserWarning, stacklevel=2)


def graph_from_gdfs(
gdf_nodes: gpd.GeoDataFrame,
gdf_edges: gpd.GeoDataFrame,
Expand Down Expand Up @@ -258,31 +317,15 @@ def graph_from_gdfs(
-------
G
"""
if not ("x" in gdf_nodes.columns and "y" in gdf_nodes.columns): # pragma: no cover
msg = "`gdf_nodes` must contain 'x' and 'y' columns."
raise ValueError(msg)

if not hasattr(gdf_nodes, "geometry"):
msg = "`gdf_nodes` must have a 'geometry' attribute."
raise ValueError(msg)
_validate_node_edge_gdfs(gdf_nodes, gdf_edges)

# drop geometry column from gdf_nodes (as we use x and y for geometry
# drop geometry column from gdf_nodes (since we use x and y for geometry
# information), but warn the user if the geometry values differ from the
# coordinates in the x and y columns. this results in a df instead of gdf.
msg = (
"Discarding the `gdf_nodes` 'geometry' column, though its values "
"differ from the coordinates in the 'x' and 'y' columns."
)
try:
all_x_match = (gdf_nodes.geometry.x == gdf_nodes["x"]).all()
all_y_match = (gdf_nodes.geometry.y == gdf_nodes["y"]).all()
if not (all_x_match and all_y_match):
# warn if x/y coords don't match geometry column
warn(msg, category=UserWarning, stacklevel=2)
except ValueError: # pragma: no cover
# warn if geometry column contains non-point geometry types
warn(msg, category=UserWarning, stacklevel=2)
df_nodes = gdf_nodes.drop(columns=gdf_nodes.geometry.name)
if gdf_nodes.active_geometry_name is None: # pragma: no cover
df_nodes = pd.DataFrame(gdf_nodes)
else:
df_nodes = gdf_nodes.drop(columns=gdf_nodes.active_geometry_name)

# create graph and add graph-level attribute dict
if graph_attrs is None:
Expand Down

0 comments on commit a5704ea

Please sign in to comment.