Skip to content

Commit

Permalink
Merge pull request #36 from PatrickOHara/londonaq
Browse files Browse the repository at this point in the history
Load a londonaq dataset from CSV
  • Loading branch information
PatrickOHara authored Oct 15, 2021
2 parents f822a29 + 7559c13 commit fcb6d3f
Show file tree
Hide file tree
Showing 13 changed files with 993 additions and 38 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ jobs:

strategy:
matrix:
python-version: [3.6, 3.7, 3.8, 3.9]
python-version: [3.7, 3.8, 3.9]

env:
OPLIB_ROOT: ../OPLib
Expand Down
2 changes: 1 addition & 1 deletion .pylintrc
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
# A comma-separated list of package or module names from where C extensions may
# be loaded. Extensions are loading into the active Python interpreter and may
# run arbitrary code.
extension-pkg-whitelist=
extension-pkg-whitelist=pydantic

# Specify a score threshold to be exceeded before program exits with error.
fail-under=10.0
Expand Down
2 changes: 1 addition & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
black>=20.8b1
black>=21.9b0
markdown-include>=0.6.0
mkdocs>=1.1.2
mkdocstrings>=0.13.6
Expand Down
119 changes: 119 additions & 0 deletions scripts/from_urbanair.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,119 @@
"""Script for generating a tsplib style txt file from londonaq CSV"""

import json
from pathlib import Path

import networkx as nx
import pandas as pd
import typer

from tspwplib import split_graph_from_properties
from tspwplib.problem import BaseTSP
from tspwplib.types import (
EdgeWeightFormat,
LondonaqGraphName,
LondonaqLocationShort,
LondonaqTimestamp,
)
from tspwplib.utils import londonaq_comment, londonaq_graph_name

OLD_EDGE_LOOKUP_JSON = "old_edge_lookup.json"
OLD_NODE_LOOKUP_JSON = "old_node_lookup.json"


def generate_londonaq_dataset(
dataset_dir: Path,
name: LondonaqGraphName,
comment: str,
edges_csv_filename: str = "edges.csv",
nodes_csv_filename: str = "nodes.csv",
old_edge_lookup: str = OLD_EDGE_LOOKUP_JSON,
old_node_lookup: str = OLD_NODE_LOOKUP_JSON,
) -> BaseTSP:
"""Generate a londonaq dataset"""

# get the CSV files for edges and nodes
dataset_dir.mkdir(parents=False, exist_ok=True)
edges_filepath = dataset_dir / edges_csv_filename
nodes_filepath = dataset_dir / nodes_csv_filename
if not edges_filepath.exists():
raise FileNotFoundError(edges_filepath)
if not nodes_filepath.exists():
raise FileNotFoundError(nodes_filepath)
nodes_df = pd.read_csv(nodes_filepath)
nodes_df = nodes_df.set_index("node")
edges_df = pd.read_csv(edges_filepath)

# split edges then relabel the nodes
edges_df = edges_df.set_index(["source", "target", "key"])
edge_attrs = edges_df.to_dict("index")
split_graph = split_graph_from_properties(
edge_attrs,
edge_attr_to_split="cost",
edge_attr_to_vertex="length",
new_vertex_attr="demand",
old_edge_attr="old_edge",
)
normalize_map = {node: i for i, node in enumerate(split_graph.nodes())}
normalized_graph = nx.relabel_nodes(split_graph, normalize_map, copy=True)

# save the node and edge mappings to a json file
old_edges = {
(normalize_map[u], normalize_map[v]): data["old_edge"]
for u, v, data in split_graph.edges(data=True)
}
old_vertices = {new: old for old, new in normalize_map.items()}

# convert tuples to lists when dumping
json_old_edges = {list(key): list(value) for key, value in old_edges.items()}
with open(dataset_dir / old_edge_lookup, "w", encoding="UTF-8") as json_file:
json.dump(json_old_edges, json_file)
with open(dataset_dir / old_node_lookup, "w", encoding="UTF-8") as json_file:
json.dump(old_vertices, json_file)

# get depots
depots = list(nodes_df.loc[nodes_df.is_depot].index.map(normalize_map))
nx.set_node_attributes(normalized_graph, False, "is_depot")
for v in depots:
normalized_graph.nodes[v]["is_depot"] = True

# NOTE (not implemented yet) get node co-ordinates

# get TSP representation
tsp = BaseTSP.from_networkx(
name,
comment,
"PCTSP",
normalized_graph,
edge_weight_format=EdgeWeightFormat.LOWER_DIAG_ROW,
weight_attr_name="cost",
)

# save to txt file
problem = tsp.to_tsplib95()
txt_filepath = dataset_dir / f"{name}.txt"
problem.save(txt_filepath)
return tsp


def to_pandas_nodelist(G: nx.Graph) -> pd.DataFrame:
"""Move node attributes to a pandas dataframe. Node ID is stored in 'node' column."""
return pd.DataFrame([{"node": node, **data} for node, data in G.nodes(data=True)])


def main(location: LondonaqLocationShort, dataset_dir: Path):
"""Entrypoint for generating londonaq dataset"""
timestamp_id: LondonaqTimestamp = LondonaqTimestamp.A
name = londonaq_graph_name(location, timestamp_id)
comment = londonaq_comment(location, timestamp_id)
generate_londonaq_dataset(
dataset_dir / name.value,
name,
comment,
edges_csv_filename=name.value + "_edges.csv",
nodes_csv_filename=name.value + "_nodes.csv",
)


if __name__ == "__main__":
typer.run(main)
6 changes: 4 additions & 2 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,19 +8,21 @@
url="https://github.com/PatrickOHara/tspwplib",
description="Library of instances for TSP with Profits",
install_requires=[
"networkx>=2.6.0",
"pandas>=1.0.0",
"pydantic>=1.8.2",
"tsplib95>=0.7.1",
],
name="tspwplib",
packages=["tspwplib"],
python_requires=">=3.6",
python_requires=">=3.7",
license="MIT",
classifiers=[
"Programming Language :: Python",
"Programming Language :: Python :: 3",
"Programming Language :: Python :: 3.6",
"Programming Language :: Python :: 3.7",
"Programming Language :: Python :: 3.8",
"Programming Language :: Python :: 3.9",
"Programming Language :: Python :: 3 :: Only",
"Operating System :: OS Independent",
],
Expand Down
18 changes: 18 additions & 0 deletions tests/test_base_tsp.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
"""Tests for the pydantic representation of a TSP"""

import pytest
from tsplib95.models import StandardProblem
from tspwplib import BaseTSP, GraphName, build_path_to_tsplib_instance


@pytest.mark.parametrize("gname", list(GraphName))
def test_from_tsplib95(tsplib_root, gname):
"""Test tsplib95 problems can be read into BaseTSP"""
# only load problems with less than 1000 vertices
n_nodes = int("".join(filter(str.isdigit, gname.value)))
if n_nodes < 1000:
tsp_path = build_path_to_tsplib_instance(tsplib_root, gname)
assert tsp_path.exists()
problem = StandardProblem.load(tsp_path)
tsp = BaseTSP.from_tsplib95(problem)
assert len(tsp.edge_data) == len(list(problem.get_edges()))
40 changes: 40 additions & 0 deletions tests/test_converter/test_split_converter.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
"""Tests for splitting edges"""

from tspwplib.converter import (
split_edges,
split_graph_from_properties,
lookup_from_split,
lookup_to_split,
)


def test_split_edges():
"""Test split edges"""
edge_list = [(0, 1), (1, 2), (0, 2)]
splits = split_edges(edge_list)
assert len(splits) == len(edge_list) * 2
assert (0, -1) in splits
assert (0, -3) in splits

# test lookups
from_split = lookup_from_split(edge_list, splits)
assert from_split[(0, -1)] == (0, 1)
assert from_split[(-1, 1)] == (0, 1)
assert from_split[(0, -3)] == (0, 2)

to_split = lookup_to_split(edge_list, splits)
assert to_split[(0, 1)] == ((0, -1), (-1, 1))
assert to_split[(1, 2)] == ((1, -2), (-2, 2))


def test_split_graph_from_properties():
"""Test split graph"""
properties = {
(0, 1): {"weight": 5, "cost": 3},
(1, 2): {"weight": 1, "cost": 10},
(0, 2): {"weight": 2, "cost": 5},
}
G = split_graph_from_properties(properties)
for _, _, data in G.edges(data=True):
old_edge = data["old_edge"]
assert data["cost"] == float(properties[old_edge]["cost"]) / 2.0
15 changes: 14 additions & 1 deletion tspwplib/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,9 +10,11 @@
is_split_vertex_pair,
is_vertex_split_head,
is_vertex_split_tail,
split_graph_from_properties,
split_head,
split_tail,
tail_prize,
to_simple_undirected,
to_vertex_dataframe,
)
from .complete import is_complete, is_complete_with_self_loops
Expand All @@ -22,7 +24,7 @@
NotSimpleCycleException,
NotSimplePathException,
)
from .problem import ProfitsProblem, is_pctsp_yes_instance
from .problem import BaseTSP, ProfitsProblem, is_pctsp_yes_instance
from .utils import build_path_to_oplib_instance, build_path_to_tsplib_instance
from .types import (
Alpha,
Expand All @@ -33,6 +35,10 @@
EdgeList,
Generation,
GraphName,
LondonaqGraphName,
LondonaqLocation,
LondonaqLocationShort,
LondonaqTimestamp,
OptimalSolutionTSP,
Vertex,
VertexFunction,
Expand All @@ -57,6 +63,7 @@

__all__ = [
"Alpha",
"BaseTSP",
"DisjointPaths",
"Edge",
"EdgeFunction",
Expand All @@ -65,6 +72,10 @@
"EdgesNotAdjacentException",
"Generation",
"GraphName",
"LondonaqGraphName",
"LondonaqLocation",
"LondonaqLocationShort",
"LondonaqTimestamp",
"NotSimpleException",
"NotSimpleCycleException",
"NotSimplePathException",
Expand Down Expand Up @@ -99,9 +110,11 @@
"problem",
"remove_self_loops_from_edge_list",
"reorder_edge_list_from_root",
"split_graph_from_properties",
"split_head",
"split_tail",
"tail_prize",
"to_simple_undirected",
"to_vertex_dataframe",
"total_cost",
"total_cost_networkx",
Expand Down
Loading

0 comments on commit fcb6d3f

Please sign in to comment.