Merge pull request #36 from PatrickOHara/londonaq

Load a londonaq dataset from CSV
PatrickOHara · Oct 15, 2021 · fcb6d3f · fcb6d3f
2 parents f822a29 + 7559c13
commit fcb6d3f
Show file tree

Hide file tree

Showing 13 changed files with 993 additions and 38 deletions.
diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml
@@ -8,7 +8,7 @@ jobs:
 
     strategy:
       matrix:
-        python-version: [3.6, 3.7, 3.8, 3.9]
+        python-version: [3.7, 3.8, 3.9]
 
     env:
       OPLIB_ROOT: ../OPLib

diff --git a/.pylintrc b/.pylintrc
@@ -3,7 +3,7 @@
 # A comma-separated list of package or module names from where C extensions may
 # be loaded. Extensions are loading into the active Python interpreter and may
 # run arbitrary code.
-extension-pkg-whitelist=
+extension-pkg-whitelist=pydantic
 
 # Specify a score threshold to be exceeded before program exits with error.
 fail-under=10.0

diff --git a/requirements.txt b/requirements.txt
@@ -1,4 +1,4 @@
-black>=20.8b1
+black>=21.9b0
 markdown-include>=0.6.0
 mkdocs>=1.1.2
 mkdocstrings>=0.13.6

diff --git a/scripts/from_urbanair.py b/scripts/from_urbanair.py
@@ -0,0 +1,119 @@
+"""Script for generating a tsplib style txt file from londonaq CSV"""
+
+import json
+from pathlib import Path
+
+import networkx as nx
+import pandas as pd
+import typer
+
+from tspwplib import split_graph_from_properties
+from tspwplib.problem import BaseTSP
+from tspwplib.types import (
+    EdgeWeightFormat,
+    LondonaqGraphName,
+    LondonaqLocationShort,
+    LondonaqTimestamp,
+)
+from tspwplib.utils import londonaq_comment, londonaq_graph_name
+
+OLD_EDGE_LOOKUP_JSON = "old_edge_lookup.json"
+OLD_NODE_LOOKUP_JSON = "old_node_lookup.json"
+
+
+def generate_londonaq_dataset(
+    dataset_dir: Path,
+    name: LondonaqGraphName,
+    comment: str,
+    edges_csv_filename: str = "edges.csv",
+    nodes_csv_filename: str = "nodes.csv",
+    old_edge_lookup: str = OLD_EDGE_LOOKUP_JSON,
+    old_node_lookup: str = OLD_NODE_LOOKUP_JSON,
+) -> BaseTSP:
+    """Generate a londonaq dataset"""
+
+    # get the CSV files for edges and nodes
+    dataset_dir.mkdir(parents=False, exist_ok=True)
+    edges_filepath = dataset_dir / edges_csv_filename
+    nodes_filepath = dataset_dir / nodes_csv_filename
+    if not edges_filepath.exists():
+        raise FileNotFoundError(edges_filepath)
+    if not nodes_filepath.exists():
+        raise FileNotFoundError(nodes_filepath)
+    nodes_df = pd.read_csv(nodes_filepath)
+    nodes_df = nodes_df.set_index("node")
+    edges_df = pd.read_csv(edges_filepath)
+
+    # split edges then relabel the nodes
+    edges_df = edges_df.set_index(["source", "target", "key"])
+    edge_attrs = edges_df.to_dict("index")
+    split_graph = split_graph_from_properties(
+        edge_attrs,
+        edge_attr_to_split="cost",
+        edge_attr_to_vertex="length",
+        new_vertex_attr="demand",
+        old_edge_attr="old_edge",
+    )
+    normalize_map = {node: i for i, node in enumerate(split_graph.nodes())}
+    normalized_graph = nx.relabel_nodes(split_graph, normalize_map, copy=True)
+
+    # save the node and edge mappings to a json file
+    old_edges = {
+        (normalize_map[u], normalize_map[v]): data["old_edge"]
+        for u, v, data in split_graph.edges(data=True)
+    }
+    old_vertices = {new: old for old, new in normalize_map.items()}
+
+    # convert tuples to lists when dumping
+    json_old_edges = {list(key): list(value) for key, value in old_edges.items()}
+    with open(dataset_dir / old_edge_lookup, "w", encoding="UTF-8") as json_file:
+        json.dump(json_old_edges, json_file)
+    with open(dataset_dir / old_node_lookup, "w", encoding="UTF-8") as json_file:
+        json.dump(old_vertices, json_file)
+
+    # get depots
+    depots = list(nodes_df.loc[nodes_df.is_depot].index.map(normalize_map))
+    nx.set_node_attributes(normalized_graph, False, "is_depot")
+    for v in depots:
+        normalized_graph.nodes[v]["is_depot"] = True
+
+    # NOTE (not implemented yet) get node co-ordinates
+
+    # get TSP representation
+    tsp = BaseTSP.from_networkx(
+        name,
+        comment,
+        "PCTSP",
+        normalized_graph,
+        edge_weight_format=EdgeWeightFormat.LOWER_DIAG_ROW,
+        weight_attr_name="cost",
+    )
+
+    # save to txt file
+    problem = tsp.to_tsplib95()
+    txt_filepath = dataset_dir / f"{name}.txt"
+    problem.save(txt_filepath)
+    return tsp
+
+
+def to_pandas_nodelist(G: nx.Graph) -> pd.DataFrame:
+    """Move node attributes to a pandas dataframe. Node ID is stored in 'node' column."""
+    return pd.DataFrame([{"node": node, **data} for node, data in G.nodes(data=True)])
+
+
+def main(location: LondonaqLocationShort, dataset_dir: Path):
+    """Entrypoint for generating londonaq dataset"""
+    timestamp_id: LondonaqTimestamp = LondonaqTimestamp.A
+    name = londonaq_graph_name(location, timestamp_id)
+    comment = londonaq_comment(location, timestamp_id)
+    generate_londonaq_dataset(
+        dataset_dir / name.value,
+        name,
+        comment,
+        edges_csv_filename=name.value + "_edges.csv",
+        nodes_csv_filename=name.value + "_nodes.csv",
+    )
+
+
+if __name__ == "__main__":
+    typer.run(main)
diff --git a/setup.py b/setup.py
@@ -8,19 +8,21 @@
     url="https://github.com/PatrickOHara/tspwplib",
     description="Library of instances for TSP with Profits",
     install_requires=[
+        "networkx>=2.6.0",
         "pandas>=1.0.0",
+        "pydantic>=1.8.2",
         "tsplib95>=0.7.1",
     ],
     name="tspwplib",
     packages=["tspwplib"],
-    python_requires=">=3.6",
+    python_requires=">=3.7",
     license="MIT",
     classifiers=[
         "Programming Language :: Python",
         "Programming Language :: Python :: 3",
-        "Programming Language :: Python :: 3.6",
         "Programming Language :: Python :: 3.7",
         "Programming Language :: Python :: 3.8",
+        "Programming Language :: Python :: 3.9",
         "Programming Language :: Python :: 3 :: Only",
         "Operating System :: OS Independent",
     ],

diff --git a/tests/test_base_tsp.py b/tests/test_base_tsp.py
@@ -0,0 +1,18 @@
+"""Tests for the pydantic representation of a TSP"""
+
+import pytest
+from tsplib95.models import StandardProblem
+from tspwplib import BaseTSP, GraphName, build_path_to_tsplib_instance
+
+
+@pytest.mark.parametrize("gname", list(GraphName))
+def test_from_tsplib95(tsplib_root, gname):
+    """Test tsplib95 problems can be read into BaseTSP"""
+    # only load problems with less than 1000 vertices
+    n_nodes = int("".join(filter(str.isdigit, gname.value)))
+    if n_nodes < 1000:
+        tsp_path = build_path_to_tsplib_instance(tsplib_root, gname)
+        assert tsp_path.exists()
+        problem = StandardProblem.load(tsp_path)
+        tsp = BaseTSP.from_tsplib95(problem)
+        assert len(tsp.edge_data) == len(list(problem.get_edges()))
diff --git a/tests/test_converter/test_split_converter.py b/tests/test_converter/test_split_converter.py
@@ -0,0 +1,40 @@
+"""Tests for splitting edges"""
+
+from tspwplib.converter import (
+    split_edges,
+    split_graph_from_properties,
+    lookup_from_split,
+    lookup_to_split,
+)
+
+
+def test_split_edges():
+    """Test split edges"""
+    edge_list = [(0, 1), (1, 2), (0, 2)]
+    splits = split_edges(edge_list)
+    assert len(splits) == len(edge_list) * 2
+    assert (0, -1) in splits
+    assert (0, -3) in splits
+
+    # test lookups
+    from_split = lookup_from_split(edge_list, splits)
+    assert from_split[(0, -1)] == (0, 1)
+    assert from_split[(-1, 1)] == (0, 1)
+    assert from_split[(0, -3)] == (0, 2)
+
+    to_split = lookup_to_split(edge_list, splits)
+    assert to_split[(0, 1)] == ((0, -1), (-1, 1))
+    assert to_split[(1, 2)] == ((1, -2), (-2, 2))
+
+
+def test_split_graph_from_properties():
+    """Test split graph"""
+    properties = {
+        (0, 1): {"weight": 5, "cost": 3},
+        (1, 2): {"weight": 1, "cost": 10},
+        (0, 2): {"weight": 2, "cost": 5},
+    }
+    G = split_graph_from_properties(properties)
+    for _, _, data in G.edges(data=True):
+        old_edge = data["old_edge"]
+        assert data["cost"] == float(properties[old_edge]["cost"]) / 2.0
diff --git a/tspwplib/__init__.py b/tspwplib/__init__.py
@@ -10,9 +10,11 @@
     is_split_vertex_pair,
     is_vertex_split_head,
     is_vertex_split_tail,
+    split_graph_from_properties,
     split_head,
     split_tail,
     tail_prize,
+    to_simple_undirected,
     to_vertex_dataframe,
 )
 from .complete import is_complete, is_complete_with_self_loops
@@ -22,7 +24,7 @@
     NotSimpleCycleException,
     NotSimplePathException,
 )
-from .problem import ProfitsProblem, is_pctsp_yes_instance
+from .problem import BaseTSP, ProfitsProblem, is_pctsp_yes_instance
 from .utils import build_path_to_oplib_instance, build_path_to_tsplib_instance
 from .types import (
     Alpha,
@@ -33,6 +35,10 @@
     EdgeList,
     Generation,
     GraphName,
+    LondonaqGraphName,
+    LondonaqLocation,
+    LondonaqLocationShort,
+    LondonaqTimestamp,
     OptimalSolutionTSP,
     Vertex,
     VertexFunction,
@@ -57,6 +63,7 @@
 
 __all__ = [
     "Alpha",
+    "BaseTSP",
     "DisjointPaths",
     "Edge",
     "EdgeFunction",
@@ -65,6 +72,10 @@
     "EdgesNotAdjacentException",
     "Generation",
     "GraphName",
+    "LondonaqGraphName",
+    "LondonaqLocation",
+    "LondonaqLocationShort",
+    "LondonaqTimestamp",
     "NotSimpleException",
     "NotSimpleCycleException",
     "NotSimplePathException",
@@ -99,9 +110,11 @@
     "problem",
     "remove_self_loops_from_edge_list",
     "reorder_edge_list_from_root",
+    "split_graph_from_properties",
     "split_head",
     "split_tail",
     "tail_prize",
+    "to_simple_undirected",
     "to_vertex_dataframe",
     "total_cost",
     "total_cost_networkx",