From b8b15fe86d09341ce7156438bf4909838212c5c6 Mon Sep 17 00:00:00 2001 From: Patrick O'Hara Date: Tue, 5 Oct 2021 13:22:48 +0100 Subject: [PATCH 01/12] =?UTF-8?q?=E2=9C=A8=20Split=20graph?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- tests/test_converter/test_split_converter.py | 45 ++++++ tspwplib/converter.py | 145 ++++++++++++++++++- tspwplib/problem.py | 7 +- tspwplib/types.py | 11 +- 4 files changed, 201 insertions(+), 7 deletions(-) create mode 100644 tests/test_converter/test_split_converter.py diff --git a/tests/test_converter/test_split_converter.py b/tests/test_converter/test_split_converter.py new file mode 100644 index 0000000..64cfab6 --- /dev/null +++ b/tests/test_converter/test_split_converter.py @@ -0,0 +1,45 @@ +"""Tests for splitting edges""" + +from tspwplib.converter import ( + split_edges, + split_graph_from_properties, + lookup_from_split, + lookup_to_split, +) + + +def test_split_edges(): + """Test split edges""" + edge_list = [(0, 1), (1, 2), (0, 2)] + splits = split_edges(edge_list) + assert len(splits) == len(edge_list) * 2 + assert (0, -1) in splits + assert (0, -3) in splits + + # test lookups + from_split = lookup_from_split(edge_list, splits) + assert from_split[(0, -1)] == (0, 1) + assert from_split[(-1, 1)] == (0, 1) + assert from_split[(0, -3)] == (0, 2) + + to_split = lookup_to_split(edge_list, splits) + assert to_split[(0, 1)] == ((0, -1), (-1, 1)) + assert to_split[(1, 2)] == ((1, -2), (-2, 2)) + + +def test_split_graph_from_properties(): + """Test split graph""" + properties = { + (0, 1): {"weight": 5, "cost": 3}, + (1, 2): {"weight": 1, "cost": 10}, + (0, 2): {"weight": 2, "cost": 5}, + } + G = split_graph_from_properties(properties) + for v in G: + if v < 0: + assert G.nodes[v]["prize"] > 0 + else: + assert G.nodes[v]["prize"] == 0 + for _, _, data in G.edges(data=True): + old_edge = data["old_edge"] + assert data["cost"] == float(properties[old_edge]["cost"]) / 2.0 diff --git a/tspwplib/converter.py b/tspwplib/converter.py index 2e5d470..0145f6f 100644 --- a/tspwplib/converter.py +++ b/tspwplib/converter.py @@ -3,11 +3,22 @@ from copy import deepcopy import math -from typing import Dict, List, Tuple +from typing import Dict, List, Tuple, Union import networkx as nx import pandas as pd from .exception import UnexpectedSelfLoopException -from .types import EdgeFunctionName, Vertex, VertexFunctionName, VertexList +from .types import ( + Edge, + EdgeFunction, + EdgeFunctionName, + EdgeList, + EdgeProperties, + MultiEdge, + Vertex, + VertexFunction, + VertexFunctionName, + VertexList, +) def to_vertex_dataframe(graph: nx.Graph) -> pd.DataFrame: @@ -321,3 +332,133 @@ def to_simple_undirected(G: nx.MultiGraph) -> nx.Graph: raise ValueError("Negative key for edge.") return simple_graph + + +def split_edges(edge_list: EdgeList) -> EdgeList: + """Split each edge (u,v) by adding a new vertex w and two new edges (u,w), (w,v). + + Args: + edge_list: List of edges or multi-edges + + Returns: + List of edges (size 2 tuple). + Size of returned edge list is twice the size of the input edges. + """ + new_vertex = -1 + splits: EdgeList = [] + for edge in edge_list: + splits.append((edge[0], new_vertex)) + splits.append((new_vertex, edge[1])) + new_vertex -= 1 + return splits + + +LookupFromSplit = Dict[Edge, Union[Edge, MultiEdge]] + + +def lookup_from_split(edge_list: EdgeList, splits: EdgeList) -> LookupFromSplit: + """Get lookup from a split edge to an original edge. + + Args: + edge_list: Edge in original graph. + splits: List of edges created by [split_edges][tspwplib.converter.split_edges]. + + Returns: + Dictionary lookup from split edges to the original edges. + """ + lookup = {} + for i, edge in enumerate(edge_list): + lookup[splits[2 * i]] = edge + lookup[splits[2 * i + 1]] = edge + return lookup + + +LookupToSplit = Dict[Union[Edge, MultiEdge], Tuple[Edge, Edge]] + + +def lookup_to_split(edge_list: EdgeList, splits: EdgeList) -> LookupToSplit: + """Get lookup from an original edge to the two split edges. + + Args: + edge_list: Edge in original graph. + splits: List of edges created by [split_edges][tspwplib.converter.split_edges]. + + Returns: + Dictionary lookup from the original edges to a pair of split edges. + """ + lookup = {} + for i, edge in enumerate(edge_list): + lookup[edge] = (splits[2 * i], splits[2 * i + 1]) + return lookup + + +def prize_from_weighted_edges( + edge_weights: EdgeFunction, to_split: LookupToSplit +) -> VertexFunction: + """Get a prize function on the vertices from a weight function on the edges. + + Args: + edge_weights: Lookup from edges to weights. + to_split: Lookup from original edges to pairs of split edges + (see [lookup_to_split][tspwplib.converter.lookup_to_split]). + + Returns: + Lookup from fake vertices to weight of original edge that the fake vertex represents. + """ + prizes = {} + for edge, weight in edge_weights.items(): + first_split, second_split = to_split[edge] + if first_split[1] != second_split[0]: + message = "Second vertex of first edge and first vertex of second edge " + message += "must match in to_split_lookup" + raise LookupError(message) + vertex = first_split[1] + prizes[vertex] = weight + return prizes + + +def split_edge_cost(edge_cost: EdgeFunction, to_split: LookupToSplit) -> EdgeFunction: + """Assign half the cost of the original edge to each of the split edges. + + Args: + edge_cost: Lookup from edges to cost. + to_split: Lookup from original edges to pairs of split edges + (see [lookup_to_split][tspwplib.converter.lookup_to_split]). + + Returns: + Lookup from split edges to cost. + + Notes: + The cost is cast to a float. + """ + split_cost = {} + for edge, cost in edge_cost.items(): + first_split, second_split = to_split[edge] + half_cost = float(cost) / 2.0 + split_cost[first_split] = half_cost + split_cost[second_split] = half_cost + return split_cost + + +def split_graph_from_properties(edge_properties: EdgeProperties) -> nx.Graph: + """Split edges with properties and create undirected simple graph""" + edge_list = list(edge_properties.keys()) + splits = split_edges(edge_list) + to_split = lookup_to_split(edge_list, splits) + from_split = lookup_from_split(edge_list, splits) + prize = prize_from_weighted_edges( + {edge: item["weight"] for edge, item in edge_properties.items()}, to_split + ) + cost = split_edge_cost( + {edge: item["cost"] for edge, item in edge_properties.items()}, to_split + ) + + # create graph and assign prizes and costs + G = nx.Graph() + G.add_edges_from(splits) + nx.set_node_attributes(G, 0, name="prize") + nx.set_node_attributes(G, prize, name="prize") + nx.set_edge_attributes(G, 0, name="cost") + nx.set_edge_attributes(G, cost, name="cost") + nx.set_edge_attributes(G, from_split, name="old_edge") + return G diff --git a/tspwplib/problem.py b/tspwplib/problem.py index 4121c8f..c191b57 100644 --- a/tspwplib/problem.py +++ b/tspwplib/problem.py @@ -3,6 +3,7 @@ import random from typing import List import networkx as nx +import pandas as pd import tsplib95 from .types import EdgeList, Vertex, VertexFunctionName, VertexLookup from .walk import is_simple_cycle, walk_from_edge_list, total_prize @@ -32,7 +33,11 @@ def __init__( def edge_removal_probability(self) -> float: """Probability of removing an edge from the graph. - It is strongly recommended to only set this value in the constructor. + Returns: + Edge removal probability. + + Notes: + It is strongly recommended to only set this value in the constructor. """ return self._edge_removal_probability diff --git a/tspwplib/types.py b/tspwplib/types.py index 628167b..b204d30 100644 --- a/tspwplib/types.py +++ b/tspwplib/types.py @@ -1,18 +1,21 @@ """Type hinting and names""" from enum import Enum, IntEnum -from typing import Dict, List, Tuple +from typing import Any, Dict, List, Tuple, Union # vertex data structures Vertex = int -VertexFunction = Dict[Vertex, int] +VertexFunction = Dict[Vertex, Union[int, float]] VertexList = List[Vertex] VertexLookup = Dict[Vertex, Vertex] +VertexProperties = Dict[Vertex, Dict[str, Any]] # edge data structures Edge = Tuple[Vertex, Vertex] -EdgeList = List[Edge] -EdgeFunction = Dict[Vertex, VertexFunction] +MultiEdge = Tuple[Vertex, Vertex, int] +EdgeList = List[Union[Edge, MultiEdge]] +EdgeFunction = Dict[Union[Edge, MultiEdge], Union[int, float]] +EdgeProperties = Dict[Union[Edge, MultiEdge], Dict[str, Any]] # path data structures DisjointPaths = Tuple[VertexList, VertexList] From 2c07dacac35b7381b1428a0e347a5c890536079f Mon Sep 17 00:00:00 2001 From: Patrick O'Hara Date: Thu, 7 Oct 2021 15:09:29 +0100 Subject: [PATCH 02/12] =?UTF-8?q?=F0=9F=92=A9=20This=20is=20broken=20:(?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- tests/test_profits_problem.py | 9 +++ tspwplib/__init__.py | 2 + tspwplib/converter.py | 17 ++++- tspwplib/problem.py | 124 ++++++++++++++++++++++++++++++++-- 4 files changed, 144 insertions(+), 8 deletions(-) diff --git a/tests/test_profits_problem.py b/tests/test_profits_problem.py index c594fd0..21af861 100644 --- a/tests/test_profits_problem.py +++ b/tests/test_profits_problem.py @@ -124,3 +124,12 @@ def test_is_pctsp_yes_instance( for i in range(num_nodes): edge_list.append((i, (i + 1) % num_nodes)) assert is_pctsp_yes_instance(graph, quota, root, edge_list) + +from pathlib import Path +from tspwplib.problem import parse_edge_list_weights, render_edge_list_weights + +def test_parse_problem(): + dataset_dir = Path("/", "Users", "patrick", "Datasets", "pctsp", "londonaq") + prob_filename = "londonaq_tiny.txt" + filepath = dataset_dir / prob_filename + ProfitsProblem.load(filepath) diff --git a/tspwplib/__init__.py b/tspwplib/__init__.py index a301247..b92e07c 100644 --- a/tspwplib/__init__.py +++ b/tspwplib/__init__.py @@ -10,6 +10,7 @@ is_split_vertex_pair, is_vertex_split_head, is_vertex_split_tail, + split_graph_from_properties, split_head, split_tail, tail_prize, @@ -99,6 +100,7 @@ "problem", "remove_self_loops_from_edge_list", "reorder_edge_list_from_root", + "split_graph_from_properties", "split_head", "split_tail", "tail_prize", diff --git a/tspwplib/converter.py b/tspwplib/converter.py index 0145f6f..ecfc5da 100644 --- a/tspwplib/converter.py +++ b/tspwplib/converter.py @@ -441,7 +441,22 @@ def split_edge_cost(edge_cost: EdgeFunction, to_split: LookupToSplit) -> EdgeFun def split_graph_from_properties(edge_properties: EdgeProperties) -> nx.Graph: - """Split edges with properties and create undirected simple graph""" + """Split edges with properties and create undirected simple graph. + + Args: + edge_properties: keys are edges, values are dicts of edge attributes + + Returns: + Undirected simple graph with edge attributes for cost, prize and old_edge + + Notes: + To get the original_edge that a split edge represents, access the 'old_edge' attribute + """ + for edge, data in edge_properties.items(): + if not "cost" in data: + raise KeyError(f"Edge property for edge {edge} has no cost") + if not "weight" in data: + raise KeyError(f"Edge property for edge {edge} has no weight") edge_list = list(edge_properties.keys()) splits = split_edges(edge_list) to_split = lookup_to_split(edge_list, splits) diff --git a/tspwplib/problem.py b/tspwplib/problem.py index c191b57..0233ab3 100644 --- a/tspwplib/problem.py +++ b/tspwplib/problem.py @@ -1,13 +1,26 @@ """Functions and classes for datasets""" import random +import re from typing import List import networkx as nx -import pandas as pd import tsplib95 -from .types import EdgeList, Vertex, VertexFunctionName, VertexLookup +from .types import EdgeFunction, EdgeList, Vertex, VertexFunctionName, VertexLookup from .walk import is_simple_cycle, walk_from_edge_list, total_prize +from tsplib95 import transformers +from tsplib95.fields import TransformerField + +class PrizesField(TransformerField): + """Field for demands.""" + + default = dict + + @classmethod + def build_transformer(cls): + node = transformers.FuncT(func=int) + demand = transformers.FuncT(func=float) + return transformers.MapT(key=node, value=demand, sep='\n') class ProfitsProblem(tsplib95.models.StandardProblem): """TSP with Profits Problem @@ -18,7 +31,7 @@ class ProfitsProblem(tsplib95.models.StandardProblem): # Maximum distance of the total route in a OP. cost_limit = tsplib95.fields.IntegerField("COST_LIMIT") # The scores of the nodes of a OP are given in the form (per line) - node_score = tsplib95.fields.DemandsField("NODE_SCORE_SECTION") + node_score = PrizesField("NODE_SCORE_SECTION") # The optimal solution to the TSP tspsol = tsplib95.fields.IntegerField("TSPSOL") @@ -65,11 +78,8 @@ def __set_node_attributes(self, graph: nx.Graph, names: VertexLookup) -> None: for vertex in list(self.get_nodes()): # pylint: disable=unsupported-membership-test,no-member is_depot = vertex in self.depots - coord: List[int] = self.node_coords.get(vertex) graph.add_node( names[vertex], - x=coord[0], - y=coord[1], prize=node_score[vertex], is_depot=is_depot, ) @@ -79,6 +89,10 @@ def __set_node_attributes(self, graph: nx.Graph, names: VertexLookup) -> None: graph[vertex]["demand"] = demand if not display is None: graph[vertex]["display"] = display + if self.node_coords: + coord = self.node_coords.get(vertex) + graph.nodes[names[vertex]]["x"] = coord[0] + graph.nodes[names[vertex]]["y"] = coord[1] def get_graph(self, normalize: bool = False) -> nx.Graph: """Return a networkx graph instance representing the problem. @@ -185,15 +199,20 @@ def get_root_vertex(self, normalize: bool = False) -> Vertex: except KeyError as key_error: raise ValueError("The list of depots is empty") from key_error - def get_edges(self) -> EdgeList: + def get_edges(self, normalize: bool = False) -> EdgeList: # pylint: disable=arguments-differ """Get a list of edges in the graph If the `edge_removal_probability` is set in the constructor, then edges will be randomly removed + Args: + normalize: If true use the normalized vertex ids + Returns: List of edges in the graph """ + if normalize: + raise NotImplementedError("Normalizing edges not yet implemented") edges: EdgeList = list(super().get_edges()) edges_copy = edges.copy() random.seed(self._seed) @@ -213,6 +232,79 @@ def get_edges(self) -> EdgeList: edges_copy.remove(edge) return edges_copy + def _create_wfunc(self, special=None): + """Overwrite create weight function""" + if self.is_explicit() and self.edge_weight_format == "EDGE_LIST_WEIGHTS": + return lambda i, j: self.edge_weights[(i, j)] + return super()._create_wfunc(special=special) + + def render(self): + # render each value by keyword + rendered = self.as_name_dict() + for name in list(rendered): + value = rendered.pop(name) + field = self.__class__.fields_by_name[name] + if self.is_explicit() and self.edge_weight_format == "EDGE_LIST_WEIGHTS" and name == "edge_weights": + rendered["EDGE_WEIGHT_SECTION"] = render_edge_list_weights(self.edge_weights) + + elif name in self.__dict__ or value != field.get_default_value(): + rendered[field.keyword] = field.render(value) + + # build keyword-value pairs with the separator + kvpairs = [] + for keyword, value in rendered.items(): + sep = ':\n' if '\n' in value else ': ' + kvpairs.append(f'{keyword}{sep}{value}') + kvpairs.append('EOF') + + # join and return the result + return '\n'.join(kvpairs) + + @classmethod + def parse(cls, text: str, **options): + """Parse text into a problem instance. + + Any keyword options are passed to the class constructor. If a keyword + argument has the same name as a field then they will collide and cause + an error. + + Args: + text: problem text + options: any keyword arguments to pass to the constructor + + Returns: + problem instance + """ + # prepare the regex for all known keys + keywords = '|'.join(cls.fields_by_keyword) + sep = r'''\s*:\s*|\s*\n''' + pattern = f'({keywords}|EOF)(?:{sep})' + + # split the whole text by known keys + regex = re.compile(pattern, re.M) + __, *results = regex.split(text) + + # pair keys and values + field_keywords = results[::2] + field_values = results[1::2] + + # parse into a dictionary + is_edge_list_weights = False + data = {} + for keyword, value in zip(field_keywords, field_values): + if keyword != 'EOF': + field = cls.fields_by_keyword[keyword] + name = cls.names_by_keyword[keyword] + field_value = field.parse(value.strip()) + if name == "EDGE_WEIGHT_TYPE" and field_value == "EDGE_LIST_WEIGHTS": + is_edge_list_weights = True + if name == "EDGE_WEIGHT_SECTION" and is_edge_list_weights: + field_value = parse_edge_list_weights(value.strip()) + data[name] = field_value + + # return as a model, letting options and field data potentially collide + return cls(**data, **options) + def is_pctsp_yes_instance( graph: nx.Graph, quota: int, root_vertex: Vertex, edge_list: EdgeList @@ -243,3 +335,21 @@ def is_pctsp_yes_instance( and root_vertex == walk[0] and root_vertex == walk[len(walk) - 1] ) + +def parse_edge_list_weights(text: str) -> EdgeFunction: + print(text) + return {} + +def render_edge_list_weights(edge_weights: EdgeFunction) -> str: + """Render edge weight dictionary to a string + + Args: + edge_weights: Keys are edge tuples. Values are the weight of the edge. + + Returns: + String representation of edge weights, including new lines. + """ + render = "" + for (u, v), weight in edge_weights.items(): + render += f"{u} {v} {weight}\n" + return render From 9b6e28d922e7eaa1fe766714921397032bf97f94 Mon Sep 17 00:00:00 2001 From: Patrick O'Hara Date: Tue, 12 Oct 2021 15:38:48 +0100 Subject: [PATCH 03/12] =?UTF-8?q?=E2=9C=A8=20Pydantic=20model=20for=20tspl?= =?UTF-8?q?ib?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- scripts/from_urbanair.py | 119 ++++++++++ setup.py | 2 + tests/test_profits_problem.py | 3 +- tspwplib/converter.py | 56 +++-- tspwplib/problem.py | 436 +++++++++++++++++++++++++++------- tspwplib/types.py | 115 ++++++++- tspwplib/utils.py | 27 +++ 7 files changed, 648 insertions(+), 110 deletions(-) create mode 100644 scripts/from_urbanair.py diff --git a/scripts/from_urbanair.py b/scripts/from_urbanair.py new file mode 100644 index 0000000..caa7406 --- /dev/null +++ b/scripts/from_urbanair.py @@ -0,0 +1,119 @@ +import json +from pathlib import Path + +import networkx as nx +import pandas as pd +import tsplib95 +import typer + +from tspwplib import split_graph_from_properties +from tspwplib.problem import BaseTSP +from tspwplib.types import EdgeWeightFormat, LondonaqLocation, LondonaqTimestamp + + +def choose_root(G): + # choose the root vertex + root = None + root_found = False + + for vertex in G.nodes(): + if not root_found and G.degree(vertex) > 2: + root_found = True + root = vertex + else: + root + return root + + +OLD_EDGE_LOOKUP_JSON = "old_edge_lookup.json" +OLD_NODE_LOOKUP_JSON = "old_node_lookup.json" + + +def generate_londonaq_dataset( + dataset_dir: Path, + location_id: LondonaqLocation, + timestamp_id: LondonaqTimestamp, + edges_csv_filename: str = "edges.csv", + nodes_csv_filename: str = "nodes.csv", + old_edge_lookup: str = OLD_EDGE_LOOKUP_JSON, + old_node_lookup: str = OLD_NODE_LOOKUP_JSON, +) -> BaseTSP: + """Generate a londonaq dataset""" + # get the CSV files for edges and nodes + dataset_dir.mkdir(parents=False, exist_ok=True) + edges_filepath = dataset_dir / edges_csv_filename + nodes_filepath = dataset_dir / nodes_csv_filename + if not edges_filepath.exists(): + raise FileNotFoundError(edges_filepath) + if not nodes_filepath.exists(): + raise FileNotFoundError(nodes_filepath) + nodes_df = pd.read_csv(nodes_filepath) + edges_df = pd.read_csv(edges_filepath) + + # split edges then relabel the nodes + edges_df = edges_df.set_index(["source", "target", "key"]) + edge_attrs = edges_df.to_dict("index") + split_graph = split_graph_from_properties(edge_attrs) + normalize_map = {node: i for i, node in enumerate(split_graph.nodes())} + normalized_graph = nx.relabel_nodes(split_graph, normalize_map, copy=True) + + # save the node and edge mappings to a json file + old_edge_lookup = { + (normalize_map[u], normalize_map[v]): data["old_edge"] + for u, v, data in split_graph.edges(data=True) + } + old_vertex_lookup = {new: old for old, new in normalize_map.items()} + with open(dataset_dir / old_edge_lookup, "w", encoding="UTF-8") as f: + json.dump(old_edge_lookup, f) + with open(dataset_dir / old_node_lookup, "w", encoding="UTF-8") as f: + json.dump(old_vertex_lookup, f) + + # TODO get root vertex + root = 0 + + # TODO get node co-ordinates + + # get TSP representation + + # save to txt file + + +def to_pandas_nodelist(G: nx.Graph) -> pd.DataFrame: + """Move node attributes to a pandas dataframe. Node ID is stored in 'node' column.""" + return pd.DataFrame([{"node": node, **data} for node, data in G.nodes(data=True)]) + + +def main(): + dataset_dir = Path("/", "Users", "patrick", "Datasets", "pctsp", "londonaq") + + root_vertex = choose_root(normalized_graph) + nx.set_node_attributes(normalized_graph, False, "is_depot") + normalized_graph.nodes[root_vertex]["is_depot"] = True + + ndf = to_pandas_nodelist(normalized_graph) + ndf = ndf.rename(columns={"prize": "demand"}) + ndf["demand"] = ndf["demand"].apply(lambda x: int(round(x))) + edf = nx.to_pandas_edgelist(normalized_graph) + edf = edf.rename(columns={"cost": "weight"}) + edf["weight"] = edf["weight"].apply(lambda x: int(round(x))) + + name = "londonaq_tiny" + comment = "Prize-collecting TSP on air quality dataset in London." + problem_type = "PCTSP" + problem = BaseTSP.from_dataframes( + name, + comment, + problem_type, + edf, + ndf, + edge_weight_format=EdgeWeightFormat.LOWER_DIAG_ROW, + ) + graph = problem.get_graph() + + tsplib = problem.to_tsplib95() + tsplib.save("test.txt") + loaded_tsplib = tsplib95.models.StandardProblem.load("test.txt") + + +if __name__ == "__main__": + typer.run(main) diff --git a/setup.py b/setup.py index 4259e35..124dbbe 100644 --- a/setup.py +++ b/setup.py @@ -8,7 +8,9 @@ url="https://github.com/PatrickOHara/tspwplib", description="Library of instances for TSP with Profits", install_requires=[ + "networkx>=2.6.0", "pandas>=1.0.0", + "pydantic>=1.8.2", "tsplib95>=0.7.1", ], name="tspwplib", diff --git a/tests/test_profits_problem.py b/tests/test_profits_problem.py index 21af861..69306cc 100644 --- a/tests/test_profits_problem.py +++ b/tests/test_profits_problem.py @@ -125,8 +125,9 @@ def test_is_pctsp_yes_instance( edge_list.append((i, (i + 1) % num_nodes)) assert is_pctsp_yes_instance(graph, quota, root, edge_list) + from pathlib import Path -from tspwplib.problem import parse_edge_list_weights, render_edge_list_weights + def test_parse_problem(): dataset_dir = Path("/", "Users", "patrick", "Datasets", "pctsp", "londonaq") diff --git a/tspwplib/converter.py b/tspwplib/converter.py index ecfc5da..53fff2b 100644 --- a/tspwplib/converter.py +++ b/tspwplib/converter.py @@ -440,11 +440,21 @@ def split_edge_cost(edge_cost: EdgeFunction, to_split: LookupToSplit) -> EdgeFun return split_cost -def split_graph_from_properties(edge_properties: EdgeProperties) -> nx.Graph: +def split_graph_from_properties( + edge_properties: EdgeProperties, + edge_attr_to_split: str = "cost", + edge_attr_to_vertex: str = "length", + new_vertex_attr: str = "prize", + old_edge_attr: str = "old_edge", +) -> nx.Graph: """Split edges with properties and create undirected simple graph. Args: - edge_properties: keys are edges, values are dicts of edge attributes + edge_properties: Keys are edges. Values are dicts of edge attributes. + edge_attr_to_split: Name of edge attribute. Assign half the value to each split edge. + edge_attr_to_vertex: Name of edge attribute. Assign edge value to a new vertex attribute. + new_vertex_attr: Name of the newly created vertex attribute. + old_edge_attr: Name of the newly created attribute for the old edge ID. Returns: Undirected simple graph with edge attributes for cost, prize and old_edge @@ -452,28 +462,38 @@ def split_graph_from_properties(edge_properties: EdgeProperties) -> nx.Graph: Notes: To get the original_edge that a split edge represents, access the 'old_edge' attribute """ - for edge, data in edge_properties.items(): - if not "cost" in data: - raise KeyError(f"Edge property for edge {edge} has no cost") - if not "weight" in data: - raise KeyError(f"Edge property for edge {edge} has no weight") + # check that every edge has an attribute to split and an attr to move to vertex + is_edge_attr_to_split = True + is_edge_attr_to_vertex = True + for data in edge_properties.values(): + if not edge_attr_to_split in data: + is_edge_attr_to_split = False + if not edge_attr_to_vertex in data: + is_edge_attr_to_vertex = False + + # split edges and create lookups edge_list = list(edge_properties.keys()) splits = split_edges(edge_list) to_split = lookup_to_split(edge_list, splits) from_split = lookup_from_split(edge_list, splits) - prize = prize_from_weighted_edges( - {edge: item["weight"] for edge, item in edge_properties.items()}, to_split - ) - cost = split_edge_cost( - {edge: item["cost"] for edge, item in edge_properties.items()}, to_split - ) # create graph and assign prizes and costs G = nx.Graph() G.add_edges_from(splits) - nx.set_node_attributes(G, 0, name="prize") - nx.set_node_attributes(G, prize, name="prize") - nx.set_edge_attributes(G, 0, name="cost") - nx.set_edge_attributes(G, cost, name="cost") - nx.set_edge_attributes(G, from_split, name="old_edge") + if is_edge_attr_to_vertex: + prize = prize_from_weighted_edges( + {edge: item[edge_attr_to_vertex] for edge, item in edge_properties.items()}, + to_split, + ) + nx.set_node_attributes(G, 0.0, name=new_vertex_attr) + nx.set_node_attributes(G, prize, name=new_vertex_attr) + + if is_edge_attr_to_split: + cost = split_edge_cost( + {edge: item[edge_attr_to_split] for edge, item in edge_properties.items()}, + to_split, + ) + nx.set_edge_attributes(G, 0.0, name=edge_attr_to_split) + nx.set_edge_attributes(G, cost, name=edge_attr_to_split) + nx.set_edge_attributes(G, from_split, name=old_edge_attr) return G diff --git a/tspwplib/problem.py b/tspwplib/problem.py index 0233ab3..d07e255 100644 --- a/tspwplib/problem.py +++ b/tspwplib/problem.py @@ -1,26 +1,357 @@ """Functions and classes for datasets""" import random -import re -from typing import List +from typing import List, Optional, Tuple, Union + import networkx as nx +import pandas as pd +import pydantic import tsplib95 -from .types import EdgeFunction, EdgeList, Vertex, VertexFunctionName, VertexLookup + +from .types import ( + DisplayDataType, + EdgeDataFormat, + EdgeFunction, + EdgeList, + EdgeWeightFormat, + EdgeWeightType, + NodeCoords, + NodeCoordType, + Vertex, + VertexFunction, + VertexFunctionName, + VertexList, + VertexLookup, +) +from .utils import edge_attribute_names, node_attribute_names from .walk import is_simple_cycle, walk_from_edge_list, total_prize -from tsplib95 import transformers -from tsplib95.fields import TransformerField -class PrizesField(TransformerField): - """Field for demands.""" +class BaseTSP(pydantic.BaseModel): + """A pydantic model for tsplib95. + + Each field is validated with type hinting. + """ + + capacity: Optional[Union[int, float]] + comment: str + demands: Optional[VertexFunction] + depots: VertexList + dimension: int + display_data: Optional[List[Tuple[int, float, float]]] + display_data_type: DisplayDataType + edge_data: EdgeList + edge_data_format: EdgeDataFormat + edge_weights: Optional[EdgeFunction] + edge_weight_format: EdgeWeightFormat + edge_weight_type: EdgeWeightType + fixed_edges: EdgeList + name: str + node_coords: NodeCoords + node_coord_type: NodeCoordType + problem_type: str + tours: Optional[List[VertexList]] + + class Config: + """Pydantic configuration""" + + arbitrary_types_allowed = True - default = dict + @classmethod + def from_networkx( + cls, + name: str, + comment: str, + problem_type: str, + G: nx.Graph, + capacity: Optional[Union[int, float]] = None, + display_data: Optional[List[Tuple[Vertex, float, float]]] = None, + display_data_type: DisplayDataType = DisplayDataType.NO_DISPLAY, + edge_weight_format: EdgeWeightFormat = EdgeWeightFormat.FULL_MATRIX, + ): + """Get a base TSP model from a networkx graph""" + edge_attr_names = edge_attribute_names(G) + node_attr_names = node_attribute_names(G) + if "weight" not in edge_attr_names: + message = "'weight' is required to be an edge attribute, but was not found in graph. " + message += "This function only supports an explicit weight function. " + raise NotImplementedError(message) + is_2d = "x" in node_attr_names and "y" in node_attr_names + is_3d = is_2d and "z" in node_attr_names + if is_3d: + node_coord_type = NodeCoordType.THREED_COORDS + node_coords = { + node: (data["x"], data["y"], data["z"]) + for node, data in G.nodes(data=True) + } + elif is_2d: + node_coord_type = NodeCoordType.TWOD_COORDS + node_coords = { + node: (data["x"], data["y"]) for node, data in G.nodes(data=True) + } + else: + node_coord_type = NodeCoordType.NO_COORDS + node_coords = {} + + demands = None + if "demand" in node_attr_names: + demands = nx.get_node_attributes(G, "demand") + if display_data_type == DisplayDataType.COORD_DISPLAY: + display_data = node_coords + + fixed_edges = [] + if "is_fixed" in edge_attr_names: + fixed_edges = [ + edge for edge, data in G.edges(data=True) if data["is_fixed"] + ] + + depots = [] + if "is_depot" in node_attr_names: + depots = [node for node, data in G.nodes(data=True) if data["is_depot"]] + edge_data = list(G.edges()) + edge_weights = nx.get_edge_attributes(G, "weight") + return cls( + capacity=capacity, + comment=comment, + demands=demands, + depots=depots, + dimension=G.number_of_nodes(), + display_data=display_data, + display_data_type=display_data_type, + edge_data=edge_data, + edge_data_format=EdgeDataFormat.EDGE_LIST, + edge_weights=edge_weights, + edge_weight_format=edge_weight_format, + edge_weight_type=EdgeWeightType.EXPLICIT, + fixed_edges=fixed_edges, + name=name, + node_coords=node_coords, + node_coord_type=node_coord_type, + problem_type=problem_type, + tours=None, + ) + + @classmethod + def from_dataframes( + cls, + name: str, + comment: str, + problem_type: str, + edges_df: pd.DataFrame, + nodes_df: pd.DataFrame, + capacity: Optional[Union[int, float]] = None, + display_data: Optional[List[Tuple[Vertex, float, float]]] = None, + display_data_type: DisplayDataType = DisplayDataType.NO_DISPLAY, + edge_weight_format: EdgeWeightFormat = EdgeWeightFormat.FULL_MATRIX, + ): + """Get a TSP base model from edge and node dataframes + + Notes: + Essential edge columns: [source, target, weight]. + Optional edge columns: [is_fixed]. + Essential node columns: [node, is_depot]. + Optional node columns: [x, y, z, demand]. + The edge weight function is explicitly given by the 'weight' column. + """ + if "weight" not in edges_df: + message = "'weight' is not a column in edges_df. " + message += "This function only supports an explicit weight function. " + message += "If you have a column that can be used as the weight function, " + message += "please rename the column to 'weight'." + raise NotImplementedError(message) + is_2d = "x" in nodes_df.columns and "y" in nodes_df.columns + is_3d = is_2d and "z" in nodes_df.columns + if is_3d: + node_coord_type = NodeCoordType.THREED_COORDS + node_coords = dict( + zip(nodes_df["node"], zip(nodes_df["x"], nodes_df["y"], nodes_df["z"])) + ) + elif is_2d: + node_coord_type = NodeCoordType.TWOD_COORDS + node_coords = dict(zip(nodes_df["node"], zip(nodes_df["x"], nodes_df["y"]))) + else: + node_coord_type = NodeCoordType.NO_COORDS + node_coords = {} + + demands = None + if "demand" in nodes_df.columns: + demands = dict(zip(nodes_df["node"], nodes_df["demand"])) + + if display_data_type == DisplayDataType.COORD_DISPLAY: + display_data = node_coords + + fixed_edges = [] + if "is_fixed" in edges_df.columns: + fixed_edges_df = edges_df.loc[edges_df["is_fixed"]] + fixed_edges = list(zip(fixed_edges_df["source"], fixed_edges_df["target"])) + + depots = nodes_df.loc[nodes_df["is_depot"]]["node"].to_list() + edge_data = list(zip(edges_df["source"], edges_df["target"])) + edge_weights = dict(zip(edge_data, edges_df["weight"])) + return cls( + capacity=capacity, + comment=comment, + demands=demands, + depots=depots, + dimension=len(nodes_df["node"]), + display_data=display_data, + display_data_type=display_data_type, + edge_data=edge_data, + edge_data_format=EdgeDataFormat.EDGE_LIST, + edge_weights=edge_weights, + edge_weight_format=edge_weight_format, + edge_weight_type=EdgeWeightType.EXPLICIT, + fixed_edges=fixed_edges, + name=name, + node_coords=node_coords, + node_coord_type=node_coord_type, + problem_type=problem_type, + tours=None, + ) @classmethod - def build_transformer(cls): - node = transformers.FuncT(func=int) - demand = transformers.FuncT(func=float) - return transformers.MapT(key=node, value=demand, sep='\n') + def from_tsplib95(cls, problem: tsplib95.models.StandardProblem): + """Get a TSP base model from a StandardProblem object""" + return cls( + capacity=problem.capacity, + comment=problem.comment, + demands=problem.demands, + depots=problem.depots, + dimension=problem.dimension, + display_data=problem.display_data, + display_data_type=problem.display_data_type, + edge_data=problem.get_edges(), + edge_data_format=problem.edge_data_format, + edge_weights={ + (i, j): problem.get_weight(i, j) for i, j in problem.get_edges() + }, + edge_weight_format=problem.edge_weight_format, + edge_weight_type=problem.edge_weight_type, + fixed_edges=problem.fixed_edges, + name=problem.name, + node_coords=[problem.node_coords.get(i) for i in problem.get_nodes()], + node_coord_type=problem.node_coord_type, + problem_type=problem.type, + tours=problem.tours, + ) + + def to_tsplib95(self) -> tsplib95.models.StandardProblem: + """Convert to a tsplib95 standard model""" + weights = self.edge_weights + if self.edge_weight_type == EdgeWeightType.EXPLICIT: + # create a graph + G = nx.Graph(incoming_graph_data=self.edge_data) + nx.set_edge_attributes(G, self.edge_weights, name="weight") + # then get the weighted adjacency matrix + weights = nx.to_numpy_array( + G, nodelist=list(G.nodes()).sort(), weight="weight", dtype=int + ) + + return tsplib95.models.StandardProblem( + # capacity=self.capacity, + comment=self.comment, + demands=self.demands, + depots=self.depots, + dimension=self.dimension, + # display_data=self.display_data, + display_data_type=self.display_data_type, + edge_data=self.edge_data, + edge_data_format=self.edge_data_format, + edge_weights=weights, + edge_weight_format=self.edge_weight_format, + edge_weight_type=self.edge_weight_type, + # fixed_edges=self.fixed_edges, + name=self.name, + node_coords=self.node_coords, + node_coord_type=self.node_coord_type, + type=self.problem_type, + # tours=self.tours, + ) + + def __set_graph_attributes(self, graph: nx.Graph) -> None: + """Set graph attributes such as 'name' and 'comment'""" + graph.graph["name"] = self.name + graph.graph["comment"] = self.comment + graph.graph["problem_type"] = self.problem_type + graph.graph["dimension"] = self.dimension + if not self.capacity is None: + graph.graph["capacity"] = self.capacity + + def __set_node_attributes(self, graph: nx.Graph) -> None: + """Set node attributes""" + for vertex in graph.nodes(): + graph.nodes[vertex]["is_depot"] = vertex in self.depots + if self.demands: + graph.nodes[vertex]["demand"] = self.demands[vertex] + if self.display_data: + graph.nodes[vertex]["display"] = self.display_data[vertex] + if self.node_coords: + coords = self.node_coords[vertex] + graph.nodes[vertex]["x"] = coords["x"] + graph.nodes[vertex]["y"] = coords["y"] + if self.node_coord_type == NodeCoordType.THREED_COORDS: + graph.nodes[vertex]["z"] = coords["z"] + + def __add_edges(self, graph: nx.Graph) -> None: + """Add edges from edge data + + Args: + graph: Input graph + """ + for (u, v) in self.edge_data: + graph.add_edge(u, v) + + def __set_edge_attributes(self, graph: nx.Graph) -> None: + """Set edge attributes for 'weight' and 'is_fixed' + + Args: + graph: Input graph + """ + nx.set_edge_attributes(graph, self.edge_weights, name="weight") + fixed = {(u, v): (u, v) in self.fixed_edges for u, v in graph.edges()} + nx.set_edge_attributes(graph, fixed, name="is_fixed") + + def get_graph(self) -> nx.Graph: + """Get a networkx graph + + Returns: + Undirected networkx graph with node attributes such as 'is_depot' + and edge attributes such as 'weight' and 'is_fixed'. + """ + G = nx.Graph() + self.__set_graph_attributes(G) + self.__add_edges(G) + self.__set_edge_attributes(G) + self.__set_node_attributes(G) + return G + + +class PrizeCollectingTSP(BaseTSP): + """Prize-collecting TSP pydantic model""" + + def get_root_vertex(self) -> Vertex: + """Get the root vertex from the 'depots' attribute + + Returns: + Root vertex + + Raises: + ValueError: If the number of depots to choose from is zero or greater than 1 + """ + if len(self.depots) > 1: + raise ValueError( + "More than 1 depot to choose from: which depot should I choose?" + ) + try: + # pylint: disable=unsubscriptable-object + return self.depots[0] + except KeyError as key_error: + raise ValueError("The list of depots is empty") from key_error + + def get_total_prize(self) -> Union[int, float]: + """"Get the total prize (demand) of all vertices""" + return sum(self.demands.values()) + class ProfitsProblem(tsplib95.models.StandardProblem): """TSP with Profits Problem @@ -31,7 +362,7 @@ class ProfitsProblem(tsplib95.models.StandardProblem): # Maximum distance of the total route in a OP. cost_limit = tsplib95.fields.IntegerField("COST_LIMIT") # The scores of the nodes of a OP are given in the form (per line) - node_score = PrizesField("NODE_SCORE_SECTION") + node_score = tsplib95.fields.DemandsField("NODE_SCORE_SECTION") # The optimal solution to the TSP tspsol = tsplib95.fields.IntegerField("TSPSOL") @@ -199,7 +530,9 @@ def get_root_vertex(self, normalize: bool = False) -> Vertex: except KeyError as key_error: raise ValueError("The list of depots is empty") from key_error - def get_edges(self, normalize: bool = False) -> EdgeList: # pylint: disable=arguments-differ + def get_edges( + self, normalize: bool = False + ) -> EdgeList: # pylint: disable=arguments-differ """Get a list of edges in the graph If the `edge_removal_probability` is set in the constructor, @@ -232,79 +565,6 @@ def get_edges(self, normalize: bool = False) -> EdgeList: # pylint: disable=ar edges_copy.remove(edge) return edges_copy - def _create_wfunc(self, special=None): - """Overwrite create weight function""" - if self.is_explicit() and self.edge_weight_format == "EDGE_LIST_WEIGHTS": - return lambda i, j: self.edge_weights[(i, j)] - return super()._create_wfunc(special=special) - - def render(self): - # render each value by keyword - rendered = self.as_name_dict() - for name in list(rendered): - value = rendered.pop(name) - field = self.__class__.fields_by_name[name] - if self.is_explicit() and self.edge_weight_format == "EDGE_LIST_WEIGHTS" and name == "edge_weights": - rendered["EDGE_WEIGHT_SECTION"] = render_edge_list_weights(self.edge_weights) - - elif name in self.__dict__ or value != field.get_default_value(): - rendered[field.keyword] = field.render(value) - - # build keyword-value pairs with the separator - kvpairs = [] - for keyword, value in rendered.items(): - sep = ':\n' if '\n' in value else ': ' - kvpairs.append(f'{keyword}{sep}{value}') - kvpairs.append('EOF') - - # join and return the result - return '\n'.join(kvpairs) - - @classmethod - def parse(cls, text: str, **options): - """Parse text into a problem instance. - - Any keyword options are passed to the class constructor. If a keyword - argument has the same name as a field then they will collide and cause - an error. - - Args: - text: problem text - options: any keyword arguments to pass to the constructor - - Returns: - problem instance - """ - # prepare the regex for all known keys - keywords = '|'.join(cls.fields_by_keyword) - sep = r'''\s*:\s*|\s*\n''' - pattern = f'({keywords}|EOF)(?:{sep})' - - # split the whole text by known keys - regex = re.compile(pattern, re.M) - __, *results = regex.split(text) - - # pair keys and values - field_keywords = results[::2] - field_values = results[1::2] - - # parse into a dictionary - is_edge_list_weights = False - data = {} - for keyword, value in zip(field_keywords, field_values): - if keyword != 'EOF': - field = cls.fields_by_keyword[keyword] - name = cls.names_by_keyword[keyword] - field_value = field.parse(value.strip()) - if name == "EDGE_WEIGHT_TYPE" and field_value == "EDGE_LIST_WEIGHTS": - is_edge_list_weights = True - if name == "EDGE_WEIGHT_SECTION" and is_edge_list_weights: - field_value = parse_edge_list_weights(value.strip()) - data[name] = field_value - - # return as a model, letting options and field data potentially collide - return cls(**data, **options) - def is_pctsp_yes_instance( graph: nx.Graph, quota: int, root_vertex: Vertex, edge_list: EdgeList @@ -336,10 +596,12 @@ def is_pctsp_yes_instance( and root_vertex == walk[len(walk) - 1] ) + def parse_edge_list_weights(text: str) -> EdgeFunction: print(text) return {} + def render_edge_list_weights(edge_weights: EdgeFunction) -> str: """Render edge weight dictionary to a string diff --git a/tspwplib/types.py b/tspwplib/types.py index b204d30..b5d27c6 100644 --- a/tspwplib/types.py +++ b/tspwplib/types.py @@ -1,5 +1,6 @@ """Type hinting and names""" +from datetime import datetime from enum import Enum, IntEnum from typing import Any, Dict, List, Tuple, Union @@ -23,21 +24,106 @@ # pylint: disable=invalid-name -class VertexFunctionName(str, Enum): +class StrEnumMixin: + """When the `str(...)` method is called on this mixin, return the value of the Enum.""" + + def __str__(self): + try: + return self.value() + except TypeError: + return self + + +class EdgeWeightType(StrEnumMixin, str, Enum): + """Specifies how the edge weights (or distances) are given""" + + EXPLICIT = "EXPLICIT" # Weights are listed explicitly in the corresponding section + EUC2D = "EUC2D" # Weights are Euclidean distances in 2-D + EUC3D = "EUC3D" # Weights are Euclidean distances in 3-D + MAX2D = "MAX2D" # Weights are maximum distances in 2-D + MAX3D = "MAX3D" # Weights are maximum distances in 3-D + MAN2D = "MAN2D" # Weights are Manhattan distances in 2-D + MAN3D = "MAN3D" # Weights are Manhattan distances in 3-D + CEIL2D = "CEIL2D" # Weights are Euclidean distances in 2-D rounded up + GEO = "GEO" # Weights are geographical distances + ATT = "ATT" # Special distance function for problems att48 and att532 + XRAY1 = ( + "XRAY1" # Special distance function for crystallography problems (Version 1) + ) + XRAY2 = ( + "XRAY2" # Special distance function for crystallography problems (Version 2) + ) + SPECIAL = "SPECIAL" # There is a special distance function documented elsewhere + + +class EdgeWeightFormat(StrEnumMixin, str, Enum): + """Describes the format of the edge weights if they are given explicitly""" + + FUNCTION = "FUNCTION" # Weights are given by a function (see above) + FULL_MATRIX = "FULL_MATRIX" # Weights are given by a full matrix + UPPER_ROW = ( + "UPPER_ROW" # Upper triangular matrix (row-wise without diagonal entries) + ) + LOWER_ROW = ( + "LOWER_ROW" # Lower triangular matrix (row-wise without diagonal entries) + ) + UPPER_DIAG_ROW = "UPPER_DIAG_ROW" # Upper triangular matrix + LOWER_DIAG_ROW = "LOWER_DIAG_ROW" # Lower triangular matrix + UPPER_COL = ( + "UPPER_COL" # Upper triangular matrix (column-wise without diagonal entries) + ) + LOWER_COL = ( + "LOWER_COL" # Lower triangular matrix (column-wise without diagonal entries) + ) + UPPER_DIAG_COL = "UPPER_DIAG_COL" # Upper triangular matrix + LOWER_DIAG_COL = "LOWER_DIAG_COL" # Lower triangular matrix + + +class EdgeDataFormat(StrEnumMixin, str, Enum): + """How the edges are listed. + + Notes: + This does not include edge attributes. It is only the edge IDs. + """ + + EDGE_LIST = "EDGE_LIST" + ADJ_LIST = "ADJ_LIST" + + +class NodeCoordType(StrEnumMixin, str, Enum): + """How node co-ordinates are represented""" + + TWOD_COORDS = "TWOD_COORDS" # Nodes are specified by coordinates in 2-D + THREED_COORDS = "THREED_COORDS" # Nodes are specified by coordinates in 3-D + NO_COORDS = "NO_COORDS" # The nodes do not have associated coordinates + + +class DisplayDataType(StrEnumMixin, str, Enum): + """How visualisation should be done.""" + + COORD_DISPLAY = "COORD_DISPLAY" # Display is generated from the node coordinates + TWOD_DISPLAY = "TWOD_DISPLAY" # Explicit coordinates in 2-D are given + NO_DISPLAY = "NO_DISPLAY" # No graphical display is possible + + +NodeCoords = Dict[Vertex, Union[Tuple[float, float], Tuple[float, float, float]]] + + +class VertexFunctionName(StrEnumMixin, str, Enum): """Valid names of functions on vertices""" demand = "demand" prize = "prize" -class EdgeFunctionName(str, Enum): +class EdgeFunctionName(StrEnumMixin, str, Enum): """Valid names of functions on edges""" cost = "cost" weight = "weight" -class GraphName(str, Enum): +class GraphName(StrEnumMixin, str, Enum): """Names of TSPlib instances""" a280 = "a280" @@ -153,7 +239,28 @@ class GraphName(str, Enum): vm1748 = "vm1748" -class Generation(str, Enum): +class LondonaqGraphName(StrEnumMixin, str, Enum): + """Names of graphs with London air quality forecasts""" + + laqkxA = "laqkxA" + + +class LondonaqTimestamp(Enum): + """Timestamps of the forecasts for London air quality forecasts""" + + A = datetime(2021, 10, 12, 8, 0, 0, tzinfo=datetime.timezone.utc) # 9am BST + + +class LondonaqLocation(StrEnumMixin, str, Enum): + """Names of locations that the London air quality graph is centered upon""" + + bb = "Big Ben" + kx = "King's Cross" + ro = "Royal Observatory Greenwich" + ws = "Wembley Stadium" + + +class Generation(StrEnumMixin, str, Enum): """Generations of TSPwP problem instances""" gen1 = "gen1" diff --git a/tspwplib/utils.py b/tspwplib/utils.py index b427e5e..a806c2d 100644 --- a/tspwplib/utils.py +++ b/tspwplib/utils.py @@ -1,6 +1,9 @@ """Useful functions for parsing""" +from itertools import chain from pathlib import Path +from typing import List +import networkx as nx from .types import Alpha, Generation, GraphName @@ -40,3 +43,27 @@ def build_path_to_tsplib_instance(tsplib_root: Path, name: GraphName) -> Path: """ filename = name.value + ".tsp" return tsplib_root / filename + + +def edge_attribute_names(G: nx.Graph) -> List[str]: + """Get the names of all edge attributes + + Args: + G: Graph + + Returns: + List of attribute names + """ + return list(set(chain.from_iterable(d.keys() for *_, d in G.edges(data=True)))) + + +def node_attribute_names(G: nx.Graph) -> List[str]: + """Get the names of all node attributes + + Args: + G: Graph + + Returns: + List of node attribute names + """ + return list(set(chain.from_iterable(d.keys() for _, d in G.nodes(data=True)))) From cfd99caf593d92a96bbdf8a06b564302d713061b Mon Sep 17 00:00:00 2001 From: Patrick O'Hara Date: Wed, 13 Oct 2021 17:48:34 +0100 Subject: [PATCH 04/12] =?UTF-8?q?=F0=9F=90=9B=20Loading=20from=20CSV?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- scripts/from_urbanair.py | 88 +++++++++++++++++++++------------------- tspwplib/problem.py | 9 ++-- tspwplib/types.py | 14 ++++++- tspwplib/utils.py | 25 +++++++++++- 4 files changed, 89 insertions(+), 47 deletions(-) diff --git a/scripts/from_urbanair.py b/scripts/from_urbanair.py index caa7406..a0f96b7 100644 --- a/scripts/from_urbanair.py +++ b/scripts/from_urbanair.py @@ -8,7 +8,8 @@ from tspwplib import split_graph_from_properties from tspwplib.problem import BaseTSP -from tspwplib.types import EdgeWeightFormat, LondonaqLocation, LondonaqTimestamp +from tspwplib.types import Edge, EdgeWeightFormat, LondonaqGraphName, LondonaqLocation, LondonaqLocationShort, LondonaqTimestamp +from tspwplib.utils import londonaq_comment, londonaq_graph_name def choose_root(G): @@ -20,8 +21,6 @@ def choose_root(G): if not root_found and G.degree(vertex) > 2: root_found = True root = vertex - else: - root return root @@ -31,14 +30,16 @@ def choose_root(G): def generate_londonaq_dataset( dataset_dir: Path, - location_id: LondonaqLocation, - timestamp_id: LondonaqTimestamp, + name: LondonaqGraphName, + comment: str, edges_csv_filename: str = "edges.csv", nodes_csv_filename: str = "nodes.csv", old_edge_lookup: str = OLD_EDGE_LOOKUP_JSON, old_node_lookup: str = OLD_NODE_LOOKUP_JSON, ) -> BaseTSP: """Generate a londonaq dataset""" + + # get the CSV files for edges and nodes dataset_dir.mkdir(parents=False, exist_ok=True) edges_filepath = dataset_dir / edges_csv_filename @@ -53,29 +54,51 @@ def generate_londonaq_dataset( # split edges then relabel the nodes edges_df = edges_df.set_index(["source", "target", "key"]) edge_attrs = edges_df.to_dict("index") - split_graph = split_graph_from_properties(edge_attrs) + split_graph = split_graph_from_properties( + edge_attrs, + edge_attr_to_split="cost", + edge_attr_to_vertex="length", + new_vertex_attr="demand", + old_edge_attr="old_edge", + ) normalize_map = {node: i for i, node in enumerate(split_graph.nodes())} normalized_graph = nx.relabel_nodes(split_graph, normalize_map, copy=True) # save the node and edge mappings to a json file - old_edge_lookup = { + old_edges = { (normalize_map[u], normalize_map[v]): data["old_edge"] for u, v, data in split_graph.edges(data=True) } - old_vertex_lookup = {new: old for old, new in normalize_map.items()} - with open(dataset_dir / old_edge_lookup, "w", encoding="UTF-8") as f: - json.dump(old_edge_lookup, f) + old_vertices= {new: old for old, new in normalize_map.items()} + + # TODO convert tuples to lists when dumping + # with open(dataset_dir / old_edge_lookup, "w", encoding="UTF-8") as f: + # json.dump(old_edges, f) with open(dataset_dir / old_node_lookup, "w", encoding="UTF-8") as f: - json.dump(old_vertex_lookup, f) + json.dump(old_vertices, f) # TODO get root vertex - root = 0 + root_vertex = 0 + nx.set_node_attributes(normalized_graph, False, "is_depot") + normalized_graph.nodes[root_vertex]["is_depot"] = True # TODO get node co-ordinates # get TSP representation + tsp = BaseTSP.from_networkx( + name, + comment, + "PCTSP", + normalized_graph, + edge_weight_format=EdgeWeightFormat.LOWER_DIAG_ROW, + weight_attr_name="cost", + ) # save to txt file + problem = tsp.to_tsplib95() + txt_filepath = dataset_dir / f"{name}.txt" + problem.save(txt_filepath) + return tsp def to_pandas_nodelist(G: nx.Graph) -> pd.DataFrame: @@ -83,36 +106,19 @@ def to_pandas_nodelist(G: nx.Graph) -> pd.DataFrame: return pd.DataFrame([{"node": node, **data} for node, data in G.nodes(data=True)]) -def main(): - dataset_dir = Path("/", "Users", "patrick", "Datasets", "pctsp", "londonaq") - - root_vertex = choose_root(normalized_graph) - nx.set_node_attributes(normalized_graph, False, "is_depot") - normalized_graph.nodes[root_vertex]["is_depot"] = True - - ndf = to_pandas_nodelist(normalized_graph) - ndf = ndf.rename(columns={"prize": "demand"}) - ndf["demand"] = ndf["demand"].apply(lambda x: int(round(x))) - edf = nx.to_pandas_edgelist(normalized_graph) - edf = edf.rename(columns={"cost": "weight"}) - edf["weight"] = edf["weight"].apply(lambda x: int(round(x))) - - name = "londonaq_tiny" - comment = "Prize-collecting TSP on air quality dataset in London." - problem_type = "PCTSP" - problem = BaseTSP.from_dataframes( - name, - comment, - problem_type, - edf, - ndf, - edge_weight_format=EdgeWeightFormat.LOWER_DIAG_ROW, +def main( + location: LondonaqLocationShort, + dataset_dir: Path = Path("/", "Users", "patrick", "Datasets", "pctsp", "londonaq") +): + timestamp_id: LondonaqTimestamp = LondonaqTimestamp.A + location_id = LondonaqLocation[location.value] + name = londonaq_graph_name(location, timestamp_id) + comment = londonaq_comment(location, timestamp_id) + generate_londonaq_dataset( + dataset_dir / name.value, name, comment, + edges_csv_filename=name.value+"_edges.csv", + nodes_csv_filename=name.value+"_nodes.csv" ) - graph = problem.get_graph() - - tsplib = problem.to_tsplib95() - tsplib.save("test.txt") - loaded_tsplib = tsplib95.models.StandardProblem.load("test.txt") if __name__ == "__main__": diff --git a/tspwplib/problem.py b/tspwplib/problem.py index d07e255..553a3ca 100644 --- a/tspwplib/problem.py +++ b/tspwplib/problem.py @@ -33,6 +33,8 @@ class BaseTSP(pydantic.BaseModel): Each field is validated with type hinting. """ + # pylint: disable=too-many-arguments + capacity: Optional[Union[int, float]] comment: str demands: Optional[VertexFunction] @@ -68,12 +70,13 @@ def from_networkx( display_data: Optional[List[Tuple[Vertex, float, float]]] = None, display_data_type: DisplayDataType = DisplayDataType.NO_DISPLAY, edge_weight_format: EdgeWeightFormat = EdgeWeightFormat.FULL_MATRIX, + weight_attr_name: str = "weight", ): """Get a base TSP model from a networkx graph""" edge_attr_names = edge_attribute_names(G) node_attr_names = node_attribute_names(G) - if "weight" not in edge_attr_names: - message = "'weight' is required to be an edge attribute, but was not found in graph. " + if weight_attr_name not in edge_attr_names: + message = f"{weight_attr_name} is required to be an edge attribute, but was not found in graph. " message += "This function only supports an explicit weight function. " raise NotImplementedError(message) is_2d = "x" in node_attr_names and "y" in node_attr_names @@ -109,7 +112,7 @@ def from_networkx( if "is_depot" in node_attr_names: depots = [node for node, data in G.nodes(data=True) if data["is_depot"]] edge_data = list(G.edges()) - edge_weights = nx.get_edge_attributes(G, "weight") + edge_weights = nx.get_edge_attributes(G, weight_attr_name) return cls( capacity=capacity, comment=comment, diff --git a/tspwplib/types.py b/tspwplib/types.py index b5d27c6..8625dbd 100644 --- a/tspwplib/types.py +++ b/tspwplib/types.py @@ -1,6 +1,6 @@ """Type hinting and names""" -from datetime import datetime +from datetime import datetime, timezone from enum import Enum, IntEnum from typing import Any, Dict, List, Tuple, Union @@ -243,12 +243,13 @@ class LondonaqGraphName(StrEnumMixin, str, Enum): """Names of graphs with London air quality forecasts""" laqkxA = "laqkxA" + laqtinyA = "laqtinyA" class LondonaqTimestamp(Enum): """Timestamps of the forecasts for London air quality forecasts""" - A = datetime(2021, 10, 12, 8, 0, 0, tzinfo=datetime.timezone.utc) # 9am BST + A = datetime(2021, 10, 13, 8, 0, 0, tzinfo=timezone.utc) # 9am BST class LondonaqLocation(StrEnumMixin, str, Enum): @@ -256,9 +257,18 @@ class LondonaqLocation(StrEnumMixin, str, Enum): bb = "Big Ben" kx = "King's Cross" + tiny = "King's Cross" ro = "Royal Observatory Greenwich" ws = "Wembley Stadium" +class LondonaqLocationShort(StrEnumMixin, str, Enum): + """Short codes for londonaq locations""" + + bb = "bb" + kx = "kx" + tiny = "tiny" + ro = "ro" + ws = "ws" class Generation(StrEnumMixin, str, Enum): """Generations of TSPwP problem instances""" diff --git a/tspwplib/utils.py b/tspwplib/utils.py index a806c2d..9486f5f 100644 --- a/tspwplib/utils.py +++ b/tspwplib/utils.py @@ -4,7 +4,14 @@ from pathlib import Path from typing import List import networkx as nx -from .types import Alpha, Generation, GraphName +from .types import ( + Alpha, + Generation, + GraphName, + LondonaqGraphName, + LondonaqLocation, + LondonaqTimestamp, +) def build_path_to_oplib_instance( @@ -67,3 +74,19 @@ def node_attribute_names(G: nx.Graph) -> List[str]: List of node attribute names """ return list(set(chain.from_iterable(d.keys() for _, d in G.nodes(data=True)))) + + +def londonaq_graph_name( + location_id: LondonaqLocation, timestamp_id: LondonaqTimestamp +) -> LondonaqGraphName: + """Get a londonaq graph name""" + return LondonaqGraphName["laq" + location_id.name + timestamp_id.name] + + +def londonaq_comment( + location_id: LondonaqLocation, timestamp_id: LondonaqTimestamp +) -> str: + """Get a comment for a londonaq dataset""" + comment = f"A London air quality dataset starting at {location_id.value}. " + comment += f"The UTC timestamp for the air quality forecast is {timestamp_id.value.isoformat()}" + return comment From 7e5f91a6cee35939e00f818704b0b2d351bbcecf Mon Sep 17 00:00:00 2001 From: Patrick O'Hara Date: Thu, 14 Oct 2021 16:47:15 +0100 Subject: [PATCH 05/12] =?UTF-8?q?=F0=9F=90=9B=20Old=20edges=20to=20json?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- scripts/from_urbanair.py | 41 +++++++++++++++++++++++----------------- tspwplib/types.py | 2 ++ 2 files changed, 26 insertions(+), 17 deletions(-) diff --git a/scripts/from_urbanair.py b/scripts/from_urbanair.py index a0f96b7..b10afc6 100644 --- a/scripts/from_urbanair.py +++ b/scripts/from_urbanair.py @@ -3,12 +3,16 @@ import networkx as nx import pandas as pd -import tsplib95 import typer from tspwplib import split_graph_from_properties from tspwplib.problem import BaseTSP -from tspwplib.types import Edge, EdgeWeightFormat, LondonaqGraphName, LondonaqLocation, LondonaqLocationShort, LondonaqTimestamp +from tspwplib.types import ( + EdgeWeightFormat, + LondonaqGraphName, + LondonaqLocationShort, + LondonaqTimestamp, +) from tspwplib.utils import londonaq_comment, londonaq_graph_name @@ -39,7 +43,6 @@ def generate_londonaq_dataset( ) -> BaseTSP: """Generate a londonaq dataset""" - # get the CSV files for edges and nodes dataset_dir.mkdir(parents=False, exist_ok=True) edges_filepath = dataset_dir / edges_csv_filename @@ -49,6 +52,7 @@ def generate_londonaq_dataset( if not nodes_filepath.exists(): raise FileNotFoundError(nodes_filepath) nodes_df = pd.read_csv(nodes_filepath) + nodes_df = nodes_df.set_index("node") edges_df = pd.read_csv(edges_filepath) # split edges then relabel the nodes @@ -69,20 +73,22 @@ def generate_londonaq_dataset( (normalize_map[u], normalize_map[v]): data["old_edge"] for u, v, data in split_graph.edges(data=True) } - old_vertices= {new: old for old, new in normalize_map.items()} + old_vertices = {new: old for old, new in normalize_map.items()} - # TODO convert tuples to lists when dumping - # with open(dataset_dir / old_edge_lookup, "w", encoding="UTF-8") as f: - # json.dump(old_edges, f) + # convert tuples to lists when dumping + json_old_edges = {list(key): list(value) for key, value in old_edges.items()} + with open(dataset_dir / old_edge_lookup, "w", encoding="UTF-8") as f: + json.dump(json_old_edges, f) with open(dataset_dir / old_node_lookup, "w", encoding="UTF-8") as f: json.dump(old_vertices, f) - # TODO get root vertex - root_vertex = 0 + # get depots + depots = list(nodes_df.loc[nodes_df.is_depot].index.map(normalize_map)) nx.set_node_attributes(normalized_graph, False, "is_depot") - normalized_graph.nodes[root_vertex]["is_depot"] = True + for v in depots: + normalized_graph.nodes[v]["is_depot"] = True - # TODO get node co-ordinates + # NOTE (not implemented yet) get node co-ordinates # get TSP representation tsp = BaseTSP.from_networkx( @@ -107,17 +113,18 @@ def to_pandas_nodelist(G: nx.Graph) -> pd.DataFrame: def main( - location: LondonaqLocationShort, - dataset_dir: Path = Path("/", "Users", "patrick", "Datasets", "pctsp", "londonaq") + location: LondonaqLocationShort, + dataset_dir: Path = Path("/", "Users", "patrick", "Datasets", "pctsp", "londonaq"), ): timestamp_id: LondonaqTimestamp = LondonaqTimestamp.A - location_id = LondonaqLocation[location.value] name = londonaq_graph_name(location, timestamp_id) comment = londonaq_comment(location, timestamp_id) generate_londonaq_dataset( - dataset_dir / name.value, name, comment, - edges_csv_filename=name.value+"_edges.csv", - nodes_csv_filename=name.value+"_nodes.csv" + dataset_dir / name.value, + name, + comment, + edges_csv_filename=name.value + "_edges.csv", + nodes_csv_filename=name.value + "_nodes.csv", ) diff --git a/tspwplib/types.py b/tspwplib/types.py index 8625dbd..ba80453 100644 --- a/tspwplib/types.py +++ b/tspwplib/types.py @@ -261,6 +261,7 @@ class LondonaqLocation(StrEnumMixin, str, Enum): ro = "Royal Observatory Greenwich" ws = "Wembley Stadium" + class LondonaqLocationShort(StrEnumMixin, str, Enum): """Short codes for londonaq locations""" @@ -270,6 +271,7 @@ class LondonaqLocationShort(StrEnumMixin, str, Enum): ro = "ro" ws = "ws" + class Generation(StrEnumMixin, str, Enum): """Generations of TSPwP problem instances""" From f0f496705295e48e13f48fea7cc1102301257255 Mon Sep 17 00:00:00 2001 From: Patrick O'Hara Date: Thu, 14 Oct 2021 18:58:34 +0100 Subject: [PATCH 06/12] =?UTF-8?q?=F0=9F=9A=A8=20Linting?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .github/workflows/tests.yml | 2 +- .pylintrc | 2 +- scripts/from_urbanair.py | 24 ++---- tests/test_converter/test_split_converter.py | 5 -- tests/test_profits_problem.py | 10 --- tspwplib/converter.py | 12 +-- tspwplib/problem.py | 80 ++++++++------------ tspwplib/types.py | 9 ++- tspwplib/walk.py | 32 +++++--- 9 files changed, 72 insertions(+), 104 deletions(-) diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index 30756fb..df862e2 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -35,6 +35,6 @@ jobs: - name: Formatting run: black --check */ - name: Lint - run: pylint --rcfile .pylintrc setup.py tspwplib tests/* + run: pylint --rcfile .pylintrc setup.py tspwplib tests/* scripts/* - name: Type hinting run: mypy --config-file .mypy.ini tspwplib \ No newline at end of file diff --git a/.pylintrc b/.pylintrc index a081c58..4c443ab 100644 --- a/.pylintrc +++ b/.pylintrc @@ -3,7 +3,7 @@ # A comma-separated list of package or module names from where C extensions may # be loaded. Extensions are loading into the active Python interpreter and may # run arbitrary code. -extension-pkg-whitelist= +extension-pkg-whitelist=pydantic # Specify a score threshold to be exceeded before program exits with error. fail-under=10.0 diff --git a/scripts/from_urbanair.py b/scripts/from_urbanair.py index b10afc6..eb1a3d6 100644 --- a/scripts/from_urbanair.py +++ b/scripts/from_urbanair.py @@ -1,3 +1,5 @@ +"""Script for generating a tsplib style txt file from londonaq CSV""" + import json from pathlib import Path @@ -15,19 +17,6 @@ ) from tspwplib.utils import londonaq_comment, londonaq_graph_name - -def choose_root(G): - # choose the root vertex - root = None - root_found = False - - for vertex in G.nodes(): - if not root_found and G.degree(vertex) > 2: - root_found = True - root = vertex - return root - - OLD_EDGE_LOOKUP_JSON = "old_edge_lookup.json" OLD_NODE_LOOKUP_JSON = "old_node_lookup.json" @@ -77,10 +66,10 @@ def generate_londonaq_dataset( # convert tuples to lists when dumping json_old_edges = {list(key): list(value) for key, value in old_edges.items()} - with open(dataset_dir / old_edge_lookup, "w", encoding="UTF-8") as f: - json.dump(json_old_edges, f) - with open(dataset_dir / old_node_lookup, "w", encoding="UTF-8") as f: - json.dump(old_vertices, f) + with open(dataset_dir / old_edge_lookup, "w", encoding="UTF-8") as json_file: + json.dump(json_old_edges, json_file) + with open(dataset_dir / old_node_lookup, "w", encoding="UTF-8") as json_file: + json.dump(old_vertices, json_file) # get depots depots = list(nodes_df.loc[nodes_df.is_depot].index.map(normalize_map)) @@ -116,6 +105,7 @@ def main( location: LondonaqLocationShort, dataset_dir: Path = Path("/", "Users", "patrick", "Datasets", "pctsp", "londonaq"), ): + """Entrypoint for generating londonaq dataset""" timestamp_id: LondonaqTimestamp = LondonaqTimestamp.A name = londonaq_graph_name(location, timestamp_id) comment = londonaq_comment(location, timestamp_id) diff --git a/tests/test_converter/test_split_converter.py b/tests/test_converter/test_split_converter.py index 64cfab6..2716864 100644 --- a/tests/test_converter/test_split_converter.py +++ b/tests/test_converter/test_split_converter.py @@ -35,11 +35,6 @@ def test_split_graph_from_properties(): (0, 2): {"weight": 2, "cost": 5}, } G = split_graph_from_properties(properties) - for v in G: - if v < 0: - assert G.nodes[v]["prize"] > 0 - else: - assert G.nodes[v]["prize"] == 0 for _, _, data in G.edges(data=True): old_edge = data["old_edge"] assert data["cost"] == float(properties[old_edge]["cost"]) / 2.0 diff --git a/tests/test_profits_problem.py b/tests/test_profits_problem.py index 69306cc..c594fd0 100644 --- a/tests/test_profits_problem.py +++ b/tests/test_profits_problem.py @@ -124,13 +124,3 @@ def test_is_pctsp_yes_instance( for i in range(num_nodes): edge_list.append((i, (i + 1) % num_nodes)) assert is_pctsp_yes_instance(graph, quota, root, edge_list) - - -from pathlib import Path - - -def test_parse_problem(): - dataset_dir = Path("/", "Users", "patrick", "Datasets", "pctsp", "londonaq") - prob_filename = "londonaq_tiny.txt" - filepath = dataset_dir / prob_filename - ProfitsProblem.load(filepath) diff --git a/tspwplib/converter.py b/tspwplib/converter.py index 53fff2b..4bd94a5 100644 --- a/tspwplib/converter.py +++ b/tspwplib/converter.py @@ -334,7 +334,7 @@ def to_simple_undirected(G: nx.MultiGraph) -> nx.Graph: return simple_graph -def split_edges(edge_list: EdgeList) -> EdgeList: +def split_edges(edge_list: EdgeList) -> List[Edge]: """Split each edge (u,v) by adding a new vertex w and two new edges (u,w), (w,v). Args: @@ -345,7 +345,7 @@ def split_edges(edge_list: EdgeList) -> EdgeList: Size of returned edge list is twice the size of the input edges. """ new_vertex = -1 - splits: EdgeList = [] + splits: List[Edge] = [] for edge in edge_list: splits.append((edge[0], new_vertex)) splits.append((new_vertex, edge[1])) @@ -356,7 +356,7 @@ def split_edges(edge_list: EdgeList) -> EdgeList: LookupFromSplit = Dict[Edge, Union[Edge, MultiEdge]] -def lookup_from_split(edge_list: EdgeList, splits: EdgeList) -> LookupFromSplit: +def lookup_from_split(edge_list: EdgeList, splits: List[Edge]) -> LookupFromSplit: """Get lookup from a split edge to an original edge. Args: @@ -376,7 +376,7 @@ def lookup_from_split(edge_list: EdgeList, splits: EdgeList) -> LookupFromSplit: LookupToSplit = Dict[Union[Edge, MultiEdge], Tuple[Edge, Edge]] -def lookup_to_split(edge_list: EdgeList, splits: EdgeList) -> LookupToSplit: +def lookup_to_split(edge_list: EdgeList, splits: List[Edge]) -> LookupToSplit: """Get lookup from an original edge to the two split edges. Args: @@ -417,7 +417,9 @@ def prize_from_weighted_edges( return prizes -def split_edge_cost(edge_cost: EdgeFunction, to_split: LookupToSplit) -> EdgeFunction: +def split_edge_cost( + edge_cost: EdgeFunction, to_split: LookupToSplit +) -> Dict[Edge, float]: """Assign half the cost of the original edge to each of the split edges. Args: diff --git a/tspwplib/problem.py b/tspwplib/problem.py index 553a3ca..aac2e02 100644 --- a/tspwplib/problem.py +++ b/tspwplib/problem.py @@ -1,7 +1,7 @@ """Functions and classes for datasets""" import random -from typing import List, Optional, Tuple, Union +from typing import List, Optional, Union import networkx as nx import pandas as pd @@ -26,6 +26,7 @@ from .utils import edge_attribute_names, node_attribute_names from .walk import is_simple_cycle, walk_from_edge_list, total_prize +# pylint: disable=too-few-public-methods class BaseTSP(pydantic.BaseModel): """A pydantic model for tsplib95. @@ -40,7 +41,7 @@ class BaseTSP(pydantic.BaseModel): demands: Optional[VertexFunction] depots: VertexList dimension: int - display_data: Optional[List[Tuple[int, float, float]]] + display_data: Optional[NodeCoords] display_data_type: DisplayDataType edge_data: EdgeList edge_data_format: EdgeDataFormat @@ -67,7 +68,7 @@ def from_networkx( problem_type: str, G: nx.Graph, capacity: Optional[Union[int, float]] = None, - display_data: Optional[List[Tuple[Vertex, float, float]]] = None, + display_data: Optional[NodeCoords] = None, display_data_type: DisplayDataType = DisplayDataType.NO_DISPLAY, edge_weight_format: EdgeWeightFormat = EdgeWeightFormat.FULL_MATRIX, weight_attr_name: str = "weight", @@ -76,21 +77,24 @@ def from_networkx( edge_attr_names = edge_attribute_names(G) node_attr_names = node_attribute_names(G) if weight_attr_name not in edge_attr_names: - message = f"{weight_attr_name} is required to be an edge attribute, but was not found in graph. " + message = f"{weight_attr_name} is required to be an edge attribute, " + message += "but was not found in graph. " message += "This function only supports an explicit weight function. " raise NotImplementedError(message) is_2d = "x" in node_attr_names and "y" in node_attr_names is_3d = is_2d and "z" in node_attr_names if is_3d: - node_coord_type = NodeCoordType.THREED_COORDS - node_coords = { - node: (data["x"], data["y"], data["z"]) - for node, data in G.nodes(data=True) - } - elif is_2d: + raise NotImplementedError("3D coords are not supported") + # node_coord_type = NodeCoordType.THREED_COORDS + # node_coords = { + # node: (float(data["x"]), float(data["y"]), float(data["z"])) + # for node, data in G.nodes(data=True) + # } + if is_2d: node_coord_type = NodeCoordType.TWOD_COORDS node_coords = { - node: (data["x"], data["y"]) for node, data in G.nodes(data=True) + node: (float(data["x"]), float(data["y"])) + for node, data in G.nodes(data=True) } else: node_coord_type = NodeCoordType.NO_COORDS @@ -143,7 +147,7 @@ def from_dataframes( edges_df: pd.DataFrame, nodes_df: pd.DataFrame, capacity: Optional[Union[int, float]] = None, - display_data: Optional[List[Tuple[Vertex, float, float]]] = None, + display_data: Optional[NodeCoords] = None, display_data_type: DisplayDataType = DisplayDataType.NO_DISPLAY, edge_weight_format: EdgeWeightFormat = EdgeWeightFormat.FULL_MATRIX, ): @@ -165,11 +169,8 @@ def from_dataframes( is_2d = "x" in nodes_df.columns and "y" in nodes_df.columns is_3d = is_2d and "z" in nodes_df.columns if is_3d: - node_coord_type = NodeCoordType.THREED_COORDS - node_coords = dict( - zip(nodes_df["node"], zip(nodes_df["x"], nodes_df["y"], nodes_df["z"])) - ) - elif is_2d: + raise NotImplementedError("3D coords not supported") + if is_2d: node_coord_type = NodeCoordType.TWOD_COORDS node_coords = dict(zip(nodes_df["node"], zip(nodes_df["x"], nodes_df["y"]))) else: @@ -290,10 +291,8 @@ def __set_node_attributes(self, graph: nx.Graph) -> None: graph.nodes[vertex]["display"] = self.display_data[vertex] if self.node_coords: coords = self.node_coords[vertex] - graph.nodes[vertex]["x"] = coords["x"] - graph.nodes[vertex]["y"] = coords["y"] - if self.node_coord_type == NodeCoordType.THREED_COORDS: - graph.nodes[vertex]["z"] = coords["z"] + graph.nodes[vertex]["x"] = coords[0] + graph.nodes[vertex]["y"] = coords[1] def __add_edges(self, graph: nx.Graph) -> None: """Add edges from edge data @@ -301,8 +300,8 @@ def __add_edges(self, graph: nx.Graph) -> None: Args: graph: Input graph """ - for (u, v) in self.edge_data: - graph.add_edge(u, v) + for edge in self.edge_data: + graph.add_edge(edge[0], edge[1]) def __set_edge_attributes(self, graph: nx.Graph) -> None: """Set edge attributes for 'weight' and 'is_fixed' @@ -353,7 +352,9 @@ def get_root_vertex(self) -> Vertex: def get_total_prize(self) -> Union[int, float]: """"Get the total prize (demand) of all vertices""" - return sum(self.demands.values()) + if self.demands: + return sum(self.demands.values()) + return 0 class ProfitsProblem(tsplib95.models.StandardProblem): @@ -391,11 +392,11 @@ def edge_removal_probability(self) -> float: def __set_edge_attributes(self, graph: nx.Graph, names: VertexLookup) -> None: """Set edge attributes""" # add every edge with some associated metadata - for u, v in self.get_edges(): - cost: int = self.get_weight(u, v) + for edge in self.get_edges(): + cost: int = self.get_weight(edge[0], edge[1]) # pylint: disable=unsupported-membership-test # is_fixed: bool = (u, v) in self.fixed_edges - graph.add_edge(names[u], names[v], cost=cost) + graph.add_edge(names[edge[0]], names[edge[1]], cost=cost) def __set_graph_attributes(self, graph: nx.Graph) -> None: """Set attributes of the graph such as the name""" @@ -533,9 +534,8 @@ def get_root_vertex(self, normalize: bool = False) -> Vertex: except KeyError as key_error: raise ValueError("The list of depots is empty") from key_error - def get_edges( - self, normalize: bool = False - ) -> EdgeList: # pylint: disable=arguments-differ + # pylint: disable=arguments-differ + def get_edges(self, normalize: bool = False) -> EdgeList: """Get a list of edges in the graph If the `edge_removal_probability` is set in the constructor, @@ -598,23 +598,3 @@ def is_pctsp_yes_instance( and root_vertex == walk[0] and root_vertex == walk[len(walk) - 1] ) - - -def parse_edge_list_weights(text: str) -> EdgeFunction: - print(text) - return {} - - -def render_edge_list_weights(edge_weights: EdgeFunction) -> str: - """Render edge weight dictionary to a string - - Args: - edge_weights: Keys are edge tuples. Values are the weight of the edge. - - Returns: - String representation of edge weights, including new lines. - """ - render = "" - for (u, v), weight in edge_weights.items(): - render += f"{u} {v} {weight}\n" - return render diff --git a/tspwplib/types.py b/tspwplib/types.py index ba80453..0c0409c 100644 --- a/tspwplib/types.py +++ b/tspwplib/types.py @@ -6,7 +6,7 @@ # vertex data structures Vertex = int -VertexFunction = Dict[Vertex, Union[int, float]] +VertexFunction = Dict[Vertex, int] VertexList = List[Vertex] VertexLookup = Dict[Vertex, Vertex] VertexProperties = Dict[Vertex, Dict[str, Any]] @@ -15,13 +15,13 @@ Edge = Tuple[Vertex, Vertex] MultiEdge = Tuple[Vertex, Vertex, int] EdgeList = List[Union[Edge, MultiEdge]] -EdgeFunction = Dict[Union[Edge, MultiEdge], Union[int, float]] +EdgeFunction = Dict[Union[Edge, MultiEdge], int] EdgeProperties = Dict[Union[Edge, MultiEdge], Dict[str, Any]] # path data structures DisjointPaths = Tuple[VertexList, VertexList] -# pylint: disable=invalid-name +# pylint: disable=invalid-name,too-few-public-methods class StrEnumMixin: @@ -106,7 +106,8 @@ class DisplayDataType(StrEnumMixin, str, Enum): NO_DISPLAY = "NO_DISPLAY" # No graphical display is possible -NodeCoords = Dict[Vertex, Union[Tuple[float, float], Tuple[float, float, float]]] +# NodeCoords = Dict[Vertex, Union[Tuple[float, float], Tuple[float, float, float]]] +NodeCoords = Dict[Vertex, Tuple[float, float]] class VertexFunctionName(StrEnumMixin, str, Enum): diff --git a/tspwplib/walk.py b/tspwplib/walk.py index ce19ad8..b99be33 100644 --- a/tspwplib/walk.py +++ b/tspwplib/walk.py @@ -5,7 +5,14 @@ import networkx as nx from .exception import EdgesNotAdjacentException, NotSimpleException -from .types import Edge, EdgeFunctionName, EdgeList, Vertex, VertexList, VertexLookup +from .types import ( + EdgeFunction, + EdgeFunctionName, + EdgeList, + Vertex, + VertexList, + VertexLookup, +) def edge_list_from_walk(walk: VertexList) -> EdgeList: @@ -74,9 +81,9 @@ def order_edge_list(unordered_edges: EdgeList) -> EdgeList: # create a lookup table of the first and second occurence of each vertex in the edge list first_occurence: VertexLookup = {} second_occurence: VertexLookup = {} - for i, (u, v) in enumerate(unordered_edges): - __add_vertex_to_occurence(first_occurence, second_occurence, u, i) - __add_vertex_to_occurence(first_occurence, second_occurence, v, i) + for i, edge in enumerate(unordered_edges): + __add_vertex_to_occurence(first_occurence, second_occurence, edge[0], i) + __add_vertex_to_occurence(first_occurence, second_occurence, edge[1], i) # use the lookup tables to place the edges in the correct order in the edge list ordered_edges = [] @@ -84,8 +91,9 @@ def order_edge_list(unordered_edges: EdgeList) -> EdgeList: target_index = -1 found_source = False first_vertex = 0 - for i, (u, v) in enumerate(unordered_edges): - u, v = unordered_edges[i] + for i, edge in enumerate(unordered_edges): + u = edge[0] + v = edge[1] if not found_source and u not in second_occurence: j = i found_source = True @@ -108,7 +116,8 @@ def order_edge_list(unordered_edges: EdgeList) -> EdgeList: if visited[j]: raise NotSimpleException() visited[j] = True - u, v = edge + u = edge[0] + v = edge[1] ordered_edges.append(edge) if j == target_index: break @@ -221,7 +230,7 @@ def is_walk(G: nx.Graph, walk: VertexList) -> bool: True if all vertices are adjacent in the graph """ edge_list = edge_list_from_walk(walk) - return all(G.has_edge(u, v) for u, v in edge_list) + return all(G.has_edge(edge[0], edge[1]) for edge in edge_list) def is_simple_cycle(G: nx.Graph, cycle: VertexList) -> bool: @@ -273,7 +282,7 @@ def total_prize(prizes: Mapping[Vertex, int], vertices: Iterable[Vertex]) -> int return sum_prize -def total_cost(costs: Mapping[Edge, int], edges: EdgeList) -> int: +def total_cost(costs: EdgeFunction, edges: EdgeList) -> int: """Total cost of edges Args: @@ -283,13 +292,14 @@ def total_cost(costs: Mapping[Edge, int], edges: EdgeList) -> int: Returns: Total cost of edges """ - sum_cost: int = 0 + sum_cost = 0 for edge in edges: try: sum_cost += costs[edge] except KeyError: try: - u, v = edge + u = edge[0] + v = edge[1] sum_cost += costs[(v, u)] except KeyError as second_key_error: raise KeyError( From 250937b78cc4e9e7e541906367aeb2f9062e7c4a Mon Sep 17 00:00:00 2001 From: Patrick O'Hara Date: Thu, 14 Oct 2021 19:06:50 +0100 Subject: [PATCH 07/12] =?UTF-8?q?=E2=AC=86=EF=B8=8F=20networkx=20doesn't?= =?UTF-8?q?=20support=20py36=20anymore?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .github/workflows/tests.yml | 2 +- setup.py | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index df862e2..210ebf0 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -8,7 +8,7 @@ jobs: strategy: matrix: - python-version: [3.6, 3.7, 3.8, 3.9] + python-version: [3.7, 3.8, 3.9] env: OPLIB_ROOT: ../OPLib diff --git a/setup.py b/setup.py index 124dbbe..2ae139a 100644 --- a/setup.py +++ b/setup.py @@ -15,14 +15,14 @@ ], name="tspwplib", packages=["tspwplib"], - python_requires=">=3.6", + python_requires=">=3.7", license="MIT", classifiers=[ "Programming Language :: Python", "Programming Language :: Python :: 3", - "Programming Language :: Python :: 3.6", "Programming Language :: Python :: 3.7", "Programming Language :: Python :: 3.8", + "Programming Language :: Python :: 3.9", "Programming Language :: Python :: 3 :: Only", "Operating System :: OS Independent", ], From 221d7b51fb2737998b2fe2400f852f1dc55f1818 Mon Sep 17 00:00:00 2001 From: Patrick O'Hara Date: Thu, 14 Oct 2021 19:09:35 +0100 Subject: [PATCH 08/12] Black formatting --- tspwplib/problem.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tspwplib/problem.py b/tspwplib/problem.py index aac2e02..fa925bb 100644 --- a/tspwplib/problem.py +++ b/tspwplib/problem.py @@ -28,6 +28,7 @@ # pylint: disable=too-few-public-methods + class BaseTSP(pydantic.BaseModel): """A pydantic model for tsplib95. From 55098f18db8509d79b761d1496b6d90ef7d5a76a Mon Sep 17 00:00:00 2001 From: Patrick O'Hara Date: Thu, 14 Oct 2021 19:15:00 +0100 Subject: [PATCH 09/12] formatting again --- requirements.txt | 2 +- tspwplib/problem.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/requirements.txt b/requirements.txt index 1052325..7b9a412 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,4 +1,4 @@ -black>=20.8b1 +black>=21.9b0 markdown-include>=0.6.0 mkdocs>=1.1.2 mkdocstrings>=0.13.6 diff --git a/tspwplib/problem.py b/tspwplib/problem.py index fa925bb..3f5eaf1 100644 --- a/tspwplib/problem.py +++ b/tspwplib/problem.py @@ -352,7 +352,7 @@ def get_root_vertex(self) -> Vertex: raise ValueError("The list of depots is empty") from key_error def get_total_prize(self) -> Union[int, float]: - """"Get the total prize (demand) of all vertices""" + """ "Get the total prize (demand) of all vertices""" if self.demands: return sum(self.demands.values()) return 0 From 8245ebb16a6954ea15c5da256b97c6d5adc7a67b Mon Sep 17 00:00:00 2001 From: Patrick O'Hara Date: Thu, 14 Oct 2021 19:21:48 +0100 Subject: [PATCH 10/12] Remove scripts from linting --- .github/workflows/tests.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index 210ebf0..dae1c1e 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -35,6 +35,6 @@ jobs: - name: Formatting run: black --check */ - name: Lint - run: pylint --rcfile .pylintrc setup.py tspwplib tests/* scripts/* + run: pylint --rcfile .pylintrc setup.py tspwplib tests/* - name: Type hinting run: mypy --config-file .mypy.ini tspwplib \ No newline at end of file From 10301046a7b10d6a474641c6cc101f3cc08e864b Mon Sep 17 00:00:00 2001 From: Patrick O'Hara Date: Fri, 15 Oct 2021 13:22:07 +0100 Subject: [PATCH 11/12] =?UTF-8?q?=E2=9C=85=20Test=20loading=20from=20tspli?= =?UTF-8?q?b95?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- tests/test_base_tsp.py | 18 +++++++++++++ tspwplib/__init__.py | 13 +++++++++- tspwplib/problem.py | 59 +++++++++++++++++++++++++++++++++++------- tspwplib/types.py | 14 +++++----- 4 files changed, 87 insertions(+), 17 deletions(-) create mode 100644 tests/test_base_tsp.py diff --git a/tests/test_base_tsp.py b/tests/test_base_tsp.py new file mode 100644 index 0000000..0933649 --- /dev/null +++ b/tests/test_base_tsp.py @@ -0,0 +1,18 @@ +"""Tests for the pydantic representation of a TSP""" + +import pytest +from tsplib95.models import StandardProblem +from tspwplib import BaseTSP, GraphName, build_path_to_tsplib_instance + + +@pytest.mark.parametrize("gname", list(GraphName)) +def test_from_tsplib95(tsplib_root, gname): + """Test tsplib95 problems can be read into BaseTSP""" + # only load problems with less than 1000 vertices + n_nodes = int("".join(filter(str.isdigit, gname.value))) + if n_nodes < 1000: + tsp_path = build_path_to_tsplib_instance(tsplib_root, gname) + assert tsp_path.exists() + problem = StandardProblem.load(tsp_path) + tsp = BaseTSP.from_tsplib95(problem) + assert len(tsp.edge_data) == len(list(problem.get_edges())) diff --git a/tspwplib/__init__.py b/tspwplib/__init__.py index b92e07c..1deb36a 100644 --- a/tspwplib/__init__.py +++ b/tspwplib/__init__.py @@ -14,6 +14,7 @@ split_head, split_tail, tail_prize, + to_simple_undirected, to_vertex_dataframe, ) from .complete import is_complete, is_complete_with_self_loops @@ -23,7 +24,7 @@ NotSimpleCycleException, NotSimplePathException, ) -from .problem import ProfitsProblem, is_pctsp_yes_instance +from .problem import BaseTSP, ProfitsProblem, is_pctsp_yes_instance from .utils import build_path_to_oplib_instance, build_path_to_tsplib_instance from .types import ( Alpha, @@ -34,6 +35,10 @@ EdgeList, Generation, GraphName, + LondonaqGraphName, + LondonaqLocation, + LondonaqLocationShort, + LondonaqTimestamp, OptimalSolutionTSP, Vertex, VertexFunction, @@ -58,6 +63,7 @@ __all__ = [ "Alpha", + "BaseTSP", "DisjointPaths", "Edge", "EdgeFunction", @@ -66,6 +72,10 @@ "EdgesNotAdjacentException", "Generation", "GraphName", + "LondonaqGraphName", + "LondonaqLocation", + "LondonaqLocationShort", + "LondonaqTimestamp", "NotSimpleException", "NotSimpleCycleException", "NotSimplePathException", @@ -104,6 +114,7 @@ "split_head", "split_tail", "tail_prize", + "to_simple_undirected", "to_vertex_dataframe", "total_cost", "total_cost_networkx", diff --git a/tspwplib/problem.py b/tspwplib/problem.py index 3f5eaf1..c7a1dd1 100644 --- a/tspwplib/problem.py +++ b/tspwplib/problem.py @@ -51,7 +51,7 @@ class BaseTSP(pydantic.BaseModel): edge_weight_type: EdgeWeightType fixed_edges: EdgeList name: str - node_coords: NodeCoords + node_coords: Optional[NodeCoords] node_coord_type: NodeCoordType problem_type: str tours: Optional[List[VertexList]] @@ -217,25 +217,66 @@ def from_dataframes( @classmethod def from_tsplib95(cls, problem: tsplib95.models.StandardProblem): """Get a TSP base model from a StandardProblem object""" + + display_data_type = ( + problem.display_data_type + if problem.display_data_type + else DisplayDataType.NO_DISPLAY + ) + edge_data_format = ( + problem.edge_data_format + if problem.edge_data_format + else EdgeDataFormat.EDGE_LIST + ) + edge_weight_type = problem.edge_weight_type + + # edge weight format + edge_weight_format = problem.edge_weight_format + if ( + not edge_weight_format + and edge_weight_type in EdgeWeightType.__members__ + and edge_weight_type != EdgeWeightType.EXPLICIT + ): + edge_weight_format = EdgeWeightFormat.FUNCTION + elif not edge_weight_format and edge_weight_type == EdgeWeightType.EXPLICIT: + raise ValueError( + "Edge weight type is set to EXPLICIT but no edge weight format is given" + ) + elif not edge_weight_format: + raise ValueError( + "Edge weight format in StandardProblem is not set - cannot assign edge weights." + ) + + node_coord_type = ( + problem.node_coord_type + if problem.node_coord_type + else NodeCoordType.NO_COORDS + ) + node_coords = None + if node_coord_type == NodeCoordType.TWOD_COORDS: + node_coords = {i: problem.node_coords.get(i) for i in problem.get_nodes()} + elif node_coord_type == NodeCoordType.THREED_COORDS: + raise NotImplementedError("3D coords not yet supported") + return cls( capacity=problem.capacity, - comment=problem.comment, + comment=problem.comment if problem.comment else "", demands=problem.demands, depots=problem.depots, dimension=problem.dimension, display_data=problem.display_data, - display_data_type=problem.display_data_type, - edge_data=problem.get_edges(), - edge_data_format=problem.edge_data_format, + display_data_type=display_data_type, + edge_data=list(problem.get_edges()), + edge_data_format=edge_data_format, edge_weights={ (i, j): problem.get_weight(i, j) for i, j in problem.get_edges() }, - edge_weight_format=problem.edge_weight_format, - edge_weight_type=problem.edge_weight_type, + edge_weight_format=edge_weight_format, + edge_weight_type=edge_weight_type, fixed_edges=problem.fixed_edges, name=problem.name, - node_coords=[problem.node_coords.get(i) for i in problem.get_nodes()], - node_coord_type=problem.node_coord_type, + node_coords=node_coords, + node_coord_type=node_coord_type, problem_type=problem.type, tours=problem.tours, ) diff --git a/tspwplib/types.py b/tspwplib/types.py index 0c0409c..3738412 100644 --- a/tspwplib/types.py +++ b/tspwplib/types.py @@ -38,13 +38,13 @@ class EdgeWeightType(StrEnumMixin, str, Enum): """Specifies how the edge weights (or distances) are given""" EXPLICIT = "EXPLICIT" # Weights are listed explicitly in the corresponding section - EUC2D = "EUC2D" # Weights are Euclidean distances in 2-D - EUC3D = "EUC3D" # Weights are Euclidean distances in 3-D - MAX2D = "MAX2D" # Weights are maximum distances in 2-D - MAX3D = "MAX3D" # Weights are maximum distances in 3-D - MAN2D = "MAN2D" # Weights are Manhattan distances in 2-D - MAN3D = "MAN3D" # Weights are Manhattan distances in 3-D - CEIL2D = "CEIL2D" # Weights are Euclidean distances in 2-D rounded up + EUC_2D = "EUC_2D" # Weights are Euclidean distances in 2-D + EUC_3D = "EUC_3D" # Weights are Euclidean distances in 3-D + MAX_2D = "MAX_2D" # Weights are maximum distances in 2-D + MAX_3D = "MAX_3D" # Weights are maximum distances in 3-D + MAN_2D = "MAN_2D" # Weights are Manhattan distances in 2-D + MAN_3D = "MAN_3D" # Weights are Manhattan distances in 3-D + CEIL_2D = "CEIL_2D" # Weights are Euclidean distances in 2-D rounded up GEO = "GEO" # Weights are geographical distances ATT = "ATT" # Special distance function for problems att48 and att532 XRAY1 = ( From 7559c13eab215b9f5b5b5c3e1d62977ca7761eed Mon Sep 17 00:00:00 2001 From: Patrick O'Hara Date: Fri, 15 Oct 2021 14:32:02 +0100 Subject: [PATCH 12/12] Change dataset dir --- scripts/from_urbanair.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/scripts/from_urbanair.py b/scripts/from_urbanair.py index eb1a3d6..c264707 100644 --- a/scripts/from_urbanair.py +++ b/scripts/from_urbanair.py @@ -101,10 +101,7 @@ def to_pandas_nodelist(G: nx.Graph) -> pd.DataFrame: return pd.DataFrame([{"node": node, **data} for node, data in G.nodes(data=True)]) -def main( - location: LondonaqLocationShort, - dataset_dir: Path = Path("/", "Users", "patrick", "Datasets", "pctsp", "londonaq"), -): +def main(location: LondonaqLocationShort, dataset_dir: Path): """Entrypoint for generating londonaq dataset""" timestamp_id: LondonaqTimestamp = LondonaqTimestamp.A name = londonaq_graph_name(location, timestamp_id)