diff --git a/CHANGELOG.md b/CHANGELOG.md index 80f97d7f..3180894c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,20 +1,27 @@ +v0.6 +==== + +2020/11/20 + +* Adds vectorized, multi-threaded calculation of many shortest path routes at once +* Restores usability of network.plot() by eliminating usage of Matplotlib's deprecated Basemap toolkit + v0.5.1 ====== 2020/08/05 -* Fixes a performance bug in network.get_node_ids() +* Fixes a performance regression in network.get_node_ids() v0.5 ==== 2020/07/28 -* Adds support for calculating shortest path lengths between arbitrary origins and destinations, with vectorization and multi-threading +* Adds support for calculating shortest path distances between arbitrary origins and destinations, with vectorization and multi-threading * Restores alternate names for aggregation types, which were inadvertently removed in v0.4 * Fixes a bug with matplotlib backends * Improves compilation in MacOS 10.15 Catalina -* Eliminates the scikit-learn dependency * Makes matplotlib and osmnet dependencies optional * Revises the documentation and demo notebook diff --git a/README.md b/README.md index e65f86ad..19be6f79 100644 --- a/README.md +++ b/README.md @@ -4,7 +4,7 @@ Pandana is a Python library for network analysis that uses [contraction hierarchies](https://en.wikipedia.org/wiki/Contraction_hierarchies) to calculate super-fast travel accessibility metrics and shortest paths. The numerical code is in C++. -v0.5 adds vectorized calculation of shortest path lengths: [network.shortest_path_lengths()](http://udst.github.io/pandana/network.html#pandana.network.Network.shortest_path_lengths). +New in v0.5 and v0.6 is vectorized, multi-threaded calculation of shortest path routes and distances: [network.shortest_paths()](http://udst.github.io/pandana/network.html#pandana.network.Network.shortest_paths), [network.shortest_path_lengths()](http://udst.github.io/pandana/network.html#pandana.network.Network.shortest_path_lengths). Documentation: http://udst.github.io/pandana diff --git a/docs/source/changelog.rst b/docs/source/changelog.rst index 6e51e952..826e550e 100644 --- a/docs/source/changelog.rst +++ b/docs/source/changelog.rst @@ -1,23 +1,30 @@ Change log ========== +v0.6 +---- + +2020/11/20 + +* Adds vectorized, multi-threaded `calculation of many shortest path routes `_ at once +* Restores usability of `network.plot() `_ by eliminating usage of Matplotlib's deprecated Basemap toolkit + v0.5.1 ------ 2020/08/05 -* Fixes a performance bug in network.get_node_ids() +* Fixes a performance regression in `network.get_node_ids() `_ v0.5 ---- 2020/07/28 -* Adds support for `calculating shortest path lengths `_ between arbitrary origins and destinations, with vectorization and multi-threading +* Adds support for `calculating shortest path distances `_ between arbitrary origins and destinations, with vectorization and multi-threading * Restores alternate names for aggregation types, which were inadvertently removed in v0.4 * Fixes a bug with matplotlib backends * Improves compilation in MacOS 10.15 Catalina -* Eliminates the scikit-learn dependency * Makes matplotlib and osmnet dependencies optional * Revises the documentation and demo notebook diff --git a/docs/source/conf.py b/docs/source/conf.py index da49931d..e915973f 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -57,9 +57,9 @@ # built documents. # # The short X.Y version. -version = '0.5.1' +version = '0.6' # The full version, including alpha/beta/rc tags. -release = '0.5.1' +release = '0.6' # The language for content autogenerated by Sphinx. Refer to documentation # for a list of supported languages. diff --git a/docs/source/index.rst b/docs/source/index.rst index 90249f6b..4b893394 100644 --- a/docs/source/index.rst +++ b/docs/source/index.rst @@ -8,7 +8,7 @@ Pandana Pandana is a Python library for network analysis that uses `contraction hierarchies `_ to calculate super-fast travel accessibility metrics and shortest paths. The numerical code is in C++. -v0.5.1, released August 5, 2020 +v0.6, released November 11, 2020 Acknowledgments diff --git a/examples/shortest_path_example.py b/examples/shortest_path_example.py index 25705434..1c63a693 100644 --- a/examples/shortest_path_example.py +++ b/examples/shortest_path_example.py @@ -71,16 +71,26 @@ print(net.shortest_path_length(nodes_a[1],nodes_b[1])) print('Repeat with vectorized calculations:') +print(net.shortest_paths(nodes_a[0:2],nodes_b[0:2])) print(net.shortest_path_lengths(nodes_a[0:2],nodes_b[0:2])) # Performance comparison print('Performance comparison for 10k distance calculations:') +t0 = time.time() +for i in range(n): + _ = net.shortest_path(nodes_a[i], nodes_b[i]) +print('Route loop time = {} sec'.format(time.time() - t0)) + +t0 = time.time() +_ = net.shortest_paths(nodes_a, nodes_b) +print('Route vectorized time = {} sec'.format(time.time() - t0)) + t0 = time.time() for i in range(n): _ = net.shortest_path_length(nodes_a[i], nodes_b[i]) -print('Loop time = {} sec'.format(time.time() - t0)) +print('Distance loop time = {} sec'.format(time.time() - t0)) t0 = time.time() _ = net.shortest_path_lengths(nodes_a, nodes_b) -print('Vectorized time = {} sec'.format(time.time() - t0)) +print('Distance vectorized time = {} sec'.format(time.time() - t0)) diff --git a/pandana/__init__.py b/pandana/__init__.py index fbc74d52..f50d2441 100644 --- a/pandana/__init__.py +++ b/pandana/__init__.py @@ -1,3 +1,3 @@ from .network import Network -version = __version__ = '0.5.1' +version = __version__ = '0.6' diff --git a/pandana/loaders/tests/test_osm.py b/pandana/loaders/tests/test_osm.py index a2859e74..14cd62b2 100644 --- a/pandana/loaders/tests/test_osm.py +++ b/pandana/loaders/tests/test_osm.py @@ -92,10 +92,10 @@ def test_node_query(bbox2): tags = '"amenity"="restaurant"' cafes = osm.node_query(*bbox2, tags=tags) - assert len(cafes) == 4 + assert len(cafes) == 2 assert 'lat' in cafes.columns assert 'lon' in cafes.columns - assert cafes['name'][2965338413] == 'Koja Kitchen' + assert cafes['name'][1419597327] == 'Cream' def test_node_query_raises(): diff --git a/pandana/network.py b/pandana/network.py index f3d96535..6dbe6c98 100644 --- a/pandana/network.py +++ b/pandana/network.py @@ -199,6 +199,45 @@ def shortest_path(self, node_a, node_b, imp_name=None): # map back to external node ids return self.node_ids.values[path] + def shortest_paths(self, nodes_a, nodes_b, imp_name=None): + """ + Vectorized calculation of shortest paths. Accepts a list of origins + and list of destinations and returns a corresponding list of + shortest path routes. Must provide an impedance name if more than + one is available. + + Added in Pandana v0.6. + + Parameters + ---------- + nodes_a : list-like of ints + Source node ids + nodes_b : list-like of ints + Corresponding destination node ids + imp_name : string + The impedance name to use for the shortest path + + Returns + ------- + paths : list of np.ndarray + Nodes traversed in each shortest path + + """ + if len(nodes_a) != len(nodes_b): + raise ValueError("Origin and destination counts don't match: {}, {}" + .format(len(nodes_a), len(nodes_b))) + + # map to internal node indexes + nodes_a_idx = self._node_indexes(pd.Series(nodes_a)).values + nodes_b_idx = self._node_indexes(pd.Series(nodes_b)).values + + imp_num = self._imp_name_to_num(imp_name) + + paths = self.net.shortest_paths(nodes_a_idx, nodes_b_idx, imp_num) + + # map back to external node ids + return [self.node_ids.values[p] for p in paths] + def shortest_path_length(self, node_a, node_b, imp_name=None): """ Return the length of the shortest path between two node ids in the @@ -208,6 +247,8 @@ def shortest_path_length(self, node_a, node_b, imp_name=None): If you have a large number of paths to calculate, don't use this function! Use the vectorized one instead. + Added in Pandana v0.5. + Parameters ---------- node_a : int @@ -240,6 +281,8 @@ def shortest_path_lengths(self, nodes_a, nodes_b, imp_name=None): of shortest path lengths. Must provide an impedance name if more than one is available. + Added in Pandana v0.5. + Parameters ---------- nodes_a : list-like of ints @@ -436,7 +479,7 @@ def aggregate(self, distance, type="sum", decay="linear", imp_name=None, def get_node_ids(self, x_col, y_col, mapping_distance=None): """ - Assign node_ids to data specified by x_col and y_col + Assign node_ids to data specified by x_col and y_col. Parameters ---------- @@ -481,15 +524,16 @@ def get_node_ids(self, x_col, y_col, mapping_distance=None): return df.node_id - def plot( - self, data, bbox=None, plot_type='scatter', - fig_kwargs=None, bmap_kwargs=None, plot_kwargs=None, - cbar_kwargs=None): + def plot(self, data, bbox=None, plot_type='scatter', fig_kwargs=None, + plot_kwargs=None, cbar_kwargs=None): """ - Plot an array of data on a map using matplotlib and Basemap, - automatically matching the data to the Pandana network node positions. + Plot an array of data on a map using Matplotlib, automatically matching + the data to the Pandana network node positions. Keyword arguments are + passed to the plotting routine. - Keyword arguments are passed to the plotting routine. + Modified in Pandana v0.6 to eliminate usage of Matplotlib's deprecated + Basemap toolkit. No longer accepts bmap_kwargs and no longer returns + a Basemap object. Parameters ---------- @@ -500,22 +544,17 @@ def plot( (lat_min, lng_min, lat_max, lng_max) plot_type : {'hexbin', 'scatter'}, optional fig_kwargs : dict, optional - Keyword arguments that will be passed to - matplotlib.pyplot.subplots. Use this to specify things like - figure size or background color. - bmap_kwargs : dict, optional - Keyword arguments that will be passed to the Basemap constructor. - This can be used to specify a projection or coastline resolution. + Keyword arguments that will be passed to matplotlib.pyplot.subplots. + Use this to specify things like figure size or background color. plot_kwargs : dict, optional Keyword arguments that will be passed to the matplotlib plotting - command used. Use this to control plot styles and color maps used. + command. Use this to control plot styles and color maps. cbar_kwargs : dict, optional - Keyword arguments passed to the Basemap.colorbar method. + Keyword arguments that will be passed to matplotlib.pyplot.colorbar. Use this to control color bar location and label. Returns ------- - bmap : Basemap fig : matplotlib.Figure ax : matplotlib.Axes @@ -528,14 +567,11 @@ def plot( try: import matplotlib import matplotlib.pyplot as plt - from mpl_toolkits.basemap import Basemap except (ModuleNotFoundError, RuntimeError): - raise ModuleNotFoundError("Pandana's network.plot() requires Matplotlib and " - "the Matplotlib Basemap Toolkit") + raise ModuleNotFoundError("Pandana's network.plot() requires Matplotlib") - fig_kwargs = fig_kwargs or {} - bmap_kwargs = bmap_kwargs or {} - plot_kwargs = plot_kwargs or {} + fig_kwargs = fig_kwargs or {'figsize': (10, 8)} + plot_kwargs = plot_kwargs or {'cmap': 'hot_r', 's': 1} cbar_kwargs = cbar_kwargs or {} if not bbox: @@ -547,23 +583,20 @@ def plot( fig, ax = plt.subplots(**fig_kwargs) - bmap = Basemap( - bbox[1], bbox[0], bbox[3], bbox[2], ax=ax, **bmap_kwargs) - bmap.drawcoastlines() - bmap.drawmapboundary() - - x, y = bmap(self.nodes_df.x.values, self.nodes_df.y.values) + x, y = (self.nodes_df.x.values, self.nodes_df.y.values) if plot_type == 'scatter': - plot = bmap.scatter( + plot = plt.scatter( x, y, c=data.values, **plot_kwargs) elif plot_type == 'hexbin': - plot = bmap.hexbin( + plot = plt.hexbin( x, y, C=data.values, **plot_kwargs) - bmap.colorbar(plot, **cbar_kwargs) + colorbar = plt.colorbar(plot, **cbar_kwargs) + + plt.show() - return bmap, fig, ax + return fig, ax def init_pois(self, num_categories, max_dist, max_pois): """ diff --git a/pandana/tests/test_pandana.py b/pandana/tests/test_pandana.py index 3d73a61b..68e277bd 100644 --- a/pandana/tests/test_pandana.py +++ b/pandana/tests/test_pandana.py @@ -268,6 +268,23 @@ def test_shortest_path(sample_osm): assert ids[1] == path[-1] +def test_shortest_paths(sample_osm): + + nodes = random_connected_nodes(sample_osm, 100) + vec_paths = sample_osm.shortest_paths(nodes[0:50], nodes[50:100]) + + for i in range(50): + path = sample_osm.shortest_path(nodes[i], nodes[i+50]) + assert(np.array_equal(vec_paths[i], path)) + + # check mismatched OD lists + try: + vec_paths = sample_osm.shortest_paths(nodes[0:51], nodes[50:100]) + assert 0 + except ValueError as e: + pass + + def test_shortest_path_length(sample_osm): for i in range(10): diff --git a/setup.py b/setup.py index 300aca2a..23c4f06a 100644 --- a/setup.py +++ b/setup.py @@ -131,7 +131,7 @@ def run(self): ## Standard setup ############################################### -version = '0.5.1' +version = '0.6' packages = find_packages(exclude=["*.tests", "*.tests.*", "tests.*", "tests"]) diff --git a/src/accessibility.cpp b/src/accessibility.cpp index 2e542e00..cedb588a 100644 --- a/src/accessibility.cpp +++ b/src/accessibility.cpp @@ -96,24 +96,41 @@ Accessibility::precomputeRangeQueries(float radius) { } -std::vector +vector Accessibility::Route(int src, int tgt, int graphno) { vector ret = this->ga[graphno]->Route(src, tgt); return vector (ret.begin(), ret.end()); } +vector> +Accessibility::Routes(vector sources, vector targets, int graphno) { + + int n = std::min(sources.size(), targets.size()); // in case lists don't match + vector> routes(n); + + #pragma omp parallel + #pragma omp for schedule(guided) + for (int i = 0 ; i < n ; i++) { + vector ret = this->ga[graphno]->Route(sources[i], targets[i], + omp_get_thread_num()); + routes[i] = vector (ret.begin(), ret.end()); + } + return routes; +} + + double Accessibility::Distance(int src, int tgt, int graphno) { return this->ga[graphno]->Distance(src, tgt); } -std::vector +vector Accessibility::Distances(vector sources, vector targets, int graphno) { int n = std::min(sources.size(), targets.size()); // in case lists don't match - vector distances (n); + vector distances(n); #pragma omp parallel #pragma omp for schedule(guided) diff --git a/src/accessibility.h b/src/accessibility.h index 98aa3e12..de76aab5 100644 --- a/src/accessibility.h +++ b/src/accessibility.h @@ -51,6 +51,10 @@ class Accessibility { // shortest path between two points vector Route(int src, int tgt, int graphno = 0); + // shortest path between list of origins and destinations + vector> Routes(vector sources, vector targets, + int graphno = 0); + // shortest path distance between two points double Distance(int src, int tgt, int graphno = 0); diff --git a/src/cyaccess.pyx b/src/cyaccess.pyx index 26a21df5..a4f6c617 100644 --- a/src/cyaccess.pyx +++ b/src/cyaccess.pyx @@ -24,6 +24,7 @@ cdef extern from "accessibility.h" namespace "MTC::accessibility": vector[double] getAllAggregateAccessibilityVariables( float, string, string, string, int) vector[int] Route(int, int, int) + vector[vector[int]] Routes(vector[long], vector[long], int) double Distance(int, int, int) vector[double] Distances(vector[long], vector[long], int) void precomputeRangeQueries(double) @@ -165,6 +166,15 @@ cdef class cyaccess: """ return self.access.Route(srcnode, destnode, impno) + def shortest_paths(self, np.ndarray[long] srcnodes, + np.ndarray[long] destnodes, int impno=0): + """ + srcnodes - node ids of origins + destnodes - node ids of destinations + impno - impedance id + """ + return self.access.Routes(srcnodes, destnodes, impno) + def shortest_path_distance(self, int srcnode, int destnode, int impno=0): """ srcnode - node id origin