diff --git a/measuring/morphometrics.ipynb b/measuring/morphometrics.ipynb index d51be2e..0d719c0 100644 --- a/measuring/morphometrics.ipynb +++ b/measuring/morphometrics.ipynb @@ -914,7 +914,92 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "At this point, all primary morphometric characters are measured and stored in a chunked parquet." + "At this point, all primary morphometric characters are measured and stored in a chunked parquet.\n", + "\n", + "## Convolution\n", + "\n", + "Morphometric variables are an input of cluster analysis, which should result in delineation of spatial signatures. However, primary morphometric characters can't be used directly. We have to understand them in context. For that reason, we introduce a convolution step. Each of the characters above will be expressed as first, second (median) and third quartile within 3 topological steps on enclosed tessellation. Resulting convolutional data will be then used as an input of cluster analysis." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "def convolute(chunk_id):\n", + " \n", + " s = time()\n", + " cells = geopandas.read_parquet(f\"../../urbangrammar_samba/spatial_signatures/morphometrics/cells/cells_{chunk_id}.pq\")\n", + " cells['keep'] = True\n", + " # add neighbouring cells from other chunks\n", + " cross_chunk_cells = []\n", + "\n", + " for chunk, inds in cross_chunk.loc[chunk_id].indices.iteritems():\n", + " add_cells = geopandas.read_parquet(f\"../../urbangrammar_samba/spatial_signatures/morphometrics/cells/cells_{chunk}.pq\").iloc[inds]\n", + " add_cells['keep'] = False\n", + " cross_chunk_cells.append(add_cells)\n", + "\n", + " df = cells.append(pd.concat(cross_chunk_cells, ignore_index=True), ignore_index=True)\n", + "\n", + " # read W\n", + " w = libpysal.weights.WSP(scipy.sparse.load_npz(f\"../../urbangrammar_samba/spatial_signatures/weights/w3_{chunk_id}.npz\")).to_W()\n", + "\n", + " # list characters\n", + " characters = [x for x in df.columns if len(x) == 6]\n", + " \n", + " # prepare dictionary to store results\n", + " convolutions = {}\n", + " for c in characters:\n", + " convolutions[c] = []\n", + " \n", + " # measure convolutions\n", + " for i in range(len(df)):\n", + " neighbours = [i]\n", + " neighbours += w.neighbors[i]\n", + "\n", + " vicinity = df.iloc[neighbours]\n", + "\n", + " for c in characters:\n", + " convolutions[c].append(np.nanpercentile(vicinity[c], [25, 50, 75], interpolation='midpoint'))\n", + " \n", + " # save convolutions to parquet file\n", + " conv = pd.DataFrame(convolutions)\n", + " exploded = pd.concat([pd.DataFrame(conv[c].to_list(), columns=[c + '_q1', c + '_q2',c + '_q3']) for c in characters], axis=1)\n", + " exploded[df.keep].to_parquet(f\"../../urbangrammar_samba/spatial_signatures/morphometrics/convolutions/conv_{chunk_id}.pq\")\n", + " \n", + " return f\"Chunk {chunk_id} processed sucessfully in {time() - s} seconds.\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "workers = 8\n", + "client = Client(LocalCluster(n_workers=workers, threads_per_worker=1))\n", + "client" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "%%time\n", + "inputs = iter(range(103))\n", + "futures = [client.submit(convolute, next(inputs)) for i in range(workers)]\n", + "ac = as_completed(futures)\n", + "for finished_future in ac:\n", + " # submit new future \n", + " try:\n", + " new_future = client.submit(convolute, next(inputs))\n", + " ac.add(new_future)\n", + " except StopIteration:\n", + " pass\n", + " print(finished_future.result())" ] } ],