From cfd6cd7a1246b74176abc23b47a1d7bcc8024512 Mon Sep 17 00:00:00 2001
From: Martin Fleischmann <martin@martinfleischmann.net>
Date: Tue, 20 Oct 2020 14:05:51 +0100
Subject: [PATCH] Convolution (#10)

* tessellation

* visuals

* convolutions
---
 measuring/morphometrics.ipynb | 87 ++++++++++++++++++++++++++++++++++-
 1 file changed, 86 insertions(+), 1 deletion(-)

diff --git a/measuring/morphometrics.ipynb b/measuring/morphometrics.ipynb
index d51be2e..0d719c0 100644
--- a/measuring/morphometrics.ipynb
+++ b/measuring/morphometrics.ipynb
@@ -914,7 +914,92 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "At this point, all primary morphometric characters are measured and stored in a chunked parquet."
+    "At this point, all primary morphometric characters are measured and stored in a chunked parquet.\n",
+    "\n",
+    "## Convolution\n",
+    "\n",
+    "Morphometric variables are an input of cluster analysis, which should result in delineation of spatial signatures. However, primary morphometric characters can't be used directly. We have to understand them in context. For that reason, we introduce a convolution step. Each of the characters above will be expressed as first, second (median) and third quartile within 3 topological steps on enclosed tessellation. Resulting convolutional data will be then used as an input of cluster analysis."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def convolute(chunk_id):\n",
+    "   \n",
+    "    s = time()\n",
+    "    cells = geopandas.read_parquet(f\"../../urbangrammar_samba/spatial_signatures/morphometrics/cells/cells_{chunk_id}.pq\")\n",
+    "    cells['keep'] = True\n",
+    "    # add neighbouring cells from other chunks\n",
+    "    cross_chunk_cells = []\n",
+    "\n",
+    "    for chunk, inds in cross_chunk.loc[chunk_id].indices.iteritems():\n",
+    "        add_cells = geopandas.read_parquet(f\"../../urbangrammar_samba/spatial_signatures/morphometrics/cells/cells_{chunk}.pq\").iloc[inds]\n",
+    "        add_cells['keep'] = False\n",
+    "        cross_chunk_cells.append(add_cells)\n",
+    "\n",
+    "    df = cells.append(pd.concat(cross_chunk_cells, ignore_index=True), ignore_index=True)\n",
+    "\n",
+    "    # read W\n",
+    "    w = libpysal.weights.WSP(scipy.sparse.load_npz(f\"../../urbangrammar_samba/spatial_signatures/weights/w3_{chunk_id}.npz\")).to_W()\n",
+    "\n",
+    "    # list characters\n",
+    "    characters = [x for x in df.columns if len(x) == 6]\n",
+    "    \n",
+    "    # prepare dictionary to store results\n",
+    "    convolutions = {}\n",
+    "    for c in characters:\n",
+    "        convolutions[c] = []\n",
+    "        \n",
+    "    # measure convolutions\n",
+    "    for i in range(len(df)):\n",
+    "        neighbours = [i]\n",
+    "        neighbours += w.neighbors[i]\n",
+    "\n",
+    "        vicinity = df.iloc[neighbours]\n",
+    "\n",
+    "        for c in characters:\n",
+    "            convolutions[c].append(np.nanpercentile(vicinity[c], [25, 50, 75], interpolation='midpoint'))\n",
+    "    \n",
+    "    # save convolutions to parquet file\n",
+    "    conv = pd.DataFrame(convolutions)\n",
+    "    exploded = pd.concat([pd.DataFrame(conv[c].to_list(), columns=[c + '_q1', c + '_q2',c + '_q3']) for c in characters], axis=1)\n",
+    "    exploded[df.keep].to_parquet(f\"../../urbangrammar_samba/spatial_signatures/morphometrics/convolutions/conv_{chunk_id}.pq\")\n",
+    "        \n",
+    "    return f\"Chunk {chunk_id} processed sucessfully in {time() - s} seconds.\""
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "workers = 8\n",
+    "client = Client(LocalCluster(n_workers=workers, threads_per_worker=1))\n",
+    "client"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "%%time\n",
+    "inputs = iter(range(103))\n",
+    "futures = [client.submit(convolute, next(inputs)) for i in range(workers)]\n",
+    "ac = as_completed(futures)\n",
+    "for finished_future in ac:\n",
+    "    # submit new future \n",
+    "    try:\n",
+    "        new_future = client.submit(convolute, next(inputs))\n",
+    "        ac.add(new_future)\n",
+    "    except StopIteration:\n",
+    "        pass\n",
+    "    print(finished_future.result())"
    ]
   }
  ],