Skip to content

Commit

Permalink
Convolution (#10)
Browse files Browse the repository at this point in the history
* tessellation

* visuals

* convolutions
  • Loading branch information
martinfleis authored Oct 20, 2020
1 parent 07f59d0 commit cfd6cd7
Showing 1 changed file with 86 additions and 1 deletion.
87 changes: 86 additions & 1 deletion measuring/morphometrics.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -914,7 +914,92 @@
"cell_type": "markdown",
"metadata": {},
"source": [
"At this point, all primary morphometric characters are measured and stored in a chunked parquet."
"At this point, all primary morphometric characters are measured and stored in a chunked parquet.\n",
"\n",
"## Convolution\n",
"\n",
"Morphometric variables are an input of cluster analysis, which should result in delineation of spatial signatures. However, primary morphometric characters can't be used directly. We have to understand them in context. For that reason, we introduce a convolution step. Each of the characters above will be expressed as first, second (median) and third quartile within 3 topological steps on enclosed tessellation. Resulting convolutional data will be then used as an input of cluster analysis."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"def convolute(chunk_id):\n",
" \n",
" s = time()\n",
" cells = geopandas.read_parquet(f\"../../urbangrammar_samba/spatial_signatures/morphometrics/cells/cells_{chunk_id}.pq\")\n",
" cells['keep'] = True\n",
" # add neighbouring cells from other chunks\n",
" cross_chunk_cells = []\n",
"\n",
" for chunk, inds in cross_chunk.loc[chunk_id].indices.iteritems():\n",
" add_cells = geopandas.read_parquet(f\"../../urbangrammar_samba/spatial_signatures/morphometrics/cells/cells_{chunk}.pq\").iloc[inds]\n",
" add_cells['keep'] = False\n",
" cross_chunk_cells.append(add_cells)\n",
"\n",
" df = cells.append(pd.concat(cross_chunk_cells, ignore_index=True), ignore_index=True)\n",
"\n",
" # read W\n",
" w = libpysal.weights.WSP(scipy.sparse.load_npz(f\"../../urbangrammar_samba/spatial_signatures/weights/w3_{chunk_id}.npz\")).to_W()\n",
"\n",
" # list characters\n",
" characters = [x for x in df.columns if len(x) == 6]\n",
" \n",
" # prepare dictionary to store results\n",
" convolutions = {}\n",
" for c in characters:\n",
" convolutions[c] = []\n",
" \n",
" # measure convolutions\n",
" for i in range(len(df)):\n",
" neighbours = [i]\n",
" neighbours += w.neighbors[i]\n",
"\n",
" vicinity = df.iloc[neighbours]\n",
"\n",
" for c in characters:\n",
" convolutions[c].append(np.nanpercentile(vicinity[c], [25, 50, 75], interpolation='midpoint'))\n",
" \n",
" # save convolutions to parquet file\n",
" conv = pd.DataFrame(convolutions)\n",
" exploded = pd.concat([pd.DataFrame(conv[c].to_list(), columns=[c + '_q1', c + '_q2',c + '_q3']) for c in characters], axis=1)\n",
" exploded[df.keep].to_parquet(f\"../../urbangrammar_samba/spatial_signatures/morphometrics/convolutions/conv_{chunk_id}.pq\")\n",
" \n",
" return f\"Chunk {chunk_id} processed sucessfully in {time() - s} seconds.\""
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"workers = 8\n",
"client = Client(LocalCluster(n_workers=workers, threads_per_worker=1))\n",
"client"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"%%time\n",
"inputs = iter(range(103))\n",
"futures = [client.submit(convolute, next(inputs)) for i in range(workers)]\n",
"ac = as_completed(futures)\n",
"for finished_future in ac:\n",
" # submit new future \n",
" try:\n",
" new_future = client.submit(convolute, next(inputs))\n",
" ac.add(new_future)\n",
" except StopIteration:\n",
" pass\n",
" print(finished_future.result())"
]
}
],
Expand Down

0 comments on commit cfd6cd7

Please sign in to comment.