From 0051f7a4ab5a3b8ae173ff2ddd79e5c9ef60dc0c Mon Sep 17 00:00:00 2001 From: lgaliana Date: Fri, 20 Dec 2024 17:53:14 +0000 Subject: [PATCH] =?UTF-8?q?Pr=C3=A9pare=20les=20donn=C3=A9es?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- applications/data.ipynb | 151 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 151 insertions(+) create mode 100644 applications/data.ipynb diff --git a/applications/data.ipynb b/applications/data.ipynb new file mode 100644 index 0000000..b839c09 --- /dev/null +++ b/applications/data.ipynb @@ -0,0 +1,151 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Conversion des bases en geoparquet\n", + "\n", + "## 1. La base dvf" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import requests\n", + "import os\n", + "\n", + "url = \"https://files.data.gouv.fr/geo-dvf/latest/csv/2022/full.csv.gz\"\n", + "file_name = \"dvf.csv.gz\"\n", + "\n", + "# Check if the file already exists\n", + "if not os.path.exists(file_name):\n", + " response = requests.get(url)\n", + "\n", + " if response.status_code == 200:\n", + " with open(file_name, \"wb\") as f:\n", + " f.write(response.content)\n", + " print(\"Téléchargement réussi.\")\n", + " else:\n", + " print(f\"Échec du téléchargement. Code d'état : {response.status_code}\")\n", + "else:\n", + " print(f\"Le fichier '{file_name}' existe déjà. Aucun téléchargement nécessaire.\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import pandas as pd\n", + "import geopandas as gpd\n", + "dvf = pd.read_csv(\"dvf.csv.gz\", dtype={'code_commune': \"str\", \"code_departement\": \"str\"})\n", + "gdf = gpd.GeoDataFrame(\n", + " dvf,\n", + " geometry=gpd.points_from_xy(x=dvf.longitude, y=dvf.latitude)\n", + ")\n", + "gdf.set_crs(epsg=4326)\n", + "object_cols = gdf.select_dtypes(['object']).columns\n", + "gdf[object_cols] = gdf[object_cols].astype('string')\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "gdf.to_parquet(\"dvf.parquet\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import duckdb\n", + "duckdb.execute(\"INSTALL spatial;\")\n", + "duckdb.execute(\"LOAD spatial;\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "duckdb.sql(\"SELECT * FROM read_parquet('dvf.parquet')\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "reference_lon = 2.3602 # Replace with your reference longitude\n", + "reference_lat = 48.9245 # Replace with your reference latitude\n", + "\n", + "# Approximate 1 km in degrees (0.009 degrees latitude/longitude)\n", + "buffer_distance_deg = 0.009\n", + "\n", + "# Updated query with approximate filtering using bounding box method\n", + "toto = duckdb.sql(\n", + " f\"\"\"\n", + " SELECT *\n", + " FROM read_parquet('dvf.parquet')\n", + " WHERE\n", + " bbox.xmin BETWEEN {reference_lon - buffer_distance_deg} AND {reference_lon + buffer_distance_deg}\n", + " AND bbox.ymin BETWEEN {reference_lat - buffer_distance_deg} AND {reference_lat + buffer_distance_deg}\n", + " \"\"\"\n", + ").to_df()\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "\n", + "duckdb.sql(f'CREATE OR REPLACE VIEW dvf AS SELECT * FROM read_parquet(\"dvf.parquet\")')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "duckdb.sql(\"SELECT * FROM dvf LIMIT 5\").df().head(2)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "base", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.7" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +}