-
Notifications
You must be signed in to change notification settings - Fork 7
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
* Add path_within_buckety * Utilise MasterScrapper pour duplicate_vectorfile_ign * Ajoute une fonction pour écrire avec S3 le md5 * Add support for s3fs access * possibility to load a .env file with python-dotenv (keys = token, key, secret) * black formatting * fix logger call * fix undefined name 'logger' * add black formatting to utils.dict_update.py * Samll refacto Dataset * reset update_json_md5 as a Dataset method; * add fs argument for instanciation of Dataset; * fixed Dataset docstring; * fix bug on Dataset if downloaded prevented because of md5 match * temporary fix in s3/s3.py of multiple s3fs creation; * fix duplicate_vectorfile_ign when file already uptodate on s3 * Move constants creation to package init * Update download.py * Update s3.py * Update __init__.py * Update misc.write_s3 * Update docstrings + notes * Notes/TODO sur s3 * Fix exception on missing file in json * Update write_s3.py * add logging configuration * Update write_s3.py * reset os.chdir('cartiflette') just in case * Move utils from s3 refactorize functions to get path (both from web access or from inside s3) * Move public functions into ad hoc subpackage * Fix typo * Start refacto of s3 * Update dev.py Black formatting * Fix typo * Default year in download.dev * Update download.py Fix default year in download.download.py * Set current year as default everywhere * Cleanup corrupt files after download * Unfinished refactorization * Remove geometry sanitations * Remove unecessary functions in s3 * Fix mockups _get_last_md5 * Add magic file detection and CachedSession * Update .gitignore * Add CSV support (COG Insee) * Update sources.yaml * Update download.py * Spec custom filetype for output * Create csv_magic.py utility for unknown csv reading * RecRefacto download Use requests-cache Refacto yaml Rename "field" argument in yaml to "territory" Handle zip Handle nested zip/7zip Handle CSV/DBF pattern (not only shapefiles) Refacto tests with CachedSession patching Split download on multiple files (download, scraper, dataset) * Add poetry and pytest to CI * Set os-specific dependency * Fix check test * Add incomplete s3 refacto for building purpose * Add feedback to test * Fix proxy error on github tests * Jobs' names differentiation * Cleanup unused files since poetry's usage * Fix copy/paste duplicates * Merge / upgrade standard patchs on bucket Set a config file which centralize all constants which relates to s3fs * Move create_path_bucket test to separate test * Full download pipeline * Fix bug on pipeline with year as int * add configuration option for tqdm * Update config.py * Recreate base gedataframes directly from s3 * Remove dev * Refactorize s3 (for a start...) * Fix _download_sources import in tests --------- Co-authored-by: linogaliana <[email protected]> Co-authored-by: thomas.grandjean <[email protected]>
- Loading branch information
1 parent
80b8a5a
commit 1c90dd3
Showing
34 changed files
with
3,614 additions
and
2,468 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -131,3 +131,6 @@ dmypy.json | |
# Setuptools vs. poetry | ||
*.lock | ||
.toml | ||
|
||
*.sqlite | ||
*.sqlite* |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,3 +1,12 @@ | ||
from .utils import * | ||
from .download import * | ||
from .s3 import * | ||
from cartiflette.config import ( | ||
BUCKET, | ||
PATH_WITHIN_BUCKET, | ||
ENDPOINT_URL, | ||
FS, | ||
THREADS_DOWNLOAD, | ||
LEAVE_TQDM, | ||
) | ||
from cartiflette.constants import REFERENCES, DOWNLOAD_PIPELINE_ARGS | ||
from cartiflette.utils import * | ||
from cartiflette.download import * | ||
from cartiflette.s3 import * |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,24 @@ | ||
# -*- coding: utf-8 -*- | ||
from dotenv import load_dotenv | ||
import os | ||
import s3fs | ||
|
||
load_dotenv() | ||
|
||
BUCKET = "projet-cartiflette" | ||
PATH_WITHIN_BUCKET = "diffusion/shapefiles-test4" | ||
ENDPOINT_URL = "https://minio.lab.sspcloud.fr" | ||
|
||
kwargs = {} | ||
for key in ["token", "secret", "key"]: | ||
try: | ||
kwargs[key] = os.environ[key] | ||
except KeyError: | ||
continue | ||
FS = s3fs.S3FileSystem(client_kwargs={"endpoint_url": ENDPOINT_URL}, **kwargs) | ||
|
||
THREADS_DOWNLOAD = 5 | ||
# Nota : each thread may also span the same number of children threads; | ||
# set to 1 for debugging purposes (will deactivate multithreading) | ||
|
||
LEAVE_TQDM = False |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,74 @@ | ||
# -*- coding: utf-8 -*- | ||
|
||
import geopandas as gpd | ||
import logging | ||
from shapely.geometry import box | ||
|
||
|
||
logger = logging.getLogger(__name__) | ||
|
||
REFERENCES = [ | ||
# use : https://boundingbox.klokantech.com/ | ||
{"location": "metropole", "geometry": box(-5.45, 41.26, 9.83, 51.31)}, | ||
{"location": "guyane", "geometry": box(-54.6, 2.11, -51.5, 5.98)}, | ||
{ | ||
"location": "martinique", | ||
"geometry": box(-61.4355, 14.2217, -60.6023, 15.0795), | ||
}, | ||
{ | ||
"location": "guadeloupe", | ||
"geometry": box(-62.018, 15.6444, -60.792, 16.714), | ||
}, | ||
{ | ||
"location": "reunion", | ||
"geometry": box(55.0033, -21.5904, 56.0508, -20.6728), | ||
}, | ||
{ | ||
"location": "mayotte", | ||
"geometry": box(44.7437, -13.2733, 45.507, -12.379), | ||
}, | ||
{ | ||
"location": "saint_pierre_et_miquelon", | ||
"geometry": box(-56.6975, 46.5488, -55.9066, 47.3416), | ||
}, | ||
] | ||
|
||
REFERENCES = gpd.GeoDataFrame(REFERENCES, crs=4326) | ||
|
||
DOWNLOAD_PIPELINE_ARGS = { | ||
"ADMIN-EXPRESS": [ | ||
"IGN", | ||
"ADMINEXPRESS", | ||
"EXPRESS-COG-TERRITOIRE", | ||
[ | ||
"guadeloupe", | ||
"martinique", | ||
"guyane", | ||
"reunion", | ||
"mayotte", | ||
"metropole", | ||
], | ||
], | ||
"BDTOPO": ["IGN", "BDTOPO", "ROOT", "france_entiere"], | ||
"IRIS": ["IGN", "CONTOUR-IRIS", "ROOT", None], | ||
"COG": [ | ||
"Insee", | ||
"COG", | ||
[ | ||
"COMMUNE", | ||
"CANTON", | ||
"ARRONDISSEMENT", | ||
"DEPARTEMENT", | ||
"REGION", | ||
"COLLECTIVITE", | ||
"PAYS", | ||
], | ||
"france_entiere", | ||
], | ||
"BV 2022": ["Insee", "BV", "FondsDeCarte_BV_2022", "france_entiere"], | ||
"BV 2012": ["Insee", "BV", "FondsDeCarte_BV_2012", "france_entiere"], | ||
} | ||
|
||
# EXPRESS-COG ? | ||
# EXPRESS-COG-CARTO-TERRITOIRE ? | ||
# EXPRESS-COG-CARTO ? |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,34 +1,14 @@ | ||
from .dev import ( | ||
# create_url_adminexpress, | ||
get_vectorfile_ign, | ||
# get_administrative_level_available_ign, | ||
store_vectorfile_ign, | ||
get_vectorfile_communes_arrondissement, | ||
# get_BV, | ||
get_cog_year, | ||
) | ||
# from cartiflette.download.dev import ( | ||
# get_vectorfile_communes_arrondissement, | ||
# # get_BV, | ||
# ) | ||
|
||
|
||
from .download import ( | ||
Dataset, | ||
BaseScraper, | ||
HttpScraper, | ||
FtpScraper, | ||
MasterScraper, | ||
download_sources, | ||
from cartiflette.download.pipeline import ( | ||
download_all, | ||
) | ||
|
||
|
||
__all__ = [ | ||
# "create_url_adminexpress", | ||
"get_vectorfile_ign", | ||
# "get_administrative_level_available_ign", | ||
"store_vectorfile_ign", | ||
"get_vectorfile_communes_arrondissement", | ||
# "get_BV", | ||
"get_cog_year", | ||
"Dataset", | ||
"BaseScraper", | ||
"HttpScraper", | ||
"FtpScraper", | ||
"MasterScraper", | ||
"download_sources", | ||
"download_all", | ||
] |
Oops, something went wrong.