Skip to content

Commit

Permalink
feat(FT): add horaires in open street map format
Browse files Browse the repository at this point in the history
  • Loading branch information
hlecuyer committed Jul 23, 2024
1 parent 6295209 commit fb6e29e
Show file tree
Hide file tree
Showing 4 changed files with 101 additions and 16 deletions.
1 change: 1 addition & 0 deletions pipeline/dags/dag_utils/sources/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -245,6 +245,7 @@
# the "request token" is the client_id:client_secret string.
"token": Variable.get("FT_API_TOKEN", None),
"extractor": france_travail.extract,
"reader": france_travail.read,
},
"services": {
"filename": "services.json",
Expand Down
83 changes: 83 additions & 0 deletions pipeline/dags/dag_utils/sources/france_travail.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,6 @@
import json
from pathlib import Path
from typing import Any, Dict, List
from urllib.parse import urljoin


Expand Down Expand Up @@ -32,3 +35,83 @@ def extract(url, token, id=None):
response.raise_for_status()

return response.content


def same_values_every_day(formated_horaires: Dict[str, str]) -> bool:
first_value = next(iter(formated_horaires.values()))
return all(value == first_value for value in formated_horaires.values())


def format_date_ft_to_open_street_map(horaires: List[Dict[str, Any]]) -> str:
maping_days = {
1: "Mo",
2: "Tu",
3: "We",
4: "Th",
5: "Fr",
}

horaires = sorted(horaires, key=lambda x: x["jour"])

formated_horaires = {}

for horaire in horaires:
if horaire["horaireFerme"] == "O":
continue
day = maping_days[horaire["jour"]]
if horaire["horaireEnContinu"] == "O":
formated_horaires[day] = (
f"{horaire['ouvertureMatin']}-{horaire['fermetureApresMidi']};"
)
elif "ouvertureApresMidi" not in horaire:
formated_horaires[day] = (
f"{horaire['ouvertureMatin']}-{horaire['fermetureMatin']};"
)
elif "ouvertureMatin" not in horaire:
formated_horaires[day] = (
f"{horaire['ouvertureApresMidi']}-{horaire['fermetureApresMidi']};"
)
else:
formated_horaires[day] = (
f"{horaire['ouvertureMatin']}-{horaire['fermetureMatin']}:{horaire['ouvertureApresMidi']}-{horaire['fermetureApresMidi']};"
)

if formated_horaires == {}:
return None

# Simplify format for 70% of the dataset
if len(formated_horaires) and same_values_every_day(formated_horaires):
formated_horaire = next(iter(horaires))
return f"Mo-Fr {next(iter(formated_horaires.values()))} PH off"

open_street_map_horaires = ""
for day, formated_horaire in formated_horaires.items():
open_street_map_horaires += f"{day} {formated_horaire}"

return f"{open_street_map_horaires} PH off"


def read(path: Path):
import pandas as pd

from . import utils

# utils.read_json is enough
# but parse horaires to horaires_open_street_map
# cf: https://francetravail.io/data/api/referentiel-agences?tabgroup-api=documentation&doc-section=api-doc-section-caracteristiques
# cf: https://wiki.openstreetmap.org/wiki/Key:opening_hours

with path.open() as file:
data = json.load(file)

for agence in data:
try:
agence["horaires_open_street_map"] = format_date_ft_to_open_street_map(
agence["horaires"]
)
except KeyError as e:
print(f"Error: {e}")
agence["horaires_open_street_map"] = None

df = pd.DataFrame.from_records(data)
return utils.df_clear_nan(df)
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ final AS (
FALSE AS "antenne",
NULL::TEXT [] AS "labels_autres",
NULL::TEXT [] AS "thematiques",
NULL AS "horaires_ouverture",
horaires_open_street_map AS "horaires_ouverture",
NULL AS "lien_source",
NULL AS "presentation_detail",
NULL AS "presentation_resume",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,21 +4,22 @@ WITH source AS (

final AS (
SELECT
_di_source_id AS "_di_source_id",
CURRENT_DATE AS "date_maj",
CASE WHEN data ->> 'dispositifADEDA' = 'true' THEN 'https://www.francetravail.fr/actualites/a-laffiche/2022/adeda-un-dispositif-pour-mieux-a.html' END AS "accessibilite",
CAST(data #>> '{adressePrincipale,gpsLat}' AS FLOAT) AS "latitude",
CAST(data #>> '{adressePrincipale,gpsLon}' AS FLOAT) AS "longitude",
data #>> '{adressePrincipale,ligne4}' AS "adresse",
data #>> '{adressePrincipale,ligne3}' AS "complement_adresse",
data #>> '{adressePrincipale,communeImplantation}' AS "code_insee",
data #>> '{adressePrincipale,bureauDistributeur}' AS "code_postal",
data #>> '{contact,email}' AS "courriel",
data #>> '{contact,telephonePublic}' AS "telephone",
data ->> 'code' AS "id",
data ->> 'libelleEtendu' AS "nom",
data ->> 'siret' AS "siret",
data ->> 'type' AS "typologie"
_di_source_id AS "_di_source_id",
CURRENT_DATE AS "date_maj",
CASE WHEN data ->> 'dispositifADEDA' = 'true' THEN 'https://www.pole-emploi.fr/actualites/a-laffiche/2022/adeda-un-dispositif-pour-mieux-a.html' END AS "accessibilite",
CAST(data #>> '{adressePrincipale,gpsLat}' AS FLOAT) AS "latitude",
CAST(data #>> '{adressePrincipale,gpsLon}' AS FLOAT) AS "longitude",
data #>> '{adressePrincipale,ligne4}' AS "adresse",
data #>> '{adressePrincipale,ligne3}' AS "complement_adresse",
data #>> '{adressePrincipale,communeImplantation}' AS "code_insee",
data #>> '{adressePrincipale,bureauDistributeur}' AS "code_postal",
data #>> '{contact,email}' AS "courriel",
data ->> 'horaires_open_street_map' AS "horaires_open_street_map",
data #>> '{contact,telephonePublic}' AS "telephone",
data ->> 'code' AS "id",
data ->> 'libelleEtendu' AS "nom",
data ->> 'siret' AS "siret",
data ->> 'type' AS "typologie"
FROM source
)

Expand Down

0 comments on commit fb6e29e

Please sign in to comment.