From e3a1ce6738b8e9f798b0e862ebf47d7548a38680 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9r=C3=A9nice=20Batut?= Date: Mon, 30 Sep 2024 15:31:49 +0200 Subject: [PATCH 1/2] Add sleep after 400 requests to Plausible --- sources/bin/extract_gtn_tutorials.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/sources/bin/extract_gtn_tutorials.py b/sources/bin/extract_gtn_tutorials.py index 5ddee89f..89308ad7 100644 --- a/sources/bin/extract_gtn_tutorials.py +++ b/sources/bin/extract_gtn_tutorials.py @@ -1,6 +1,7 @@ #!/usr/bin/env python import argparse +import time from datetime import date from typing import ( Dict, @@ -12,6 +13,8 @@ import yt_dlp from owlready2 import get_ontology +PLAUSIBLE_REQUEST_NB = 0 + def add_supported_servers(tuto: dict) -> None: """ @@ -80,8 +83,13 @@ def get_visit_results(url: str, tuto: dict, plausible_api: str) -> None: """ Extract visit results from Plausible URL """ + global PLAUSIBLE_REQUEST_NB headers = {"Authorization": f"Bearer {plausible_api}"} + if PLAUSIBLE_REQUEST_NB > 400: + time.sleep(3600) + PLAUSIBLE_REQUEST_NB = 0 results = shared.get_request_json(url, headers) + PLAUSIBLE_REQUEST_NB += 1 if "results" in results: for metric in ["visitors", "pageviews", "visit_duration"]: tuto[metric] += results["results"][metric]["value"] From c07588c45ddc8d89e482d4be99dbfdac8d515ea7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9r=C3=A9nice=20Batut?= Date: Mon, 30 Sep 2024 15:32:14 +0200 Subject: [PATCH 2/2] Add more options for YouTube option --- sources/bin/extract_gtn_tutorials.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/sources/bin/extract_gtn_tutorials.py b/sources/bin/extract_gtn_tutorials.py index 89308ad7..2f43904f 100644 --- a/sources/bin/extract_gtn_tutorials.py +++ b/sources/bin/extract_gtn_tutorials.py @@ -117,7 +117,11 @@ def get_youtube_stats(tuto: dict) -> None: """ tuto["video_versions"] = 0 tuto["video_view"] = 0 - ydl_opts = {"ignoreerrors": True, "quiet": True} + ydl_opts = { + "ignoreerrors": True, + "quiet": True, + "skip_download": True, + } recordings = [] if "recordings" in tuto and tuto["recordings"]: recordings = tuto["recordings"]