From 66109f0e4b831b3b1da2ab49f77b2012970b2619 Mon Sep 17 00:00:00 2001 From: CaptainOfHacks <39195263+CaptainOfHacks@users.noreply.github.com> Date: Thu, 29 Feb 2024 21:50:18 +0200 Subject: [PATCH] WIP --- dags/pipelines/notice_fetcher_pipelines.py | 1 - ted_sws/notice_fetcher/adapters/ted_api.py | 5 +++-- .../notice_fetcher/services/notice_fetcher.py | 20 +++---------------- 3 files changed, 6 insertions(+), 20 deletions(-) diff --git a/dags/pipelines/notice_fetcher_pipelines.py b/dags/pipelines/notice_fetcher_pipelines.py index 69dd7d24..880b35d8 100644 --- a/dags/pipelines/notice_fetcher_pipelines.py +++ b/dags/pipelines/notice_fetcher_pipelines.py @@ -38,7 +38,6 @@ def notice_fetcher_by_query_pipeline(query: str = None) -> List[str]: notice_ids = None try: ted_api_query = {"query": query} - print("query is:", query) mongodb_client = MongoClient(config.MONGO_DB_AUTH_URL) notice_ids = NoticeFetcher(notice_repository=NoticeRepository(mongodb_client=mongodb_client), ted_api_adapter=TedAPIAdapter( diff --git a/ted_sws/notice_fetcher/adapters/ted_api.py b/ted_sws/notice_fetcher/adapters/ted_api.py index 1a6a00a6..6a455e9d 100644 --- a/ted_sws/notice_fetcher/adapters/ted_api.py +++ b/ted_sws/notice_fetcher/adapters/ted_api.py @@ -43,11 +43,12 @@ def __call__(self, api_url: str, api_query: dict) -> dict: response = requests.post(api_url, json=api_query) try_again_request_count = 0 + sleep_time = 0.5 while response.status_code == HTTPStatus.TOO_MANY_REQUESTS: log_warning(f"Request return error: {response.status_code}") try_again_request_count += 1 - log_warning(f"Sleep for : {try_again_request_count * 0.1} seconds") - time.sleep(try_again_request_count * 0.1) + log_warning(f"Sleep for : {try_again_request_count * sleep_time} seconds") + time.sleep(try_again_request_count * sleep_time) response = requests.post(api_url, json=api_query) if try_again_request_count > 5: break diff --git a/ted_sws/notice_fetcher/services/notice_fetcher.py b/ted_sws/notice_fetcher/services/notice_fetcher.py index a0799ff4..10310e88 100644 --- a/ted_sws/notice_fetcher/services/notice_fetcher.py +++ b/ted_sws/notice_fetcher/services/notice_fetcher.py @@ -108,25 +108,11 @@ def fetch_notices_by_query(self, query: dict) -> List[str]: :param query: :return: """ - #documents = self.ted_api_adapter.get_by_query(query=query) documents = self.ted_api_adapter.get_generator_by_query(query=query) notice_ids = set() - while True: - try: - document = next(documents, None) - - if document is None: - break - notice_ids.add(document["ND"]) - print(f"downloaded Notice: ", document["ND"]) - self.notice_repository.add(notice=self._create_notice(notice_data=document)) - except Exception as e: - print(e) - # for document in documents: - # notice_ids.add(document["ND"]) - # print(f"downloaded Notice: ", document["ND"]) - # self.notice_repository.add(notice=self._create_notice(notice_data=document)) - #return self._store_to_notice_repository(documents=documents) + for document in documents: + notice_ids.add(document["ND"]) + self.notice_repository.add(notice=self._create_notice(notice_data=document)) return list(notice_ids) def fetch_notices_by_date_range(self, start_date: date, end_date: date) -> List[str]: