diff --git a/ted_sws/notice_fetcher/adapters/ted_api.py b/ted_sws/notice_fetcher/adapters/ted_api.py index 87413dbc..933a0a01 100644 --- a/ted_sws/notice_fetcher/adapters/ted_api.py +++ b/ted_sws/notice_fetcher/adapters/ted_api.py @@ -128,11 +128,13 @@ def _retrieve_document_content(self, document_content: dict) -> str: else: raise Exception(f"The notice content can't be loaded!: {response}, {response.content}") - def get_generator_by_query(self, query: dict, result_fields: dict = None) -> Generator[dict, None, None]: + def get_generator_by_query(self, query: dict, result_fields: dict = None, load_content: bool = True) -> Generator[ + dict, None, None]: """ Method to get a documents content by passing a query to the API (json) :param query: :param result_fields: + :param load_content: :return:Generator[dict] """ query.update(DEFAULT_TED_API_QUERY_RESULT_SIZE) @@ -146,30 +148,28 @@ def get_generator_by_query(self, query: dict, result_fields: dict = None) -> Gen query[RESULT_PAGE_NUMBER] = page_number response_body = self.request_api(api_url=self.ted_api_url, api_query=query) documents_content += response_body[RESPONSE_RESULTS] - for document_content in documents_content: + + for document_content in documents_content: + if load_content: document_content[DOCUMENT_CONTENT] = self._retrieve_document_content(document_content) - # document_id = document_content[DOCUMENT_NOTICE_ID_KEY] - # document_id = "0" * (11 - len(document_id)) + document_id - # document_content[DOCUMENT_NOTICE_ID_KEY] = document_id del document_content[LINKS_TO_CONTENT_KEY] - yield document_content + yield document_content else: for document_content in documents_content: - document_content[DOCUMENT_CONTENT] = self._retrieve_document_content(document_content) - # document_id = document_content[DOCUMENT_NOTICE_ID_KEY] - # document_id = "0" * (11 - len(document_id)) + document_id - # document_content[DOCUMENT_NOTICE_ID_KEY] = document_id - del document_content[LINKS_TO_CONTENT_KEY] + if load_content: + document_content[DOCUMENT_CONTENT] = self._retrieve_document_content(document_content) + del document_content[LINKS_TO_CONTENT_KEY] yield document_content - def get_by_query(self, query: dict, result_fields: dict = None) -> List[dict]: + def get_by_query(self, query: dict, result_fields: dict = None, load_content: bool = True) -> List[dict]: """ Method to get a documents content by passing a query to the API (json) :param query: :param result_fields: + :param load_content: :return:List[dict] """ - return list(self.get_generator_by_query(query=query, result_fields=result_fields)) + return list(self.get_generator_by_query(query=query, result_fields=result_fields, load_content=load_content)) def get_by_id(self, document_id: str) -> dict: """ diff --git a/ted_sws/supra_notice_manager/services/supra_notice_validator.py b/ted_sws/supra_notice_manager/services/supra_notice_validator.py index 8a9e4564..06b91b48 100644 --- a/ted_sws/supra_notice_manager/services/supra_notice_validator.py +++ b/ted_sws/supra_notice_manager/services/supra_notice_validator.py @@ -36,7 +36,7 @@ def validate_and_update_daily_supra_notice(ted_publication_date: day_type, mongo ted_api_adapter: TedAPIAdapter = TedAPIAdapter(request_api=request_api) query = {"query": f"PD={ted_publication_date.strftime('%Y%m%d*')}"} - documents = ted_api_adapter.get_by_query(query=query, result_fields={"fields": ["ND"]}) + documents = ted_api_adapter.get_by_query(query=query, result_fields={"fields": ["ND"]}, load_content=False) api_notice_ids_list = [document["ND"] for document in documents] if documents and len(documents) else [] api_notice_ids = set(api_notice_ids_list)