From e9b580456d60d2ac3ba186f6d1a01613bf6fa31e Mon Sep 17 00:00:00 2001 From: Dumitru Date: Fri, 29 Sep 2023 19:07:30 +0300 Subject: [PATCH 1/2] make cellar url configurable from env and airflow --- ted_sws/__init__.py | 5 ++++- .../check_availability_of_notice_in_cellar.py | 11 ++++++++--- 2 files changed, 12 insertions(+), 4 deletions(-) diff --git a/ted_sws/__init__.py b/ted_sws/__init__.py index 641d29b9..52fb68e7 100644 --- a/ted_sws/__init__.py +++ b/ted_sws/__init__.py @@ -165,6 +165,10 @@ class TedAPIConfig: def TED_API_URL(self, config_value: str) -> str: return config_value + @env_property(config_resolver_class=AirflowAndEnvConfigResolver) + def TED_WEBAPI_SPARQL_URL(self, config_value: str) -> str: + return config_value + class FusekiConfig: @env_property() @@ -258,7 +262,6 @@ def S3_PUBLISH_NOTICE_RDF_BUCKET(self, config_value: str) -> str: return config_value - class AirflowConfig: @env_property(config_resolver_class=AirflowAndEnvConfigResolver, default_value="4") diff --git a/ted_sws/notice_validator/services/check_availability_of_notice_in_cellar.py b/ted_sws/notice_validator/services/check_availability_of_notice_in_cellar.py index b9417069..11299949 100644 --- a/ted_sws/notice_validator/services/check_availability_of_notice_in_cellar.py +++ b/ted_sws/notice_validator/services/check_availability_of_notice_in_cellar.py @@ -1,6 +1,8 @@ import time from typing import List, Set from pymongo import MongoClient + +from ted_sws import config from ted_sws.core.model.notice import Notice, NoticeStatus from ted_sws.core.service.batch_processing import chunks from ted_sws.data_manager.adapters.notice_repository import NoticeRepository @@ -9,33 +11,36 @@ from ted_sws.notice_validator.resources import NOTICE_AVAILABILITY_SPARQL_QUERY_TEMPLATE_PATH, \ NOTICES_AVAILABILITY_SPARQL_QUERY_TEMPLATE_PATH, GET_NOTICE_URI_SPARQL_QUERY_TEMPLATE_PATH -WEBAPI_SPARQL_URL = "https://publications.europa.eu/webapi/rdf/sparql" WEBAPI_SPARQL_RUN_FORMAT = "application/sparql-results+json" INVALID_NOTICE_URI = 'https://www.w3.org/1999/02/22-rdf-syntax-ns#type-invalid' DEFAULT_NOTICES_BATCH_SIZE = 5000 DEFAULT_CELLAR_REQUEST_DELAY = 3 -def check_availability_of_notice_in_cellar(notice_uri: str, endpoint_url: str = WEBAPI_SPARQL_URL) -> bool: +def check_availability_of_notice_in_cellar(notice_uri: str, endpoint_url: str = None) -> bool: """ This service checks the notice availability in Cellar :param notice_uri: :param endpoint_url: :return: """ + if not endpoint_url: + endpoint_url = config.TED_WEBAPI_SPARQL_URL query_template = NOTICE_AVAILABILITY_SPARQL_QUERY_TEMPLATE_PATH.read_text(encoding="utf-8") query = query_template.format(notice_uri=notice_uri) result = SPARQLTripleStoreEndpoint(endpoint_url=endpoint_url).with_query(sparql_query=query).fetch_tree() return result['boolean'] -def check_availability_of_notices_in_cellar(notice_uries: List[str], endpoint_url: str = WEBAPI_SPARQL_URL) -> Set[str]: +def check_availability_of_notices_in_cellar(notice_uries: List[str], endpoint_url: str = None) -> Set[str]: """ This service check the notices availability in Cellar, and return available set of notice uries. :param notice_uries: :param endpoint_url: :return: """ + if not endpoint_url: + endpoint_url = config.TED_WEBAPI_SPARQL_URL query_template = NOTICES_AVAILABILITY_SPARQL_QUERY_TEMPLATE_PATH.read_text(encoding="utf-8") notice_uries = " ".join([f"<{notice_uri}>" for notice_uri in notice_uries]) query = query_template.format(notice_uries=notice_uries) From 7b5d632ac66b12961f9ce3d9e03069cf771248e8 Mon Sep 17 00:00:00 2001 From: Dumitru Date: Fri, 29 Sep 2023 19:31:31 +0300 Subject: [PATCH 2/2] rename to CELLAR_WEBAPI_SPARQL_URL --- ted_sws/__init__.py | 2 +- .../services/check_availability_of_notice_in_cellar.py | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/ted_sws/__init__.py b/ted_sws/__init__.py index 52fb68e7..976d2a08 100644 --- a/ted_sws/__init__.py +++ b/ted_sws/__init__.py @@ -166,7 +166,7 @@ def TED_API_URL(self, config_value: str) -> str: return config_value @env_property(config_resolver_class=AirflowAndEnvConfigResolver) - def TED_WEBAPI_SPARQL_URL(self, config_value: str) -> str: + def CELLAR_WEBAPI_SPARQL_URL(self, config_value: str) -> str: return config_value diff --git a/ted_sws/notice_validator/services/check_availability_of_notice_in_cellar.py b/ted_sws/notice_validator/services/check_availability_of_notice_in_cellar.py index 11299949..c76e775a 100644 --- a/ted_sws/notice_validator/services/check_availability_of_notice_in_cellar.py +++ b/ted_sws/notice_validator/services/check_availability_of_notice_in_cellar.py @@ -25,7 +25,7 @@ def check_availability_of_notice_in_cellar(notice_uri: str, endpoint_url: str = :return: """ if not endpoint_url: - endpoint_url = config.TED_WEBAPI_SPARQL_URL + endpoint_url = config.CELLAR_WEBAPI_SPARQL_URL query_template = NOTICE_AVAILABILITY_SPARQL_QUERY_TEMPLATE_PATH.read_text(encoding="utf-8") query = query_template.format(notice_uri=notice_uri) result = SPARQLTripleStoreEndpoint(endpoint_url=endpoint_url).with_query(sparql_query=query).fetch_tree() @@ -40,7 +40,7 @@ def check_availability_of_notices_in_cellar(notice_uries: List[str], endpoint_ur :return: """ if not endpoint_url: - endpoint_url = config.TED_WEBAPI_SPARQL_URL + endpoint_url = config.CELLAR_WEBAPI_SPARQL_URL query_template = NOTICES_AVAILABILITY_SPARQL_QUERY_TEMPLATE_PATH.read_text(encoding="utf-8") notice_uries = " ".join([f"<{notice_uri}>" for notice_uri in notice_uries]) query = query_template.format(notice_uries=notice_uries)