Skip to content

Commit

Permalink
Merge branch 'feature/TDA-284' of https://github.com/meaningfy-ws/ted…
Browse files Browse the repository at this point in the history
…-data-eu into feature/TDA-284
  • Loading branch information
CaptainOfHacks committed Oct 25, 2023
2 parents 172c3aa + 12e5dc4 commit f0d1751
Show file tree
Hide file tree
Showing 2 changed files with 5 additions and 2 deletions.
4 changes: 3 additions & 1 deletion ted_data_eu/services/etl_pipelines/ted_data_etl_pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@
END_DATE_METADATA_FIELD = "end_date"
TRIPLE_STORE_ENDPOINT = "notices"
TED_NOTICES_LINK = 'https://ted.europa.eu/udl?uri=TED:NOTICE:{notice_id}:TEXT:EN:HTML'
TRIPLE_STORE_ENDPOINT_FIELD = "triple_store_endpoint"

PROCEDURE_TYPE_COLUMN_NAME = "procedure_type"
WINNER_NUTS_COLUMN_NAME = "winner_nuts"
Expand Down Expand Up @@ -204,6 +205,7 @@ def extract(self) -> Dict:
"""
etl_metadata = self.get_metadata()
etl_metadata_fields = etl_metadata.keys()
triple_store_endpoint = etl_metadata[TRIPLE_STORE_ENDPOINT_FIELD] if TRIPLE_STORE_ENDPOINT_FIELD in etl_metadata_fields else TRIPLE_STORE_ENDPOINT
if START_DATE_METADATA_FIELD in etl_metadata_fields and END_DATE_METADATA_FIELD in etl_metadata_fields:
if START_DATE_METADATA_FIELD == END_DATE_METADATA_FIELD:
date_range = datetime.strptime(START_DATE_METADATA_FIELD, "\"%Y%m%d\"")
Expand All @@ -218,7 +220,7 @@ def extract(self) -> Dict:

sparql_query_template = Template(config.BQ_PATHS[SPARQL_QUERY_NAME].read_text(encoding='utf-8'))
sparql_query_str = sparql_query_template.substitute(date_range=date_range)
triple_store_endpoint = GraphDBAdapter().get_sparql_triple_store_endpoint(repository_name=TRIPLE_STORE_ENDPOINT)
triple_store_endpoint = GraphDBAdapter().get_sparql_triple_store_endpoint(repository_name=triple_store_endpoint)
result_table = triple_store_endpoint.with_query(sparql_query_str).fetch_tabular()
return {"data": result_table}

Expand Down
3 changes: 2 additions & 1 deletion tests/e2e/test_ted_etl.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
CONTRACT_VALUE_AVAILABLE_INDICATOR, PROCEDURE_TYPE_INDICATOR, PRODUCT_CODES_AVAILABLE_INDICATOR, LOT_NUTS_0, \
LOT_NUTS_1, LOT_NUTS_2, LOT_NUTS_3, get_country_name_by_code, BUYER_NUTS_COLUMN_NAME, PROCEDURE_ID_COLUMN_NAME, \
PROCEDURE_DESCRIPTION_COLUMN_NAME, PROCEDURE_COLUMN_NAME, TDA_FREE_INDEX_NAME, TDA_STARTER_INDEX_NAME, CPV_RANK_4, \
CPV_RANK_2, CPV_RANK_1, CPV_RANK_3, LOT_COUNTRY
CPV_RANK_2, CPV_RANK_1, CPV_RANK_3, LOT_COUNTRY, TRIPLE_STORE_ENDPOINT_FIELD


def test_get_country_name_by_code(real_country_code_alpha_2, fake_country_code_alpha_2, real_country_code_alpha_3,
Expand All @@ -20,6 +20,7 @@ def test_get_country_name_by_code(real_country_code_alpha_2, fake_country_code_a

def test_etl_pipeline(ted_data_etl_pipelines, etl_pipeline_config, graphdb_triple_store, example_notices,
tmp_repository_name):
etl_pipeline_config[TRIPLE_STORE_ENDPOINT_FIELD] = tmp_repository_name
for ted_data_etl_pipeline in ted_data_etl_pipelines:
graphdb_repositories = graphdb_triple_store.list_repositories()
if tmp_repository_name in graphdb_repositories:
Expand Down

0 comments on commit f0d1751

Please sign in to comment.