Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Update reprocess_unnormalised_notices_from_backlog.py #124

Merged
merged 15 commits into from
Oct 25, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 1 addition & 3 deletions .github/workflows/unit-tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -26,9 +26,7 @@ jobs:
- name: Install dependencies
run: |
sudo apt-get install -y libssl-dev libcurl4-openssl-dev
python -m pip install --upgrade setuptools pip wheel
make install
make install-dev
python -m pip install --upgrade setuptools pip wheel tox~=4.11.3
- name: Make envfile
uses: SpicyPizza/create-envfile@v1
with:
Expand Down
3 changes: 2 additions & 1 deletion dags/reprocess_unnormalised_notices_from_backlog.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,8 @@ def reprocess_unnormalised_notices_from_backlog():
def select_all_raw_notices():
start_date = get_dag_param(key=START_DATE_DAG_PARAM)
end_date = get_dag_param(key=END_DATE_DAG_PARAM)
notice_ids = notice_ids_selector_by_status(notice_statuses=[NoticeStatus.RAW], start_date=start_date,
notice_ids = notice_ids_selector_by_status(notice_statuses=[NoticeStatus.RAW, NoticeStatus.INDEXED],
start_date=start_date,
end_date=end_date)
push_dag_downstream(key=NOTICE_IDS_KEY, value=notice_ids)

Expand Down
14 changes: 7 additions & 7 deletions requirements.dev.txt
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
coverage~=6.3.1
pytest~=7.0.0
pytest-bdd~=5.0.0
pytest-cov~=3.0.0
pytest-subtests~=0.6.0
tox~=3.24.5
coverage~=7.3.2
pytest~=7.4.3
pytest-bdd~=7.0.0
pytest-cov~=4.1.0
pytest-subtests~=0.11.0
tox~=4.11.3
tox-pytest-summary~=0.1.2
mongomock==4.0.0
mongomock~=4.1.2
2 changes: 1 addition & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
git+https://github.com/OP-TED/ted-rdf-conversion-pipeline.git@main
git+https://github.com/OP-TED/ted-rdf-conversion-pipeline.git@1.2.0-rc.2
elasticsearch~=8.6.2
currencyconverter~=0.17.6
pycountry~=22.3.5
Expand Down
1 change: 1 addition & 0 deletions ted_data_eu/adapters/nuts_processor.py
Original file line number Diff line number Diff line change
Expand Up @@ -114,6 +114,7 @@ def __init__(self, nuts_csv: io.StringIO):
}
)
self.dataframe[self.NUTS_PARENT_COLUMN_NAME] = self.dataframe[self.NUTS_PARENT_COLUMN_NAME].str.split("/").str[-1]
self.dataframe[self.NUTS_LABEL_COLUMN_NAME] = self.dataframe[self.NUTS_LABEL_COLUMN_NAME].str.partition(" ")[2]

def nuts_exists(self, nuts_code: str) -> bool:
"""
Expand Down
4 changes: 3 additions & 1 deletion ted_data_eu/services/etl_pipelines/ted_data_etl_pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@
END_DATE_METADATA_FIELD = "end_date"
TRIPLE_STORE_ENDPOINT = "notices"
TED_NOTICES_LINK = 'https://ted.europa.eu/udl?uri=TED:NOTICE:{notice_id}:TEXT:EN:HTML'
TRIPLE_STORE_ENDPOINT_FIELD = "triple_store_endpoint"

PROCEDURE_TYPE_COLUMN_NAME = "procedure_type"
WINNER_NUTS_COLUMN_NAME = "winner_nuts"
Expand Down Expand Up @@ -204,6 +205,7 @@ def extract(self) -> Dict:
"""
etl_metadata = self.get_metadata()
etl_metadata_fields = etl_metadata.keys()
triple_store_endpoint = etl_metadata[TRIPLE_STORE_ENDPOINT_FIELD] if TRIPLE_STORE_ENDPOINT_FIELD in etl_metadata_fields else TRIPLE_STORE_ENDPOINT
if START_DATE_METADATA_FIELD in etl_metadata_fields and END_DATE_METADATA_FIELD in etl_metadata_fields:
if START_DATE_METADATA_FIELD == END_DATE_METADATA_FIELD:
date_range = datetime.strptime(START_DATE_METADATA_FIELD, "\"%Y%m%d\"")
Expand All @@ -218,7 +220,7 @@ def extract(self) -> Dict:

sparql_query_template = Template(config.BQ_PATHS[SPARQL_QUERY_NAME].read_text(encoding='utf-8'))
sparql_query_str = sparql_query_template.substitute(date_range=date_range)
triple_store_endpoint = GraphDBAdapter().get_sparql_triple_store_endpoint(repository_name=TRIPLE_STORE_ENDPOINT)
triple_store_endpoint = GraphDBAdapter().get_sparql_triple_store_endpoint(repository_name=triple_store_endpoint)
result_table = triple_store_endpoint.with_query(sparql_query_str).fetch_tabular()
return {"data": result_table}

Expand Down
3 changes: 2 additions & 1 deletion tests/e2e/test_ted_etl.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
CONTRACT_VALUE_AVAILABLE_INDICATOR, PROCEDURE_TYPE_INDICATOR, PRODUCT_CODES_AVAILABLE_INDICATOR, LOT_NUTS_0, \
LOT_NUTS_1, LOT_NUTS_2, LOT_NUTS_3, get_country_name_by_code, BUYER_NUTS_COLUMN_NAME, PROCEDURE_ID_COLUMN_NAME, \
PROCEDURE_DESCRIPTION_COLUMN_NAME, PROCEDURE_COLUMN_NAME, TDA_FREE_INDEX_NAME, TDA_STARTER_INDEX_NAME, CPV_RANK_4, \
CPV_RANK_2, CPV_RANK_1, CPV_RANK_3, LOT_COUNTRY
CPV_RANK_2, CPV_RANK_1, CPV_RANK_3, LOT_COUNTRY, TRIPLE_STORE_ENDPOINT_FIELD


def test_get_country_name_by_code(real_country_code_alpha_2, fake_country_code_alpha_2, real_country_code_alpha_3,
Expand All @@ -20,6 +20,7 @@ def test_get_country_name_by_code(real_country_code_alpha_2, fake_country_code_a

def test_etl_pipeline(ted_data_etl_pipelines, etl_pipeline_config, graphdb_triple_store, example_notices,
tmp_repository_name):
etl_pipeline_config[TRIPLE_STORE_ENDPOINT_FIELD] = tmp_repository_name
for ted_data_etl_pipeline in ted_data_etl_pipelines:
graphdb_repositories = graphdb_triple_store.list_repositories()
if tmp_repository_name in graphdb_repositories:
Expand Down
4 changes: 3 additions & 1 deletion tests/unit/test_nuts_generation.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,4 +53,6 @@ def test_cellar_nuts_processor(real_nuts, fake_nuts, cellar_nuts_processor):
assert cellar_nuts_processor.get_nuts_level_by_code(nuts_code='FRK') == 1
assert cellar_nuts_processor.get_nuts_level_by_code(nuts_code='FR') == 0
assert cellar_nuts_processor.get_nuts_level_by_code(nuts_code='F') is None
assert cellar_nuts_processor.get_nuts_level_by_code(nuts_code=None) is None
assert cellar_nuts_processor.get_nuts_level_by_code(nuts_code=None) is None

assert cellar_nuts_processor.get_nuts_label_by_code(nuts_code='BE32') == 'Prov. Hainaut'
Loading