From 353152d044aabcf46a02b8053bb919af711f7ab7 Mon Sep 17 00:00:00 2001 From: dpavam Date: Fri, 4 Oct 2024 17:22:36 +0100 Subject: [PATCH 01/18] build: includes pydantic for validation --- impc_api_helper/pyproject.toml | 3 ++- impc_api_helper/setup.py | 1 + requirements.txt | 1 + 3 files changed, 4 insertions(+), 1 deletion(-) diff --git a/impc_api_helper/pyproject.toml b/impc_api_helper/pyproject.toml index 59bd62f..7449db5 100644 --- a/impc_api_helper/pyproject.toml +++ b/impc_api_helper/pyproject.toml @@ -14,7 +14,8 @@ authors = [ dependencies = [ "pandas>=2.2.0", "requests>=2.31.0", - "tqdm>=4.66.4" + "tqdm>=4.66.4", + "pydantic>=2.9" ] readme = "README.md" diff --git a/impc_api_helper/setup.py b/impc_api_helper/setup.py index d4d03f0..452d1f1 100644 --- a/impc_api_helper/setup.py +++ b/impc_api_helper/setup.py @@ -11,6 +11,7 @@ 'pandas>=2.2.0', 'requests>=2.31.0', 'tqdm>=4.66.4', + 'pydantic>=2.9' ], extras_require={ diff --git a/requirements.txt b/requirements.txt index c12e5ab..9157acd 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,3 +1,4 @@ pandas>=2.2.0 requests>=2.31.0 tqdm>=4.66.4 +pydantic>=2.9 From e088e14f933c6d577f4ff25024cd494652ba7ae8 Mon Sep 17 00:00:00 2001 From: dpavam Date: Fri, 4 Oct 2024 17:50:14 +0100 Subject: [PATCH 02/18] feat: handles core or param[fl] errors more gracefully for users. NOTE: Issue with PATH in utils.py depending on IDE used. --- .../impc_api_helper/solr_request.py | 19 ++++++- .../impc_api_helper/utils/__init__.py | 0 .../impc_api_helper/utils/core_fields.json | 15 +++++ .../impc_api_helper/utils/utils.py | 57 +++++++++++++++++++ 4 files changed, 89 insertions(+), 2 deletions(-) create mode 100644 impc_api_helper/impc_api_helper/utils/__init__.py create mode 100644 impc_api_helper/impc_api_helper/utils/core_fields.json create mode 100644 impc_api_helper/impc_api_helper/utils/utils.py diff --git a/impc_api_helper/impc_api_helper/solr_request.py b/impc_api_helper/impc_api_helper/solr_request.py index eea3ee4..cf93135 100644 --- a/impc_api_helper/impc_api_helper/solr_request.py +++ b/impc_api_helper/impc_api_helper/solr_request.py @@ -1,9 +1,11 @@ from IPython.display import display +from pydantic import ValidationError from tqdm import tqdm import pandas as pd import requests +from .utils.utils import CoreParamsValidator # Display the whole dataframe <15 pd.set_option("display.max_rows", 15) @@ -61,6 +63,18 @@ def solr_request(core, params, silent=False): ) """ + # Validate core and params + print("Validating core and params...") + try: + CoreParamsValidator( + core=core, + params=params + ) + print("Validation passed ") + except ValidationError as e: + print(f"Validation failed: {e.errors()[0].get('msg')}") + + base_url = "https://www.ebi.ac.uk/mi/impc/solr/" solr_url = base_url + core + "/select" @@ -95,8 +109,9 @@ def solr_request(core, params, silent=False): return num_found, df else: - print("Error:", response.status_code, response.text) - + # print("Error:", response.status_code, response.text) + print("Error:", response.status_code) + exit() def _process_faceting(data, params): """Processes the faceting data from an API response. diff --git a/impc_api_helper/impc_api_helper/utils/__init__.py b/impc_api_helper/impc_api_helper/utils/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/impc_api_helper/impc_api_helper/utils/core_fields.json b/impc_api_helper/impc_api_helper/utils/core_fields.json new file mode 100644 index 0000000..d3ce064 --- /dev/null +++ b/impc_api_helper/impc_api_helper/utils/core_fields.json @@ -0,0 +1,15 @@ +{ + "experimental": [ + "id", "observation_id", "specimen_id", "phenotyping_center_id", "phenotyping_center", "production_center_id", "production_center", "specimen_project_id", "specimen_project_name", "gene_accession_id", "gene_symbol", "allele_accession_id", "allele_symbol", "zygosity", "sex", "biological_model_id", "biological_sample_id", "biological_sample_group", "strain_accession_id", "strain_name", "genetic_background", "allelic_composition", "colony_id", "litter_id", "date_of_birth", "external_sample_id", "life_stage_name", "life_stage_acc", "datasource_id", "datasource_name", "project_id", "project_name", "pipeline_id", "pipeline_name", "pipeline_stable_id", "procedure_id", "procedure_name", "procedure_stable_id", "procedure_group", "parameter_id", "parameter_name", "parameter_stable_id", "procedure_sequence_id", "experiment_id", "observation_type", "data_type", "experiment_source_id", "date_of_experiment", "weight_parameter_stable_id", "weight_date", "weight_days_old", "weight", "data_point", "order_index", "dimension", "time_point", "discrete_point", "category", "raw_category", "metadata", "metadata_group", "anatomy_id", "anatomy_term", "anatomy_id_term", "anatomy_term_synonym", "top_level_anatomy_id", "top_level_anatomy_term", "top_level_anatomy_term_synonym", "selected_top_level_anatomy_id", "selected_top_level_anatomy_term", "selected_top_level_anatomy_term_synonym", "intermediate_anatomy_id", "intermediate_anatomy_term", "intermediate_anatomy_term_synonym", "parent_anatomy_id", "parent_anatomy_term", "parent_anatomy_term_synonym", "child_anatomy_id", "child_anatomy_term", "child_anatomy_term_synonym", "download_file_path", "image_link", "file_type", "increment_value", "parameter_association_stable_id", "parameter_association_sequence_id", "parameter_association_dim_id", "parameter_association_name", "parameter_association_value", "developmental_stage_acc", "developmental_stage_name", "text_value", "sub_term_id", "sub_term_name", "sub_term_description", "age_in_days", "age_in_weeks" + ], + "genotype-phenotype": [ + "doc_id", "ontology_db_id", "assertion_type", "assertion_type_id", "mpath_term_id", "mpath_term_name", "anatomy_term_id", "anatomy_term_name", "intermediate_anatomy_term_id", "intermediate_anatomy_term_name", "top_level_anatomy_term_id", "top_level_anatomy_term_name", "mp_term_id", "mp_term_name", "alt_mp_term_id", "top_level_mp_term_id", "top_level_mp_term_name", "intermediate_mp_term_id", "intermediate_mp_term_name", "marker_symbol", "marker_accession_id", "colony_id", "allele_name", "allele_symbol", "allele_accession_id", "strain_name", "strain_accession_id", "phenotyping_center", "project_external_id", "project_name", "project_fullname", "resource_name", "resource_fullname", "sex", "zygosity", "pipeline_name", "pipeline_stable_id", "pipeline_stable_key", "procedure_name", "procedure_stable_id", "procedure_stable_key", "parameter_name", "parameter_stable_id", "parameter_stable_key", "statistical_method", "percentage_change", "p_value", "effect_size", "external_id", "life_stage_acc", "life_stage_name" + ], + "impc_images": [ + "id", "observation_id", "specimen_id", "phenotyping_center_id", "phenotyping_center", "production_center_id", "production_center", "specimen_project_id", "specimen_project_name", "gene_accession_id", "gene_symbol", "allele_accession_id", "allele_symbol", "zygosity", "sex", "biological_model_id", "biological_sample_id", "biological_sample_group", "strain_accession_id", "strain_name", "genetic_background", "allelic_composition", "colony_id", "litter_id", "date_of_birth", "external_sample_id", "life_stage_name", "life_stage_acc", "datasource_id", "datasource_name", "project_id", "project_name", "pipeline_id", "pipeline_name", "pipeline_stable_id", "procedure_id", "procedure_name", "procedure_stable_id", "procedure_group", "parameter_id", "parameter_name", "parameter_stable_id", "procedure_sequence_id", "experiment_id", "observation_type", "data_type", "experiment_source_id", "date_of_experiment", "weight_parameter_stable_id", "weight_date", "weight_days_old", "weight", "data_point", "order_index", "dimension", "time_point", "discrete_point", "category", "raw_category", "metadata", "metadata_group", "mp_id", "mp_term", "top_level_mp_id", "top_level_mp_term", "intermediate_mp_id", "intermediate_mp_term", "anatomy_id", "anatomy_term", "anatomy_id_term", "anatomy_term_synonym", "top_level_anatomy_id", "top_level_anatomy_term", "top_level_anatomy_term_synonym", "selected_top_level_anatomy_id", "selected_top_level_anatomy_term", "selected_top_level_anatomy_term_synonym", "intermediate_anatomy_id", "intermediate_anatomy_term", "intermediate_anatomy_term_synonym", "parent_anatomy_id", "parent_anatomy_term", "parent_anatomy_term_synonym", "child_anatomy_id", "child_anatomy_term", "child_anatomy_term_synonym", "download_file_path", "image_link", "file_type", "parameter_association_stable_id", "parameter_association_sequence_id", "parameter_association_dim_id", "parameter_association_name", "parameter_association_value", "developmental_stage_acc", "developmental_stage_name", "text_value", "sub_term_id", "sub_term_name", "sub_term_description", "sequence_id", "age_in_days", "age_in_weeks", "download_url", "jpeg_url", "thumbnail_url", "omero_id" + ], + "phenodigm": [ + "type", "disease_id", "disease_source", "disease_term", "disease_alts", "disease_locus", "disease_classes", "disease_phenotypes", "gene_id", "gene_symbol", "gene_symbols_withdrawn", "gene_locus", "hgnc_gene_id", "hgnc_gene_symbol", "hgnc_gene_symbols_withdrawn", "hgnc_gene_locus", "mouse_model", "impc_model", "model_id", "model_source", "model_description", "model_genetic_background", "marker_id", "marker_symbol", "marker_locus", "marker_num_models", "model_phenotypes", "ontology", "phenotype_id", "phenotype_term", "phenotype_synonym", "hp_id", "hp_term", "mp_id", "mp_term", "association_curated", "association_ortholog", "marker_symbols_withdrawn", "disease_matched_phenotypes", "model_matched_phenotypes", "disease_model_avg_raw", "disease_model_avg_norm", "disease_model_max_raw", "disease_model_max_norm", "search_qf", "human_curated_gene", "impc_model_with_curated_gene", "mgi_model_with_curated_gene", "impc_model_with_computed_association", "mgi_model_with_computed_association" + ], + "statistical-result": ["doc_id", "db_id", "data_type", "anatomy_term_id", "anatomy_term_name", "intermediate_anatomy_term_id", "intermediate_anatomy_term_name", "top_level_anatomy_term_id", "top_level_anatomy_term_name", "mp_term_id_options", "mp_term_name_options", "mp_term_id", "mp_term_name", "top_level_mp_term_id", "top_level_mp_term_name", "intermediate_mp_term_id", "intermediate_mp_term_name", "male_mp_term_id", "male_mp_term_name", "male_top_level_mp_term_id", "male_top_level_mp_term_name", "male_intermediate_mp_term_id", "male_intermediate_mp_term_name", "female_mp_term_id", "female_mp_term_name", "female_top_level_mp_term_id", "female_top_level_mp_term_name", "female_intermediate_mp_term_id", "female_intermediate_mp_term_name", "resource_name", "resource_fullname", "resource_id", "project_name", "phenotyping_center", "pipeline_stable_id", "pipeline_stable_key", "pipeline_name", "pipeline_id", "procedure_stable_id", "procedure_stable_key", "procedure_name", "procedure_id", "parameter_stable_id", "parameter_stable_key", "parameter_name", "parameter_id", "colony_id", "marker_symbol", "marker_accession_id", "allele_symbol", "allele_name", "allele_accession_id", "strain_name", "strain_accession_id", "sex", "zygosity", "control_selection_method", "dependent_variable", "metadata_group", "data_frame", "genetic_background", "production_center", "external_db_id", "id", "organisation_id", "phenotyping_center_id", "project_id", "male_control_mean", "male_mutant_mean", "female_control_mean", "female_mutant_mean", "genotype_p_value_low_vs_normal_high", "genotype_p_value_low_normal_vs_high", "genotype_effect_size_low_vs_normal_high", "genotype_effect_size_low_normal_vs_high", "female_p_value_low_vs_normal_high", "female_p_value_low_normal_vs_high", "female_effect_size_low_vs_normal_high", "female_effect_size_low_normal_vs_high", "male_p_value_low_vs_normal_high", "male_p_value_low_normal_vs_high", "male_effect_size_low_vs_normal_high", "male_effect_size_low_normal_vs_high", "categories", "categorical_p_value", "categorical_effect_size", "batch_significant", "variance_significant", "null_test_p_value", "genotype_effect_p_value", "genotype_effect_stderr_estimate", "genotype_effect_parameter_estimate", "male_percentage_change", "female_percentage_change", "sex_effect_p_value", "sex_effect_stderr_estimate", "sex_effect_parameter_estimate", "weight_effect_p_value", "weight_effect_stderr_estimate", "weight_effect_parameter_estimate", "group1_genotype", "group1_residuals_normality_test", "group2_genotype", "group2_residuals_normality_test", "blups_test", "rotated_residuals_test", "intercept_estimate", "intercept_estimate_stderr_estimate", "interaction_significant", "interaction_effect_p_value", "female_ko_effect_p_value", "female_ko_effect_stderr_estimate", "female_ko_parameter_estimate", "female_effect_size", "male_ko_effect_p_value", "male_ko_effect_stderr_estimate", "male_ko_parameter_estimate", "male_effect_size", "classification_tag", "phenotype_sex", "life_stage_acc", "life_stage_name", "significant", "soft_windowing_bandwidth", "soft_windowing_shape", "soft_windowing_peaks", "soft_windowing_min_obs_required", "soft_windowing_total_obs_or_weight", "soft_windowing_threshold", "soft_windowing_number_of_doe", "soft_windowing_doe_note", "metadata"] +} diff --git a/impc_api_helper/impc_api_helper/utils/utils.py b/impc_api_helper/impc_api_helper/utils/utils.py new file mode 100644 index 0000000..81f6598 --- /dev/null +++ b/impc_api_helper/impc_api_helper/utils/utils.py @@ -0,0 +1,57 @@ +from pydantic import BaseModel, model_validator +import json +from typing import List, Dict +from pathlib import Path +import warnings + +CORE_FILE = Path('impc_api_helper', 'impc_api_helper', 'utils', 'core_fields.json') + +# Define the dictionary with available options: core:fields +def load_core_fields(filename: Path): + with open(filename, "r") as f: + validation_dict = json.load(f) + return validation_dict + +# Define the validation dict +validation_json = load_core_fields(CORE_FILE) + +# Function to parse the fields (fl) params in params +def get_fields(fields: str) -> List[str]: + return fields.split(",") + + +class CoreParamsValidator(BaseModel): + core: str + params: Dict + + @model_validator(mode='before') + @classmethod + def validate_core_and_fields(cls, values): + core = values.get("core") + params = values.get("params") + + # Validate core + if core not in validation_json.keys(): + raise ValueError(f'Invalid core: "{core}", select from the available:\n{validation_json.keys()})') + + # Compare passed fl values vs the allowed fl values for a given core + fields: str = params.get("fl") + + # If no fields were specified, pass + if fields is None: + print("No fields passed, skipping field validation...") + return values + + # Get the fields passed to params and the expected fields for the core + field_list: List[str] = get_fields(fields) + accepted_core_fields: List[str] = validation_json.get(core, []) + + # Validate each field in params + for fl in field_list: + if fl not in accepted_core_fields: + warnings.warn(f'Invalid field: "{fl}". Check spelling of fields. To see available fields check the documentation at: https://www.ebi.ac.uk/mi/impc/solrdoc/', + category=UserWarning) + # Return validated values + return values + +# Check and raise error as needed. From 7c280f664343a5f1af75382ce3af964eacf7ee63 Mon Sep 17 00:00:00 2001 From: dpavam Date: Tue, 8 Oct 2024 11:19:54 +0100 Subject: [PATCH 03/18] build: update setup.py and pyproject.toml settings to include the utils module --- impc_api_helper/pyproject.toml | 4 ++-- impc_api_helper/setup.py | 4 +++- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/impc_api_helper/pyproject.toml b/impc_api_helper/pyproject.toml index 7449db5..ee6fc5e 100644 --- a/impc_api_helper/pyproject.toml +++ b/impc_api_helper/pyproject.toml @@ -26,9 +26,9 @@ dev = [ "pytest>=8.2.2" ] +[tool.setuptools.packages.find] +include = ["impc_api_helper", "impc_api_helper.*"] [project.urls] "Homepage" = "https://github.com/mpi2/impc-data-api-workshop" -[tool.setuptools] -packages = ["impc_api_helper"] \ No newline at end of file diff --git a/impc_api_helper/setup.py b/impc_api_helper/setup.py index 452d1f1..3307bf7 100644 --- a/impc_api_helper/setup.py +++ b/impc_api_helper/setup.py @@ -1,12 +1,14 @@ from setuptools import setup, find_packages + setup( name='impc_api_helper', version='0.1.0', description='A package to facilitate making API request to the IMPC Solr API', author='MPI2, Marina Kan, Diego Pava', url='https://github.com/mpi2/impc-data-api-workshop', - packages=find_packages(), + packages=find_packages(include=["impc_api_helper", "impc_api_helper.*"]), + include_package_data=True, install_requires=[ 'pandas>=2.2.0', 'requests>=2.31.0', From 1ef3ca38e6e6b5e06137e018f6694bf6e49b27c3 Mon Sep 17 00:00:00 2001 From: dpavam Date: Tue, 8 Oct 2024 11:20:46 +0100 Subject: [PATCH 04/18] fix: adds utils to __init__.py for correct imports --- impc_api_helper/impc_api_helper/__init__.py | 1 + 1 file changed, 1 insertion(+) diff --git a/impc_api_helper/impc_api_helper/__init__.py b/impc_api_helper/impc_api_helper/__init__.py index 4531621..c819a69 100644 --- a/impc_api_helper/impc_api_helper/__init__.py +++ b/impc_api_helper/impc_api_helper/__init__.py @@ -1,5 +1,6 @@ from .solr_request import solr_request, batch_request from .iterator_solr_request import iterator_solr_request +from .utils import validators, warnings # Control what gets imported by client __all__ = ["solr_request", "batch_request", "iterator_solr_request"] From 90908c27211cb0c9ab88003df8e31c2a292192c0 Mon Sep 17 00:00:00 2001 From: dpavam Date: Tue, 8 Oct 2024 11:22:03 +0100 Subject: [PATCH 05/18] feat: changes utils to validators. Adds a json validation class --- .../impc_api_helper/utils/utils.py | 57 ------------- .../impc_api_helper/utils/validators.py | 79 +++++++++++++++++++ 2 files changed, 79 insertions(+), 57 deletions(-) delete mode 100644 impc_api_helper/impc_api_helper/utils/utils.py create mode 100644 impc_api_helper/impc_api_helper/utils/validators.py diff --git a/impc_api_helper/impc_api_helper/utils/utils.py b/impc_api_helper/impc_api_helper/utils/utils.py deleted file mode 100644 index 81f6598..0000000 --- a/impc_api_helper/impc_api_helper/utils/utils.py +++ /dev/null @@ -1,57 +0,0 @@ -from pydantic import BaseModel, model_validator -import json -from typing import List, Dict -from pathlib import Path -import warnings - -CORE_FILE = Path('impc_api_helper', 'impc_api_helper', 'utils', 'core_fields.json') - -# Define the dictionary with available options: core:fields -def load_core_fields(filename: Path): - with open(filename, "r") as f: - validation_dict = json.load(f) - return validation_dict - -# Define the validation dict -validation_json = load_core_fields(CORE_FILE) - -# Function to parse the fields (fl) params in params -def get_fields(fields: str) -> List[str]: - return fields.split(",") - - -class CoreParamsValidator(BaseModel): - core: str - params: Dict - - @model_validator(mode='before') - @classmethod - def validate_core_and_fields(cls, values): - core = values.get("core") - params = values.get("params") - - # Validate core - if core not in validation_json.keys(): - raise ValueError(f'Invalid core: "{core}", select from the available:\n{validation_json.keys()})') - - # Compare passed fl values vs the allowed fl values for a given core - fields: str = params.get("fl") - - # If no fields were specified, pass - if fields is None: - print("No fields passed, skipping field validation...") - return values - - # Get the fields passed to params and the expected fields for the core - field_list: List[str] = get_fields(fields) - accepted_core_fields: List[str] = validation_json.get(core, []) - - # Validate each field in params - for fl in field_list: - if fl not in accepted_core_fields: - warnings.warn(f'Invalid field: "{fl}". Check spelling of fields. To see available fields check the documentation at: https://www.ebi.ac.uk/mi/impc/solrdoc/', - category=UserWarning) - # Return validated values - return values - -# Check and raise error as needed. diff --git a/impc_api_helper/impc_api_helper/utils/validators.py b/impc_api_helper/impc_api_helper/utils/validators.py new file mode 100644 index 0000000..25627d5 --- /dev/null +++ b/impc_api_helper/impc_api_helper/utils/validators.py @@ -0,0 +1,79 @@ +from pydantic import BaseModel, model_validator +import json +from typing import List, Dict +from pathlib import Path +import warnings +from dataclasses import dataclass, field +from impc_api_helper.utils.warnings import warning_config, InvalidCoreWarning, InvalidFieldWarning + +# Initialise warning config +warning_config() + +# Dataclass for the json validator +@dataclass +class ValidationJson: + CORE_FILE: Path = Path('impc_api_helper', 'impc_api_helper', 'utils', 'core_fields.json') + _validation_json: Dict[str, List[str]] = field(default_factory=dict, init=False) + + # Eager initialisation + def __post_init__(self): + self._validation_json = self.load_core_fields(self.CORE_FILE) + + def load_core_fields(self, filename: Path) -> Dict[str, List[str]]: + with open(filename, "r") as f: + return json.load(f) + + def valid_cores(self): + return self._validation_json.keys() + + def valid_fields(self, core: str) -> List[str]: + return self._validation_json.get(core, []) + +# Function to parse the fields (fl) params in params +def get_fields(fields: str) -> List[str]: + return fields.split(",") + + +class CoreParamsValidator(BaseModel): + core: str + params: Dict + + @model_validator(mode='before') + @classmethod + def validate_core_and_fields(cls, values): + core = values.get("core") + params = values.get("params") + + # Call the Validator Object + jv = ValidationJson() + + # Validate core + if core not in jv.valid_cores(): + warnings.warn( + message=f'Invalid core: "{core}", select from the available cores:\n{jv.valid_cores()})\n', + category=InvalidCoreWarning) + + # Compare passed fl values vs the allowed fl values for a given core + fields: str = params.get("fl") + + # If no fields were specified, pass + if fields is None: + print("No fields passed, skipping field validation...") + return values + + # Get the fields passed to params and the expected fields for the core + field_list: List[str] = get_fields(fields) + + + # Validate each field in params + # TODO: perhaps pass al invalid fields as a list, instead of many warning messages + for fl in field_list: + if fl not in jv.valid_fields(core): + warnings.warn(message=f"""Unexpected field name: "{fl}". Check the spelling of fields.\nTo see expected fields check the documentation at: https://www.ebi.ac.uk/mi/impc/solrdoc/""", + category=InvalidFieldWarning) + # Return validated values + return values + + + + From 7fcf7bc9dae17a587705ee12aa15440d2b78c3e1 Mon Sep 17 00:00:00 2001 From: dpavam Date: Tue, 8 Oct 2024 11:22:18 +0100 Subject: [PATCH 06/18] feat: creates custom warnings to inform the user about validation errors --- .../impc_api_helper/utils/warnings.py | 24 +++++++++++++++++++ 1 file changed, 24 insertions(+) create mode 100644 impc_api_helper/impc_api_helper/utils/warnings.py diff --git a/impc_api_helper/impc_api_helper/utils/warnings.py b/impc_api_helper/impc_api_helper/utils/warnings.py new file mode 100644 index 0000000..d344092 --- /dev/null +++ b/impc_api_helper/impc_api_helper/utils/warnings.py @@ -0,0 +1,24 @@ +"""Module for warnings and excepton utils""" + +import warnings + + +# Custom warnings +class InvalidCoreWarning(Warning): + """Exception raised when the core is not in the expected core names""" + + +class InvalidFieldWarning(Warning): + """Exception raised when the field name is not in the expected fields""" + + +# Custom warning function +def warning_config(): + """Customises formatting and filters for warnings""" + + def custom_warning(message, category, filename, lineno, line=None): + return f'{category.__name__}: {message}\n' + + warnings.formatwarning = custom_warning + warnings.simplefilter("always", Warning) + From 04dcf73c2e3e20d23b8f6b7d4bb5c9dbfad31808 Mon Sep 17 00:00:00 2001 From: dpavam Date: Tue, 8 Oct 2024 11:22:50 +0100 Subject: [PATCH 07/18] feat: Adds an optional validation argument to check params --- .../impc_api_helper/solr_request.py | 22 +++++++------------ 1 file changed, 8 insertions(+), 14 deletions(-) diff --git a/impc_api_helper/impc_api_helper/solr_request.py b/impc_api_helper/impc_api_helper/solr_request.py index cf93135..cc4664a 100644 --- a/impc_api_helper/impc_api_helper/solr_request.py +++ b/impc_api_helper/impc_api_helper/solr_request.py @@ -1,11 +1,8 @@ from IPython.display import display -from pydantic import ValidationError from tqdm import tqdm - - import pandas as pd import requests -from .utils.utils import CoreParamsValidator +from impc_api_helper.utils.validators import CoreParamsValidator # Display the whole dataframe <15 pd.set_option("display.max_rows", 15) @@ -13,7 +10,7 @@ # Create helper function -def solr_request(core, params, silent=False): +def solr_request(core, params, silent=False, validate=False): """Performs a single Solr request to the IMPC Solr API. Args: @@ -21,7 +18,10 @@ def solr_request(core, params, silent=False): params (dict): dictionary containing the API call parameters. silent (bool, optional): default False If True, displays: URL of API call, the number of found docs - and a portion of the DataFrame. + and a portion of the DataFrame. + validate (bool, optional): default False + If True, validates the parameters against the core schema and raises warnings + if any parameter seems invalid. Returns: @@ -63,17 +63,11 @@ def solr_request(core, params, silent=False): ) """ - # Validate core and params - print("Validating core and params...") - try: + if validate: CoreParamsValidator( core=core, params=params ) - print("Validation passed ") - except ValidationError as e: - print(f"Validation failed: {e.errors()[0].get('msg')}") - base_url = "https://www.ebi.ac.uk/mi/impc/solr/" solr_url = base_url + core + "/select" @@ -111,7 +105,7 @@ def solr_request(core, params, silent=False): else: # print("Error:", response.status_code, response.text) print("Error:", response.status_code) - exit() + def _process_faceting(data, params): """Processes the faceting data from an API response. From 2eb40d64cc4a75b12e2ead303832cba5e988771c Mon Sep 17 00:00:00 2001 From: dpavam Date: Fri, 11 Oct 2024 16:10:08 +0100 Subject: [PATCH 08/18] fix: path to core_fields.json changed for relative imports and MANIFEST.ini to be included when building the package --- impc_api_helper/MANIFEST.in | 1 + impc_api_helper/impc_api_helper/utils/validators.py | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) create mode 100644 impc_api_helper/MANIFEST.in diff --git a/impc_api_helper/MANIFEST.in b/impc_api_helper/MANIFEST.in new file mode 100644 index 0000000..e833b2e --- /dev/null +++ b/impc_api_helper/MANIFEST.in @@ -0,0 +1 @@ +include impc_api_helper/utils/core_fields.json \ No newline at end of file diff --git a/impc_api_helper/impc_api_helper/utils/validators.py b/impc_api_helper/impc_api_helper/utils/validators.py index 25627d5..64a49cc 100644 --- a/impc_api_helper/impc_api_helper/utils/validators.py +++ b/impc_api_helper/impc_api_helper/utils/validators.py @@ -12,7 +12,7 @@ # Dataclass for the json validator @dataclass class ValidationJson: - CORE_FILE: Path = Path('impc_api_helper', 'impc_api_helper', 'utils', 'core_fields.json') + CORE_FILE: Path = Path(__file__).resolve().parent / 'core_fields.json' _validation_json: Dict[str, List[str]] = field(default_factory=dict, init=False) # Eager initialisation From 1d3e15d693bca4fe648335c178701886924b4db3 Mon Sep 17 00:00:00 2001 From: dpavam Date: Fri, 11 Oct 2024 16:10:26 +0100 Subject: [PATCH 09/18] fix: corrects typo in experiment core key --- impc_api_helper/impc_api_helper/utils/core_fields.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/impc_api_helper/impc_api_helper/utils/core_fields.json b/impc_api_helper/impc_api_helper/utils/core_fields.json index d3ce064..48454d9 100644 --- a/impc_api_helper/impc_api_helper/utils/core_fields.json +++ b/impc_api_helper/impc_api_helper/utils/core_fields.json @@ -1,5 +1,5 @@ { - "experimental": [ + "experiment": [ "id", "observation_id", "specimen_id", "phenotyping_center_id", "phenotyping_center", "production_center_id", "production_center", "specimen_project_id", "specimen_project_name", "gene_accession_id", "gene_symbol", "allele_accession_id", "allele_symbol", "zygosity", "sex", "biological_model_id", "biological_sample_id", "biological_sample_group", "strain_accession_id", "strain_name", "genetic_background", "allelic_composition", "colony_id", "litter_id", "date_of_birth", "external_sample_id", "life_stage_name", "life_stage_acc", "datasource_id", "datasource_name", "project_id", "project_name", "pipeline_id", "pipeline_name", "pipeline_stable_id", "procedure_id", "procedure_name", "procedure_stable_id", "procedure_group", "parameter_id", "parameter_name", "parameter_stable_id", "procedure_sequence_id", "experiment_id", "observation_type", "data_type", "experiment_source_id", "date_of_experiment", "weight_parameter_stable_id", "weight_date", "weight_days_old", "weight", "data_point", "order_index", "dimension", "time_point", "discrete_point", "category", "raw_category", "metadata", "metadata_group", "anatomy_id", "anatomy_term", "anatomy_id_term", "anatomy_term_synonym", "top_level_anatomy_id", "top_level_anatomy_term", "top_level_anatomy_term_synonym", "selected_top_level_anatomy_id", "selected_top_level_anatomy_term", "selected_top_level_anatomy_term_synonym", "intermediate_anatomy_id", "intermediate_anatomy_term", "intermediate_anatomy_term_synonym", "parent_anatomy_id", "parent_anatomy_term", "parent_anatomy_term_synonym", "child_anatomy_id", "child_anatomy_term", "child_anatomy_term_synonym", "download_file_path", "image_link", "file_type", "increment_value", "parameter_association_stable_id", "parameter_association_sequence_id", "parameter_association_dim_id", "parameter_association_name", "parameter_association_value", "developmental_stage_acc", "developmental_stage_name", "text_value", "sub_term_id", "sub_term_name", "sub_term_description", "age_in_days", "age_in_weeks" ], "genotype-phenotype": [ From da18efb36eeb3952da53b277df5515a5e1609d1c Mon Sep 17 00:00:00 2001 From: dpavam Date: Fri, 11 Oct 2024 16:11:00 +0100 Subject: [PATCH 10/18] feat: adds a boolean value to avoid raising a field warning if the core is invalid --- impc_api_helper/impc_api_helper/utils/validators.py | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/impc_api_helper/impc_api_helper/utils/validators.py b/impc_api_helper/impc_api_helper/utils/validators.py index 64a49cc..ef2d751 100644 --- a/impc_api_helper/impc_api_helper/utils/validators.py +++ b/impc_api_helper/impc_api_helper/utils/validators.py @@ -41,6 +41,7 @@ class CoreParamsValidator(BaseModel): @model_validator(mode='before') @classmethod def validate_core_and_fields(cls, values): + invalid_core: bool = False core = values.get("core") params = values.get("params") @@ -49,6 +50,7 @@ def validate_core_and_fields(cls, values): # Validate core if core not in jv.valid_cores(): + invalid_core = True warnings.warn( message=f'Invalid core: "{core}", select from the available cores:\n{jv.valid_cores()})\n', category=InvalidCoreWarning) @@ -67,10 +69,11 @@ def validate_core_and_fields(cls, values): # Validate each field in params # TODO: perhaps pass al invalid fields as a list, instead of many warning messages - for fl in field_list: - if fl not in jv.valid_fields(core): - warnings.warn(message=f"""Unexpected field name: "{fl}". Check the spelling of fields.\nTo see expected fields check the documentation at: https://www.ebi.ac.uk/mi/impc/solrdoc/""", - category=InvalidFieldWarning) + if invalid_core is not True: + for fl in field_list: + if fl not in jv.valid_fields(core): + warnings.warn(message=f"""Unexpected field name: "{fl}". Check the spelling of fields.\nTo see expected fields check the documentation at: https://www.ebi.ac.uk/mi/impc/solrdoc/""", + category=InvalidFieldWarning) # Return validated values return values From 5b188251d9f5ab3688591bf28a2cf2e14765bb0b Mon Sep 17 00:00:00 2001 From: dpavam Date: Fri, 11 Oct 2024 16:12:04 +0100 Subject: [PATCH 11/18] tests: adds basic tests for core and params["fl"] validation --- impc_api_helper/tests/test_solr_request.py | 47 +++++++++++++++++++++- 1 file changed, 46 insertions(+), 1 deletion(-) diff --git a/impc_api_helper/tests/test_solr_request.py b/impc_api_helper/tests/test_solr_request.py index a0a33e0..68f5ed4 100644 --- a/impc_api_helper/tests/test_solr_request.py +++ b/impc_api_helper/tests/test_solr_request.py @@ -2,7 +2,7 @@ from unittest.mock import patch from solr_request import solr_request, _process_faceting from .test_helpers import check_url_status_code_and_params - +from impc_api_helper.utils.warnings import InvalidCoreWarning, InvalidFieldWarning class TestSolrRequest: """Test class for the Solr Request function @@ -286,3 +286,48 @@ def test_process_faceting(self, params, data): assert df.iloc[1, 1] == 9 assert df.iloc[2, 0] == "banana" assert df.iloc[2, 1] == 24 + + + @pytest.mark.parametrize( + "mock_response", + [ + { + "status_code": 200, + "json": { + "response": { + "numFound": 101, + "docs": [ + ], + } + }, + }, + ], + indirect=['mock_response'] + ) + def test_solr_request_core_validation(self, common_params, mock_response): + with pytest.warns(InvalidCoreWarning): + _ = solr_request(core='invalid_core', params=common_params, validate=True) + + @pytest.mark.parametrize( + "mock_response", + [ + { + "status_code": 200, + "json": { + "response": { + "numFound": 101, + "docs": [ + ], + } + }, + }, + ], + indirect=['mock_response'] + ) + def test_solr_request_fields_validation(self, mock_response): + with pytest.warns(InvalidFieldWarning): + _ = solr_request(core="experiment", + params={'q':'*:*', + 'fl': 'invalid_field,another_invalid_field'}, + validate=True) + \ No newline at end of file From 716b00180c6e3fa3a6dc82fc8a7e3a97e7f6eebc Mon Sep 17 00:00:00 2001 From: dpavam Date: Fri, 11 Oct 2024 16:26:54 +0100 Subject: [PATCH 12/18] refactor: consice params for the test_solr_validation tests --- impc_api_helper/tests/test_solr_request.py | 34 +++++----------------- 1 file changed, 7 insertions(+), 27 deletions(-) diff --git a/impc_api_helper/tests/test_solr_request.py b/impc_api_helper/tests/test_solr_request.py index 68f5ed4..92d2024 100644 --- a/impc_api_helper/tests/test_solr_request.py +++ b/impc_api_helper/tests/test_solr_request.py @@ -287,43 +287,23 @@ def test_process_faceting(self, params, data): assert df.iloc[2, 0] == "banana" assert df.iloc[2, 1] == 24 - - @pytest.mark.parametrize( - "mock_response", - [ - { + # Validation tests + def _validation_response(): + return { "status_code": 200, "json": { "response": { "numFound": 101, - "docs": [ - ], + "docs": [], } }, - }, - ], - indirect=['mock_response'] - ) + } + @pytest.mark.parametrize("mock_response",[_validation_response()], indirect=['mock_response']) def test_solr_request_core_validation(self, common_params, mock_response): with pytest.warns(InvalidCoreWarning): _ = solr_request(core='invalid_core', params=common_params, validate=True) - @pytest.mark.parametrize( - "mock_response", - [ - { - "status_code": 200, - "json": { - "response": { - "numFound": 101, - "docs": [ - ], - } - }, - }, - ], - indirect=['mock_response'] - ) + @pytest.mark.parametrize("mock_response",[_validation_response()], indirect=['mock_response']) def test_solr_request_fields_validation(self, mock_response): with pytest.warns(InvalidFieldWarning): _ = solr_request(core="experiment", From d2cd9c1241c8df8862e812c3d25d41006d1dc9d8 Mon Sep 17 00:00:00 2001 From: dpavam Date: Fri, 11 Oct 2024 16:27:44 +0100 Subject: [PATCH 13/18] style: formatted test_solr_request module --- impc_api_helper/tests/test_solr_request.py | 36 +++++++++++++--------- 1 file changed, 21 insertions(+), 15 deletions(-) diff --git a/impc_api_helper/tests/test_solr_request.py b/impc_api_helper/tests/test_solr_request.py index 92d2024..b0df4ca 100644 --- a/impc_api_helper/tests/test_solr_request.py +++ b/impc_api_helper/tests/test_solr_request.py @@ -4,6 +4,7 @@ from .test_helpers import check_url_status_code_and_params from impc_api_helper.utils.warnings import InvalidCoreWarning, InvalidFieldWarning + class TestSolrRequest: """Test class for the Solr Request function @@ -290,24 +291,29 @@ def test_process_faceting(self, params, data): # Validation tests def _validation_response(): return { - "status_code": 200, - "json": { - "response": { - "numFound": 101, - "docs": [], - } - }, + "status_code": 200, + "json": { + "response": { + "numFound": 101, + "docs": [], } - @pytest.mark.parametrize("mock_response",[_validation_response()], indirect=['mock_response']) + }, + } + + @pytest.mark.parametrize( + "mock_response", [_validation_response()], indirect=["mock_response"] + ) def test_solr_request_core_validation(self, common_params, mock_response): with pytest.warns(InvalidCoreWarning): - _ = solr_request(core='invalid_core', params=common_params, validate=True) + _ = solr_request(core="invalid_core", params=common_params, validate=True) - @pytest.mark.parametrize("mock_response",[_validation_response()], indirect=['mock_response']) + @pytest.mark.parametrize( + "mock_response", [_validation_response()], indirect=["mock_response"] + ) def test_solr_request_fields_validation(self, mock_response): with pytest.warns(InvalidFieldWarning): - _ = solr_request(core="experiment", - params={'q':'*:*', - 'fl': 'invalid_field,another_invalid_field'}, - validate=True) - \ No newline at end of file + _ = solr_request( + core="experiment", + params={"q": "*:*", "fl": "invalid_field,another_invalid_field"}, + validate=True, + ) From 065614053ccaaa32dfc6445a7c970d900a51cd12 Mon Sep 17 00:00:00 2001 From: dpavam Date: Fri, 11 Oct 2024 17:01:14 +0100 Subject: [PATCH 14/18] chore: update README.md with the validators use cases --- impc_api_helper/README.md | 29 +++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) diff --git a/impc_api_helper/README.md b/impc_api_helper/README.md index aa8635b..022e82b 100644 --- a/impc_api_helper/README.md +++ b/impc_api_helper/README.md @@ -25,6 +25,35 @@ num_found, df = solr_request( core='genotype-phenotype', params={ ) ``` +#### Solr request validation +A common pitfall when writing a query is the misspelling of `core` and `fields` arguments. For this, we have included an `validate` argument that raises a warning when these values are not as expected. Note this does not prevent you from executing a query; it just alerts you to a potential issue. + +##### Core validation +``` +num_found, df = solr_request( core='invalid_core', params={ + 'q': '*:*', + 'rows': 10 + }, + validate=True +) + +> InvalidCoreWarning: Invalid core: "genotype-phenotyp", select from the available cores: +> dict_keys(['experiment', 'genotype-phenotype', 'impc_images', 'phenodigm', 'statistical-result'])) +``` + +##### Field list validation +``` +num_found, df = solr_request( core='genotype-phenotype', params={ + 'q': '*:*', + 'rows': 10, + 'fl': 'invalid_field,marker_symbol,allele_symbol' + }, + validate=True +) +> InvalidFieldWarning: Unexpected field name: "invalid_field". Check the spelling of fields. +> To see expected fields check the documentation at: https://www.ebi.ac.uk/mi/impc/solrdoc/ +``` + ### Batch request For larger requests, use the batch request function to query the API responsibly. ``` From d5a309f512a80f6d421b56c60f7a7ca5ca3d9dda Mon Sep 17 00:00:00 2001 From: dpavam Date: Mon, 14 Oct 2024 15:18:42 +0100 Subject: [PATCH 15/18] feat: Reverts to showing full error message to users --- impc_api_helper/impc_api_helper/solr_request.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/impc_api_helper/impc_api_helper/solr_request.py b/impc_api_helper/impc_api_helper/solr_request.py index cc4664a..888bf14 100644 --- a/impc_api_helper/impc_api_helper/solr_request.py +++ b/impc_api_helper/impc_api_helper/solr_request.py @@ -103,8 +103,7 @@ def solr_request(core, params, silent=False, validate=False): return num_found, df else: - # print("Error:", response.status_code, response.text) - print("Error:", response.status_code) + print("Error:", response.status_code, response.text) def _process_faceting(data, params): From 6495db35789621bc351fd0d7390ba29b5e1e0bb3 Mon Sep 17 00:00:00 2001 From: dpavam Date: Mon, 14 Oct 2024 15:25:47 +0100 Subject: [PATCH 16/18] chore: update gitignore --- .gitignore | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/.gitignore b/.gitignore index c47ace4..835c29e 100644 --- a/.gitignore +++ b/.gitignore @@ -12,3 +12,7 @@ dist/ *.pytest* *.pytest_cache __pycache__ + + +# Local notes +notes.md \ No newline at end of file From f9fcfc50cd9b07206411b36ad6f059399980cd2b Mon Sep 17 00:00:00 2001 From: dpavam <93325413+dpavam@users.noreply.github.com> Date: Mon, 14 Oct 2024 15:27:31 +0100 Subject: [PATCH 17/18] Update impc_api_helper/impc_api_helper/utils/warnings.py Co-authored-by: Marina Kan <113850522+marinak-ebi@users.noreply.github.com> --- impc_api_helper/impc_api_helper/utils/warnings.py | 1 - 1 file changed, 1 deletion(-) diff --git a/impc_api_helper/impc_api_helper/utils/warnings.py b/impc_api_helper/impc_api_helper/utils/warnings.py index d344092..6f154f9 100644 --- a/impc_api_helper/impc_api_helper/utils/warnings.py +++ b/impc_api_helper/impc_api_helper/utils/warnings.py @@ -21,4 +21,3 @@ def custom_warning(message, category, filename, lineno, line=None): warnings.formatwarning = custom_warning warnings.simplefilter("always", Warning) - From 66d26eeb84c48d2894af79f33345901b7cfeaa80 Mon Sep 17 00:00:00 2001 From: dpavam <93325413+dpavam@users.noreply.github.com> Date: Mon, 14 Oct 2024 15:27:39 +0100 Subject: [PATCH 18/18] Update impc_api_helper/impc_api_helper/utils/validators.py Co-authored-by: Marina Kan <113850522+marinak-ebi@users.noreply.github.com> --- impc_api_helper/impc_api_helper/utils/validators.py | 4 ---- 1 file changed, 4 deletions(-) diff --git a/impc_api_helper/impc_api_helper/utils/validators.py b/impc_api_helper/impc_api_helper/utils/validators.py index ef2d751..1bc959a 100644 --- a/impc_api_helper/impc_api_helper/utils/validators.py +++ b/impc_api_helper/impc_api_helper/utils/validators.py @@ -76,7 +76,3 @@ def validate_core_and_fields(cls, values): category=InvalidFieldWarning) # Return validated values return values - - - -