-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #14 from mpi2/12-solr_request-should-handle-errors…
…-more-gracefully [Solr_request] - 12 solr request should handle errors more gracefully
- Loading branch information
Showing
13 changed files
with
203 additions
and
8 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -12,3 +12,7 @@ dist/ | |
*.pytest* | ||
*.pytest_cache | ||
__pycache__ | ||
|
||
|
||
# Local notes | ||
notes.md |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
include impc_api_helper/utils/core_fields.json |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,5 +1,6 @@ | ||
from .solr_request import solr_request, batch_request | ||
from .iterator_solr_request import iterator_solr_request | ||
from .utils import validators, warnings | ||
|
||
# Control what gets imported by client | ||
__all__ = ["solr_request", "batch_request", "iterator_solr_request"] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Empty file.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,15 @@ | ||
{ | ||
"experiment": [ | ||
"id", "observation_id", "specimen_id", "phenotyping_center_id", "phenotyping_center", "production_center_id", "production_center", "specimen_project_id", "specimen_project_name", "gene_accession_id", "gene_symbol", "allele_accession_id", "allele_symbol", "zygosity", "sex", "biological_model_id", "biological_sample_id", "biological_sample_group", "strain_accession_id", "strain_name", "genetic_background", "allelic_composition", "colony_id", "litter_id", "date_of_birth", "external_sample_id", "life_stage_name", "life_stage_acc", "datasource_id", "datasource_name", "project_id", "project_name", "pipeline_id", "pipeline_name", "pipeline_stable_id", "procedure_id", "procedure_name", "procedure_stable_id", "procedure_group", "parameter_id", "parameter_name", "parameter_stable_id", "procedure_sequence_id", "experiment_id", "observation_type", "data_type", "experiment_source_id", "date_of_experiment", "weight_parameter_stable_id", "weight_date", "weight_days_old", "weight", "data_point", "order_index", "dimension", "time_point", "discrete_point", "category", "raw_category", "metadata", "metadata_group", "anatomy_id", "anatomy_term", "anatomy_id_term", "anatomy_term_synonym", "top_level_anatomy_id", "top_level_anatomy_term", "top_level_anatomy_term_synonym", "selected_top_level_anatomy_id", "selected_top_level_anatomy_term", "selected_top_level_anatomy_term_synonym", "intermediate_anatomy_id", "intermediate_anatomy_term", "intermediate_anatomy_term_synonym", "parent_anatomy_id", "parent_anatomy_term", "parent_anatomy_term_synonym", "child_anatomy_id", "child_anatomy_term", "child_anatomy_term_synonym", "download_file_path", "image_link", "file_type", "increment_value", "parameter_association_stable_id", "parameter_association_sequence_id", "parameter_association_dim_id", "parameter_association_name", "parameter_association_value", "developmental_stage_acc", "developmental_stage_name", "text_value", "sub_term_id", "sub_term_name", "sub_term_description", "age_in_days", "age_in_weeks" | ||
], | ||
"genotype-phenotype": [ | ||
"doc_id", "ontology_db_id", "assertion_type", "assertion_type_id", "mpath_term_id", "mpath_term_name", "anatomy_term_id", "anatomy_term_name", "intermediate_anatomy_term_id", "intermediate_anatomy_term_name", "top_level_anatomy_term_id", "top_level_anatomy_term_name", "mp_term_id", "mp_term_name", "alt_mp_term_id", "top_level_mp_term_id", "top_level_mp_term_name", "intermediate_mp_term_id", "intermediate_mp_term_name", "marker_symbol", "marker_accession_id", "colony_id", "allele_name", "allele_symbol", "allele_accession_id", "strain_name", "strain_accession_id", "phenotyping_center", "project_external_id", "project_name", "project_fullname", "resource_name", "resource_fullname", "sex", "zygosity", "pipeline_name", "pipeline_stable_id", "pipeline_stable_key", "procedure_name", "procedure_stable_id", "procedure_stable_key", "parameter_name", "parameter_stable_id", "parameter_stable_key", "statistical_method", "percentage_change", "p_value", "effect_size", "external_id", "life_stage_acc", "life_stage_name" | ||
], | ||
"impc_images": [ | ||
"id", "observation_id", "specimen_id", "phenotyping_center_id", "phenotyping_center", "production_center_id", "production_center", "specimen_project_id", "specimen_project_name", "gene_accession_id", "gene_symbol", "allele_accession_id", "allele_symbol", "zygosity", "sex", "biological_model_id", "biological_sample_id", "biological_sample_group", "strain_accession_id", "strain_name", "genetic_background", "allelic_composition", "colony_id", "litter_id", "date_of_birth", "external_sample_id", "life_stage_name", "life_stage_acc", "datasource_id", "datasource_name", "project_id", "project_name", "pipeline_id", "pipeline_name", "pipeline_stable_id", "procedure_id", "procedure_name", "procedure_stable_id", "procedure_group", "parameter_id", "parameter_name", "parameter_stable_id", "procedure_sequence_id", "experiment_id", "observation_type", "data_type", "experiment_source_id", "date_of_experiment", "weight_parameter_stable_id", "weight_date", "weight_days_old", "weight", "data_point", "order_index", "dimension", "time_point", "discrete_point", "category", "raw_category", "metadata", "metadata_group", "mp_id", "mp_term", "top_level_mp_id", "top_level_mp_term", "intermediate_mp_id", "intermediate_mp_term", "anatomy_id", "anatomy_term", "anatomy_id_term", "anatomy_term_synonym", "top_level_anatomy_id", "top_level_anatomy_term", "top_level_anatomy_term_synonym", "selected_top_level_anatomy_id", "selected_top_level_anatomy_term", "selected_top_level_anatomy_term_synonym", "intermediate_anatomy_id", "intermediate_anatomy_term", "intermediate_anatomy_term_synonym", "parent_anatomy_id", "parent_anatomy_term", "parent_anatomy_term_synonym", "child_anatomy_id", "child_anatomy_term", "child_anatomy_term_synonym", "download_file_path", "image_link", "file_type", "parameter_association_stable_id", "parameter_association_sequence_id", "parameter_association_dim_id", "parameter_association_name", "parameter_association_value", "developmental_stage_acc", "developmental_stage_name", "text_value", "sub_term_id", "sub_term_name", "sub_term_description", "sequence_id", "age_in_days", "age_in_weeks", "download_url", "jpeg_url", "thumbnail_url", "omero_id" | ||
], | ||
"phenodigm": [ | ||
"type", "disease_id", "disease_source", "disease_term", "disease_alts", "disease_locus", "disease_classes", "disease_phenotypes", "gene_id", "gene_symbol", "gene_symbols_withdrawn", "gene_locus", "hgnc_gene_id", "hgnc_gene_symbol", "hgnc_gene_symbols_withdrawn", "hgnc_gene_locus", "mouse_model", "impc_model", "model_id", "model_source", "model_description", "model_genetic_background", "marker_id", "marker_symbol", "marker_locus", "marker_num_models", "model_phenotypes", "ontology", "phenotype_id", "phenotype_term", "phenotype_synonym", "hp_id", "hp_term", "mp_id", "mp_term", "association_curated", "association_ortholog", "marker_symbols_withdrawn", "disease_matched_phenotypes", "model_matched_phenotypes", "disease_model_avg_raw", "disease_model_avg_norm", "disease_model_max_raw", "disease_model_max_norm", "search_qf", "human_curated_gene", "impc_model_with_curated_gene", "mgi_model_with_curated_gene", "impc_model_with_computed_association", "mgi_model_with_computed_association" | ||
], | ||
"statistical-result": ["doc_id", "db_id", "data_type", "anatomy_term_id", "anatomy_term_name", "intermediate_anatomy_term_id", "intermediate_anatomy_term_name", "top_level_anatomy_term_id", "top_level_anatomy_term_name", "mp_term_id_options", "mp_term_name_options", "mp_term_id", "mp_term_name", "top_level_mp_term_id", "top_level_mp_term_name", "intermediate_mp_term_id", "intermediate_mp_term_name", "male_mp_term_id", "male_mp_term_name", "male_top_level_mp_term_id", "male_top_level_mp_term_name", "male_intermediate_mp_term_id", "male_intermediate_mp_term_name", "female_mp_term_id", "female_mp_term_name", "female_top_level_mp_term_id", "female_top_level_mp_term_name", "female_intermediate_mp_term_id", "female_intermediate_mp_term_name", "resource_name", "resource_fullname", "resource_id", "project_name", "phenotyping_center", "pipeline_stable_id", "pipeline_stable_key", "pipeline_name", "pipeline_id", "procedure_stable_id", "procedure_stable_key", "procedure_name", "procedure_id", "parameter_stable_id", "parameter_stable_key", "parameter_name", "parameter_id", "colony_id", "marker_symbol", "marker_accession_id", "allele_symbol", "allele_name", "allele_accession_id", "strain_name", "strain_accession_id", "sex", "zygosity", "control_selection_method", "dependent_variable", "metadata_group", "data_frame", "genetic_background", "production_center", "external_db_id", "id", "organisation_id", "phenotyping_center_id", "project_id", "male_control_mean", "male_mutant_mean", "female_control_mean", "female_mutant_mean", "genotype_p_value_low_vs_normal_high", "genotype_p_value_low_normal_vs_high", "genotype_effect_size_low_vs_normal_high", "genotype_effect_size_low_normal_vs_high", "female_p_value_low_vs_normal_high", "female_p_value_low_normal_vs_high", "female_effect_size_low_vs_normal_high", "female_effect_size_low_normal_vs_high", "male_p_value_low_vs_normal_high", "male_p_value_low_normal_vs_high", "male_effect_size_low_vs_normal_high", "male_effect_size_low_normal_vs_high", "categories", "categorical_p_value", "categorical_effect_size", "batch_significant", "variance_significant", "null_test_p_value", "genotype_effect_p_value", "genotype_effect_stderr_estimate", "genotype_effect_parameter_estimate", "male_percentage_change", "female_percentage_change", "sex_effect_p_value", "sex_effect_stderr_estimate", "sex_effect_parameter_estimate", "weight_effect_p_value", "weight_effect_stderr_estimate", "weight_effect_parameter_estimate", "group1_genotype", "group1_residuals_normality_test", "group2_genotype", "group2_residuals_normality_test", "blups_test", "rotated_residuals_test", "intercept_estimate", "intercept_estimate_stderr_estimate", "interaction_significant", "interaction_effect_p_value", "female_ko_effect_p_value", "female_ko_effect_stderr_estimate", "female_ko_parameter_estimate", "female_effect_size", "male_ko_effect_p_value", "male_ko_effect_stderr_estimate", "male_ko_parameter_estimate", "male_effect_size", "classification_tag", "phenotype_sex", "life_stage_acc", "life_stage_name", "significant", "soft_windowing_bandwidth", "soft_windowing_shape", "soft_windowing_peaks", "soft_windowing_min_obs_required", "soft_windowing_total_obs_or_weight", "soft_windowing_threshold", "soft_windowing_number_of_doe", "soft_windowing_doe_note", "metadata"] | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,78 @@ | ||
from pydantic import BaseModel, model_validator | ||
import json | ||
from typing import List, Dict | ||
from pathlib import Path | ||
import warnings | ||
from dataclasses import dataclass, field | ||
from impc_api_helper.utils.warnings import warning_config, InvalidCoreWarning, InvalidFieldWarning | ||
|
||
# Initialise warning config | ||
warning_config() | ||
|
||
# Dataclass for the json validator | ||
@dataclass | ||
class ValidationJson: | ||
CORE_FILE: Path = Path(__file__).resolve().parent / 'core_fields.json' | ||
_validation_json: Dict[str, List[str]] = field(default_factory=dict, init=False) | ||
|
||
# Eager initialisation | ||
def __post_init__(self): | ||
self._validation_json = self.load_core_fields(self.CORE_FILE) | ||
|
||
def load_core_fields(self, filename: Path) -> Dict[str, List[str]]: | ||
with open(filename, "r") as f: | ||
return json.load(f) | ||
|
||
def valid_cores(self): | ||
return self._validation_json.keys() | ||
|
||
def valid_fields(self, core: str) -> List[str]: | ||
return self._validation_json.get(core, []) | ||
|
||
# Function to parse the fields (fl) params in params | ||
def get_fields(fields: str) -> List[str]: | ||
return fields.split(",") | ||
|
||
|
||
class CoreParamsValidator(BaseModel): | ||
core: str | ||
params: Dict | ||
|
||
@model_validator(mode='before') | ||
@classmethod | ||
def validate_core_and_fields(cls, values): | ||
invalid_core: bool = False | ||
core = values.get("core") | ||
params = values.get("params") | ||
|
||
# Call the Validator Object | ||
jv = ValidationJson() | ||
|
||
# Validate core | ||
if core not in jv.valid_cores(): | ||
invalid_core = True | ||
warnings.warn( | ||
message=f'Invalid core: "{core}", select from the available cores:\n{jv.valid_cores()})\n', | ||
category=InvalidCoreWarning) | ||
|
||
# Compare passed fl values vs the allowed fl values for a given core | ||
fields: str = params.get("fl") | ||
|
||
# If no fields were specified, pass | ||
if fields is None: | ||
print("No fields passed, skipping field validation...") | ||
return values | ||
|
||
# Get the fields passed to params and the expected fields for the core | ||
field_list: List[str] = get_fields(fields) | ||
|
||
|
||
# Validate each field in params | ||
# TODO: perhaps pass al invalid fields as a list, instead of many warning messages | ||
if invalid_core is not True: | ||
for fl in field_list: | ||
if fl not in jv.valid_fields(core): | ||
warnings.warn(message=f"""Unexpected field name: "{fl}". Check the spelling of fields.\nTo see expected fields check the documentation at: https://www.ebi.ac.uk/mi/impc/solrdoc/""", | ||
category=InvalidFieldWarning) | ||
# Return validated values | ||
return values |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,23 @@ | ||
"""Module for warnings and excepton utils""" | ||
|
||
import warnings | ||
|
||
|
||
# Custom warnings | ||
class InvalidCoreWarning(Warning): | ||
"""Exception raised when the core is not in the expected core names""" | ||
|
||
|
||
class InvalidFieldWarning(Warning): | ||
"""Exception raised when the field name is not in the expected fields""" | ||
|
||
|
||
# Custom warning function | ||
def warning_config(): | ||
"""Customises formatting and filters for warnings""" | ||
|
||
def custom_warning(message, category, filename, lineno, line=None): | ||
return f'{category.__name__}: {message}\n' | ||
|
||
warnings.formatwarning = custom_warning | ||
warnings.simplefilter("always", Warning) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,3 +1,4 @@ | ||
pandas>=2.2.0 | ||
requests>=2.31.0 | ||
tqdm>=4.66.4 | ||
pydantic>=2.9 |