diff --git a/impc_api_helper/MANIFEST.in b/impc_api_helper/MANIFEST.in deleted file mode 100644 index e833b2e..0000000 --- a/impc_api_helper/MANIFEST.in +++ /dev/null @@ -1 +0,0 @@ -include impc_api_helper/utils/core_fields.json \ No newline at end of file diff --git a/impc_api_helper/impc_api_helper/temp.py b/impc_api_helper/impc_api_helper/temp.py deleted file mode 100644 index 1a7fe66..0000000 --- a/impc_api_helper/impc_api_helper/temp.py +++ /dev/null @@ -1,19 +0,0 @@ -from iterator_solr_request_2 import batch_solr_request -import pandas as pd - -markers = ['"Cthrc1"', '*11'] -df = batch_solr_request( - core="genotype-phenotype", - params={ - "q": "*:*", - "fl": "marker_symbol,mp_term_name,p_value", - 'field_list': markers, - 'field_type': 'marker_symbol' - }, - download=True, -) - -df = pd.read_json('genotype-phenotype.json', nrows=80000, lines=True) -# df = pd.read_csv('genotype-phenotype.csv', nrows=80000) -# df = pd.read_xml('genotype-phenotype.xml', parser='etree') -print(df.shape) diff --git a/impc_api_helper/pytest.ini b/impc_api_helper/pytest.ini deleted file mode 100644 index 9c2fb12..0000000 --- a/impc_api_helper/pytest.ini +++ /dev/null @@ -1,2 +0,0 @@ -[pytest] -pythonpath = impc_api_helper \ No newline at end of file diff --git a/impc_api_helper/CONTRIBUTING.md b/impc_module/CONTRIBUTING.md similarity index 94% rename from impc_api_helper/CONTRIBUTING.md rename to impc_module/CONTRIBUTING.md index 7e87df1..ddf847e 100644 --- a/impc_api_helper/CONTRIBUTING.md +++ b/impc_module/CONTRIBUTING.md @@ -12,7 +12,7 @@ The package is build using [setuptools](https://setuptools.pypa.io/en/latest/use ``` -from impc_api_helper import solr_request +from impc_api import solr_request num_found, df = solr_request( core='genotype-phenotype', params={ 'q': '*:*' 'rows': 10 @@ -23,7 +23,7 @@ num_found, df = solr_request( core='genotype-phenotype', params={ ## Installing the package for development We use [pytest](https://docs.pytest.org/en/stable/) for testing. To install in dev mode follow [stepts 1 and 2](#installing-the-package-for-use) above and then: -3. Install the package running `pip install '.[dev]'` +3. Install the package running `pip install -e .` This should install `pytest` and enable you to run tests: ``` diff --git a/impc_api_helper/LICENSE b/impc_module/LICENSE similarity index 100% rename from impc_api_helper/LICENSE rename to impc_module/LICENSE diff --git a/impc_module/MANIFEST.in b/impc_module/MANIFEST.in new file mode 100644 index 0000000..b2e6650 --- /dev/null +++ b/impc_module/MANIFEST.in @@ -0,0 +1 @@ +include impc_api/utils/core_fields.json \ No newline at end of file diff --git a/impc_api_helper/README.md b/impc_module/README.md similarity index 83% rename from impc_api_helper/README.md rename to impc_module/README.md index 197b76a..0cdc541 100644 --- a/impc_api_helper/README.md +++ b/impc_module/README.md @@ -1,8 +1,14 @@ -# IMPC_API_HELPER -`impc_api_helper` is a python package. +# IMPC_API +`impc_api` is a python package. The functions in this package are intended for use on a Jupyter Notebook. +1. **Create a virtual environment (optional but recommended)**: +On Mac: + `python3 -m venv .venv` + `source .venv/bin/activate` +3. **Install the package running**: `pip install impc_api` +4. **Try it out**: Create a [Jupyter Notebook](https://jupyter.org/install#jupyter-notebook) and try some of the examples below: ## Installing the package for the first time 1. Clone the repository and navigate into it. Navigate into the package name until you can see `setup.py` and `pyproject.toml` 2. Run `python3 -m build`, this builds the package, a couple of new files/folders will appear. @@ -13,13 +19,15 @@ The functions in this package are intended for use on a Jupyter Notebook. The available functions can be imported as: ``` -from impc_api_helper import solr_request, batch_solr_request +from impc_api import solr_request, batch_solr_request ``` ## 1. Solr request The most basic request to the IMPC solr API ``` -num_found, df = solr_request( core='genotype-phenotype', params={ +num_found, df = solr_request( + core='genotype-phenotype', + params={ 'q': '*:*', 'rows': 10, 'fl': 'marker_symbol,allele_symbol,parameter_stable_id' @@ -32,25 +40,26 @@ num_found, df = solr_request( core='genotype-phenotype', params={ ``` num_found, df = solr_request( - core="genotype-phenotype", - params={ + core="genotype-phenotype", + params={ "q": "*:*", "rows": 0, "facet": "on", "facet.field": "zygosity", "facet.limit": 15, "facet.mincount": 1, - }, - ) + } +) ``` ### b. Solr request validation A common pitfall when writing a query is the misspelling of `core` and `fields` arguments. For this, we have included a `validate` argument that raises a warning when these values are not as expected. Note this does not prevent you from executing a query; it just alerts you to a potential issue. - #### Core validation ``` -num_found, df = solr_request( core='invalid_core', params={ +num_found, df = solr_request( + core='invalid_core', + params={ 'q': '*:*', 'rows': 10 }, @@ -58,12 +67,14 @@ num_found, df = solr_request( core='invalid_core', params={ ) > InvalidCoreWarning: Invalid core: "genotype-phenotyp", select from the available cores: -> dict_keys(['experiment', 'genotype-phenotype', 'impc_images', 'phenodigm', 'statistical-result'])) +> dict_keys(['experiment', 'genotype-phenotype', 'impc_images', 'phenodigm', 'statistical-result']) ``` #### Field list validation ``` -num_found, df = solr_request( core='genotype-phenotype', params={ +num_found, df = solr_request( + core='genotype-phenotype', + params={ 'q': '*:*', 'rows': 10, 'fl': 'invalid_field,marker_symbol,allele_symbol' @@ -126,7 +137,7 @@ Pass the list to the `field_list` param and specify the type of `fl` in `field_t ``` # List of gene symbols -genes = ["Zfp580","Firrm","Gpld1","Mbip"] +genes = ["Zfp580", "Firrm", "Gpld1", "Mbip"] df = batch_solr_request( core='genotype-phenotype', @@ -140,11 +151,11 @@ df = batch_solr_request( ) print(df.head()) ``` -This too can be downloaded +This can be downloaded too: ``` # List of gene symbols -genes = ["Zfp580","Firrm","Gpld1","Mbip"] +genes = ["Zfp580", "Firrm", "Gpld1", "Mbip"] df = batch_solr_request( core='genotype-phenotype', @@ -159,6 +170,3 @@ df = batch_solr_request( ) print(df.head()) ``` - - - diff --git a/impc_api_helper/impc_api_helper/__init__.py b/impc_module/impc_api/__init__.py similarity index 100% rename from impc_api_helper/impc_api_helper/__init__.py rename to impc_module/impc_api/__init__.py diff --git a/impc_api_helper/impc_api_helper/batch_solr_request.py b/impc_module/impc_api/batch_solr_request.py similarity index 98% rename from impc_api_helper/impc_api_helper/batch_solr_request.py rename to impc_module/impc_api/batch_solr_request.py index bdc4bec..c025847 100644 --- a/impc_api_helper/impc_api_helper/batch_solr_request.py +++ b/impc_module/impc_api/batch_solr_request.py @@ -1,23 +1,23 @@ -from IPython.display import display import json +import warnings +from pathlib import Path + import pandas as pd import requests from tqdm import tqdm -from .solr_request import solr_request -from pathlib import Path -import warnings -from impc_api_helper.utils.warnings import ( +from IPython.display import display + +from impc_api.utils.validators import DownloadFormatValidator +from impc_api.utils.warnings import ( warning_config, RowsParamIgnored, UnsupportedDownloadFormatError, ) -from impc_api_helper.utils.validators import DownloadFormatValidator - +from .solr_request import solr_request # Initialise warning config warning_config() - def batch_solr_request( core, params, download=False, batch_size=5000, filename="batch_request" ): diff --git a/impc_api_helper/impc_api_helper/solr_request.py b/impc_module/impc_api/solr_request.py similarity index 99% rename from impc_api_helper/impc_api_helper/solr_request.py rename to impc_module/impc_api/solr_request.py index 01b7816..296214b 100644 --- a/impc_api_helper/impc_api_helper/solr_request.py +++ b/impc_module/impc_api/solr_request.py @@ -1,14 +1,14 @@ -from IPython.display import display -from tqdm import tqdm import pandas as pd import requests -from impc_api_helper.utils.validators import CoreParamsValidator +from IPython.display import display +from tqdm import tqdm + +from impc_api.utils.validators import CoreParamsValidator # Display the whole dataframe <15 pd.set_option("display.max_rows", 15) pd.set_option("display.max_columns", None) - # Create helper function def solr_request(core, params, silent=False, validate=False): """Performs a single Solr request to the IMPC Solr API. diff --git a/impc_api_helper/impc_api_helper/utils/__init__.py b/impc_module/impc_api/utils/__init__.py similarity index 100% rename from impc_api_helper/impc_api_helper/utils/__init__.py rename to impc_module/impc_api/utils/__init__.py diff --git a/impc_api_helper/impc_api_helper/utils/core_fields.json b/impc_module/impc_api/utils/core_fields.json similarity index 100% rename from impc_api_helper/impc_api_helper/utils/core_fields.json rename to impc_module/impc_api/utils/core_fields.json diff --git a/impc_api_helper/impc_api_helper/utils/validators.py b/impc_module/impc_api/utils/validators.py similarity index 80% rename from impc_api_helper/impc_api_helper/utils/validators.py rename to impc_module/impc_api/utils/validators.py index 665fbf2..2be08b0 100644 --- a/impc_api_helper/impc_api_helper/utils/validators.py +++ b/impc_module/impc_api/utils/validators.py @@ -1,10 +1,27 @@ -from pydantic import BaseModel, model_validator, field_validator +""" +This module provides validation for core fields and download formats using Pydantic models. + +Classes: + - ValidationJson: Loads and validates core fields from a JSON configuration file. + - CoreParamsValidator: Validates core names and associated fields (fl), issuing warnings for invalid inputs. + - DownloadFormatValidator: Validates the download format (wt) to ensure it is supported (json or csv). + +Functions: + - get_fields(fields: str) -> List[str]: Parses a comma-separated string of field names into a list. + +Custom Exceptions: + - InvalidCoreWarning: Raised for invalid core names. + - InvalidFieldWarning: Raised for unexpected field names. + - UnsupportedDownloadFormatError: Raised for unsupported download formats. +""" + import json -from typing import List, Dict -from pathlib import Path import warnings from dataclasses import dataclass, field -from impc_api_helper.utils.warnings import ( +from pathlib import Path +from typing import List, Dict +from pydantic import BaseModel, model_validator, field_validator +from impc_api.utils.warnings import ( warning_config, InvalidCoreWarning, InvalidFieldWarning, @@ -14,7 +31,6 @@ # Initialise warning config warning_config() - # Dataclass for the json validator @dataclass class ValidationJson: diff --git a/impc_api_helper/impc_api_helper/utils/warnings.py b/impc_module/impc_api/utils/warnings.py similarity index 100% rename from impc_api_helper/impc_api_helper/utils/warnings.py rename to impc_module/impc_api/utils/warnings.py diff --git a/impc_api_helper/pyproject.toml b/impc_module/pyproject.toml similarity index 88% rename from impc_api_helper/pyproject.toml rename to impc_module/pyproject.toml index ee6fc5e..b4e6990 100644 --- a/impc_api_helper/pyproject.toml +++ b/impc_module/pyproject.toml @@ -3,7 +3,7 @@ requires = ["setuptools"] build-backend = "setuptools.build_meta" [project] -name = "impc_api_helper" +name = "impc_api" version = "0.1.0" description = "A package to facilitate making API requests to the IMPC Solr API" authors = [ @@ -27,7 +27,7 @@ dev = [ ] [tool.setuptools.packages.find] -include = ["impc_api_helper", "impc_api_helper.*"] +include = ["impc_api", "impc_api.*"] [project.urls] "Homepage" = "https://github.com/mpi2/impc-data-api-workshop" diff --git a/impc_module/pytest.ini b/impc_module/pytest.ini new file mode 100644 index 0000000..5383661 --- /dev/null +++ b/impc_module/pytest.ini @@ -0,0 +1,2 @@ +[pytest] +pythonpath = impc_api \ No newline at end of file diff --git a/impc_api_helper/setup.py b/impc_module/setup.py similarity index 88% rename from impc_api_helper/setup.py rename to impc_module/setup.py index 3307bf7..d684217 100644 --- a/impc_api_helper/setup.py +++ b/impc_module/setup.py @@ -2,12 +2,12 @@ setup( - name='impc_api_helper', + name='impc_api', version='0.1.0', description='A package to facilitate making API request to the IMPC Solr API', author='MPI2, Marina Kan, Diego Pava', url='https://github.com/mpi2/impc-data-api-workshop', - packages=find_packages(include=["impc_api_helper", "impc_api_helper.*"]), + packages=find_packages(include=["impc_api", "impc_api.*"]), include_package_data=True, install_requires=[ 'pandas>=2.2.0', diff --git a/impc_api_helper/tests/__init__.py b/impc_module/tests/__init__.py similarity index 100% rename from impc_api_helper/tests/__init__.py rename to impc_module/tests/__init__.py diff --git a/impc_api_helper/tests/test_batch_solr_request.py b/impc_module/tests/test_batch_solr_request.py similarity index 97% rename from impc_api_helper/tests/test_batch_solr_request.py rename to impc_module/tests/test_batch_solr_request.py index a595efe..f70b70f 100644 --- a/impc_api_helper/tests/test_batch_solr_request.py +++ b/impc_module/tests/test_batch_solr_request.py @@ -1,7 +1,12 @@ -import pytest +import json from pathlib import Path from unittest.mock import patch, call, Mock -from impc_api_helper.batch_solr_request import ( + +import pandas as pd +import pytest +from pandas.testing import assert_frame_equal + +from impc_api.batch_solr_request import ( batch_solr_request, _batch_solr_generator, solr_request, @@ -9,19 +14,15 @@ _solr_downloader, _read_downloaded_file, ) -from impc_api_helper.utils.warnings import ( +from impc_api.utils.warnings import ( RowsParamIgnored, UnsupportedDownloadFormatError, ) -import json -import pandas as pd -from pandas.testing import assert_frame_equal - # When rows is passed to batch solr request, a warning is raised. # Let's ignore this warning in all tests except the one that asserts the warning pytestmark = pytest.mark.filterwarnings( - "ignore::impc_api_helper.utils.warnings.RowsParamIgnored" + "ignore::impc_api.utils.warnings.RowsParamIgnored" ) @@ -51,7 +52,7 @@ def common_params(self): # solr_request will be mocked with different values for numFound, therefore it is passed as param @pytest.fixture def mock_solr_request(self, request): - with patch("impc_api_helper.batch_solr_request.solr_request") as mock: + with patch("impc_api.batch_solr_request.solr_request") as mock: # Mock expected return content of the solr_request (numFound and df) mock.return_value = (request.param, pd.DataFrame()) yield mock @@ -59,7 +60,7 @@ def mock_solr_request(self, request): # Fixture mocking _batch_to_df @pytest.fixture def mock_batch_to_df(self): - with patch("impc_api_helper.batch_solr_request._batch_to_df") as mock: + with patch("impc_api.batch_solr_request._batch_to_df") as mock: # Mock expected return content of the _batch_to_df (pd.DataFrame) mock.return_value = pd.DataFrame() yield mock @@ -157,13 +158,13 @@ def test_batch_solr_request_download_false_large_request( # Fixture mocking _batch_solr_generator @pytest.fixture def mock_batch_solr_generator(self): - with patch("impc_api_helper.batch_solr_request._batch_solr_generator") as mock: + with patch("impc_api.batch_solr_request._batch_solr_generator") as mock: yield mock # Fixture mocking _solr_downloader. Yields a tmp_path to write a file for the duration of the test. @pytest.fixture def mock_solr_downloader(self, tmp_path): - with patch("impc_api_helper.batch_solr_request._solr_downloader") as mock: + with patch("impc_api.batch_solr_request._solr_downloader") as mock: temp_dir = Path(tmp_path) / "temp_dir" temp_dir.mkdir() yield mock @@ -377,7 +378,7 @@ def test_batch_solr_request_multiple_fields( # Test the warning when params["rows"] is passed @pytest.mark.filterwarnings( - "default::impc_api_helper.utils.warnings.RowsParamIgnored" + "default::impc_api.utils.warnings.RowsParamIgnored" ) @pytest.mark.parametrize("mock_solr_request", [10000], indirect=True) def test_param_rows_warning(core, common_params, mock_solr_request): @@ -408,7 +409,7 @@ def mock_solr_request_generator(self, request): """Patches solr_request for _batch_to_df _batch_solr_generator producing a df dynamically. Creates a df in chunks (row by row) mocking incoming batches of responses. """ - with patch("impc_api_helper.batch_solr_request.solr_request") as mock: + with patch("impc_api.batch_solr_request.solr_request") as mock: # Call the generator data_generator = self.data_generator() @@ -473,7 +474,7 @@ def test_batch_to_df( # Fixture to mock the requests module @pytest.fixture def mock_requests_get(self, request): - with patch("impc_api_helper.batch_solr_request.requests.get") as mock_get: + with patch("impc_api.batch_solr_request.requests.get") as mock_get: # Capture the format of the response wt = request.param["wt"] mock_get.return_value.format = wt @@ -579,7 +580,7 @@ def test_batch_solr_generator( # Fixture to mock requests.get returning a status code. @pytest.fixture def mock_requests_get_error(self, request): - with patch("impc_api_helper.batch_solr_request.requests.get") as mock_get: + with patch("impc_api.batch_solr_request.requests.get") as mock_get: mock_get.return_value.status_code = request.param yield mock_get diff --git a/impc_api_helper/tests/test_helpers.py b/impc_module/tests/test_helpers.py similarity index 100% rename from impc_api_helper/tests/test_helpers.py rename to impc_module/tests/test_helpers.py diff --git a/impc_api_helper/tests/test_solr_request.py b/impc_module/tests/test_solr_request.py similarity index 99% rename from impc_api_helper/tests/test_solr_request.py rename to impc_module/tests/test_solr_request.py index b0df4ca..c9a44ee 100644 --- a/impc_api_helper/tests/test_solr_request.py +++ b/impc_module/tests/test_solr_request.py @@ -1,9 +1,8 @@ -import pytest from unittest.mock import patch +import pytest +from impc_api.utils.warnings import InvalidCoreWarning, InvalidFieldWarning from solr_request import solr_request, _process_faceting from .test_helpers import check_url_status_code_and_params -from impc_api_helper.utils.warnings import InvalidCoreWarning, InvalidFieldWarning - class TestSolrRequest: """Test class for the Solr Request function